LLVM 23.0.0git
InstCombineCalls.cpp
Go to the documentation of this file.
1//===- InstCombineCalls.cpp -----------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the visitCall, visitInvoke, and visitCallBr functions.
10//
11//===----------------------------------------------------------------------===//
12
13#include "InstCombineInternal.h"
14#include "llvm/ADT/APFloat.h"
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/APSInt.h"
17#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/Loads.h"
32#include "llvm/IR/Attributes.h"
33#include "llvm/IR/BasicBlock.h"
34#include "llvm/IR/Constant.h"
35#include "llvm/IR/Constants.h"
36#include "llvm/IR/DataLayout.h"
37#include "llvm/IR/DebugInfo.h"
39#include "llvm/IR/Function.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/InstrTypes.h"
43#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Intrinsics.h"
47#include "llvm/IR/IntrinsicsAArch64.h"
48#include "llvm/IR/IntrinsicsAMDGPU.h"
49#include "llvm/IR/IntrinsicsARM.h"
50#include "llvm/IR/IntrinsicsHexagon.h"
51#include "llvm/IR/LLVMContext.h"
52#include "llvm/IR/Metadata.h"
55#include "llvm/IR/Statepoint.h"
56#include "llvm/IR/Type.h"
57#include "llvm/IR/User.h"
58#include "llvm/IR/Value.h"
59#include "llvm/IR/ValueHandle.h"
64#include "llvm/Support/Debug.h"
75#include <algorithm>
76#include <cassert>
77#include <cstdint>
78#include <optional>
79#include <utility>
80#include <vector>
81
82#define DEBUG_TYPE "instcombine"
84
85using namespace llvm;
86using namespace PatternMatch;
87
88STATISTIC(NumSimplified, "Number of library calls simplified");
89
91 "instcombine-guard-widening-window",
92 cl::init(3),
93 cl::desc("How wide an instruction window to bypass looking for "
94 "another guard"));
95
96/// Return the specified type promoted as it would be to pass though a va_arg
97/// area.
99 if (IntegerType* ITy = dyn_cast<IntegerType>(Ty)) {
100 if (ITy->getBitWidth() < 32)
101 return Type::getInt32Ty(Ty->getContext());
102 }
103 return Ty;
104}
105
106/// Recognize a memcpy/memmove from a trivially otherwise unused alloca.
107/// TODO: This should probably be integrated with visitAllocSites, but that
108/// requires a deeper change to allow either unread or unwritten objects.
110 auto *Src = MI->getRawSource();
111 while (isa<GetElementPtrInst>(Src)) {
112 if (!Src->hasOneUse())
113 return false;
114 Src = cast<Instruction>(Src)->getOperand(0);
115 }
116 return isa<AllocaInst>(Src) && Src->hasOneUse();
117}
118
120 Align DstAlign = getKnownAlignment(MI->getRawDest(), DL, MI, &AC, &DT);
121 MaybeAlign CopyDstAlign = MI->getDestAlign();
122 if (!CopyDstAlign || *CopyDstAlign < DstAlign) {
123 MI->setDestAlignment(DstAlign);
124 return MI;
125 }
126
127 Align SrcAlign = getKnownAlignment(MI->getRawSource(), DL, MI, &AC, &DT);
128 MaybeAlign CopySrcAlign = MI->getSourceAlign();
129 if (!CopySrcAlign || *CopySrcAlign < SrcAlign) {
130 MI->setSourceAlignment(SrcAlign);
131 return MI;
132 }
133
134 // If we have a store to a location which is known constant, we can conclude
135 // that the store must be storing the constant value (else the memory
136 // wouldn't be constant), and this must be a noop.
137 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
138 // Set the size of the copy to 0, it will be deleted on the next iteration.
139 MI->setLength((uint64_t)0);
140 return MI;
141 }
142
143 // If the source is provably undef, the memcpy/memmove doesn't do anything
144 // (unless the transfer is volatile).
145 if (hasUndefSource(MI) && !MI->isVolatile()) {
146 // Set the size of the copy to 0, it will be deleted on the next iteration.
147 MI->setLength((uint64_t)0);
148 return MI;
149 }
150
151 // If MemCpyInst length is 1/2/4/8 bytes then replace memcpy with
152 // load/store.
153 ConstantInt *MemOpLength = dyn_cast<ConstantInt>(MI->getLength());
154 if (!MemOpLength) return nullptr;
155
156 // Source and destination pointer types are always "i8*" for intrinsic. See
157 // if the size is something we can handle with a single primitive load/store.
158 // A single load+store correctly handles overlapping memory in the memmove
159 // case.
160 uint64_t Size = MemOpLength->getLimitedValue();
161 assert(Size && "0-sized memory transferring should be removed already.");
162
163 if (Size > 8 || (Size&(Size-1)))
164 return nullptr; // If not 1/2/4/8 bytes, exit.
165
166 // If it is an atomic and alignment is less than the size then we will
167 // introduce the unaligned memory access which will be later transformed
168 // into libcall in CodeGen. This is not evident performance gain so disable
169 // it now.
170 if (MI->isAtomic())
171 if (*CopyDstAlign < Size || *CopySrcAlign < Size)
172 return nullptr;
173
174 // Use an integer load+store unless we can find something better.
175 IntegerType* IntType = IntegerType::get(MI->getContext(), Size<<3);
176
177 // If the memcpy has metadata describing the members, see if we can get the
178 // TBAA, scope and noalias tags describing our copy.
179 AAMDNodes AACopyMD = MI->getAAMetadata().adjustForAccess(Size);
180
181 Value *Src = MI->getArgOperand(1);
182 Value *Dest = MI->getArgOperand(0);
183 LoadInst *L = Builder.CreateLoad(IntType, Src);
184 // Alignment from the mem intrinsic will be better, so use it.
185 L->setAlignment(*CopySrcAlign);
186 L->setAAMetadata(AACopyMD);
187 MDNode *LoopMemParallelMD =
188 MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
189 if (LoopMemParallelMD)
190 L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
191 MDNode *AccessGroupMD = MI->getMetadata(LLVMContext::MD_access_group);
192 if (AccessGroupMD)
193 L->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
194
195 StoreInst *S = Builder.CreateStore(L, Dest);
196 // Alignment from the mem intrinsic will be better, so use it.
197 S->setAlignment(*CopyDstAlign);
198 S->setAAMetadata(AACopyMD);
199 if (LoopMemParallelMD)
200 S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
201 if (AccessGroupMD)
202 S->setMetadata(LLVMContext::MD_access_group, AccessGroupMD);
203 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
204
205 if (auto *MT = dyn_cast<MemTransferInst>(MI)) {
206 // non-atomics can be volatile
207 L->setVolatile(MT->isVolatile());
208 S->setVolatile(MT->isVolatile());
209 }
210 if (MI->isAtomic()) {
211 // atomics have to be unordered
212 L->setOrdering(AtomicOrdering::Unordered);
214 }
215
216 // Set the size of the copy to 0, it will be deleted on the next iteration.
217 MI->setLength((uint64_t)0);
218 return MI;
219}
220
222 const Align KnownAlignment =
223 getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
224 MaybeAlign MemSetAlign = MI->getDestAlign();
225 if (!MemSetAlign || *MemSetAlign < KnownAlignment) {
226 MI->setDestAlignment(KnownAlignment);
227 return MI;
228 }
229
230 // If we have a store to a location which is known constant, we can conclude
231 // that the store must be storing the constant value (else the memory
232 // wouldn't be constant), and this must be a noop.
233 if (!isModSet(AA->getModRefInfoMask(MI->getDest()))) {
234 // Set the size of the copy to 0, it will be deleted on the next iteration.
235 MI->setLength((uint64_t)0);
236 return MI;
237 }
238
239 // Remove memset with an undef value.
240 // FIXME: This is technically incorrect because it might overwrite a poison
241 // value. Change to PoisonValue once #52930 is resolved.
242 if (isa<UndefValue>(MI->getValue())) {
243 // Set the size of the copy to 0, it will be deleted on the next iteration.
244 MI->setLength((uint64_t)0);
245 return MI;
246 }
247
248 // Extract the length and alignment and fill if they are constant.
249 ConstantInt *LenC = dyn_cast<ConstantInt>(MI->getLength());
250 ConstantInt *FillC = dyn_cast<ConstantInt>(MI->getValue());
251 if (!LenC || !FillC || !FillC->getType()->isIntegerTy(8))
252 return nullptr;
253 const uint64_t Len = LenC->getLimitedValue();
254 assert(Len && "0-sized memory setting should be removed already.");
255 const Align Alignment = MI->getDestAlign().valueOrOne();
256
257 // If it is an atomic and alignment is less than the size then we will
258 // introduce the unaligned memory access which will be later transformed
259 // into libcall in CodeGen. This is not evident performance gain so disable
260 // it now.
261 if (MI->isAtomic() && Alignment < Len)
262 return nullptr;
263
264 // memset(s,c,n) -> store s, c (for n=1,2,4,8)
265 if (Len <= 8 && isPowerOf2_32((uint32_t)Len)) {
266 Value *Dest = MI->getDest();
267
268 // Extract the fill value and store.
269 Constant *FillVal = ConstantInt::get(
270 MI->getContext(), APInt::getSplat(Len * 8, FillC->getValue()));
271 StoreInst *S = Builder.CreateStore(FillVal, Dest, MI->isVolatile());
272 S->copyMetadata(*MI, LLVMContext::MD_DIAssignID);
273 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(S)) {
274 if (llvm::is_contained(DbgAssign->location_ops(), FillC))
275 DbgAssign->replaceVariableLocationOp(FillC, FillVal);
276 }
277
278 S->setAlignment(Alignment);
279 if (MI->isAtomic())
281
282 // Set the size of the copy to 0, it will be deleted on the next iteration.
283 MI->setLength((uint64_t)0);
284 return MI;
285 }
286
287 return nullptr;
288}
289
290// TODO, Obvious Missing Transforms:
291// * Narrow width by halfs excluding zero/undef lanes
292Value *InstCombinerImpl::simplifyMaskedLoad(IntrinsicInst &II) {
293 Value *LoadPtr = II.getArgOperand(0);
294 const Align Alignment = II.getParamAlign(0).valueOrOne();
295
296 // If the mask is all ones or undefs, this is a plain vector load of the 1st
297 // argument.
298 if (maskIsAllOneOrUndef(II.getArgOperand(1))) {
299 LoadInst *L = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
300 "unmaskedload");
301 L->copyMetadata(II);
302 return L;
303 }
304
305 // If we can unconditionally load from this address, replace with a
306 // load/select idiom. TODO: use DT for context sensitive query
307 if (isDereferenceablePointer(LoadPtr, II.getType(),
308 II.getDataLayout(), &II, &AC)) {
309 LoadInst *LI = Builder.CreateAlignedLoad(II.getType(), LoadPtr, Alignment,
310 "unmaskedload");
311 LI->copyMetadata(II);
312 return Builder.CreateSelect(II.getArgOperand(1), LI, II.getArgOperand(2));
313 }
314
315 return nullptr;
316}
317
318// TODO, Obvious Missing Transforms:
319// * Single constant active lane -> store
320// * Narrow width by halfs excluding zero/undef lanes
321Instruction *InstCombinerImpl::simplifyMaskedStore(IntrinsicInst &II) {
322 Value *StorePtr = II.getArgOperand(1);
323 Align Alignment = II.getParamAlign(1).valueOrOne();
324 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
325 if (!ConstMask)
326 return nullptr;
327
328 // If the mask is all zeros, this instruction does nothing.
329 if (maskIsAllZeroOrUndef(ConstMask))
331
332 // If the mask is all ones, this is a plain vector store of the 1st argument.
333 if (maskIsAllOneOrUndef(ConstMask)) {
334 StoreInst *S =
335 new StoreInst(II.getArgOperand(0), StorePtr, false, Alignment);
336 S->copyMetadata(II);
337 return S;
338 }
339
340 if (isa<ScalableVectorType>(ConstMask->getType()))
341 return nullptr;
342
343 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
344 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
345 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
346 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
347 PoisonElts))
348 return replaceOperand(II, 0, V);
349
350 return nullptr;
351}
352
353// TODO, Obvious Missing Transforms:
354// * Single constant active lane load -> load
355// * Dereferenceable address & few lanes -> scalarize speculative load/selects
356// * Adjacent vector addresses -> masked.load
357// * Narrow width by halfs excluding zero/undef lanes
358// * Vector incrementing address -> vector masked load
359Instruction *InstCombinerImpl::simplifyMaskedGather(IntrinsicInst &II) {
360 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(1));
361 if (!ConstMask)
362 return nullptr;
363
364 // Vector splat address w/known mask -> scalar load
365 // Fold the gather to load the source vector first lane
366 // because it is reloading the same value each time
367 if (ConstMask->isAllOnesValue())
368 if (auto *SplatPtr = getSplatValue(II.getArgOperand(0))) {
369 auto *VecTy = cast<VectorType>(II.getType());
370 const Align Alignment = II.getParamAlign(0).valueOrOne();
371 LoadInst *L = Builder.CreateAlignedLoad(VecTy->getElementType(), SplatPtr,
372 Alignment, "load.scalar");
373 Value *Shuf =
374 Builder.CreateVectorSplat(VecTy->getElementCount(), L, "broadcast");
376 }
377
378 return nullptr;
379}
380
381// TODO, Obvious Missing Transforms:
382// * Single constant active lane -> store
383// * Adjacent vector addresses -> masked.store
384// * Narrow store width by halfs excluding zero/undef lanes
385// * Vector incrementing address -> vector masked store
386Instruction *InstCombinerImpl::simplifyMaskedScatter(IntrinsicInst &II) {
387 auto *ConstMask = dyn_cast<Constant>(II.getArgOperand(2));
388 if (!ConstMask)
389 return nullptr;
390
391 // If the mask is all zeros, a scatter does nothing.
392 if (maskIsAllZeroOrUndef(ConstMask))
394
395 // Vector splat address -> scalar store
396 if (auto *SplatPtr = getSplatValue(II.getArgOperand(1))) {
397 // scatter(splat(value), splat(ptr), non-zero-mask) -> store value, ptr
398 if (auto *SplatValue = getSplatValue(II.getArgOperand(0))) {
399 if (maskContainsAllOneOrUndef(ConstMask)) {
400 Align Alignment = II.getParamAlign(1).valueOrOne();
401 StoreInst *S = new StoreInst(SplatValue, SplatPtr, /*IsVolatile=*/false,
402 Alignment);
403 S->copyMetadata(II);
404 return S;
405 }
406 }
407 // scatter(vector, splat(ptr), splat(true)) -> store extract(vector,
408 // lastlane), ptr
409 if (ConstMask->isAllOnesValue()) {
410 Align Alignment = II.getParamAlign(1).valueOrOne();
411 VectorType *WideLoadTy = cast<VectorType>(II.getArgOperand(1)->getType());
412 ElementCount VF = WideLoadTy->getElementCount();
413 Value *RunTimeVF = Builder.CreateElementCount(Builder.getInt32Ty(), VF);
414 Value *LastLane = Builder.CreateSub(RunTimeVF, Builder.getInt32(1));
415 Value *Extract =
416 Builder.CreateExtractElement(II.getArgOperand(0), LastLane);
417 StoreInst *S =
418 new StoreInst(Extract, SplatPtr, /*IsVolatile=*/false, Alignment);
419 S->copyMetadata(II);
420 return S;
421 }
422 }
423 if (isa<ScalableVectorType>(ConstMask->getType()))
424 return nullptr;
425
426 // Use masked off lanes to simplify operands via SimplifyDemandedVectorElts
427 APInt DemandedElts = possiblyDemandedEltsInMask(ConstMask);
428 APInt PoisonElts(DemandedElts.getBitWidth(), 0);
429 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(0), DemandedElts,
430 PoisonElts))
431 return replaceOperand(II, 0, V);
432 if (Value *V = SimplifyDemandedVectorElts(II.getOperand(1), DemandedElts,
433 PoisonElts))
434 return replaceOperand(II, 1, V);
435
436 return nullptr;
437}
438
439/// This function transforms launder.invariant.group and strip.invariant.group
440/// like:
441/// launder(launder(%x)) -> launder(%x) (the result is not the argument)
442/// launder(strip(%x)) -> launder(%x)
443/// strip(strip(%x)) -> strip(%x) (the result is not the argument)
444/// strip(launder(%x)) -> strip(%x)
445/// This is legal because it preserves the most recent information about
446/// the presence or absence of invariant.group.
448 InstCombinerImpl &IC) {
449 auto *Arg = II.getArgOperand(0);
450 auto *StrippedArg = Arg->stripPointerCasts();
451 auto *StrippedInvariantGroupsArg = StrippedArg;
452 while (auto *Intr = dyn_cast<IntrinsicInst>(StrippedInvariantGroupsArg)) {
453 if (Intr->getIntrinsicID() != Intrinsic::launder_invariant_group &&
454 Intr->getIntrinsicID() != Intrinsic::strip_invariant_group)
455 break;
456 StrippedInvariantGroupsArg = Intr->getArgOperand(0)->stripPointerCasts();
457 }
458 if (StrippedArg == StrippedInvariantGroupsArg)
459 return nullptr; // No launders/strips to remove.
460
461 Value *Result = nullptr;
462
463 if (II.getIntrinsicID() == Intrinsic::launder_invariant_group)
464 Result = IC.Builder.CreateLaunderInvariantGroup(StrippedInvariantGroupsArg);
465 else if (II.getIntrinsicID() == Intrinsic::strip_invariant_group)
466 Result = IC.Builder.CreateStripInvariantGroup(StrippedInvariantGroupsArg);
467 else
469 "simplifyInvariantGroupIntrinsic only handles launder and strip");
470 if (Result->getType()->getPointerAddressSpace() !=
471 II.getType()->getPointerAddressSpace())
472 Result = IC.Builder.CreateAddrSpaceCast(Result, II.getType());
473
474 return cast<Instruction>(Result);
475}
476
478 assert((II.getIntrinsicID() == Intrinsic::cttz ||
479 II.getIntrinsicID() == Intrinsic::ctlz) &&
480 "Expected cttz or ctlz intrinsic");
481 bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
482 Value *Op0 = II.getArgOperand(0);
483 Value *Op1 = II.getArgOperand(1);
484 Value *X;
485 // ctlz(bitreverse(x)) -> cttz(x)
486 // cttz(bitreverse(x)) -> ctlz(x)
487 if (match(Op0, m_BitReverse(m_Value(X)))) {
488 Intrinsic::ID ID = IsTZ ? Intrinsic::ctlz : Intrinsic::cttz;
489 Function *F =
490 Intrinsic::getOrInsertDeclaration(II.getModule(), ID, II.getType());
491 return CallInst::Create(F, {X, II.getArgOperand(1)});
492 }
493
494 if (II.getType()->isIntOrIntVectorTy(1)) {
495 // ctlz/cttz i1 Op0 --> not Op0
496 if (match(Op1, m_Zero()))
497 return BinaryOperator::CreateNot(Op0);
498 // If zero is poison, then the input can be assumed to be "true", so the
499 // instruction simplifies to "false".
500 assert(match(Op1, m_One()) && "Expected ctlz/cttz operand to be 0 or 1");
501 return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(II.getType()));
502 }
503
504 // If ctlz/cttz is only used as a shift amount, set is_zero_poison to true.
505 if (II.hasOneUse() && match(Op1, m_Zero()) &&
506 match(II.user_back(), m_Shift(m_Value(), m_Specific(&II)))) {
507 II.dropUBImplyingAttrsAndMetadata();
508 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
509 }
510
511 Constant *C;
512
513 if (IsTZ) {
514 // cttz(-x) -> cttz(x)
515 if (match(Op0, m_Neg(m_Value(X))))
516 return IC.replaceOperand(II, 0, X);
517
518 // cttz(-x & x) -> cttz(x)
519 if (match(Op0, m_c_And(m_Neg(m_Value(X)), m_Deferred(X))))
520 return IC.replaceOperand(II, 0, X);
521
522 // cttz(sext(x)) -> cttz(zext(x))
523 if (match(Op0, m_OneUse(m_SExt(m_Value(X))))) {
524 auto *Zext = IC.Builder.CreateZExt(X, II.getType());
525 auto *CttzZext =
526 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, Zext, Op1);
527 return IC.replaceInstUsesWith(II, CttzZext);
528 }
529
530 // Zext doesn't change the number of trailing zeros, so narrow:
531 // cttz(zext(x)) -> zext(cttz(x)) if the 'ZeroIsPoison' parameter is 'true'.
532 if (match(Op0, m_OneUse(m_ZExt(m_Value(X)))) && match(Op1, m_One())) {
533 auto *Cttz = IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, X,
534 IC.Builder.getTrue());
535 auto *ZextCttz = IC.Builder.CreateZExt(Cttz, II.getType());
536 return IC.replaceInstUsesWith(II, ZextCttz);
537 }
538
539 // cttz(abs(x)) -> cttz(x)
540 // cttz(nabs(x)) -> cttz(x)
541 Value *Y;
543 if (SPF == SPF_ABS || SPF == SPF_NABS)
544 return IC.replaceOperand(II, 0, X);
545
547 return IC.replaceOperand(II, 0, X);
548
549 // cttz(shl(%const, %val), 1) --> add(cttz(%const, 1), %val)
550 if (match(Op0, m_Shl(m_ImmConstant(C), m_Value(X))) &&
551 match(Op1, m_One())) {
552 Value *ConstCttz =
553 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
554 return BinaryOperator::CreateAdd(ConstCttz, X);
555 }
556
557 // cttz(lshr exact (%const, %val), 1) --> sub(cttz(%const, 1), %val)
558 if (match(Op0, m_Exact(m_LShr(m_ImmConstant(C), m_Value(X)))) &&
559 match(Op1, m_One())) {
560 Value *ConstCttz =
561 IC.Builder.CreateBinaryIntrinsic(Intrinsic::cttz, C, Op1);
562 return BinaryOperator::CreateSub(ConstCttz, X);
563 }
564
565 // cttz(add(lshr(UINT_MAX, %val), 1)) --> sub(width, %val)
566 if (match(Op0, m_Add(m_LShr(m_AllOnes(), m_Value(X)), m_One()))) {
567 Value *Width =
568 ConstantInt::get(II.getType(), II.getType()->getScalarSizeInBits());
569 return BinaryOperator::CreateSub(Width, X);
570 }
571 } else {
572 // ctlz(lshr(%const, %val), 1) --> add(ctlz(%const, 1), %val)
573 if (match(Op0, m_LShr(m_ImmConstant(C), m_Value(X))) &&
574 match(Op1, m_One())) {
575 Value *ConstCtlz =
576 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
577 return BinaryOperator::CreateAdd(ConstCtlz, X);
578 }
579
580 // ctlz(shl nuw (%const, %val), 1) --> sub(ctlz(%const, 1), %val)
581 if (match(Op0, m_NUWShl(m_ImmConstant(C), m_Value(X))) &&
582 match(Op1, m_One())) {
583 Value *ConstCtlz =
584 IC.Builder.CreateBinaryIntrinsic(Intrinsic::ctlz, C, Op1);
585 return BinaryOperator::CreateSub(ConstCtlz, X);
586 }
587
588 // ctlz(~x & (x - 1)) -> bitwidth - cttz(x, false)
589 if (Op0->hasOneUse() &&
590 match(Op0,
592 Type *Ty = II.getType();
593 unsigned BitWidth = Ty->getScalarSizeInBits();
594 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
595 {X, IC.Builder.getFalse()});
596 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
597 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
598 }
599 }
600
601 // cttz(Pow2) -> Log2(Pow2)
602 // ctlz(Pow2) -> BitWidth - 1 - Log2(Pow2)
603 if (auto *R = IC.tryGetLog2(Op0, match(Op1, m_One()))) {
604 if (IsTZ)
605 return IC.replaceInstUsesWith(II, R);
606 BinaryOperator *BO = BinaryOperator::CreateSub(
607 ConstantInt::get(R->getType(), R->getType()->getScalarSizeInBits() - 1),
608 R);
609 BO->setHasNoSignedWrap();
611 return BO;
612 }
613
614 KnownBits Known = IC.computeKnownBits(Op0, &II);
615
616 // Create a mask for bits above (ctlz) or below (cttz) the first known one.
617 unsigned PossibleZeros = IsTZ ? Known.countMaxTrailingZeros()
618 : Known.countMaxLeadingZeros();
619 unsigned DefiniteZeros = IsTZ ? Known.countMinTrailingZeros()
620 : Known.countMinLeadingZeros();
621
622 // If all bits above (ctlz) or below (cttz) the first known one are known
623 // zero, this value is constant.
624 // FIXME: This should be in InstSimplify because we're replacing an
625 // instruction with a constant.
626 if (PossibleZeros == DefiniteZeros) {
627 auto *C = ConstantInt::get(Op0->getType(), DefiniteZeros);
628 return IC.replaceInstUsesWith(II, C);
629 }
630
631 // If the input to cttz/ctlz is known to be non-zero,
632 // then change the 'ZeroIsPoison' parameter to 'true'
633 // because we know the zero behavior can't affect the result.
634 if (!Known.One.isZero() ||
636 if (!match(II.getArgOperand(1), m_One()))
637 return IC.replaceOperand(II, 1, IC.Builder.getTrue());
638 }
639
640 // Add range attribute since known bits can't completely reflect what we know.
641 unsigned BitWidth = Op0->getType()->getScalarSizeInBits();
642 if (BitWidth != 1 && !II.hasRetAttr(Attribute::Range) &&
643 !II.getMetadata(LLVMContext::MD_range)) {
644 ConstantRange Range(APInt(BitWidth, DefiniteZeros),
645 APInt(BitWidth, PossibleZeros + 1));
646 II.addRangeRetAttr(Range);
647 return &II;
648 }
649
650 return nullptr;
651}
652
654 assert(II.getIntrinsicID() == Intrinsic::ctpop &&
655 "Expected ctpop intrinsic");
656 Type *Ty = II.getType();
657 unsigned BitWidth = Ty->getScalarSizeInBits();
658 Value *Op0 = II.getArgOperand(0);
659 Value *X, *Y;
660
661 // ctpop(bitreverse(x)) -> ctpop(x)
662 // ctpop(bswap(x)) -> ctpop(x)
663 if (match(Op0, m_BitReverse(m_Value(X))) || match(Op0, m_BSwap(m_Value(X))))
664 return IC.replaceOperand(II, 0, X);
665
666 // ctpop(rot(x)) -> ctpop(x)
667 if ((match(Op0, m_FShl(m_Value(X), m_Value(Y), m_Value())) ||
668 match(Op0, m_FShr(m_Value(X), m_Value(Y), m_Value()))) &&
669 X == Y)
670 return IC.replaceOperand(II, 0, X);
671
672 // ctpop(x | -x) -> bitwidth - cttz(x, false)
673 if (Op0->hasOneUse() &&
674 match(Op0, m_c_Or(m_Value(X), m_Neg(m_Deferred(X))))) {
675 auto *Cttz = IC.Builder.CreateIntrinsic(Intrinsic::cttz, Ty,
676 {X, IC.Builder.getFalse()});
677 auto *Bw = ConstantInt::get(Ty, APInt(BitWidth, BitWidth));
678 return IC.replaceInstUsesWith(II, IC.Builder.CreateSub(Bw, Cttz));
679 }
680
681 // ctpop(~x & (x - 1)) -> cttz(x, false)
682 if (match(Op0,
684 Function *F =
685 Intrinsic::getOrInsertDeclaration(II.getModule(), Intrinsic::cttz, Ty);
686 return CallInst::Create(F, {X, IC.Builder.getFalse()});
687 }
688
689 // Zext doesn't change the number of set bits, so narrow:
690 // ctpop (zext X) --> zext (ctpop X)
691 if (match(Op0, m_OneUse(m_ZExt(m_Value(X))))) {
692 Value *NarrowPop = IC.Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, X);
693 return CastInst::Create(Instruction::ZExt, NarrowPop, Ty);
694 }
695
696 KnownBits Known(BitWidth);
697 IC.computeKnownBits(Op0, Known, &II);
698
699 // If all bits are zero except for exactly one fixed bit, then the result
700 // must be 0 or 1, and we can get that answer by shifting to LSB:
701 // ctpop (X & 32) --> (X & 32) >> 5
702 // TODO: Investigate removing this as its likely unnecessary given the below
703 // `isKnownToBeAPowerOfTwo` check.
704 if ((~Known.Zero).isPowerOf2())
705 return BinaryOperator::CreateLShr(
706 Op0, ConstantInt::get(Ty, (~Known.Zero).exactLogBase2()));
707
708 // More generally we can also handle non-constant power of 2 patterns such as
709 // shl/shr(Pow2, X), (X & -X), etc... by transforming:
710 // ctpop(Pow2OrZero) --> icmp ne X, 0
711 if (IC.isKnownToBeAPowerOfTwo(Op0, /* OrZero */ true))
712 return CastInst::Create(Instruction::ZExt,
715 Ty);
716
717 // Add range attribute since known bits can't completely reflect what we know.
718 if (BitWidth != 1) {
719 ConstantRange OldRange =
720 II.getRange().value_or(ConstantRange::getFull(BitWidth));
721
722 unsigned Lower = Known.countMinPopulation();
723 unsigned Upper = Known.countMaxPopulation() + 1;
724
725 if (Lower == 0 && OldRange.contains(APInt::getZero(BitWidth)) &&
727 Lower = 1;
728
730 Range = Range.intersectWith(OldRange, ConstantRange::Unsigned);
731
732 if (Range != OldRange) {
733 II.addRangeRetAttr(Range);
734 return &II;
735 }
736 }
737
738 return nullptr;
739}
740
741/// Convert `tbl`/`tbx` intrinsics to shufflevector if the mask is constant, and
742/// at most two source operands are actually referenced.
744 bool IsExtension) {
745 // Bail out if the mask is not a constant.
746 auto *C = dyn_cast<Constant>(II.getArgOperand(II.arg_size() - 1));
747 if (!C)
748 return nullptr;
749
750 auto *RetTy = cast<FixedVectorType>(II.getType());
751 unsigned NumIndexes = RetTy->getNumElements();
752
753 // Only perform this transformation for <8 x i8> and <16 x i8> vector types.
754 if (!RetTy->getElementType()->isIntegerTy(8) ||
755 (NumIndexes != 8 && NumIndexes != 16))
756 return nullptr;
757
758 // For tbx instructions, the first argument is the "fallback" vector, which
759 // has the same length as the mask and return type.
760 unsigned int StartIndex = (unsigned)IsExtension;
761 auto *SourceTy =
762 cast<FixedVectorType>(II.getArgOperand(StartIndex)->getType());
763 // Note that the element count of each source vector does *not* need to be the
764 // same as the element count of the return type and mask! All source vectors
765 // must have the same element count as each other, though.
766 unsigned NumElementsPerSource = SourceTy->getNumElements();
767
768 // There are no tbl/tbx intrinsics for which the destination size exceeds the
769 // source size. However, our definitions of the intrinsics, at least in
770 // IntrinsicsAArch64.td, allow for arbitrary destination vector sizes, so it
771 // *could* technically happen.
772 if (NumIndexes > NumElementsPerSource)
773 return nullptr;
774
775 // The tbl/tbx intrinsics take several source operands followed by a mask
776 // operand.
777 unsigned int NumSourceOperands = II.arg_size() - 1 - (unsigned)IsExtension;
778
779 // Map input operands to shuffle indices. This also helpfully deduplicates the
780 // input arguments, in case the same value is passed as an argument multiple
781 // times.
782 SmallDenseMap<Value *, unsigned, 2> ValueToShuffleSlot;
783 Value *ShuffleOperands[2] = {PoisonValue::get(SourceTy),
784 PoisonValue::get(SourceTy)};
785
786 int Indexes[16];
787 for (unsigned I = 0; I < NumIndexes; ++I) {
788 Constant *COp = C->getAggregateElement(I);
789
790 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
791 return nullptr;
792
793 if (isa<UndefValue>(COp)) {
794 Indexes[I] = -1;
795 continue;
796 }
797
798 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
799 // The index of the input argument that this index references (0 = first
800 // source argument, etc).
801 unsigned SourceOperandIndex = Index / NumElementsPerSource;
802 // The index of the element at that source operand.
803 unsigned SourceOperandElementIndex = Index % NumElementsPerSource;
804
805 Value *SourceOperand;
806 if (SourceOperandIndex >= NumSourceOperands) {
807 // This index is out of bounds. Map it to index into either the fallback
808 // vector (tbx) or vector of zeroes (tbl).
809 SourceOperandIndex = NumSourceOperands;
810 if (IsExtension) {
811 // For out-of-bounds indices in tbx, choose the `I`th element of the
812 // fallback.
813 SourceOperand = II.getArgOperand(0);
814 SourceOperandElementIndex = I;
815 } else {
816 // Otherwise, choose some element from the dummy vector of zeroes (we'll
817 // always choose the first).
818 SourceOperand = Constant::getNullValue(SourceTy);
819 SourceOperandElementIndex = 0;
820 }
821 } else {
822 SourceOperand = II.getArgOperand(SourceOperandIndex + StartIndex);
823 }
824
825 // The source operand may be the fallback vector, which may not have the
826 // same number of elements as the source vector. In that case, we *could*
827 // choose to extend its length with another shufflevector, but it's simpler
828 // to just bail instead.
829 if (cast<FixedVectorType>(SourceOperand->getType())->getNumElements() !=
830 NumElementsPerSource)
831 return nullptr;
832
833 // We now know the source operand referenced by this index. Make it a
834 // shufflevector operand, if it isn't already.
835 unsigned NumSlots = ValueToShuffleSlot.size();
836 // This shuffle references more than two sources, and hence cannot be
837 // represented as a shufflevector.
838 if (NumSlots == 2 && !ValueToShuffleSlot.contains(SourceOperand))
839 return nullptr;
840
841 auto [It, Inserted] =
842 ValueToShuffleSlot.try_emplace(SourceOperand, NumSlots);
843 if (Inserted)
844 ShuffleOperands[It->getSecond()] = SourceOperand;
845
846 unsigned RemappedIndex =
847 (It->getSecond() * NumElementsPerSource) + SourceOperandElementIndex;
848 Indexes[I] = RemappedIndex;
849 }
850
852 ShuffleOperands[0], ShuffleOperands[1], ArrayRef(Indexes, NumIndexes));
853 return IC.replaceInstUsesWith(II, Shuf);
854}
855
856// Returns true iff the 2 intrinsics have the same operands, limiting the
857// comparison to the first NumOperands.
858static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E,
859 unsigned NumOperands) {
860 assert(I.arg_size() >= NumOperands && "Not enough operands");
861 assert(E.arg_size() >= NumOperands && "Not enough operands");
862 for (unsigned i = 0; i < NumOperands; i++)
863 if (I.getArgOperand(i) != E.getArgOperand(i))
864 return false;
865 return true;
866}
867
868// Remove trivially empty start/end intrinsic ranges, i.e. a start
869// immediately followed by an end (ignoring debuginfo or other
870// start/end intrinsics in between). As this handles only the most trivial
871// cases, tracking the nesting level is not needed:
872//
873// call @llvm.foo.start(i1 0)
874// call @llvm.foo.start(i1 0) ; This one won't be skipped: it will be removed
875// call @llvm.foo.end(i1 0)
876// call @llvm.foo.end(i1 0) ; &I
877static bool
879 std::function<bool(const IntrinsicInst &)> IsStart) {
880 // We start from the end intrinsic and scan backwards, so that InstCombine
881 // has already processed (and potentially removed) all the instructions
882 // before the end intrinsic.
883 BasicBlock::reverse_iterator BI(EndI), BE(EndI.getParent()->rend());
884 for (; BI != BE; ++BI) {
885 if (auto *I = dyn_cast<IntrinsicInst>(&*BI)) {
886 if (I->isDebugOrPseudoInst() ||
887 I->getIntrinsicID() == EndI.getIntrinsicID())
888 continue;
889 if (IsStart(*I)) {
890 if (haveSameOperands(EndI, *I, EndI.arg_size())) {
892 IC.eraseInstFromFunction(EndI);
893 return true;
894 }
895 // Skip start intrinsics that don't pair with this end intrinsic.
896 continue;
897 }
898 }
899 break;
900 }
901
902 return false;
903}
904
906 removeTriviallyEmptyRange(I, *this, [&I](const IntrinsicInst &II) {
907 // Bail out on the case where the source va_list of a va_copy is destroyed
908 // immediately by a follow-up va_end.
909 return II.getIntrinsicID() == Intrinsic::vastart ||
910 (II.getIntrinsicID() == Intrinsic::vacopy &&
911 I.getArgOperand(0) != II.getArgOperand(1));
912 });
913 return nullptr;
914}
915
917 assert(Call.arg_size() > 1 && "Need at least 2 args to swap");
918 Value *Arg0 = Call.getArgOperand(0), *Arg1 = Call.getArgOperand(1);
919 if (isa<Constant>(Arg0) && !isa<Constant>(Arg1)) {
920 Call.setArgOperand(0, Arg1);
921 Call.setArgOperand(1, Arg0);
922 return &Call;
923 }
924 return nullptr;
925}
926
927/// Creates a result tuple for an overflow intrinsic \p II with a given
928/// \p Result and a constant \p Overflow value.
930 Constant *Overflow) {
931 Constant *V[] = {PoisonValue::get(Result->getType()), Overflow};
932 StructType *ST = cast<StructType>(II->getType());
933 Constant *Struct = ConstantStruct::get(ST, V);
934 return InsertValueInst::Create(Struct, Result, 0);
935}
936
938InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
939 WithOverflowInst *WO = cast<WithOverflowInst>(II);
940 Value *OperationResult = nullptr;
941 Constant *OverflowResult = nullptr;
942 if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
943 WO->getRHS(), *WO, OperationResult, OverflowResult))
944 return createOverflowTuple(WO, OperationResult, OverflowResult);
945
946 // See whether we can optimize the overflow check with assumption information.
947 for (User *U : WO->users()) {
948 if (!match(U, m_ExtractValue<1>(m_Value())))
949 continue;
950
951 for (auto &AssumeVH : AC.assumptionsFor(U)) {
952 if (!AssumeVH)
953 continue;
954 CallInst *I = cast<CallInst>(AssumeVH);
955 if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
956 continue;
957 if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
958 /*AllowEphemerals=*/true))
959 continue;
960 Value *Result =
961 Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
962 Result->takeName(WO);
963 if (auto *Inst = dyn_cast<Instruction>(Result)) {
964 if (WO->isSigned())
965 Inst->setHasNoSignedWrap();
966 else
967 Inst->setHasNoUnsignedWrap();
968 }
969 return createOverflowTuple(WO, Result,
970 ConstantInt::getFalse(U->getType()));
971 }
972 }
973
974 return nullptr;
975}
976
977static bool inputDenormalIsIEEE(const Function &F, const Type *Ty) {
978 Ty = Ty->getScalarType();
979 return F.getDenormalMode(Ty->getFltSemantics()).Input == DenormalMode::IEEE;
980}
981
982static bool inputDenormalIsDAZ(const Function &F, const Type *Ty) {
983 Ty = Ty->getScalarType();
984 return F.getDenormalMode(Ty->getFltSemantics()).inputsAreZero();
985}
986
987/// \returns the compare predicate type if the test performed by
988/// llvm.is.fpclass(x, \p Mask) is equivalent to fcmp o__ x, 0.0 with the
989/// floating-point environment assumed for \p F for type \p Ty
991 const Function &F, Type *Ty) {
992 switch (static_cast<unsigned>(Mask)) {
993 case fcZero:
994 if (inputDenormalIsIEEE(F, Ty))
995 return FCmpInst::FCMP_OEQ;
996 break;
997 case fcZero | fcSubnormal:
998 if (inputDenormalIsDAZ(F, Ty))
999 return FCmpInst::FCMP_OEQ;
1000 break;
1001 case fcPositive | fcNegZero:
1002 if (inputDenormalIsIEEE(F, Ty))
1003 return FCmpInst::FCMP_OGE;
1004 break;
1006 if (inputDenormalIsDAZ(F, Ty))
1007 return FCmpInst::FCMP_OGE;
1008 break;
1010 if (inputDenormalIsIEEE(F, Ty))
1011 return FCmpInst::FCMP_OGT;
1012 break;
1013 case fcNegative | fcPosZero:
1014 if (inputDenormalIsIEEE(F, Ty))
1015 return FCmpInst::FCMP_OLE;
1016 break;
1018 if (inputDenormalIsDAZ(F, Ty))
1019 return FCmpInst::FCMP_OLE;
1020 break;
1022 if (inputDenormalIsIEEE(F, Ty))
1023 return FCmpInst::FCMP_OLT;
1024 break;
1025 case fcPosNormal | fcPosInf:
1026 if (inputDenormalIsDAZ(F, Ty))
1027 return FCmpInst::FCMP_OGT;
1028 break;
1029 case fcNegNormal | fcNegInf:
1030 if (inputDenormalIsDAZ(F, Ty))
1031 return FCmpInst::FCMP_OLT;
1032 break;
1033 case ~fcZero & ~fcNan:
1034 if (inputDenormalIsIEEE(F, Ty))
1035 return FCmpInst::FCMP_ONE;
1036 break;
1037 case ~(fcZero | fcSubnormal) & ~fcNan:
1038 if (inputDenormalIsDAZ(F, Ty))
1039 return FCmpInst::FCMP_ONE;
1040 break;
1041 default:
1042 break;
1043 }
1044
1046}
1047
1048Instruction *InstCombinerImpl::foldIntrinsicIsFPClass(IntrinsicInst &II) {
1049 Value *Src0 = II.getArgOperand(0);
1050 Value *Src1 = II.getArgOperand(1);
1051 const ConstantInt *CMask = cast<ConstantInt>(Src1);
1052 FPClassTest Mask = static_cast<FPClassTest>(CMask->getZExtValue());
1053 const bool IsUnordered = (Mask & fcNan) == fcNan;
1054 const bool IsOrdered = (Mask & fcNan) == fcNone;
1055 const FPClassTest OrderedMask = Mask & ~fcNan;
1056 const FPClassTest OrderedInvertedMask = ~OrderedMask & ~fcNan;
1057
1058 const bool IsStrict =
1059 II.getFunction()->getAttributes().hasFnAttr(Attribute::StrictFP);
1060
1061 Value *FNegSrc;
1062 if (match(Src0, m_FNeg(m_Value(FNegSrc)))) {
1063 // is.fpclass (fneg x), mask -> is.fpclass x, (fneg mask)
1064
1065 II.setArgOperand(1, ConstantInt::get(Src1->getType(), fneg(Mask)));
1066 return replaceOperand(II, 0, FNegSrc);
1067 }
1068
1069 Value *FAbsSrc;
1070 if (match(Src0, m_FAbs(m_Value(FAbsSrc)))) {
1071 II.setArgOperand(1, ConstantInt::get(Src1->getType(), inverse_fabs(Mask)));
1072 return replaceOperand(II, 0, FAbsSrc);
1073 }
1074
1075 if ((OrderedMask == fcInf || OrderedInvertedMask == fcInf) &&
1076 (IsOrdered || IsUnordered) && !IsStrict) {
1077 // is.fpclass(x, fcInf) -> fcmp oeq fabs(x), +inf
1078 // is.fpclass(x, ~fcInf) -> fcmp one fabs(x), +inf
1079 // is.fpclass(x, fcInf|fcNan) -> fcmp ueq fabs(x), +inf
1080 // is.fpclass(x, ~(fcInf|fcNan)) -> fcmp une fabs(x), +inf
1082 FCmpInst::Predicate Pred =
1083 IsUnordered ? FCmpInst::FCMP_UEQ : FCmpInst::FCMP_OEQ;
1084 if (OrderedInvertedMask == fcInf)
1085 Pred = IsUnordered ? FCmpInst::FCMP_UNE : FCmpInst::FCMP_ONE;
1086
1087 Value *Fabs = Builder.CreateFAbs(Src0);
1088 Value *CmpInf = Builder.CreateFCmp(Pred, Fabs, Inf);
1089 CmpInf->takeName(&II);
1090 return replaceInstUsesWith(II, CmpInf);
1091 }
1092
1093 if ((OrderedMask == fcPosInf || OrderedMask == fcNegInf) &&
1094 (IsOrdered || IsUnordered) && !IsStrict) {
1095 // is.fpclass(x, fcPosInf) -> fcmp oeq x, +inf
1096 // is.fpclass(x, fcNegInf) -> fcmp oeq x, -inf
1097 // is.fpclass(x, fcPosInf|fcNan) -> fcmp ueq x, +inf
1098 // is.fpclass(x, fcNegInf|fcNan) -> fcmp ueq x, -inf
1099 Constant *Inf =
1100 ConstantFP::getInfinity(Src0->getType(), OrderedMask == fcNegInf);
1101 Value *EqInf = IsUnordered ? Builder.CreateFCmpUEQ(Src0, Inf)
1102 : Builder.CreateFCmpOEQ(Src0, Inf);
1103
1104 EqInf->takeName(&II);
1105 return replaceInstUsesWith(II, EqInf);
1106 }
1107
1108 if ((OrderedInvertedMask == fcPosInf || OrderedInvertedMask == fcNegInf) &&
1109 (IsOrdered || IsUnordered) && !IsStrict) {
1110 // is.fpclass(x, ~fcPosInf) -> fcmp one x, +inf
1111 // is.fpclass(x, ~fcNegInf) -> fcmp one x, -inf
1112 // is.fpclass(x, ~fcPosInf|fcNan) -> fcmp une x, +inf
1113 // is.fpclass(x, ~fcNegInf|fcNan) -> fcmp une x, -inf
1115 OrderedInvertedMask == fcNegInf);
1116 Value *NeInf = IsUnordered ? Builder.CreateFCmpUNE(Src0, Inf)
1117 : Builder.CreateFCmpONE(Src0, Inf);
1118 NeInf->takeName(&II);
1119 return replaceInstUsesWith(II, NeInf);
1120 }
1121
1122 if (Mask == fcNan && !IsStrict) {
1123 // Equivalent of isnan. Replace with standard fcmp if we don't care about FP
1124 // exceptions.
1125 Value *IsNan =
1126 Builder.CreateFCmpUNO(Src0, ConstantFP::getZero(Src0->getType()));
1127 IsNan->takeName(&II);
1128 return replaceInstUsesWith(II, IsNan);
1129 }
1130
1131 if (Mask == (~fcNan & fcAllFlags) && !IsStrict) {
1132 // Equivalent of !isnan. Replace with standard fcmp.
1133 Value *FCmp =
1134 Builder.CreateFCmpORD(Src0, ConstantFP::getZero(Src0->getType()));
1135 FCmp->takeName(&II);
1136 return replaceInstUsesWith(II, FCmp);
1137 }
1138
1140
1141 // Try to replace with an fcmp with 0
1142 //
1143 // is.fpclass(x, fcZero) -> fcmp oeq x, 0.0
1144 // is.fpclass(x, fcZero | fcNan) -> fcmp ueq x, 0.0
1145 // is.fpclass(x, ~fcZero & ~fcNan) -> fcmp one x, 0.0
1146 // is.fpclass(x, ~fcZero) -> fcmp une x, 0.0
1147 //
1148 // is.fpclass(x, fcPosSubnormal | fcPosNormal | fcPosInf) -> fcmp ogt x, 0.0
1149 // is.fpclass(x, fcPositive | fcNegZero) -> fcmp oge x, 0.0
1150 //
1151 // is.fpclass(x, fcNegSubnormal | fcNegNormal | fcNegInf) -> fcmp olt x, 0.0
1152 // is.fpclass(x, fcNegative | fcPosZero) -> fcmp ole x, 0.0
1153 //
1154 if (!IsStrict && (IsOrdered || IsUnordered) &&
1155 (PredType = fpclassTestIsFCmp0(OrderedMask, *II.getFunction(),
1156 Src0->getType())) !=
1159 // Equivalent of == 0.
1160 Value *FCmp = Builder.CreateFCmp(
1161 IsUnordered ? FCmpInst::getUnorderedPredicate(PredType) : PredType,
1162 Src0, Zero);
1163
1164 FCmp->takeName(&II);
1165 return replaceInstUsesWith(II, FCmp);
1166 }
1167
1168 KnownFPClass Known =
1169 computeKnownFPClass(Src0, Mask, SQ.getWithInstruction(&II));
1170
1171 // Clear test bits we know must be false from the source value.
1172 // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
1173 // fp_class (ninf x), ninf|pinf|other -> fp_class (ninf x), other
1174 if ((Mask & Known.KnownFPClasses) != Mask) {
1175 II.setArgOperand(
1176 1, ConstantInt::get(Src1->getType(), Mask & Known.KnownFPClasses));
1177 return &II;
1178 }
1179
1180 // If none of the tests which can return false are possible, fold to true.
1181 // fp_class (nnan x), ~(qnan|snan) -> true
1182 // fp_class (ninf x), ~(ninf|pinf) -> true
1183 if (Mask == Known.KnownFPClasses)
1184 return replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
1185
1186 return nullptr;
1187}
1188
1189static std::optional<bool> getKnownSign(Value *Op, const SimplifyQuery &SQ) {
1190 KnownBits Known = computeKnownBits(Op, SQ);
1191 if (Known.isNonNegative())
1192 return false;
1193 if (Known.isNegative())
1194 return true;
1195
1196 Value *X, *Y;
1197 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1199
1200 return std::nullopt;
1201}
1202
1203static std::optional<bool> getKnownSignOrZero(Value *Op,
1204 const SimplifyQuery &SQ) {
1205 if (std::optional<bool> Sign = getKnownSign(Op, SQ))
1206 return Sign;
1207
1208 Value *X, *Y;
1209 if (match(Op, m_NSWSub(m_Value(X), m_Value(Y))))
1211
1212 return std::nullopt;
1213}
1214
1215/// Return true if two values \p Op0 and \p Op1 are known to have the same sign.
1216static bool signBitMustBeTheSame(Value *Op0, Value *Op1,
1217 const SimplifyQuery &SQ) {
1218 std::optional<bool> Known1 = getKnownSign(Op1, SQ);
1219 if (!Known1)
1220 return false;
1221 std::optional<bool> Known0 = getKnownSign(Op0, SQ);
1222 if (!Known0)
1223 return false;
1224 return *Known0 == *Known1;
1225}
1226
1227// Determines if ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b)) is safe.
1228//
1229// This is true if, when the add saturates, the resulting ldexp is guaranteed to
1230// produce 0 or inf.
1231static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy) {
1232 const fltSemantics &FltSem = FpTy->getScalarType()->getFltSemantics();
1233 if (!APFloat::semanticsHasInf(FltSem))
1234 return false;
1235
1236 // Cap ExpBits at 32 because scalbn takes an int. This is sufficient for any
1237 // reasonable fp type (for example, `double` only has 11 exponent bits).
1238 unsigned ExpBits = std::min(ExpTy->getScalarSizeInBits(), 32u);
1239 int SignedMax = static_cast<int>(maxIntN(ExpBits));
1240 int SignedMin = static_cast<int>(minIntN(ExpBits));
1241 APFloat ScaledUp = scalbn(APFloat::getSmallest(FltSem), SignedMax,
1243 APFloat ScaledDown = scalbn(APFloat::getLargest(FltSem), SignedMin,
1245 return ScaledUp.isInfinity() && ScaledDown.isZero();
1246}
1247
1248/// Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0. This
1249/// can trigger other combines.
1251 InstCombiner::BuilderTy &Builder) {
1252 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1253 assert((MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin ||
1254 MinMaxID == Intrinsic::umax || MinMaxID == Intrinsic::umin) &&
1255 "Expected a min or max intrinsic");
1256
1257 // TODO: Match vectors with undef elements, but undef may not propagate.
1258 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
1259 Value *X;
1260 const APInt *C0, *C1;
1261 if (!match(Op0, m_OneUse(m_Add(m_Value(X), m_APInt(C0)))) ||
1262 !match(Op1, m_APInt(C1)))
1263 return nullptr;
1264
1265 // Check for necessary no-wrap and overflow constraints.
1266 bool IsSigned = MinMaxID == Intrinsic::smax || MinMaxID == Intrinsic::smin;
1267 auto *Add = cast<BinaryOperator>(Op0);
1268 if ((IsSigned && !Add->hasNoSignedWrap()) ||
1269 (!IsSigned && !Add->hasNoUnsignedWrap()))
1270 return nullptr;
1271
1272 // If the constant difference overflows, then instsimplify should reduce the
1273 // min/max to the add or C1.
1274 bool Overflow;
1275 APInt CDiff =
1276 IsSigned ? C1->ssub_ov(*C0, Overflow) : C1->usub_ov(*C0, Overflow);
1277 assert(!Overflow && "Expected simplify of min/max");
1278
1279 // min/max (add X, C0), C1 --> add (min/max X, C1 - C0), C0
1280 // Note: the "mismatched" no-overflow setting does not propagate.
1281 Constant *NewMinMaxC = ConstantInt::get(II->getType(), CDiff);
1282 Value *NewMinMax = Builder.CreateBinaryIntrinsic(MinMaxID, X, NewMinMaxC);
1283 return IsSigned ? BinaryOperator::CreateNSWAdd(NewMinMax, Add->getOperand(1))
1284 : BinaryOperator::CreateNUWAdd(NewMinMax, Add->getOperand(1));
1285}
1286/// Match a sadd_sat or ssub_sat which is using min/max to clamp the value.
1287Instruction *InstCombinerImpl::matchSAddSubSat(IntrinsicInst &MinMax1) {
1288 Type *Ty = MinMax1.getType();
1289
1290 // We are looking for a tree of:
1291 // max(INT_MIN, min(INT_MAX, add(sext(A), sext(B))))
1292 // Where the min and max could be reversed
1293 Instruction *MinMax2;
1294 BinaryOperator *AddSub;
1295 const APInt *MinValue, *MaxValue;
1296 if (match(&MinMax1, m_SMin(m_Instruction(MinMax2), m_APInt(MaxValue)))) {
1297 if (!match(MinMax2, m_SMax(m_BinOp(AddSub), m_APInt(MinValue))))
1298 return nullptr;
1299 } else if (match(&MinMax1,
1300 m_SMax(m_Instruction(MinMax2), m_APInt(MinValue)))) {
1301 if (!match(MinMax2, m_SMin(m_BinOp(AddSub), m_APInt(MaxValue))))
1302 return nullptr;
1303 } else
1304 return nullptr;
1305
1306 // Check that the constants clamp a saturate, and that the new type would be
1307 // sensible to convert to.
1308 if (!(*MaxValue + 1).isPowerOf2() || -*MinValue != *MaxValue + 1)
1309 return nullptr;
1310 // In what bitwidth can this be treated as saturating arithmetics?
1311 unsigned NewBitWidth = (*MaxValue + 1).logBase2() + 1;
1312 // FIXME: This isn't quite right for vectors, but using the scalar type is a
1313 // good first approximation for what should be done there.
1314 if (!shouldChangeType(Ty->getScalarType()->getIntegerBitWidth(), NewBitWidth))
1315 return nullptr;
1316
1317 // Also make sure that the inner min/max and the add/sub have one use.
1318 if (!MinMax2->hasOneUse() || !AddSub->hasOneUse())
1319 return nullptr;
1320
1321 // Create the new type (which can be a vector type)
1322 Type *NewTy = Ty->getWithNewBitWidth(NewBitWidth);
1323
1324 Intrinsic::ID IntrinsicID;
1325 if (AddSub->getOpcode() == Instruction::Add)
1326 IntrinsicID = Intrinsic::sadd_sat;
1327 else if (AddSub->getOpcode() == Instruction::Sub)
1328 IntrinsicID = Intrinsic::ssub_sat;
1329 else
1330 return nullptr;
1331
1332 // The two operands of the add/sub must be nsw-truncatable to the NewTy. This
1333 // is usually achieved via a sext from a smaller type.
1334 if (ComputeMaxSignificantBits(AddSub->getOperand(0), AddSub) > NewBitWidth ||
1335 ComputeMaxSignificantBits(AddSub->getOperand(1), AddSub) > NewBitWidth)
1336 return nullptr;
1337
1338 // Finally create and return the sat intrinsic, truncated to the new type
1339 Value *AT = Builder.CreateTrunc(AddSub->getOperand(0), NewTy);
1340 Value *BT = Builder.CreateTrunc(AddSub->getOperand(1), NewTy);
1341 Value *Sat = Builder.CreateIntrinsic(IntrinsicID, NewTy, {AT, BT});
1342 return CastInst::Create(Instruction::SExt, Sat, Ty);
1343}
1344
1345
1346/// If we have a clamp pattern like max (min X, 42), 41 -- where the output
1347/// can only be one of two possible constant values -- turn that into a select
1348/// of constants.
1350 InstCombiner::BuilderTy &Builder) {
1351 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
1352 Value *X;
1353 const APInt *C0, *C1;
1354 if (!match(I1, m_APInt(C1)) || !I0->hasOneUse())
1355 return nullptr;
1356
1358 switch (II->getIntrinsicID()) {
1359 case Intrinsic::smax:
1360 if (match(I0, m_SMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1361 Pred = ICmpInst::ICMP_SGT;
1362 break;
1363 case Intrinsic::smin:
1364 if (match(I0, m_SMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1365 Pred = ICmpInst::ICMP_SLT;
1366 break;
1367 case Intrinsic::umax:
1368 if (match(I0, m_UMin(m_Value(X), m_APInt(C0))) && *C0 == *C1 + 1)
1369 Pred = ICmpInst::ICMP_UGT;
1370 break;
1371 case Intrinsic::umin:
1372 if (match(I0, m_UMax(m_Value(X), m_APInt(C0))) && *C1 == *C0 + 1)
1373 Pred = ICmpInst::ICMP_ULT;
1374 break;
1375 default:
1376 llvm_unreachable("Expected min/max intrinsic");
1377 }
1378 if (Pred == CmpInst::BAD_ICMP_PREDICATE)
1379 return nullptr;
1380
1381 // max (min X, 42), 41 --> X > 41 ? 42 : 41
1382 // min (max X, 42), 43 --> X < 43 ? 42 : 43
1383 Value *Cmp = Builder.CreateICmp(Pred, X, I1);
1384 return SelectInst::Create(Cmp, ConstantInt::get(II->getType(), *C0), I1);
1385}
1386
1387/// If this min/max has a constant operand and an operand that is a matching
1388/// min/max with a constant operand, constant-fold the 2 constant operands.
1390 IRBuilderBase &Builder,
1391 const SimplifyQuery &SQ) {
1392 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1393 auto *LHS = dyn_cast<MinMaxIntrinsic>(II->getArgOperand(0));
1394 if (!LHS)
1395 return nullptr;
1396
1397 Constant *C0, *C1;
1398 if (!match(LHS->getArgOperand(1), m_ImmConstant(C0)) ||
1399 !match(II->getArgOperand(1), m_ImmConstant(C1)))
1400 return nullptr;
1401
1402 // max (max X, C0), C1 --> max X, (max C0, C1)
1403 // min (min X, C0), C1 --> min X, (min C0, C1)
1404 // umax (smax X, nneg C0), nneg C1 --> smax X, (umax C0, C1)
1405 // smin (umin X, nneg C0), nneg C1 --> umin X, (smin C0, C1)
1406 Intrinsic::ID InnerMinMaxID = LHS->getIntrinsicID();
1407 if (InnerMinMaxID != MinMaxID &&
1408 !(((MinMaxID == Intrinsic::umax && InnerMinMaxID == Intrinsic::smax) ||
1409 (MinMaxID == Intrinsic::smin && InnerMinMaxID == Intrinsic::umin)) &&
1410 isKnownNonNegative(C0, SQ) && isKnownNonNegative(C1, SQ)))
1411 return nullptr;
1412
1414 Value *CondC = Builder.CreateICmp(Pred, C0, C1);
1415 Value *NewC = Builder.CreateSelect(CondC, C0, C1);
1416 return Builder.CreateIntrinsic(InnerMinMaxID, II->getType(),
1417 {LHS->getArgOperand(0), NewC});
1418}
1419
1420/// If this min/max has a matching min/max operand with a constant, try to push
1421/// the constant operand into this instruction. This can enable more folds.
1422static Instruction *
1424 InstCombiner::BuilderTy &Builder) {
1425 // Match and capture a min/max operand candidate.
1426 Value *X, *Y;
1427 Constant *C;
1428 Instruction *Inner;
1430 m_Instruction(Inner),
1432 m_Value(Y))))
1433 return nullptr;
1434
1435 // The inner op must match. Check for constants to avoid infinite loops.
1436 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1437 auto *InnerMM = dyn_cast<IntrinsicInst>(Inner);
1438 if (!InnerMM || InnerMM->getIntrinsicID() != MinMaxID ||
1440 return nullptr;
1441
1442 // max (max X, C), Y --> max (max X, Y), C
1444 MinMaxID, II->getType());
1445 Value *NewInner = Builder.CreateBinaryIntrinsic(MinMaxID, X, Y);
1446 NewInner->takeName(Inner);
1447 return CallInst::Create(MinMax, {NewInner, C});
1448}
1449
1450/// Reduce a sequence of min/max intrinsics with a common operand.
1452 // Match 3 of the same min/max ops. Example: umin(umin(), umin()).
1453 auto *LHS = dyn_cast<IntrinsicInst>(II->getArgOperand(0));
1454 auto *RHS = dyn_cast<IntrinsicInst>(II->getArgOperand(1));
1455 Intrinsic::ID MinMaxID = II->getIntrinsicID();
1456 if (!LHS || !RHS || LHS->getIntrinsicID() != MinMaxID ||
1457 RHS->getIntrinsicID() != MinMaxID ||
1458 (!LHS->hasOneUse() && !RHS->hasOneUse()))
1459 return nullptr;
1460
1461 Value *A = LHS->getArgOperand(0);
1462 Value *B = LHS->getArgOperand(1);
1463 Value *C = RHS->getArgOperand(0);
1464 Value *D = RHS->getArgOperand(1);
1465
1466 // Look for a common operand.
1467 Value *MinMaxOp = nullptr;
1468 Value *ThirdOp = nullptr;
1469 if (LHS->hasOneUse()) {
1470 // If the LHS is only used in this chain and the RHS is used outside of it,
1471 // reuse the RHS min/max because that will eliminate the LHS.
1472 if (D == A || C == A) {
1473 // min(min(a, b), min(c, a)) --> min(min(c, a), b)
1474 // min(min(a, b), min(a, d)) --> min(min(a, d), b)
1475 MinMaxOp = RHS;
1476 ThirdOp = B;
1477 } else if (D == B || C == B) {
1478 // min(min(a, b), min(c, b)) --> min(min(c, b), a)
1479 // min(min(a, b), min(b, d)) --> min(min(b, d), a)
1480 MinMaxOp = RHS;
1481 ThirdOp = A;
1482 }
1483 } else {
1484 assert(RHS->hasOneUse() && "Expected one-use operand");
1485 // Reuse the LHS. This will eliminate the RHS.
1486 if (D == A || D == B) {
1487 // min(min(a, b), min(c, a)) --> min(min(a, b), c)
1488 // min(min(a, b), min(c, b)) --> min(min(a, b), c)
1489 MinMaxOp = LHS;
1490 ThirdOp = C;
1491 } else if (C == A || C == B) {
1492 // min(min(a, b), min(b, d)) --> min(min(a, b), d)
1493 // min(min(a, b), min(c, b)) --> min(min(a, b), d)
1494 MinMaxOp = LHS;
1495 ThirdOp = D;
1496 }
1497 }
1498
1499 if (!MinMaxOp || !ThirdOp)
1500 return nullptr;
1501
1502 Module *Mod = II->getModule();
1503 Function *MinMax =
1504 Intrinsic::getOrInsertDeclaration(Mod, MinMaxID, II->getType());
1505 return CallInst::Create(MinMax, { MinMaxOp, ThirdOp });
1506}
1507
1508/// If all arguments of the intrinsic are unary shuffles with the same mask,
1509/// try to shuffle after the intrinsic.
1512 if (!II->getType()->isVectorTy() ||
1513 !isTriviallyVectorizable(II->getIntrinsicID()) ||
1514 !II->getCalledFunction()->isSpeculatable())
1515 return nullptr;
1516
1517 Value *X;
1518 Constant *C;
1519 ArrayRef<int> Mask;
1520 auto *NonConstArg = find_if_not(II->args(), [&II](Use &Arg) {
1521 return isa<Constant>(Arg.get()) ||
1522 isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1523 Arg.getOperandNo(), nullptr);
1524 });
1525 if (!NonConstArg ||
1526 !match(NonConstArg, m_Shuffle(m_Value(X), m_Poison(), m_Mask(Mask))))
1527 return nullptr;
1528
1529 // At least 1 operand must be a shuffle with 1 use because we are creating 2
1530 // instructions.
1531 if (none_of(II->args(), match_fn(m_OneUse(m_Shuffle(m_Value(), m_Value())))))
1532 return nullptr;
1533
1534 // See if all arguments are shuffled with the same mask.
1536 Type *SrcTy = X->getType();
1537 for (Use &Arg : II->args()) {
1538 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1539 Arg.getOperandNo(), nullptr))
1540 NewArgs.push_back(Arg);
1541 else if (match(&Arg,
1542 m_Shuffle(m_Value(X), m_Poison(), m_SpecificMask(Mask))) &&
1543 X->getType() == SrcTy)
1544 NewArgs.push_back(X);
1545 else if (match(&Arg, m_ImmConstant(C))) {
1546 // If it's a constant, try find the constant that would be shuffled to C.
1547 if (Constant *ShuffledC =
1548 unshuffleConstant(Mask, C, cast<VectorType>(SrcTy)))
1549 NewArgs.push_back(ShuffledC);
1550 else
1551 return nullptr;
1552 } else
1553 return nullptr;
1554 }
1555
1556 // intrinsic (shuf X, M), (shuf Y, M), ... --> shuf (intrinsic X, Y, ...), M
1557 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1558 // Result type might be a different vector width.
1559 // TODO: Check that the result type isn't widened?
1560 VectorType *ResTy =
1561 VectorType::get(II->getType()->getScalarType(), cast<VectorType>(SrcTy));
1562 Value *NewIntrinsic =
1563 Builder.CreateIntrinsic(ResTy, II->getIntrinsicID(), NewArgs, FPI);
1564 return new ShuffleVectorInst(NewIntrinsic, Mask);
1565}
1566
1567/// If all arguments of the intrinsic are reverses, try to pull the reverse
1568/// after the intrinsic.
1570 if (!II->getType()->isVectorTy() ||
1571 !isTriviallyVectorizable(II->getIntrinsicID()))
1572 return nullptr;
1573
1574 // At least 1 operand must be a reverse with 1 use because we are creating 2
1575 // instructions.
1576 if (none_of(II->args(), [](Value *V) {
1577 return match(V, m_OneUse(m_VecReverse(m_Value())));
1578 }))
1579 return nullptr;
1580
1581 Value *X;
1582 Constant *C;
1583 SmallVector<Value *> NewArgs;
1584 for (Use &Arg : II->args()) {
1585 if (isVectorIntrinsicWithScalarOpAtArg(II->getIntrinsicID(),
1586 Arg.getOperandNo(), nullptr))
1587 NewArgs.push_back(Arg);
1588 else if (match(&Arg, m_VecReverse(m_Value(X))))
1589 NewArgs.push_back(X);
1590 else if (isSplatValue(Arg))
1591 NewArgs.push_back(Arg);
1592 else if (match(&Arg, m_ImmConstant(C)))
1593 NewArgs.push_back(Builder.CreateVectorReverse(C));
1594 else
1595 return nullptr;
1596 }
1597
1598 // intrinsic (reverse X), (reverse Y), ... --> reverse (intrinsic X, Y, ...)
1599 Instruction *FPI = isa<FPMathOperator>(II) ? II : nullptr;
1600 Instruction *NewIntrinsic = Builder.CreateIntrinsic(
1601 II->getType(), II->getIntrinsicID(), NewArgs, FPI);
1602 return Builder.CreateVectorReverse(NewIntrinsic);
1603}
1604
1605/// Fold the following cases and accepts bswap and bitreverse intrinsics:
1606/// bswap(logic_op(bswap(x), y)) --> logic_op(x, bswap(y))
1607/// bswap(logic_op(bswap(x), bswap(y))) --> logic_op(x, y) (ignores multiuse)
1608template <Intrinsic::ID IntrID>
1610 InstCombiner::BuilderTy &Builder) {
1611 static_assert(IntrID == Intrinsic::bswap || IntrID == Intrinsic::bitreverse,
1612 "This helper only supports BSWAP and BITREVERSE intrinsics");
1613
1614 Value *X, *Y;
1615 // Find bitwise logic op. Check that it is a BinaryOperator explicitly so we
1616 // don't match ConstantExpr that aren't meaningful for this transform.
1619 Value *OldReorderX, *OldReorderY;
1621
1622 // If both X and Y are bswap/bitreverse, the transform reduces the number
1623 // of instructions even if there's multiuse.
1624 // If only one operand is bswap/bitreverse, we need to ensure the operand
1625 // have only one use.
1626 if (match(X, m_Intrinsic<IntrID>(m_Value(OldReorderX))) &&
1627 match(Y, m_Intrinsic<IntrID>(m_Value(OldReorderY)))) {
1628 return BinaryOperator::Create(Op, OldReorderX, OldReorderY);
1629 }
1630
1631 if (match(X, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderX))))) {
1632 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, Y);
1633 return BinaryOperator::Create(Op, OldReorderX, NewReorder);
1634 }
1635
1636 if (match(Y, m_OneUse(m_Intrinsic<IntrID>(m_Value(OldReorderY))))) {
1637 Value *NewReorder = Builder.CreateUnaryIntrinsic(IntrID, X);
1638 return BinaryOperator::Create(Op, NewReorder, OldReorderY);
1639 }
1640 }
1641 return nullptr;
1642}
1643
1644/// Helper to match idempotent binary intrinsics, namely, intrinsics where
1645/// `f(f(x, y), y) == f(x, y)` holds.
1647 switch (IID) {
1648 case Intrinsic::smax:
1649 case Intrinsic::smin:
1650 case Intrinsic::umax:
1651 case Intrinsic::umin:
1652 case Intrinsic::maximum:
1653 case Intrinsic::minimum:
1654 case Intrinsic::maximumnum:
1655 case Intrinsic::minimumnum:
1656 case Intrinsic::maxnum:
1657 case Intrinsic::minnum:
1658 return true;
1659 default:
1660 return false;
1661 }
1662}
1663
1664/// Attempt to simplify value-accumulating recurrences of kind:
1665/// %umax.acc = phi i8 [ %umax, %backedge ], [ %a, %entry ]
1666/// %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b)
1667/// And let the idempotent binary intrinsic be hoisted, when the operands are
1668/// known to be loop-invariant.
1670 IntrinsicInst *II) {
1671 PHINode *PN;
1672 Value *Init, *OtherOp;
1673
1674 // A binary intrinsic recurrence with loop-invariant operands is equivalent to
1675 // `call @llvm.binary.intrinsic(Init, OtherOp)`.
1676 auto IID = II->getIntrinsicID();
1677 if (!isIdempotentBinaryIntrinsic(IID) ||
1679 !IC.getDominatorTree().dominates(OtherOp, PN))
1680 return nullptr;
1681
1682 auto *InvariantBinaryInst =
1683 IC.Builder.CreateBinaryIntrinsic(IID, Init, OtherOp);
1684 if (isa<FPMathOperator>(InvariantBinaryInst))
1685 cast<Instruction>(InvariantBinaryInst)->copyFastMathFlags(II);
1686 return InvariantBinaryInst;
1687}
1688
1689static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
1690 if (!CanReorderLanes)
1691 return nullptr;
1692
1693 Value *V;
1694 if (match(Arg, m_VecReverse(m_Value(V))))
1695 return V;
1696
1697 ArrayRef<int> Mask;
1698 if (!isa<FixedVectorType>(Arg->getType()) ||
1699 !match(Arg, m_Shuffle(m_Value(V), m_Undef(), m_Mask(Mask))) ||
1700 !cast<ShuffleVectorInst>(Arg)->isSingleSource())
1701 return nullptr;
1702
1703 int Sz = Mask.size();
1704 SmallBitVector UsedIndices(Sz);
1705 for (int Idx : Mask) {
1706 if (Idx == PoisonMaskElem || UsedIndices.test(Idx))
1707 return nullptr;
1708 UsedIndices.set(Idx);
1709 }
1710
1711 // Can remove shuffle iff just shuffled elements, no repeats, undefs, or
1712 // other changes.
1713 return UsedIndices.all() ? V : nullptr;
1714}
1715
1716/// Fold an unsigned minimum of trailing or leading zero bits counts:
1717/// umin(cttz(CtOp1, ZeroUndef), ConstOp) --> cttz(CtOp1 | (1 << ConstOp))
1718/// umin(ctlz(CtOp1, ZeroUndef), ConstOp) --> ctlz(CtOp1 | (SignedMin
1719/// >> ConstOp))
1720/// umin(cttz(CtOp1), cttz(CtOp2)) --> cttz(CtOp1 | CtOp2)
1721/// umin(ctlz(CtOp1), ctlz(CtOp2)) --> ctlz(CtOp1 | CtOp2)
1722template <Intrinsic::ID IntrID>
1723static Value *
1725 const DataLayout &DL,
1726 InstCombiner::BuilderTy &Builder) {
1727 static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1728 "This helper only supports cttz and ctlz intrinsics");
1729
1730 Value *CtOp1, *CtOp2;
1731 Value *ZeroUndef1, *ZeroUndef2;
1732 if (!match(I0, m_OneUse(
1733 m_Intrinsic<IntrID>(m_Value(CtOp1), m_Value(ZeroUndef1)))))
1734 return nullptr;
1735
1736 if (match(I1,
1737 m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp2), m_Value(ZeroUndef2)))))
1738 return Builder.CreateBinaryIntrinsic(
1739 IntrID, Builder.CreateOr(CtOp1, CtOp2),
1740 Builder.CreateOr(ZeroUndef1, ZeroUndef2));
1741
1742 unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1743 auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1744 if (!match(I1, m_CheckedInt(LessBitWidth)))
1745 // We have a constant >= BitWidth (which can be handled by CVP)
1746 // or a non-splat vector with elements < and >= BitWidth
1747 return nullptr;
1748
1749 Type *Ty = I1->getType();
1751 IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1752 IntrID == Intrinsic::cttz
1753 ? ConstantInt::get(Ty, 1)
1754 : ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1755 cast<Constant>(I1), DL);
1756 return Builder.CreateBinaryIntrinsic(
1757 IntrID, Builder.CreateOr(CtOp1, NewConst),
1758 ConstantInt::getTrue(ZeroUndef1->getType()));
1759}
1760
1761/// Return whether "X LOp (Y ROp Z)" is always equal to
1762/// "(X LOp Y) ROp (X LOp Z)".
1764 bool HasNSW, Intrinsic::ID ROp) {
1765 switch (ROp) {
1766 case Intrinsic::umax:
1767 case Intrinsic::umin:
1768 if (HasNUW && LOp == Instruction::Add)
1769 return true;
1770 if (HasNUW && LOp == Instruction::Shl)
1771 return true;
1772 return false;
1773 case Intrinsic::smax:
1774 case Intrinsic::smin:
1775 return HasNSW && LOp == Instruction::Add;
1776 default:
1777 return false;
1778 }
1779}
1780
1781/// Return whether "(X ROp Y) LOp Z" is always equal to
1782/// "(X LOp Z) ROp (Y LOp Z)".
1784 bool HasNSW, Intrinsic::ID ROp) {
1785 if (Instruction::isCommutative(LOp) || LOp == Instruction::Shl)
1786 return leftDistributesOverRight(LOp, HasNUW, HasNSW, ROp);
1787 switch (ROp) {
1788 case Intrinsic::umax:
1789 case Intrinsic::umin:
1790 return HasNUW && LOp == Instruction::Sub;
1791 case Intrinsic::smax:
1792 case Intrinsic::smin:
1793 return HasNSW && LOp == Instruction::Sub;
1794 default:
1795 return false;
1796 }
1797}
1798
1799// Attempts to factorise a common term
1800// in an instruction that has the form "(A op' B) op (C op' D)
1801// where op is an intrinsic and op' is a binop
1802static Value *
1804 InstCombiner::BuilderTy &Builder) {
1805 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1806 Intrinsic::ID TopLevelOpcode = II->getIntrinsicID();
1807
1810
1811 if (!Op0 || !Op1)
1812 return nullptr;
1813
1814 if (Op0->getOpcode() != Op1->getOpcode())
1815 return nullptr;
1816
1817 if (!Op0->hasOneUse() || !Op1->hasOneUse())
1818 return nullptr;
1819
1820 Instruction::BinaryOps InnerOpcode =
1821 static_cast<Instruction::BinaryOps>(Op0->getOpcode());
1822 bool HasNUW = Op0->hasNoUnsignedWrap() && Op1->hasNoUnsignedWrap();
1823 bool HasNSW = Op0->hasNoSignedWrap() && Op1->hasNoSignedWrap();
1824
1825 Value *A = Op0->getOperand(0);
1826 Value *B = Op0->getOperand(1);
1827 Value *C = Op1->getOperand(0);
1828 Value *D = Op1->getOperand(1);
1829
1830 // Attempts to swap variables such that A equals C or B equals D,
1831 // if the inner operation is commutative.
1832 if (Op0->isCommutative() && A != C && B != D) {
1833 if (A == D || B == C)
1834 std::swap(C, D);
1835 else
1836 return nullptr;
1837 }
1838
1839 BinaryOperator *NewBinop;
1840 if (A == C &&
1841 leftDistributesOverRight(InnerOpcode, HasNUW, HasNSW, TopLevelOpcode)) {
1842 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, B, D);
1843 NewBinop =
1844 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, A, NewIntrinsic));
1845 } else if (B == D && rightDistributesOverLeft(InnerOpcode, HasNUW, HasNSW,
1846 TopLevelOpcode)) {
1847 Value *NewIntrinsic = Builder.CreateBinaryIntrinsic(TopLevelOpcode, A, C);
1848 NewBinop =
1849 cast<BinaryOperator>(Builder.CreateBinOp(InnerOpcode, NewIntrinsic, B));
1850 } else {
1851 return nullptr;
1852 }
1853
1854 NewBinop->setHasNoUnsignedWrap(HasNUW);
1855 NewBinop->setHasNoSignedWrap(HasNSW);
1856
1857 return NewBinop;
1858}
1859
1861 Value *Arg0 = II->getArgOperand(0);
1862 auto *ShiftConst = dyn_cast<Constant>(II->getArgOperand(1));
1863 if (!ShiftConst)
1864 return nullptr;
1865
1866 int ElemBits = Arg0->getType()->getScalarSizeInBits();
1867 bool AllPositive = true;
1868 bool AllNegative = true;
1869
1870 auto Check = [&](Constant *C) -> bool {
1871 if (auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
1872 const APInt &V = CI->getValue();
1873 if (V.isNonNegative()) {
1874 AllNegative = false;
1875 return AllPositive && V.ult(ElemBits);
1876 }
1877 AllPositive = false;
1878 return AllNegative && V.sgt(-ElemBits);
1879 }
1880 return false;
1881 };
1882
1883 if (auto *VTy = dyn_cast<FixedVectorType>(Arg0->getType())) {
1884 for (unsigned I = 0, E = VTy->getNumElements(); I < E; ++I) {
1885 if (!Check(ShiftConst->getAggregateElement(I)))
1886 return nullptr;
1887 }
1888
1889 } else if (!Check(ShiftConst))
1890 return nullptr;
1891
1892 IRBuilderBase &B = IC.Builder;
1893 if (AllPositive)
1894 return IC.replaceInstUsesWith(*II, B.CreateShl(Arg0, ShiftConst));
1895
1896 Value *NegAmt = B.CreateNeg(ShiftConst);
1897 Intrinsic::ID IID = II->getIntrinsicID();
1898 const bool IsSigned =
1899 IID == Intrinsic::arm_neon_vshifts || IID == Intrinsic::aarch64_neon_sshl;
1900 Value *Result =
1901 IsSigned ? B.CreateAShr(Arg0, NegAmt) : B.CreateLShr(Arg0, NegAmt);
1902 return IC.replaceInstUsesWith(*II, Result);
1903}
1904
1905/// CallInst simplification. This mostly only handles folding of intrinsic
1906/// instructions. For normal calls, it allows visitCallBase to do the heavy
1907/// lifting.
1909 // Don't try to simplify calls without uses. It will not do anything useful,
1910 // but will result in the following folds being skipped.
1911 if (!CI.use_empty()) {
1912 SmallVector<Value *, 8> Args(CI.args());
1913 if (Value *V = simplifyCall(&CI, CI.getCalledOperand(), Args,
1914 SQ.getWithInstruction(&CI)))
1915 return replaceInstUsesWith(CI, V);
1916 }
1917
1918 if (Value *FreedOp = getFreedOperand(&CI, &TLI))
1919 return visitFree(CI, FreedOp);
1920
1921 // If the caller function (i.e. us, the function that contains this CallInst)
1922 // is nounwind, mark the call as nounwind, even if the callee isn't.
1923 if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
1924 CI.setDoesNotThrow();
1925 return &CI;
1926 }
1927
1929 if (!II)
1930 return visitCallBase(CI);
1931
1932 // Intrinsics cannot occur in an invoke or a callbr, so handle them here
1933 // instead of in visitCallBase.
1934 if (auto *MI = dyn_cast<AnyMemIntrinsic>(II)) {
1935 if (auto NumBytes = MI->getLengthInBytes()) {
1936 // memmove/cpy/set of zero bytes is a noop.
1937 if (NumBytes->isZero())
1938 return eraseInstFromFunction(CI);
1939
1940 // For atomic unordered mem intrinsics if len is not a positive or
1941 // not a multiple of element size then behavior is undefined.
1942 if (MI->isAtomic() &&
1943 (NumBytes->isNegative() ||
1944 (NumBytes->getZExtValue() % MI->getElementSizeInBytes() != 0))) {
1946 assert(MI->getType()->isVoidTy() &&
1947 "non void atomic unordered mem intrinsic");
1948 return eraseInstFromFunction(*MI);
1949 }
1950 }
1951
1952 // No other transformations apply to volatile transfers.
1953 if (MI->isVolatile())
1954 return nullptr;
1955
1957 // memmove(x,x,size) -> noop.
1958 if (MTI->getSource() == MTI->getDest())
1959 return eraseInstFromFunction(CI);
1960 }
1961
1962 auto IsPointerUndefined = [MI](Value *Ptr) {
1963 return isa<ConstantPointerNull>(Ptr) &&
1965 MI->getFunction(),
1966 cast<PointerType>(Ptr->getType())->getAddressSpace());
1967 };
1968 bool SrcIsUndefined = false;
1969 // If we can determine a pointer alignment that is bigger than currently
1970 // set, update the alignment.
1971 if (auto *MTI = dyn_cast<AnyMemTransferInst>(MI)) {
1973 return I;
1974 SrcIsUndefined = IsPointerUndefined(MTI->getRawSource());
1975 } else if (auto *MSI = dyn_cast<AnyMemSetInst>(MI)) {
1976 if (Instruction *I = SimplifyAnyMemSet(MSI))
1977 return I;
1978 }
1979
1980 // If src/dest is null, this memory intrinsic must be a noop.
1981 if (SrcIsUndefined || IsPointerUndefined(MI->getRawDest())) {
1982 Builder.CreateAssumption(Builder.CreateIsNull(MI->getLength()));
1983 return eraseInstFromFunction(CI);
1984 }
1985
1986 // If we have a memmove and the source operation is a constant global,
1987 // then the source and dest pointers can't alias, so we can change this
1988 // into a call to memcpy.
1989 if (auto *MMI = dyn_cast<AnyMemMoveInst>(MI)) {
1990 if (GlobalVariable *GVSrc = dyn_cast<GlobalVariable>(MMI->getSource()))
1991 if (GVSrc->isConstant()) {
1992 Module *M = CI.getModule();
1993 Intrinsic::ID MemCpyID =
1994 MMI->isAtomic()
1995 ? Intrinsic::memcpy_element_unordered_atomic
1996 : Intrinsic::memcpy;
1997 Type *Tys[3] = { CI.getArgOperand(0)->getType(),
1998 CI.getArgOperand(1)->getType(),
1999 CI.getArgOperand(2)->getType() };
2001 Intrinsic::getOrInsertDeclaration(M, MemCpyID, Tys));
2002 return II;
2003 }
2004 }
2005 }
2006
2007 // For fixed width vector result intrinsics, use the generic demanded vector
2008 // support.
2009 if (auto *IIFVTy = dyn_cast<FixedVectorType>(II->getType())) {
2010 auto VWidth = IIFVTy->getNumElements();
2011 APInt PoisonElts(VWidth, 0);
2012 APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
2013 if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, PoisonElts)) {
2014 if (V != II)
2015 return replaceInstUsesWith(*II, V);
2016 return II;
2017 }
2018 }
2019
2020 if (II->isCommutative()) {
2021 if (auto Pair = matchSymmetricPair(II->getOperand(0), II->getOperand(1))) {
2022 replaceOperand(*II, 0, Pair->first);
2023 replaceOperand(*II, 1, Pair->second);
2024 return II;
2025 }
2026
2027 if (CallInst *NewCall = canonicalizeConstantArg0ToArg1(CI))
2028 return NewCall;
2029 }
2030
2031 // Unused constrained FP intrinsic calls may have declared side effect, which
2032 // prevents it from being removed. In some cases however the side effect is
2033 // actually absent. To detect this case, call SimplifyConstrainedFPCall. If it
2034 // returns a replacement, the call may be removed.
2035 if (CI.use_empty() && isa<ConstrainedFPIntrinsic>(CI)) {
2036 if (simplifyConstrainedFPCall(&CI, SQ.getWithInstruction(&CI)))
2037 return eraseInstFromFunction(CI);
2038 }
2039
2040 Intrinsic::ID IID = II->getIntrinsicID();
2041 switch (IID) {
2042 case Intrinsic::objectsize: {
2043 SmallVector<Instruction *> InsertedInstructions;
2044 if (Value *V = lowerObjectSizeCall(II, DL, &TLI, AA, /*MustSucceed=*/false,
2045 &InsertedInstructions)) {
2046 for (Instruction *Inserted : InsertedInstructions)
2047 Worklist.add(Inserted);
2048 return replaceInstUsesWith(CI, V);
2049 }
2050 return nullptr;
2051 }
2052 case Intrinsic::abs: {
2053 Value *IIOperand = II->getArgOperand(0);
2054 bool IntMinIsPoison = cast<Constant>(II->getArgOperand(1))->isOneValue();
2055
2056 // abs(-x) -> abs(x)
2057 Value *X;
2058 if (match(IIOperand, m_Neg(m_Value(X)))) {
2059 if (cast<Instruction>(IIOperand)->hasNoSignedWrap() || IntMinIsPoison)
2060 replaceOperand(*II, 1, Builder.getTrue());
2061 return replaceOperand(*II, 0, X);
2062 }
2063 if (match(IIOperand, m_c_Select(m_Neg(m_Value(X)), m_Deferred(X))))
2064 return replaceOperand(*II, 0, X);
2065
2066 Value *Y;
2067 // abs(a * abs(b)) -> abs(a * b)
2068 if (match(IIOperand,
2071 bool NSW =
2072 cast<Instruction>(IIOperand)->hasNoSignedWrap() && IntMinIsPoison;
2073 auto *XY = NSW ? Builder.CreateNSWMul(X, Y) : Builder.CreateMul(X, Y);
2074 return replaceOperand(*II, 0, XY);
2075 }
2076
2077 if (std::optional<bool> Known =
2078 getKnownSignOrZero(IIOperand, SQ.getWithInstruction(II))) {
2079 // abs(x) -> x if x >= 0 (include abs(x-y) --> x - y where x >= y)
2080 // abs(x) -> x if x > 0 (include abs(x-y) --> x - y where x > y)
2081 if (!*Known)
2082 return replaceInstUsesWith(*II, IIOperand);
2083
2084 // abs(x) -> -x if x < 0
2085 // abs(x) -> -x if x < = 0 (include abs(x-y) --> y - x where x <= y)
2086 if (IntMinIsPoison)
2087 return BinaryOperator::CreateNSWNeg(IIOperand);
2088 return BinaryOperator::CreateNeg(IIOperand);
2089 }
2090
2091 // abs (sext X) --> zext (abs X*)
2092 // Clear the IsIntMin (nsw) bit on the abs to allow narrowing.
2093 if (match(IIOperand, m_OneUse(m_SExt(m_Value(X))))) {
2094 Value *NarrowAbs =
2095 Builder.CreateBinaryIntrinsic(Intrinsic::abs, X, Builder.getFalse());
2096 return CastInst::Create(Instruction::ZExt, NarrowAbs, II->getType());
2097 }
2098
2099 // Match a complicated way to check if a number is odd/even:
2100 // abs (srem X, 2) --> and X, 1
2101 const APInt *C;
2102 if (match(IIOperand, m_SRem(m_Value(X), m_APInt(C))) && *C == 2)
2103 return BinaryOperator::CreateAnd(X, ConstantInt::get(II->getType(), 1));
2104
2105 break;
2106 }
2107 case Intrinsic::umin: {
2108 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2109 // umin(x, 1) == zext(x != 0)
2110 if (match(I1, m_One())) {
2111 assert(II->getType()->getScalarSizeInBits() != 1 &&
2112 "Expected simplify of umin with max constant");
2113 Value *Zero = Constant::getNullValue(I0->getType());
2114 Value *Cmp = Builder.CreateICmpNE(I0, Zero);
2115 return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
2116 }
2117 // umin(cttz(x), const) --> cttz(x | (1 << const))
2118 if (Value *FoldedCttz =
2120 I0, I1, DL, Builder))
2121 return replaceInstUsesWith(*II, FoldedCttz);
2122 // umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
2123 if (Value *FoldedCtlz =
2125 I0, I1, DL, Builder))
2126 return replaceInstUsesWith(*II, FoldedCtlz);
2127 [[fallthrough]];
2128 }
2129 case Intrinsic::umax: {
2130 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2131 Value *X, *Y;
2132 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_ZExt(m_Value(Y))) &&
2133 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2134 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2135 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2136 }
2137 Constant *C;
2138 if (match(I0, m_ZExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2139 I0->hasOneUse()) {
2140 if (Constant *NarrowC = getLosslessUnsignedTrunc(C, X->getType(), DL)) {
2141 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2142 return CastInst::Create(Instruction::ZExt, NarrowMaxMin, II->getType());
2143 }
2144 }
2145 // If C is not 0:
2146 // umax(nuw_shl(x, C), x + 1) -> x == 0 ? 1 : nuw_shl(x, C)
2147 // If C is not 0 or 1:
2148 // umax(nuw_mul(x, C), x + 1) -> x == 0 ? 1 : nuw_mul(x, C)
2149 auto foldMaxMulShift = [&](Value *A, Value *B) -> Instruction * {
2150 const APInt *C;
2151 Value *X;
2152 if (!match(A, m_NUWShl(m_Value(X), m_APInt(C))) &&
2153 !(match(A, m_NUWMul(m_Value(X), m_APInt(C))) && !C->isOne()))
2154 return nullptr;
2155 if (C->isZero())
2156 return nullptr;
2157 if (!match(B, m_OneUse(m_Add(m_Specific(X), m_One()))))
2158 return nullptr;
2159
2160 Value *Cmp = Builder.CreateICmpEQ(X, ConstantInt::get(X->getType(), 0));
2161 Value *NewSelect = nullptr;
2162 NewSelect = Builder.CreateSelectWithUnknownProfile(
2163 Cmp, ConstantInt::get(X->getType(), 1), A, DEBUG_TYPE);
2164 return replaceInstUsesWith(*II, NewSelect);
2165 };
2166
2167 if (IID == Intrinsic::umax) {
2168 if (Instruction *I = foldMaxMulShift(I0, I1))
2169 return I;
2170 if (Instruction *I = foldMaxMulShift(I1, I0))
2171 return I;
2172 }
2173
2174 // If both operands of unsigned min/max are sign-extended, it is still ok
2175 // to narrow the operation.
2176 [[fallthrough]];
2177 }
2178 case Intrinsic::smax:
2179 case Intrinsic::smin: {
2180 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2181 Value *X, *Y;
2182 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_SExt(m_Value(Y))) &&
2183 (I0->hasOneUse() || I1->hasOneUse()) && X->getType() == Y->getType()) {
2184 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, Y);
2185 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2186 }
2187
2188 Constant *C;
2189 if (match(I0, m_SExt(m_Value(X))) && match(I1, m_Constant(C)) &&
2190 I0->hasOneUse()) {
2191 if (Constant *NarrowC = getLosslessSignedTrunc(C, X->getType(), DL)) {
2192 Value *NarrowMaxMin = Builder.CreateBinaryIntrinsic(IID, X, NarrowC);
2193 return CastInst::Create(Instruction::SExt, NarrowMaxMin, II->getType());
2194 }
2195 }
2196
2197 // smax(smin(X, MinC), MaxC) -> smin(smax(X, MaxC), MinC) if MinC s>= MaxC
2198 // umax(umin(X, MinC), MaxC) -> umin(umax(X, MaxC), MinC) if MinC u>= MaxC
2199 const APInt *MinC, *MaxC;
2200 auto CreateCanonicalClampForm = [&](bool IsSigned) {
2201 auto MaxIID = IsSigned ? Intrinsic::smax : Intrinsic::umax;
2202 auto MinIID = IsSigned ? Intrinsic::smin : Intrinsic::umin;
2203 Value *NewMax = Builder.CreateBinaryIntrinsic(
2204 MaxIID, X, ConstantInt::get(X->getType(), *MaxC));
2205 return replaceInstUsesWith(
2206 *II, Builder.CreateBinaryIntrinsic(
2207 MinIID, NewMax, ConstantInt::get(X->getType(), *MinC)));
2208 };
2209 if (IID == Intrinsic::smax &&
2211 m_APInt(MinC)))) &&
2212 match(I1, m_APInt(MaxC)) && MinC->sgt(*MaxC))
2213 return CreateCanonicalClampForm(true);
2214 if (IID == Intrinsic::umax &&
2216 m_APInt(MinC)))) &&
2217 match(I1, m_APInt(MaxC)) && MinC->ugt(*MaxC))
2218 return CreateCanonicalClampForm(false);
2219
2220 // umin(i1 X, i1 Y) -> and i1 X, Y
2221 // smax(i1 X, i1 Y) -> and i1 X, Y
2222 if ((IID == Intrinsic::umin || IID == Intrinsic::smax) &&
2223 II->getType()->isIntOrIntVectorTy(1)) {
2224 return BinaryOperator::CreateAnd(I0, I1);
2225 }
2226
2227 // umax(i1 X, i1 Y) -> or i1 X, Y
2228 // smin(i1 X, i1 Y) -> or i1 X, Y
2229 if ((IID == Intrinsic::umax || IID == Intrinsic::smin) &&
2230 II->getType()->isIntOrIntVectorTy(1)) {
2231 return BinaryOperator::CreateOr(I0, I1);
2232 }
2233
2234 // smin(smax(X, -1), 1) -> scmp(X, 0)
2235 // smax(smin(X, 1), -1) -> scmp(X, 0)
2236 // At this point, smax(smin(X, 1), -1) is changed to smin(smax(X, -1)
2237 // And i1's have been changed to and/ors
2238 // So we only need to check for smin
2239 if (IID == Intrinsic::smin) {
2240 if (match(I0, m_OneUse(m_SMax(m_Value(X), m_AllOnes()))) &&
2241 match(I1, m_One())) {
2242 Value *Zero = ConstantInt::get(X->getType(), 0);
2243 return replaceInstUsesWith(
2244 CI,
2245 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {X, Zero}));
2246 }
2247 }
2248
2249 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2250 // smax (neg nsw X), (neg nsw Y) --> neg nsw (smin X, Y)
2251 // smin (neg nsw X), (neg nsw Y) --> neg nsw (smax X, Y)
2252 // TODO: Canonicalize neg after min/max if I1 is constant.
2253 if (match(I0, m_NSWNeg(m_Value(X))) && match(I1, m_NSWNeg(m_Value(Y))) &&
2254 (I0->hasOneUse() || I1->hasOneUse())) {
2256 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, X, Y);
2257 return BinaryOperator::CreateNSWNeg(InvMaxMin);
2258 }
2259 }
2260
2261 // (umax X, (xor X, Pow2))
2262 // -> (or X, Pow2)
2263 // (umin X, (xor X, Pow2))
2264 // -> (and X, ~Pow2)
2265 // (smax X, (xor X, Pos_Pow2))
2266 // -> (or X, Pos_Pow2)
2267 // (smin X, (xor X, Pos_Pow2))
2268 // -> (and X, ~Pos_Pow2)
2269 // (smax X, (xor X, Neg_Pow2))
2270 // -> (and X, ~Neg_Pow2)
2271 // (smin X, (xor X, Neg_Pow2))
2272 // -> (or X, Neg_Pow2)
2273 if ((match(I0, m_c_Xor(m_Specific(I1), m_Value(X))) ||
2274 match(I1, m_c_Xor(m_Specific(I0), m_Value(X)))) &&
2275 isKnownToBeAPowerOfTwo(X, /* OrZero */ true)) {
2276 bool UseOr = IID == Intrinsic::smax || IID == Intrinsic::umax;
2277 bool UseAndN = IID == Intrinsic::smin || IID == Intrinsic::umin;
2278
2279 if (IID == Intrinsic::smax || IID == Intrinsic::smin) {
2280 auto KnownSign = getKnownSign(X, SQ.getWithInstruction(II));
2281 if (KnownSign == std::nullopt) {
2282 UseOr = false;
2283 UseAndN = false;
2284 } else if (*KnownSign /* true is Signed. */) {
2285 UseOr ^= true;
2286 UseAndN ^= true;
2287 Type *Ty = I0->getType();
2288 // Negative power of 2 must be IntMin. It's possible to be able to
2289 // prove negative / power of 2 without actually having known bits, so
2290 // just get the value by hand.
2292 Ty, APInt::getSignedMinValue(Ty->getScalarSizeInBits()));
2293 }
2294 }
2295 if (UseOr)
2296 return BinaryOperator::CreateOr(I0, X);
2297 else if (UseAndN)
2298 return BinaryOperator::CreateAnd(I0, Builder.CreateNot(X));
2299 }
2300
2301 // If we can eliminate ~A and Y is free to invert:
2302 // max ~A, Y --> ~(min A, ~Y)
2303 //
2304 // Examples:
2305 // max ~A, ~Y --> ~(min A, Y)
2306 // max ~A, C --> ~(min A, ~C)
2307 // max ~A, (max ~Y, ~Z) --> ~min( A, (min Y, Z))
2308 auto moveNotAfterMinMax = [&](Value *X, Value *Y) -> Instruction * {
2309 Value *A;
2310 if (match(X, m_OneUse(m_Not(m_Value(A)))) &&
2311 !isFreeToInvert(A, A->hasOneUse())) {
2312 if (Value *NotY = getFreelyInverted(Y, Y->hasOneUse(), &Builder)) {
2314 Value *InvMaxMin = Builder.CreateBinaryIntrinsic(InvID, A, NotY);
2315 return BinaryOperator::CreateNot(InvMaxMin);
2316 }
2317 }
2318 return nullptr;
2319 };
2320
2321 if (Instruction *I = moveNotAfterMinMax(I0, I1))
2322 return I;
2323 if (Instruction *I = moveNotAfterMinMax(I1, I0))
2324 return I;
2325
2327 return I;
2328
2329 // minmax (X & NegPow2C, Y & NegPow2C) --> minmax(X, Y) & NegPow2C
2330 const APInt *RHSC;
2331 if (match(I0, m_OneUse(m_And(m_Value(X), m_NegatedPower2(RHSC)))) &&
2332 match(I1, m_OneUse(m_And(m_Value(Y), m_SpecificInt(*RHSC)))))
2333 return BinaryOperator::CreateAnd(Builder.CreateBinaryIntrinsic(IID, X, Y),
2334 ConstantInt::get(II->getType(), *RHSC));
2335
2336 // smax(X, -X) --> abs(X)
2337 // smin(X, -X) --> -abs(X)
2338 // umax(X, -X) --> -abs(X)
2339 // umin(X, -X) --> abs(X)
2340 if (isKnownNegation(I0, I1)) {
2341 // We can choose either operand as the input to abs(), but if we can
2342 // eliminate the only use of a value, that's better for subsequent
2343 // transforms/analysis.
2344 if (I0->hasOneUse() && !I1->hasOneUse())
2345 std::swap(I0, I1);
2346
2347 // This is some variant of abs(). See if we can propagate 'nsw' to the abs
2348 // operation and potentially its negation.
2349 bool IntMinIsPoison = isKnownNegation(I0, I1, /* NeedNSW */ true);
2350 Value *Abs = Builder.CreateBinaryIntrinsic(
2351 Intrinsic::abs, I0,
2352 ConstantInt::getBool(II->getContext(), IntMinIsPoison));
2353
2354 // We don't have a "nabs" intrinsic, so negate if needed based on the
2355 // max/min operation.
2356 if (IID == Intrinsic::smin || IID == Intrinsic::umax)
2357 Abs = Builder.CreateNeg(Abs, "nabs", IntMinIsPoison);
2358 return replaceInstUsesWith(CI, Abs);
2359 }
2360
2362 return Sel;
2363
2364 if (Instruction *SAdd = matchSAddSubSat(*II))
2365 return SAdd;
2366
2367 if (Value *NewMinMax = reassociateMinMaxWithConstants(II, Builder, SQ))
2368 return replaceInstUsesWith(*II, NewMinMax);
2369
2371 return R;
2372
2373 if (Instruction *NewMinMax = factorizeMinMaxTree(II))
2374 return NewMinMax;
2375
2376 // Try to fold minmax with constant RHS based on range information
2377 if (match(I1, m_APIntAllowPoison(RHSC))) {
2378 ICmpInst::Predicate Pred =
2380 bool IsSigned = MinMaxIntrinsic::isSigned(IID);
2382 I0, IsSigned, SQ.getWithInstruction(II));
2383 if (!LHS_CR.isFullSet()) {
2384 if (LHS_CR.icmp(Pred, *RHSC))
2385 return replaceInstUsesWith(*II, I0);
2386 if (LHS_CR.icmp(ICmpInst::getSwappedPredicate(Pred), *RHSC))
2387 return replaceInstUsesWith(*II,
2388 ConstantInt::get(II->getType(), *RHSC));
2389 }
2390 }
2391
2393 return replaceInstUsesWith(*II, V);
2394
2395 break;
2396 }
2397 case Intrinsic::scmp: {
2398 Value *I0 = II->getArgOperand(0), *I1 = II->getArgOperand(1);
2399 Value *LHS, *RHS;
2400 if (match(I0, m_NSWSub(m_Value(LHS), m_Value(RHS))) && match(I1, m_Zero()))
2401 return replaceInstUsesWith(
2402 CI,
2403 Builder.CreateIntrinsic(II->getType(), Intrinsic::scmp, {LHS, RHS}));
2404 break;
2405 }
2406 case Intrinsic::bitreverse: {
2407 Value *IIOperand = II->getArgOperand(0);
2408 // bitrev (zext i1 X to ?) --> X ? SignBitC : 0
2409 Value *X;
2410 if (match(IIOperand, m_ZExt(m_Value(X))) &&
2411 X->getType()->isIntOrIntVectorTy(1)) {
2412 Type *Ty = II->getType();
2413 APInt SignBit = APInt::getSignMask(Ty->getScalarSizeInBits());
2414 return SelectInst::Create(X, ConstantInt::get(Ty, SignBit),
2416 }
2417
2418 if (Instruction *crossLogicOpFold =
2420 return crossLogicOpFold;
2421
2422 break;
2423 }
2424 case Intrinsic::bswap: {
2425 Value *IIOperand = II->getArgOperand(0);
2426
2427 // Try to canonicalize bswap-of-logical-shift-by-8-bit-multiple as
2428 // inverse-shift-of-bswap:
2429 // bswap (shl X, Y) --> lshr (bswap X), Y
2430 // bswap (lshr X, Y) --> shl (bswap X), Y
2431 Value *X, *Y;
2432 if (match(IIOperand, m_OneUse(m_LogicalShift(m_Value(X), m_Value(Y))))) {
2433 unsigned BitWidth = IIOperand->getType()->getScalarSizeInBits();
2435 Value *NewSwap = Builder.CreateUnaryIntrinsic(Intrinsic::bswap, X);
2436 BinaryOperator::BinaryOps InverseShift =
2437 cast<BinaryOperator>(IIOperand)->getOpcode() == Instruction::Shl
2438 ? Instruction::LShr
2439 : Instruction::Shl;
2440 return BinaryOperator::Create(InverseShift, NewSwap, Y);
2441 }
2442 }
2443
2444 KnownBits Known = computeKnownBits(IIOperand, II);
2445 uint64_t LZ = alignDown(Known.countMinLeadingZeros(), 8);
2446 uint64_t TZ = alignDown(Known.countMinTrailingZeros(), 8);
2447 unsigned BW = Known.getBitWidth();
2448
2449 // bswap(x) -> shift(x) if x has exactly one "active byte"
2450 if (BW - LZ - TZ == 8) {
2451 assert(LZ != TZ && "active byte cannot be in the middle");
2452 if (LZ > TZ) // -> shl(x) if the "active byte" is in the low part of x
2453 return BinaryOperator::CreateNUWShl(
2454 IIOperand, ConstantInt::get(IIOperand->getType(), LZ - TZ));
2455 // -> lshr(x) if the "active byte" is in the high part of x
2456 return BinaryOperator::CreateExactLShr(
2457 IIOperand, ConstantInt::get(IIOperand->getType(), TZ - LZ));
2458 }
2459
2460 // bswap(trunc(bswap(x))) -> trunc(lshr(x, c))
2461 if (match(IIOperand, m_Trunc(m_BSwap(m_Value(X))))) {
2462 unsigned C = X->getType()->getScalarSizeInBits() - BW;
2463 Value *CV = ConstantInt::get(X->getType(), C);
2464 Value *V = Builder.CreateLShr(X, CV);
2465 return new TruncInst(V, IIOperand->getType());
2466 }
2467
2468 if (Instruction *crossLogicOpFold =
2470 return crossLogicOpFold;
2471 }
2472
2473 // Try to fold into bitreverse if bswap is the root of the expression tree.
2474 if (Instruction *BitOp = matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ false,
2475 /*MatchBitReversals*/ true))
2476 return BitOp;
2477 break;
2478 }
2479 case Intrinsic::masked_load:
2480 if (Value *SimplifiedMaskedOp = simplifyMaskedLoad(*II))
2481 return replaceInstUsesWith(CI, SimplifiedMaskedOp);
2482 break;
2483 case Intrinsic::masked_store:
2484 return simplifyMaskedStore(*II);
2485 case Intrinsic::masked_gather:
2486 return simplifyMaskedGather(*II);
2487 case Intrinsic::masked_scatter:
2488 return simplifyMaskedScatter(*II);
2489 case Intrinsic::launder_invariant_group:
2490 case Intrinsic::strip_invariant_group:
2491 if (auto *SkippedBarrier = simplifyInvariantGroupIntrinsic(*II, *this))
2492 return replaceInstUsesWith(*II, SkippedBarrier);
2493 break;
2494 case Intrinsic::powi:
2495 if (ConstantInt *Power = dyn_cast<ConstantInt>(II->getArgOperand(1))) {
2496 // 0 and 1 are handled in instsimplify
2497 // powi(x, -1) -> 1/x
2498 if (Power->isMinusOne())
2499 return BinaryOperator::CreateFDivFMF(ConstantFP::get(CI.getType(), 1.0),
2500 II->getArgOperand(0), II);
2501 // powi(x, 2) -> x*x
2502 if (Power->equalsInt(2))
2503 return BinaryOperator::CreateFMulFMF(II->getArgOperand(0),
2504 II->getArgOperand(0), II);
2505
2506 if (!Power->getValue()[0]) {
2507 Value *X;
2508 // If power is even:
2509 // powi(-x, p) -> powi(x, p)
2510 // powi(fabs(x), p) -> powi(x, p)
2511 // powi(copysign(x, y), p) -> powi(x, p)
2512 if (match(II->getArgOperand(0), m_FNeg(m_Value(X))) ||
2513 match(II->getArgOperand(0), m_FAbs(m_Value(X))) ||
2514 match(II->getArgOperand(0),
2516 return replaceOperand(*II, 0, X);
2517 }
2518 }
2519 break;
2520
2521 case Intrinsic::cttz:
2522 case Intrinsic::ctlz:
2523 if (auto *I = foldCttzCtlz(*II, *this))
2524 return I;
2525 break;
2526
2527 case Intrinsic::ctpop:
2528 if (auto *I = foldCtpop(*II, *this))
2529 return I;
2530 break;
2531
2532 case Intrinsic::fshl:
2533 case Intrinsic::fshr: {
2534 Value *Op0 = II->getArgOperand(0), *Op1 = II->getArgOperand(1);
2535 Type *Ty = II->getType();
2536 unsigned BitWidth = Ty->getScalarSizeInBits();
2537 Constant *ShAmtC;
2538 if (match(II->getArgOperand(2), m_ImmConstant(ShAmtC))) {
2539 // Canonicalize a shift amount constant operand to modulo the bit-width.
2540 Constant *WidthC = ConstantInt::get(Ty, BitWidth);
2541 Constant *ModuloC =
2542 ConstantFoldBinaryOpOperands(Instruction::URem, ShAmtC, WidthC, DL);
2543 if (!ModuloC)
2544 return nullptr;
2545 if (ModuloC != ShAmtC)
2546 return replaceOperand(*II, 2, ModuloC);
2547
2549 ShAmtC, DL),
2550 m_One()) &&
2551 "Shift amount expected to be modulo bitwidth");
2552
2553 // Canonicalize funnel shift right by constant to funnel shift left. This
2554 // is not entirely arbitrary. For historical reasons, the backend may
2555 // recognize rotate left patterns but miss rotate right patterns.
2556 if (IID == Intrinsic::fshr) {
2557 // fshr X, Y, C --> fshl X, Y, (BitWidth - C) if C is not zero.
2558 if (!isKnownNonZero(ShAmtC, SQ.getWithInstruction(II)))
2559 return nullptr;
2560
2561 Constant *LeftShiftC = ConstantExpr::getSub(WidthC, ShAmtC);
2562 Module *Mod = II->getModule();
2563 Function *Fshl =
2564 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::fshl, Ty);
2565 return CallInst::Create(Fshl, { Op0, Op1, LeftShiftC });
2566 }
2567 assert(IID == Intrinsic::fshl &&
2568 "All funnel shifts by simple constants should go left");
2569
2570 // fshl(X, 0, C) --> shl X, C
2571 // fshl(X, undef, C) --> shl X, C
2572 if (match(Op1, m_ZeroInt()) || match(Op1, m_Undef()))
2573 return BinaryOperator::CreateShl(Op0, ShAmtC);
2574
2575 // fshl(0, X, C) --> lshr X, (BW-C)
2576 // fshl(undef, X, C) --> lshr X, (BW-C)
2577 if (match(Op0, m_ZeroInt()) || match(Op0, m_Undef()))
2578 return BinaryOperator::CreateLShr(Op1,
2579 ConstantExpr::getSub(WidthC, ShAmtC));
2580
2581 // fshl i16 X, X, 8 --> bswap i16 X (reduce to more-specific form)
2582 if (Op0 == Op1 && BitWidth == 16 && match(ShAmtC, m_SpecificInt(8))) {
2583 Module *Mod = II->getModule();
2584 Function *Bswap =
2585 Intrinsic::getOrInsertDeclaration(Mod, Intrinsic::bswap, Ty);
2586 return CallInst::Create(Bswap, { Op0 });
2587 }
2588 if (Instruction *BitOp =
2589 matchBSwapOrBitReverse(*II, /*MatchBSwaps*/ true,
2590 /*MatchBitReversals*/ true))
2591 return BitOp;
2592
2593 // R = fshl(X, X, C2)
2594 // fshl(R, R, C1) --> fshl(X, X, (C1 + C2) % bitsize)
2595 Value *InnerOp;
2596 const APInt *ShAmtInnerC, *ShAmtOuterC;
2597 if (match(Op0, m_FShl(m_Value(InnerOp), m_Deferred(InnerOp),
2598 m_APInt(ShAmtInnerC))) &&
2599 match(ShAmtC, m_APInt(ShAmtOuterC)) && Op0 == Op1) {
2600 APInt Sum = *ShAmtOuterC + *ShAmtInnerC;
2601 APInt Modulo = Sum.urem(APInt(Sum.getBitWidth(), BitWidth));
2602 if (Modulo.isZero())
2603 return replaceInstUsesWith(*II, InnerOp);
2604 Constant *ModuloC = ConstantInt::get(Ty, Modulo);
2606 {InnerOp, InnerOp, ModuloC});
2607 }
2608 }
2609
2610 // fshl(X, X, Neg(Y)) --> fshr(X, X, Y)
2611 // fshr(X, X, Neg(Y)) --> fshl(X, X, Y)
2612 // if BitWidth is a power-of-2
2613 Value *Y;
2614 if (Op0 == Op1 && isPowerOf2_32(BitWidth) &&
2615 match(II->getArgOperand(2), m_Neg(m_Value(Y)))) {
2616 Module *Mod = II->getModule();
2618 Mod, IID == Intrinsic::fshl ? Intrinsic::fshr : Intrinsic::fshl, Ty);
2619 return CallInst::Create(OppositeShift, {Op0, Op1, Y});
2620 }
2621
2622 // fshl(X, 0, Y) --> shl(X, and(Y, BitWidth - 1)) if bitwidth is a
2623 // power-of-2
2624 if (IID == Intrinsic::fshl && isPowerOf2_32(BitWidth) &&
2625 match(Op1, m_ZeroInt())) {
2626 Value *Op2 = II->getArgOperand(2);
2627 Value *And = Builder.CreateAnd(Op2, ConstantInt::get(Ty, BitWidth - 1));
2628 return BinaryOperator::CreateShl(Op0, And);
2629 }
2630
2631 // Left or right might be masked.
2633 return &CI;
2634
2635 // The shift amount (operand 2) of a funnel shift is modulo the bitwidth,
2636 // so only the low bits of the shift amount are demanded if the bitwidth is
2637 // a power-of-2.
2638 if (!isPowerOf2_32(BitWidth))
2639 break;
2641 KnownBits Op2Known(BitWidth);
2642 if (SimplifyDemandedBits(II, 2, Op2Demanded, Op2Known))
2643 return &CI;
2644 break;
2645 }
2646 case Intrinsic::ptrmask: {
2647 unsigned BitWidth = DL.getPointerTypeSizeInBits(II->getType());
2648 KnownBits Known(BitWidth);
2650 return II;
2651
2652 Value *InnerPtr, *InnerMask;
2653 bool Changed = false;
2654 // Combine:
2655 // (ptrmask (ptrmask p, A), B)
2656 // -> (ptrmask p, (and A, B))
2657 if (match(II->getArgOperand(0),
2659 m_Value(InnerMask))))) {
2660 assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
2661 "Mask types must match");
2662 // TODO: If InnerMask == Op1, we could copy attributes from inner
2663 // callsite -> outer callsite.
2664 Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
2665 replaceOperand(CI, 0, InnerPtr);
2666 replaceOperand(CI, 1, NewMask);
2667 Changed = true;
2668 }
2669
2670 // See if we can deduce non-null.
2671 if (!CI.hasRetAttr(Attribute::NonNull) &&
2672 (Known.isNonZero() ||
2673 isKnownNonZero(II, getSimplifyQuery().getWithInstruction(II)))) {
2674 CI.addRetAttr(Attribute::NonNull);
2675 Changed = true;
2676 }
2677
2678 unsigned NewAlignmentLog =
2680 std::min(BitWidth - 1, Known.countMinTrailingZeros()));
2681 // Known bits will capture if we had alignment information associated with
2682 // the pointer argument.
2683 if (NewAlignmentLog > Log2(CI.getRetAlign().valueOrOne())) {
2685 CI.getContext(), Align(uint64_t(1) << NewAlignmentLog)));
2686 Changed = true;
2687 }
2688 if (Changed)
2689 return &CI;
2690 break;
2691 }
2692 case Intrinsic::uadd_with_overflow:
2693 case Intrinsic::sadd_with_overflow: {
2694 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2695 return I;
2696
2697 // Given 2 constant operands whose sum does not overflow:
2698 // uaddo (X +nuw C0), C1 -> uaddo X, C0 + C1
2699 // saddo (X +nsw C0), C1 -> saddo X, C0 + C1
2700 Value *X;
2701 const APInt *C0, *C1;
2702 Value *Arg0 = II->getArgOperand(0);
2703 Value *Arg1 = II->getArgOperand(1);
2704 bool IsSigned = IID == Intrinsic::sadd_with_overflow;
2705 bool HasNWAdd = IsSigned
2706 ? match(Arg0, m_NSWAddLike(m_Value(X), m_APInt(C0)))
2707 : match(Arg0, m_NUWAddLike(m_Value(X), m_APInt(C0)));
2708 if (HasNWAdd && match(Arg1, m_APInt(C1))) {
2709 bool Overflow;
2710 APInt NewC =
2711 IsSigned ? C1->sadd_ov(*C0, Overflow) : C1->uadd_ov(*C0, Overflow);
2712 if (!Overflow)
2713 return replaceInstUsesWith(
2714 *II, Builder.CreateBinaryIntrinsic(
2715 IID, X, ConstantInt::get(Arg1->getType(), NewC)));
2716 }
2717 break;
2718 }
2719
2720 case Intrinsic::umul_with_overflow:
2721 case Intrinsic::smul_with_overflow:
2722 case Intrinsic::usub_with_overflow:
2723 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2724 return I;
2725 break;
2726
2727 case Intrinsic::ssub_with_overflow: {
2728 if (Instruction *I = foldIntrinsicWithOverflowCommon(II))
2729 return I;
2730
2731 Constant *C;
2732 Value *Arg0 = II->getArgOperand(0);
2733 Value *Arg1 = II->getArgOperand(1);
2734 // Given a constant C that is not the minimum signed value
2735 // for an integer of a given bit width:
2736 //
2737 // ssubo X, C -> saddo X, -C
2738 if (match(Arg1, m_Constant(C)) && C->isNotMinSignedValue()) {
2739 Value *NegVal = ConstantExpr::getNeg(C);
2740 // Build a saddo call that is equivalent to the discovered
2741 // ssubo call.
2742 return replaceInstUsesWith(
2743 *II, Builder.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow,
2744 Arg0, NegVal));
2745 }
2746
2747 break;
2748 }
2749
2750 case Intrinsic::uadd_sat:
2751 case Intrinsic::sadd_sat:
2752 case Intrinsic::usub_sat:
2753 case Intrinsic::ssub_sat: {
2755 Type *Ty = SI->getType();
2756 Value *Arg0 = SI->getLHS();
2757 Value *Arg1 = SI->getRHS();
2758
2759 // Make use of known overflow information.
2760 OverflowResult OR = computeOverflow(SI->getBinaryOp(), SI->isSigned(),
2761 Arg0, Arg1, SI);
2762 switch (OR) {
2764 break;
2766 if (SI->isSigned())
2767 return BinaryOperator::CreateNSW(SI->getBinaryOp(), Arg0, Arg1);
2768 else
2769 return BinaryOperator::CreateNUW(SI->getBinaryOp(), Arg0, Arg1);
2771 unsigned BitWidth = Ty->getScalarSizeInBits();
2772 APInt Min = APSInt::getMinValue(BitWidth, !SI->isSigned());
2773 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Min));
2774 }
2776 unsigned BitWidth = Ty->getScalarSizeInBits();
2777 APInt Max = APSInt::getMaxValue(BitWidth, !SI->isSigned());
2778 return replaceInstUsesWith(*SI, ConstantInt::get(Ty, Max));
2779 }
2780 }
2781
2782 // usub_sat((sub nuw C, A), C1) -> usub_sat(usub_sat(C, C1), A)
2783 // which after that:
2784 // usub_sat((sub nuw C, A), C1) -> usub_sat(C - C1, A) if C1 u< C
2785 // usub_sat((sub nuw C, A), C1) -> 0 otherwise
2786 Constant *C, *C1;
2787 Value *A;
2788 if (IID == Intrinsic::usub_sat &&
2789 match(Arg0, m_NUWSub(m_ImmConstant(C), m_Value(A))) &&
2790 match(Arg1, m_ImmConstant(C1))) {
2791 auto *NewC = Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, C, C1);
2792 auto *NewSub =
2793 Builder.CreateBinaryIntrinsic(Intrinsic::usub_sat, NewC, A);
2794 return replaceInstUsesWith(*SI, NewSub);
2795 }
2796
2797 // ssub.sat(X, C) -> sadd.sat(X, -C) if C != MIN
2798 if (IID == Intrinsic::ssub_sat && match(Arg1, m_Constant(C)) &&
2799 C->isNotMinSignedValue()) {
2800 Value *NegVal = ConstantExpr::getNeg(C);
2801 return replaceInstUsesWith(
2802 *II, Builder.CreateBinaryIntrinsic(
2803 Intrinsic::sadd_sat, Arg0, NegVal));
2804 }
2805
2806 // sat(sat(X + Val2) + Val) -> sat(X + (Val+Val2))
2807 // sat(sat(X - Val2) - Val) -> sat(X - (Val+Val2))
2808 // if Val and Val2 have the same sign
2809 if (auto *Other = dyn_cast<IntrinsicInst>(Arg0)) {
2810 Value *X;
2811 const APInt *Val, *Val2;
2812 APInt NewVal;
2813 bool IsUnsigned =
2814 IID == Intrinsic::uadd_sat || IID == Intrinsic::usub_sat;
2815 if (Other->getIntrinsicID() == IID &&
2816 match(Arg1, m_APInt(Val)) &&
2817 match(Other->getArgOperand(0), m_Value(X)) &&
2818 match(Other->getArgOperand(1), m_APInt(Val2))) {
2819 if (IsUnsigned)
2820 NewVal = Val->uadd_sat(*Val2);
2821 else if (Val->isNonNegative() == Val2->isNonNegative()) {
2822 bool Overflow;
2823 NewVal = Val->sadd_ov(*Val2, Overflow);
2824 if (Overflow) {
2825 // Both adds together may add more than SignedMaxValue
2826 // without saturating the final result.
2827 break;
2828 }
2829 } else {
2830 // Cannot fold saturated addition with different signs.
2831 break;
2832 }
2833
2834 return replaceInstUsesWith(
2835 *II, Builder.CreateBinaryIntrinsic(
2836 IID, X, ConstantInt::get(II->getType(), NewVal)));
2837 }
2838 }
2839 break;
2840 }
2841
2842 case Intrinsic::minnum:
2843 case Intrinsic::maxnum:
2844 case Intrinsic::minimumnum:
2845 case Intrinsic::maximumnum:
2846 case Intrinsic::minimum:
2847 case Intrinsic::maximum: {
2848 Value *Arg0 = II->getArgOperand(0);
2849 Value *Arg1 = II->getArgOperand(1);
2850 Value *X, *Y;
2851 if (match(Arg0, m_FNeg(m_Value(X))) && match(Arg1, m_FNeg(m_Value(Y))) &&
2852 (Arg0->hasOneUse() || Arg1->hasOneUse())) {
2853 // If both operands are negated, invert the call and negate the result:
2854 // min(-X, -Y) --> -(max(X, Y))
2855 // max(-X, -Y) --> -(min(X, Y))
2856 Intrinsic::ID NewIID;
2857 switch (IID) {
2858 case Intrinsic::maxnum:
2859 NewIID = Intrinsic::minnum;
2860 break;
2861 case Intrinsic::minnum:
2862 NewIID = Intrinsic::maxnum;
2863 break;
2864 case Intrinsic::maximumnum:
2865 NewIID = Intrinsic::minimumnum;
2866 break;
2867 case Intrinsic::minimumnum:
2868 NewIID = Intrinsic::maximumnum;
2869 break;
2870 case Intrinsic::maximum:
2871 NewIID = Intrinsic::minimum;
2872 break;
2873 case Intrinsic::minimum:
2874 NewIID = Intrinsic::maximum;
2875 break;
2876 default:
2877 llvm_unreachable("unexpected intrinsic ID");
2878 }
2879 Value *NewCall = Builder.CreateBinaryIntrinsic(NewIID, X, Y, II);
2880 Instruction *FNeg = UnaryOperator::CreateFNeg(NewCall);
2881 FNeg->copyIRFlags(II);
2882 return FNeg;
2883 }
2884
2885 // m(m(X, C2), C1) -> m(X, C)
2886 const APFloat *C1, *C2;
2887 if (auto *M = dyn_cast<IntrinsicInst>(Arg0)) {
2888 if (M->getIntrinsicID() == IID && match(Arg1, m_APFloat(C1)) &&
2889 ((match(M->getArgOperand(0), m_Value(X)) &&
2890 match(M->getArgOperand(1), m_APFloat(C2))) ||
2891 (match(M->getArgOperand(1), m_Value(X)) &&
2892 match(M->getArgOperand(0), m_APFloat(C2))))) {
2893 APFloat Res(0.0);
2894 switch (IID) {
2895 case Intrinsic::maxnum:
2896 Res = maxnum(*C1, *C2);
2897 break;
2898 case Intrinsic::minnum:
2899 Res = minnum(*C1, *C2);
2900 break;
2901 case Intrinsic::maximumnum:
2902 Res = maximumnum(*C1, *C2);
2903 break;
2904 case Intrinsic::minimumnum:
2905 Res = minimumnum(*C1, *C2);
2906 break;
2907 case Intrinsic::maximum:
2908 Res = maximum(*C1, *C2);
2909 break;
2910 case Intrinsic::minimum:
2911 Res = minimum(*C1, *C2);
2912 break;
2913 default:
2914 llvm_unreachable("unexpected intrinsic ID");
2915 }
2916 // TODO: Conservatively intersecting FMF. If Res == C2, the transform
2917 // was a simplification (so Arg0 and its original flags could
2918 // propagate?)
2919 Value *V = Builder.CreateBinaryIntrinsic(
2920 IID, X, ConstantFP::get(Arg0->getType(), Res),
2922 return replaceInstUsesWith(*II, V);
2923 }
2924 }
2925
2926 // m((fpext X), (fpext Y)) -> fpext (m(X, Y))
2927 if (match(Arg0, m_FPExt(m_Value(X))) && match(Arg1, m_FPExt(m_Value(Y))) &&
2928 (Arg0->hasOneUse() || Arg1->hasOneUse()) &&
2929 X->getType() == Y->getType()) {
2930 Value *NewCall =
2931 Builder.CreateBinaryIntrinsic(IID, X, Y, II, II->getName());
2932 return new FPExtInst(NewCall, II->getType());
2933 }
2934
2935 // m(fpext X, C) -> fpext m(X, TruncC) if C can be losslessly truncated.
2936 Constant *C;
2937 if (match(Arg0, m_OneUse(m_FPExt(m_Value(X)))) &&
2938 match(Arg1, m_ImmConstant(C))) {
2939 if (Constant *TruncC =
2940 getLosslessInvCast(C, X->getType(), Instruction::FPExt, DL)) {
2941 Value *NewCall =
2942 Builder.CreateBinaryIntrinsic(IID, X, TruncC, II, II->getName());
2943 return new FPExtInst(NewCall, II->getType());
2944 }
2945 }
2946
2947 // max X, -X --> fabs X
2948 // min X, -X --> -(fabs X)
2949 // TODO: Remove one-use limitation? That is obviously better for max,
2950 // hence why we don't check for one-use for that. However,
2951 // it would be an extra instruction for min (fnabs), but
2952 // that is still likely better for analysis and codegen.
2953 auto IsMinMaxOrXNegX = [IID, &X](Value *Op0, Value *Op1) {
2954 if (match(Op0, m_FNeg(m_Value(X))) && match(Op1, m_Specific(X)))
2955 return Op0->hasOneUse() ||
2956 (IID != Intrinsic::minimum && IID != Intrinsic::minnum &&
2957 IID != Intrinsic::minimumnum);
2958 return false;
2959 };
2960
2961 if (IsMinMaxOrXNegX(Arg0, Arg1) || IsMinMaxOrXNegX(Arg1, Arg0)) {
2962 Value *R = Builder.CreateFAbs(X, II);
2963 if (IID == Intrinsic::minimum || IID == Intrinsic::minnum ||
2964 IID == Intrinsic::minimumnum)
2965 R = Builder.CreateFNegFMF(R, II);
2966 return replaceInstUsesWith(*II, R);
2967 }
2968
2969 break;
2970 }
2971 case Intrinsic::matrix_multiply: {
2972 // Optimize negation in matrix multiplication.
2973
2974 // -A * -B -> A * B
2975 Value *A, *B;
2976 if (match(II->getArgOperand(0), m_FNeg(m_Value(A))) &&
2977 match(II->getArgOperand(1), m_FNeg(m_Value(B)))) {
2978 replaceOperand(*II, 0, A);
2979 replaceOperand(*II, 1, B);
2980 return II;
2981 }
2982
2983 Value *Op0 = II->getOperand(0);
2984 Value *Op1 = II->getOperand(1);
2985 Value *OpNotNeg, *NegatedOp;
2986 unsigned NegatedOpArg, OtherOpArg;
2987 if (match(Op0, m_FNeg(m_Value(OpNotNeg)))) {
2988 NegatedOp = Op0;
2989 NegatedOpArg = 0;
2990 OtherOpArg = 1;
2991 } else if (match(Op1, m_FNeg(m_Value(OpNotNeg)))) {
2992 NegatedOp = Op1;
2993 NegatedOpArg = 1;
2994 OtherOpArg = 0;
2995 } else
2996 // Multiplication doesn't have a negated operand.
2997 break;
2998
2999 // Only optimize if the negated operand has only one use.
3000 if (!NegatedOp->hasOneUse())
3001 break;
3002
3003 Value *OtherOp = II->getOperand(OtherOpArg);
3004 VectorType *RetTy = cast<VectorType>(II->getType());
3005 VectorType *NegatedOpTy = cast<VectorType>(NegatedOp->getType());
3006 VectorType *OtherOpTy = cast<VectorType>(OtherOp->getType());
3007 ElementCount NegatedCount = NegatedOpTy->getElementCount();
3008 ElementCount OtherCount = OtherOpTy->getElementCount();
3009 ElementCount RetCount = RetTy->getElementCount();
3010 // (-A) * B -> A * (-B), if it is cheaper to negate B and vice versa.
3011 if (ElementCount::isKnownGT(NegatedCount, OtherCount) &&
3012 ElementCount::isKnownLT(OtherCount, RetCount)) {
3013 Value *InverseOtherOp = Builder.CreateFNeg(OtherOp);
3014 replaceOperand(*II, NegatedOpArg, OpNotNeg);
3015 replaceOperand(*II, OtherOpArg, InverseOtherOp);
3016 return II;
3017 }
3018 // (-A) * B -> -(A * B), if it is cheaper to negate the result
3019 if (ElementCount::isKnownGT(NegatedCount, RetCount)) {
3020 SmallVector<Value *, 5> NewArgs(II->args());
3021 NewArgs[NegatedOpArg] = OpNotNeg;
3022 Instruction *NewMul =
3023 Builder.CreateIntrinsic(II->getType(), IID, NewArgs, II);
3024 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(NewMul, II));
3025 }
3026 break;
3027 }
3028 case Intrinsic::fmuladd: {
3029 // Try to simplify the underlying FMul.
3030 if (Value *V =
3031 simplifyFMulInst(II->getArgOperand(0), II->getArgOperand(1),
3032 II->getFastMathFlags(), SQ.getWithInstruction(II)))
3033 return BinaryOperator::CreateFAddFMF(V, II->getArgOperand(2),
3034 II->getFastMathFlags());
3035
3036 [[fallthrough]];
3037 }
3038 case Intrinsic::fma: {
3039 // fma fneg(x), fneg(y), z -> fma x, y, z
3040 Value *Src0 = II->getArgOperand(0);
3041 Value *Src1 = II->getArgOperand(1);
3042 Value *Src2 = II->getArgOperand(2);
3043 Value *X, *Y;
3044 if (match(Src0, m_FNeg(m_Value(X))) && match(Src1, m_FNeg(m_Value(Y)))) {
3045 replaceOperand(*II, 0, X);
3046 replaceOperand(*II, 1, Y);
3047 return II;
3048 }
3049
3050 // fma fabs(x), fabs(x), z -> fma x, x, z
3051 if (match(Src0, m_FAbs(m_Value(X))) &&
3052 match(Src1, m_FAbs(m_Specific(X)))) {
3053 replaceOperand(*II, 0, X);
3054 replaceOperand(*II, 1, X);
3055 return II;
3056 }
3057
3058 // Try to simplify the underlying FMul. We can only apply simplifications
3059 // that do not require rounding.
3060 if (Value *V = simplifyFMAFMul(Src0, Src1, II->getFastMathFlags(),
3061 SQ.getWithInstruction(II)))
3062 return BinaryOperator::CreateFAddFMF(V, Src2, II->getFastMathFlags());
3063
3064 // fma x, y, 0 -> fmul x, y
3065 // This is always valid for -0.0, but requires nsz for +0.0 as
3066 // -0.0 + 0.0 = 0.0, which would not be the same as the fmul on its own.
3067 if (match(Src2, m_NegZeroFP()) ||
3068 (match(Src2, m_PosZeroFP()) && II->getFastMathFlags().noSignedZeros()))
3069 return BinaryOperator::CreateFMulFMF(Src0, Src1, II);
3070
3071 // fma x, -1.0, y -> fsub y, x
3072 if (match(Src1, m_SpecificFP(-1.0)))
3073 return BinaryOperator::CreateFSubFMF(Src2, Src0, II);
3074
3075 break;
3076 }
3077 case Intrinsic::copysign: {
3078 Value *Mag = II->getArgOperand(0), *Sign = II->getArgOperand(1);
3079 if (std::optional<bool> KnownSignBit = computeKnownFPSignBit(
3080 Sign, getSimplifyQuery().getWithInstruction(II))) {
3081 if (*KnownSignBit) {
3082 // If we know that the sign argument is negative, reduce to FNABS:
3083 // copysign Mag, -Sign --> fneg (fabs Mag)
3084 Value *Fabs = Builder.CreateFAbs(Mag, II);
3085 return replaceInstUsesWith(*II, Builder.CreateFNegFMF(Fabs, II));
3086 }
3087
3088 // If we know that the sign argument is positive, reduce to FABS:
3089 // copysign Mag, +Sign --> fabs Mag
3090 Value *Fabs = Builder.CreateFAbs(Mag, II);
3091 return replaceInstUsesWith(*II, Fabs);
3092 }
3093
3094 // Propagate sign argument through nested calls:
3095 // copysign Mag, (copysign ?, X) --> copysign Mag, X
3096 Value *X;
3098 Value *CopySign =
3099 Builder.CreateCopySign(Mag, X, FMFSource::intersect(II, Sign));
3100 return replaceInstUsesWith(*II, CopySign);
3101 }
3102
3103 // Clear sign-bit of constant magnitude:
3104 // copysign -MagC, X --> copysign MagC, X
3105 // TODO: Support constant folding for fabs
3106 const APFloat *MagC;
3107 if (match(Mag, m_APFloat(MagC)) && MagC->isNegative()) {
3108 APFloat PosMagC = *MagC;
3109 PosMagC.clearSign();
3110 return replaceOperand(*II, 0, ConstantFP::get(Mag->getType(), PosMagC));
3111 }
3112
3113 // Peek through changes of magnitude's sign-bit. This call rewrites those:
3114 // copysign (fabs X), Sign --> copysign X, Sign
3115 // copysign (fneg X), Sign --> copysign X, Sign
3116 if (match(Mag, m_FAbs(m_Value(X))) || match(Mag, m_FNeg(m_Value(X))))
3117 return replaceOperand(*II, 0, X);
3118
3119 Type *SignEltTy = Sign->getType()->getScalarType();
3120
3121 Value *CastSrc;
3122 if (match(Sign,
3124 CastSrc->getType()->isIntOrIntVectorTy() &&
3126 KnownBits Known(SignEltTy->getPrimitiveSizeInBits());
3128 APInt::getSignMask(Known.getBitWidth()), Known,
3129 SQ))
3130 return II;
3131 }
3132
3133 break;
3134 }
3135 case Intrinsic::fabs: {
3136 Value *Cond, *TVal, *FVal;
3137 Value *Arg = II->getArgOperand(0);
3138 Value *X;
3139 // fabs (-X) --> fabs (X)
3140 if (match(Arg, m_FNeg(m_Value(X)))) {
3141 Value *Fabs = Builder.CreateFAbs(X, II);
3142 return replaceInstUsesWith(CI, Fabs);
3143 }
3144
3145 if (match(Arg, m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))) {
3146 // fabs (select Cond, TrueC, FalseC) --> select Cond, AbsT, AbsF
3147 if (Arg->hasOneUse() ? (isa<Constant>(TVal) || isa<Constant>(FVal))
3148 : (isa<Constant>(TVal) && isa<Constant>(FVal))) {
3149 CallInst *AbsT = Builder.CreateCall(II->getCalledFunction(), {TVal});
3150 CallInst *AbsF = Builder.CreateCall(II->getCalledFunction(), {FVal});
3151 SelectInst *SI = SelectInst::Create(Cond, AbsT, AbsF);
3152 SI->setFastMathFlags(II->getFastMathFlags() |
3153 cast<SelectInst>(Arg)->getFastMathFlags());
3154 // Can't copy nsz to select, as even with the nsz flag the fabs result
3155 // always has the sign bit unset.
3156 SI->setHasNoSignedZeros(false);
3157 return SI;
3158 }
3159 // fabs (select Cond, -FVal, FVal) --> fabs FVal
3160 if (match(TVal, m_FNeg(m_Specific(FVal))))
3161 return replaceOperand(*II, 0, FVal);
3162 // fabs (select Cond, TVal, -TVal) --> fabs TVal
3163 if (match(FVal, m_FNeg(m_Specific(TVal))))
3164 return replaceOperand(*II, 0, TVal);
3165 }
3166
3167 Value *Magnitude, *Sign;
3168 if (match(II->getArgOperand(0),
3169 m_CopySign(m_Value(Magnitude), m_Value(Sign)))) {
3170 // fabs (copysign x, y) -> (fabs x)
3171 Value *AbsSign = Builder.CreateFAbs(Magnitude, II);
3172 return replaceInstUsesWith(*II, AbsSign);
3173 }
3174
3175 [[fallthrough]];
3176 }
3177 case Intrinsic::ceil:
3178 case Intrinsic::floor:
3179 case Intrinsic::round:
3180 case Intrinsic::roundeven:
3181 case Intrinsic::nearbyint:
3182 case Intrinsic::rint:
3183 case Intrinsic::trunc: {
3184 Value *ExtSrc;
3185 if (match(II->getArgOperand(0), m_OneUse(m_FPExt(m_Value(ExtSrc))))) {
3186 // Narrow the call: intrinsic (fpext x) -> fpext (intrinsic x)
3187 Value *NarrowII = Builder.CreateUnaryIntrinsic(IID, ExtSrc, II);
3188 return new FPExtInst(NarrowII, II->getType());
3189 }
3190 break;
3191 }
3192 case Intrinsic::cos:
3193 case Intrinsic::amdgcn_cos:
3194 case Intrinsic::cosh: {
3195 Value *X, *Sign;
3196 Value *Src = II->getArgOperand(0);
3197 if (match(Src, m_FNeg(m_Value(X))) || match(Src, m_FAbs(m_Value(X))) ||
3198 match(Src, m_CopySign(m_Value(X), m_Value(Sign)))) {
3199 // f(-x) --> f(x)
3200 // f(fabs(x)) --> f(x)
3201 // f(copysign(x, y)) --> f(x)
3202 // for f in {cos, cosh}
3203 return replaceOperand(*II, 0, X);
3204 }
3205 break;
3206 }
3207 case Intrinsic::sin:
3208 case Intrinsic::amdgcn_sin:
3209 case Intrinsic::sinh:
3210 case Intrinsic::tan:
3211 case Intrinsic::tanh: {
3212 Value *X;
3213 if (match(II->getArgOperand(0), m_OneUse(m_FNeg(m_Value(X))))) {
3214 // f(-x) --> -f(x)
3215 // for f in {sin, sinh, tan, tanh}
3216 Value *NewFunc = Builder.CreateUnaryIntrinsic(IID, X, II);
3217 return UnaryOperator::CreateFNegFMF(NewFunc, II);
3218 }
3219 break;
3220 }
3221 case Intrinsic::ldexp: {
3222 Value *Src = II->getArgOperand(0);
3223 Value *Exp = II->getArgOperand(1);
3224
3225 // ldexp(x, K) -> fmul x, 2^K
3226 uint64_t ConstExp;
3227 if (match(Exp, m_ConstantInt(ConstExp))) {
3228 const fltSemantics &FPTy =
3229 Src->getType()->getScalarType()->getFltSemantics();
3230
3231 APFloat Scaled = scalbn(APFloat::getOne(FPTy), static_cast<int>(ConstExp),
3233 if (!Scaled.isZero() && !Scaled.isInfinity()) {
3234 // Skip overflow and underflow cases.
3235 Constant *FPConst = ConstantFP::get(Src->getType(), Scaled);
3236 return BinaryOperator::CreateFMulFMF(Src, FPConst, II);
3237 }
3238 }
3239
3240 // ldexp(ldexp(x, a), b) -> ldexp(x, sadd.sat(a, b))
3241 //
3242 // A danger is if the first ldexp would overflow to infinity or underflow to
3243 // zero, but the combined exponent avoids it.
3244 //
3245 // We ignore this with reassoc, or if we know both exponents have the same
3246 // sign (since then we'd just double down on the over/underflow which would
3247 // occur anyway).
3248 //
3249 // ldexp can take arbitrary integer types, so we also need to ensure that
3250 // our exponent type is wide enough so that if sadd.sat(a, b) saturates,
3251 // then ldexp at the saturated exponent saturates to inf or zero as well.
3252 //
3253 // TODO: Could do better if we had range tracking for the input value
3254 // exponent. Also could broaden sign check to cover == 0 case.
3255 Value *InnerSrc;
3256 Value *InnerExp;
3258 m_Value(InnerSrc), m_Value(InnerExp)))) &&
3259 Exp->getType() == InnerExp->getType()) {
3260 FastMathFlags FMF = II->getFastMathFlags();
3261 FastMathFlags InnerFlags = cast<FPMathOperator>(Src)->getFastMathFlags();
3262
3263 if (ldexpSaturatingAddIsSafe(II->getType(), Exp->getType()) &&
3264 ((FMF.allowReassoc() && InnerFlags.allowReassoc()) ||
3265 signBitMustBeTheSame(Exp, InnerExp, SQ.getWithInstruction(II)))) {
3266 Value *NewExp =
3267 Builder.CreateBinaryIntrinsic(Intrinsic::sadd_sat, InnerExp, Exp);
3268 II->setArgOperand(1, NewExp);
3269 II->setFastMathFlags(InnerFlags); // Or the inner flags.
3270 return replaceOperand(*II, 0, InnerSrc);
3271 }
3272 }
3273
3274 // ldexp(x, zext(i1 y)) -> fmul x, (select y, 2.0, 1.0)
3275 // ldexp(x, sext(i1 y)) -> fmul x, (select y, 0.5, 1.0)
3276 Value *ExtSrc;
3277 if (match(Exp, m_ZExt(m_Value(ExtSrc))) &&
3278 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3279 Value *Select =
3280 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 2.0),
3281 ConstantFP::get(II->getType(), 1.0));
3283 }
3284 if (match(Exp, m_SExt(m_Value(ExtSrc))) &&
3285 ExtSrc->getType()->getScalarSizeInBits() == 1) {
3286 Value *Select =
3287 Builder.CreateSelect(ExtSrc, ConstantFP::get(II->getType(), 0.5),
3288 ConstantFP::get(II->getType(), 1.0));
3290 }
3291
3292 // ldexp(x, c ? exp : 0) -> c ? ldexp(x, exp) : x
3293 // ldexp(x, c ? 0 : exp) -> c ? x : ldexp(x, exp)
3294 ///
3295 // TODO: If we cared, should insert a canonicalize for x
3296 Value *SelectCond, *SelectLHS, *SelectRHS;
3297 if (match(II->getArgOperand(1),
3298 m_OneUse(m_Select(m_Value(SelectCond), m_Value(SelectLHS),
3299 m_Value(SelectRHS))))) {
3300 Value *NewLdexp = nullptr;
3301 Value *Select = nullptr;
3302 if (match(SelectRHS, m_ZeroInt())) {
3303 NewLdexp = Builder.CreateLdexp(Src, SelectLHS, II);
3304 Select = Builder.CreateSelect(SelectCond, NewLdexp, Src);
3305 } else if (match(SelectLHS, m_ZeroInt())) {
3306 NewLdexp = Builder.CreateLdexp(Src, SelectRHS, II);
3307 Select = Builder.CreateSelect(SelectCond, Src, NewLdexp);
3308 }
3309
3310 if (NewLdexp) {
3311 Select->takeName(II);
3312 return replaceInstUsesWith(*II, Select);
3313 }
3314 }
3315
3316 break;
3317 }
3318 case Intrinsic::ptrauth_auth:
3319 case Intrinsic::ptrauth_resign: {
3320 // (sign|resign) + (auth|resign) can be folded by omitting the middle
3321 // sign+auth component if the key and discriminator match.
3322 bool NeedSign = II->getIntrinsicID() == Intrinsic::ptrauth_resign;
3323 Value *Ptr = II->getArgOperand(0);
3324 Value *Key = II->getArgOperand(1);
3325 Value *Disc = II->getArgOperand(2);
3326 Value *DS = nullptr;
3327 if (auto Bundle = II->getOperandBundle(LLVMContext::OB_deactivation_symbol))
3328 DS = Bundle->Inputs[0];
3329
3330 // AuthKey will be the key we need to end up authenticating against in
3331 // whatever we replace this sequence with.
3332 Value *AuthKey = nullptr, *AuthDisc = nullptr, *BasePtr;
3333 if (const auto *CI = dyn_cast<CallBase>(Ptr)) {
3334 Value *OtherDS = nullptr;
3335 if (auto Bundle =
3337 OtherDS = Bundle->Inputs[0];
3338 if (DS != OtherDS)
3339 break;
3340
3341 if (CI->getIntrinsicID() == Intrinsic::ptrauth_sign) {
3342 if (CI->getArgOperand(1) != Key || CI->getArgOperand(2) != Disc)
3343 break;
3344 } else if (CI->getIntrinsicID() == Intrinsic::ptrauth_resign) {
3345 // The resign intrinsic does not support deactivation symbols.
3346 assert(!DS);
3347 if (CI->getArgOperand(3) != Key || CI->getArgOperand(4) != Disc)
3348 break;
3349 AuthKey = CI->getArgOperand(1);
3350 AuthDisc = CI->getArgOperand(2);
3351 } else
3352 break;
3353 BasePtr = CI->getArgOperand(0);
3354 } else if (const auto *PtrToInt = dyn_cast<PtrToIntOperator>(Ptr)) {
3355 // ptrauth constants are equivalent to a call to @llvm.ptrauth.sign for
3356 // our purposes, so check for that too.
3357 const auto *CPA = dyn_cast<ConstantPtrAuth>(PtrToInt->getOperand(0));
3358 if (!CPA || DS || !CPA->isKnownCompatibleWith(Key, Disc, DL))
3359 break;
3360
3361 // resign(ptrauth(p,ks,ds),ks,ds,kr,dr) -> ptrauth(p,kr,dr)
3362 if (NeedSign && isa<ConstantInt>(II->getArgOperand(4))) {
3363 auto *SignKey = cast<ConstantInt>(II->getArgOperand(3));
3364 auto *SignDisc = cast<ConstantInt>(II->getArgOperand(4));
3365 auto *Null = ConstantPointerNull::get(Builder.getPtrTy());
3366 auto *NewCPA = ConstantPtrAuth::get(CPA->getPointer(), SignKey,
3367 SignDisc, /*AddrDisc=*/Null,
3368 /*DeactivationSymbol=*/Null);
3370 *II, ConstantExpr::getPointerCast(NewCPA, II->getType()));
3371 return eraseInstFromFunction(*II);
3372 }
3373
3374 // auth(ptrauth(p,k,d),k,d) -> p
3375 BasePtr = Builder.CreatePtrToInt(CPA->getPointer(), II->getType());
3376 } else
3377 break;
3378
3379 unsigned NewIntrin;
3380 if (AuthKey && NeedSign) {
3381 // resign(0,1) + resign(1,2) = resign(0, 2)
3382 NewIntrin = Intrinsic::ptrauth_resign;
3383 } else if (AuthKey) {
3384 // resign(0,1) + auth(1) = auth(0)
3385 NewIntrin = Intrinsic::ptrauth_auth;
3386 } else if (NeedSign) {
3387 // sign(0) + resign(0, 1) = sign(1)
3388 NewIntrin = Intrinsic::ptrauth_sign;
3389 } else {
3390 // sign(0) + auth(0) = nop
3391 replaceInstUsesWith(*II, BasePtr);
3392 return eraseInstFromFunction(*II);
3393 }
3394
3395 SmallVector<Value *, 4> CallArgs;
3396 CallArgs.push_back(BasePtr);
3397 if (AuthKey) {
3398 CallArgs.push_back(AuthKey);
3399 CallArgs.push_back(AuthDisc);
3400 }
3401
3402 if (NeedSign) {
3403 CallArgs.push_back(II->getArgOperand(3));
3404 CallArgs.push_back(II->getArgOperand(4));
3405 }
3406
3407 std::vector<OperandBundleDef> Bundles;
3408 if (DS)
3409 Bundles.push_back(OperandBundleDef("deactivation-symbol", DS));
3410
3411 Function *NewFn =
3412 Intrinsic::getOrInsertDeclaration(II->getModule(), NewIntrin);
3413 return CallInst::Create(NewFn, CallArgs, Bundles);
3414 }
3415 case Intrinsic::arm_neon_vtbl1:
3416 case Intrinsic::arm_neon_vtbl2:
3417 case Intrinsic::arm_neon_vtbl3:
3418 case Intrinsic::arm_neon_vtbl4:
3419 case Intrinsic::aarch64_neon_tbl1:
3420 case Intrinsic::aarch64_neon_tbl2:
3421 case Intrinsic::aarch64_neon_tbl3:
3422 case Intrinsic::aarch64_neon_tbl4:
3423 return simplifyNeonTbl(*II, *this, /*IsExtension=*/false);
3424 case Intrinsic::arm_neon_vtbx1:
3425 case Intrinsic::arm_neon_vtbx2:
3426 case Intrinsic::arm_neon_vtbx3:
3427 case Intrinsic::arm_neon_vtbx4:
3428 case Intrinsic::aarch64_neon_tbx1:
3429 case Intrinsic::aarch64_neon_tbx2:
3430 case Intrinsic::aarch64_neon_tbx3:
3431 case Intrinsic::aarch64_neon_tbx4:
3432 return simplifyNeonTbl(*II, *this, /*IsExtension=*/true);
3433
3434 case Intrinsic::arm_neon_vmulls:
3435 case Intrinsic::arm_neon_vmullu:
3436 case Intrinsic::aarch64_neon_smull:
3437 case Intrinsic::aarch64_neon_umull: {
3438 Value *Arg0 = II->getArgOperand(0);
3439 Value *Arg1 = II->getArgOperand(1);
3440
3441 // Handle mul by zero first:
3443 return replaceInstUsesWith(CI, ConstantAggregateZero::get(II->getType()));
3444 }
3445
3446 // Check for constant LHS & RHS - in this case we just simplify.
3447 bool Zext = (IID == Intrinsic::arm_neon_vmullu ||
3448 IID == Intrinsic::aarch64_neon_umull);
3449 VectorType *NewVT = cast<VectorType>(II->getType());
3450 if (Constant *CV0 = dyn_cast<Constant>(Arg0)) {
3451 if (Constant *CV1 = dyn_cast<Constant>(Arg1)) {
3452 Value *V0 = Builder.CreateIntCast(CV0, NewVT, /*isSigned=*/!Zext);
3453 Value *V1 = Builder.CreateIntCast(CV1, NewVT, /*isSigned=*/!Zext);
3454 return replaceInstUsesWith(CI, Builder.CreateMul(V0, V1));
3455 }
3456
3457 // Couldn't simplify - canonicalize constant to the RHS.
3458 std::swap(Arg0, Arg1);
3459 }
3460
3461 // Handle mul by one:
3462 if (Constant *CV1 = dyn_cast<Constant>(Arg1))
3463 if (ConstantInt *Splat =
3464 dyn_cast_or_null<ConstantInt>(CV1->getSplatValue()))
3465 if (Splat->isOne())
3466 return CastInst::CreateIntegerCast(Arg0, II->getType(),
3467 /*isSigned=*/!Zext);
3468
3469 break;
3470 }
3471 case Intrinsic::arm_neon_aesd:
3472 case Intrinsic::arm_neon_aese:
3473 case Intrinsic::aarch64_crypto_aesd:
3474 case Intrinsic::aarch64_crypto_aese:
3475 case Intrinsic::aarch64_sve_aesd:
3476 case Intrinsic::aarch64_sve_aese: {
3477 Value *DataArg = II->getArgOperand(0);
3478 Value *KeyArg = II->getArgOperand(1);
3479
3480 // Accept zero on either operand.
3481 if (!match(KeyArg, m_ZeroInt()))
3482 std::swap(KeyArg, DataArg);
3483
3484 // Try to use the builtin XOR in AESE and AESD to eliminate a prior XOR
3485 Value *Data, *Key;
3486 if (match(KeyArg, m_ZeroInt()) &&
3487 match(DataArg, m_Xor(m_Value(Data), m_Value(Key)))) {
3488 replaceOperand(*II, 0, Data);
3489 replaceOperand(*II, 1, Key);
3490 return II;
3491 }
3492 break;
3493 }
3494 case Intrinsic::arm_neon_vshifts:
3495 case Intrinsic::arm_neon_vshiftu:
3496 case Intrinsic::aarch64_neon_sshl:
3497 case Intrinsic::aarch64_neon_ushl:
3498 return foldNeonShift(II, *this);
3499 case Intrinsic::hexagon_V6_vandvrt:
3500 case Intrinsic::hexagon_V6_vandvrt_128B: {
3501 // Simplify Q -> V -> Q conversion.
3502 if (auto Op0 = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3503 Intrinsic::ID ID0 = Op0->getIntrinsicID();
3504 if (ID0 != Intrinsic::hexagon_V6_vandqrt &&
3505 ID0 != Intrinsic::hexagon_V6_vandqrt_128B)
3506 break;
3507 Value *Bytes = Op0->getArgOperand(1), *Mask = II->getArgOperand(1);
3508 uint64_t Bytes1 = computeKnownBits(Bytes, Op0).One.getZExtValue();
3509 uint64_t Mask1 = computeKnownBits(Mask, II).One.getZExtValue();
3510 // Check if every byte has common bits in Bytes and Mask.
3511 uint64_t C = Bytes1 & Mask1;
3512 if ((C & 0xFF) && (C & 0xFF00) && (C & 0xFF0000) && (C & 0xFF000000))
3513 return replaceInstUsesWith(*II, Op0->getArgOperand(0));
3514 }
3515 break;
3516 }
3517 case Intrinsic::stackrestore: {
3518 enum class ClassifyResult {
3519 None,
3520 Alloca,
3521 StackRestore,
3522 CallWithSideEffects,
3523 };
3524 auto Classify = [](const Instruction *I) {
3525 if (isa<AllocaInst>(I))
3526 return ClassifyResult::Alloca;
3527
3528 if (auto *CI = dyn_cast<CallInst>(I)) {
3529 if (auto *II = dyn_cast<IntrinsicInst>(CI)) {
3530 if (II->getIntrinsicID() == Intrinsic::stackrestore)
3531 return ClassifyResult::StackRestore;
3532
3533 if (II->mayHaveSideEffects())
3534 return ClassifyResult::CallWithSideEffects;
3535 } else {
3536 // Consider all non-intrinsic calls to be side effects
3537 return ClassifyResult::CallWithSideEffects;
3538 }
3539 }
3540
3541 return ClassifyResult::None;
3542 };
3543
3544 // If the stacksave and the stackrestore are in the same BB, and there is
3545 // no intervening call, alloca, or stackrestore of a different stacksave,
3546 // remove the restore. This can happen when variable allocas are DCE'd.
3547 if (IntrinsicInst *SS = dyn_cast<IntrinsicInst>(II->getArgOperand(0))) {
3548 if (SS->getIntrinsicID() == Intrinsic::stacksave &&
3549 SS->getParent() == II->getParent()) {
3550 BasicBlock::iterator BI(SS);
3551 bool CannotRemove = false;
3552 for (++BI; &*BI != II; ++BI) {
3553 switch (Classify(&*BI)) {
3554 case ClassifyResult::None:
3555 // So far so good, look at next instructions.
3556 break;
3557
3558 case ClassifyResult::StackRestore:
3559 // If we found an intervening stackrestore for a different
3560 // stacksave, we can't remove the stackrestore. Otherwise, continue.
3561 if (cast<IntrinsicInst>(*BI).getArgOperand(0) != SS)
3562 CannotRemove = true;
3563 break;
3564
3565 case ClassifyResult::Alloca:
3566 case ClassifyResult::CallWithSideEffects:
3567 // If we found an alloca, a non-intrinsic call, or an intrinsic
3568 // call with side effects, we can't remove the stackrestore.
3569 CannotRemove = true;
3570 break;
3571 }
3572 if (CannotRemove)
3573 break;
3574 }
3575
3576 if (!CannotRemove)
3577 return eraseInstFromFunction(CI);
3578 }
3579 }
3580
3581 // Scan down this block to see if there is another stack restore in the
3582 // same block without an intervening call/alloca.
3584 Instruction *TI = II->getParent()->getTerminator();
3585 bool CannotRemove = false;
3586 for (++BI; &*BI != TI; ++BI) {
3587 switch (Classify(&*BI)) {
3588 case ClassifyResult::None:
3589 // So far so good, look at next instructions.
3590 break;
3591
3592 case ClassifyResult::StackRestore:
3593 // If there is a stackrestore below this one, remove this one.
3594 return eraseInstFromFunction(CI);
3595
3596 case ClassifyResult::Alloca:
3597 case ClassifyResult::CallWithSideEffects:
3598 // If we found an alloca, a non-intrinsic call, or an intrinsic call
3599 // with side effects (such as llvm.stacksave and llvm.read_register),
3600 // we can't remove the stack restore.
3601 CannotRemove = true;
3602 break;
3603 }
3604 if (CannotRemove)
3605 break;
3606 }
3607
3608 // If the stack restore is in a return, resume, or unwind block and if there
3609 // are no allocas or calls between the restore and the return, nuke the
3610 // restore.
3611 if (!CannotRemove && (isa<ReturnInst>(TI) || isa<ResumeInst>(TI)))
3612 return eraseInstFromFunction(CI);
3613 break;
3614 }
3615 case Intrinsic::lifetime_end:
3616 // Asan needs to poison memory to detect invalid access which is possible
3617 // even for empty lifetime range.
3618 if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress) ||
3619 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemory) ||
3620 II->getFunction()->hasFnAttribute(Attribute::SanitizeHWAddress) ||
3621 II->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag))
3622 break;
3623
3624 if (removeTriviallyEmptyRange(*II, *this, [](const IntrinsicInst &I) {
3625 return I.getIntrinsicID() == Intrinsic::lifetime_start;
3626 }))
3627 return nullptr;
3628 break;
3629 case Intrinsic::assume: {
3630 Value *IIOperand = II->getArgOperand(0);
3631
3632 // Canonicalize assume(a && b) -> assume(a); assume(b);
3633 // Note: New assumption intrinsics created here are registered by
3634 // the InstCombineIRInserter object.
3635 Value *A, *B;
3636 if (match(IIOperand, m_LogicalAnd(m_Value(A), m_Value(B)))) {
3637 Builder.CreateAssumption(A);
3638 Builder.CreateAssumption(B);
3639 return eraseInstFromFunction(*II);
3640 }
3641 // assume(!(a || b)) -> assume(!a); assume(!b);
3642 if (match(IIOperand, m_Not(m_LogicalOr(m_Value(A), m_Value(B))))) {
3643 Builder.CreateAssumption(Builder.CreateNot(A));
3644 Builder.CreateAssumption(Builder.CreateNot(B));
3645 return eraseInstFromFunction(*II);
3646 }
3647
3648 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3649 OperandBundleUse OBU = II->getOperandBundleAt(Idx);
3650
3651 // Separate storage assumptions apply to the underlying allocations, not
3652 // any particular pointer within them. When evaluating the hints for AA
3653 // purposes we getUnderlyingObject them; by precomputing the answers here
3654 // we can avoid having to do so repeatedly there.
3655 if (OBU.getTagName() == "separate_storage") {
3656 assert(OBU.Inputs.size() == 2);
3657 auto MaybeSimplifyHint = [&](const Use &U) {
3658 Value *Hint = U.get();
3659 // Not having a limit is safe because InstCombine removes unreachable
3660 // code.
3661 Value *UnderlyingObject = getUnderlyingObject(Hint, /*MaxLookup*/ 0);
3662 if (Hint != UnderlyingObject)
3663 replaceUse(const_cast<Use &>(U), UnderlyingObject);
3664 };
3665 MaybeSimplifyHint(OBU.Inputs[0]);
3666 MaybeSimplifyHint(OBU.Inputs[1]);
3667 }
3668
3669 // Try to remove redundant alignment assumptions.
3670 if (OBU.getTagName() == "align" && OBU.Inputs.size() == 2) {
3672 *cast<AssumeInst>(II), II->arg_size() + Idx);
3673 if (!RK || RK.AttrKind != Attribute::Alignment ||
3675 continue;
3676
3677 // Remove align 1 bundles; they don't add any useful information.
3678 if (RK.ArgValue == 1)
3680
3681 // Don't try to remove align assumptions for pointers derived from
3682 // arguments. We might lose information if the function gets inline and
3683 // the align argument attribute disappears.
3685 if (!UO || isa<Argument>(UO))
3686 continue;
3687
3688 // Compute known bits for the pointer and drop the assume if the
3689 // known alignment isn't increased by it.
3690 if ((1ULL << computeKnownBits(RK.WasOn, II).countMinTrailingZeros()) <
3691 RK.ArgValue)
3692 continue;
3694 }
3695
3696 if (OBU.getTagName() == "nonnull" && OBU.Inputs.size() == 1) {
3698 *cast<AssumeInst>(II), II->arg_size() + Idx);
3699 if (!RK || RK.AttrKind != Attribute::NonNull)
3700 continue;
3701
3702 // Drop assume if we can prove nonnull without it
3703 if (isKnownNonZero(RK.WasOn, getSimplifyQuery().getWithInstruction(II)))
3705
3706 // Fold the assume into metadata if it's valid at the load
3707 if (auto *LI = dyn_cast<LoadInst>(RK.WasOn);
3708 LI &&
3709 isValidAssumeForContext(II, LI, &DT, /*AllowEphemerals=*/true)) {
3710 MDNode *MD = MDNode::get(II->getContext(), {});
3711 LI->setMetadata(LLVMContext::MD_nonnull, MD);
3712 LI->setMetadata(LLVMContext::MD_noundef, MD);
3714 }
3715
3716 // TODO: apply nonnull return attributes to calls and invokes
3717 }
3718 }
3719
3720 // Convert nonnull assume like:
3721 // %A = icmp ne i32* %PTR, null
3722 // call void @llvm.assume(i1 %A)
3723 // into
3724 // call void @llvm.assume(i1 true) [ "nonnull"(i32* %PTR) ]
3725 if (match(IIOperand,
3727 A->getType()->isPointerTy()) {
3728 Builder.CreateNonnullAssumption(A);
3729 return eraseInstFromFunction(*II);
3730 }
3731
3732 // Convert alignment assume like:
3733 // %B = ptrtoint i32* %A to i64
3734 // %C = and i64 %B, Constant
3735 // %D = icmp eq i64 %C, 0
3736 // call void @llvm.assume(i1 %D)
3737 // into
3738 // call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 Constant + 1)]
3739 uint64_t AlignMask = 1;
3740 if ((match(IIOperand, m_Not(m_Trunc(m_Value(A)))) ||
3741 match(IIOperand,
3743 m_And(m_Value(A), m_ConstantInt(AlignMask)),
3744 m_Zero())))) {
3745 if (isPowerOf2_64(AlignMask + 1)) {
3746 uint64_t Offset = 0;
3748 if (match(A, m_PtrToIntOrAddr(m_Value(A)))) {
3749 /// Note: this doesn't preserve the offset information but merges
3750 /// offset and alignment.
3751 /// TODO: we can generate a GEP instead of merging the alignment with
3752 /// the offset.
3753 Builder.CreateAlignmentAssumption(getDataLayout(), A,
3754 MinAlign(Offset, AlignMask + 1));
3755 return eraseInstFromFunction(*II);
3756 }
3757 }
3758 }
3759
3760 /// Canonicalize Knowledge in operand bundles.
3761 if (EnableKnowledgeRetention && II->hasOperandBundles()) {
3762 for (unsigned Idx = 0; Idx < II->getNumOperandBundles(); Idx++) {
3763 auto &BOI = II->bundle_op_info_begin()[Idx];
3766 if (BOI.End - BOI.Begin > 2)
3767 continue; // Prevent reducing knowledge in an align with offset since
3768 // extracting a RetainedKnowledge from them looses offset
3769 // information
3770 RetainedKnowledge CanonRK =
3773 &getDominatorTree());
3774 if (CanonRK == RK)
3775 continue;
3776 if (!CanonRK) {
3777 if (BOI.End - BOI.Begin > 0) {
3778 Worklist.pushValue(II->op_begin()[BOI.Begin]);
3779 Value::dropDroppableUse(II->op_begin()[BOI.Begin]);
3780 }
3781 continue;
3782 }
3783 assert(RK.AttrKind == CanonRK.AttrKind);
3784 if (BOI.End - BOI.Begin > 0)
3785 II->op_begin()[BOI.Begin].set(CanonRK.WasOn);
3786 if (BOI.End - BOI.Begin > 1)
3787 II->op_begin()[BOI.Begin + 1].set(ConstantInt::get(
3788 Type::getInt64Ty(II->getContext()), CanonRK.ArgValue));
3789 if (RK.WasOn)
3790 Worklist.pushValue(RK.WasOn);
3791 return II;
3792 }
3793 }
3794
3795 // If there is a dominating assume with the same condition as this one,
3796 // then this one is redundant, and should be removed.
3797 KnownBits Known(1);
3798 computeKnownBits(IIOperand, Known, II);
3800 return eraseInstFromFunction(*II);
3801
3802 // assume(false) is unreachable.
3803 if (match(IIOperand, m_CombineOr(m_Zero(), m_Undef()))) {
3805 return eraseInstFromFunction(*II);
3806 }
3807
3808 // Update the cache of affected values for this assumption (we might be
3809 // here because we just simplified the condition).
3810 AC.updateAffectedValues(cast<AssumeInst>(II));
3811 break;
3812 }
3813 case Intrinsic::experimental_guard: {
3814 // Is this guard followed by another guard? We scan forward over a small
3815 // fixed window of instructions to handle common cases with conditions
3816 // computed between guards.
3817 Instruction *NextInst = II->getNextNode();
3818 for (unsigned i = 0; i < GuardWideningWindow; i++) {
3819 // Note: Using context-free form to avoid compile time blow up
3820 if (!isSafeToSpeculativelyExecute(NextInst))
3821 break;
3822 NextInst = NextInst->getNextNode();
3823 }
3824 Value *NextCond = nullptr;
3825 if (match(NextInst,
3827 Value *CurrCond = II->getArgOperand(0);
3828
3829 // Remove a guard that it is immediately preceded by an identical guard.
3830 // Otherwise canonicalize guard(a); guard(b) -> guard(a & b).
3831 if (CurrCond != NextCond) {
3832 Instruction *MoveI = II->getNextNode();
3833 while (MoveI != NextInst) {
3834 auto *Temp = MoveI;
3835 MoveI = MoveI->getNextNode();
3836 Temp->moveBefore(II->getIterator());
3837 }
3838 replaceOperand(*II, 0, Builder.CreateAnd(CurrCond, NextCond));
3839 }
3840 eraseInstFromFunction(*NextInst);
3841 return II;
3842 }
3843 break;
3844 }
3845 case Intrinsic::vector_insert: {
3846 Value *Vec = II->getArgOperand(0);
3847 Value *SubVec = II->getArgOperand(1);
3848 Value *Idx = II->getArgOperand(2);
3849 auto *DstTy = dyn_cast<FixedVectorType>(II->getType());
3850 auto *VecTy = dyn_cast<FixedVectorType>(Vec->getType());
3851 auto *SubVecTy = dyn_cast<FixedVectorType>(SubVec->getType());
3852
3853 // Only canonicalize if the destination vector, Vec, and SubVec are all
3854 // fixed vectors.
3855 if (DstTy && VecTy && SubVecTy) {
3856 unsigned DstNumElts = DstTy->getNumElements();
3857 unsigned VecNumElts = VecTy->getNumElements();
3858 unsigned SubVecNumElts = SubVecTy->getNumElements();
3859 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3860
3861 // An insert that entirely overwrites Vec with SubVec is a nop.
3862 if (VecNumElts == SubVecNumElts)
3863 return replaceInstUsesWith(CI, SubVec);
3864
3865 // Widen SubVec into a vector of the same width as Vec, since
3866 // shufflevector requires the two input vectors to be the same width.
3867 // Elements beyond the bounds of SubVec within the widened vector are
3868 // undefined.
3869 SmallVector<int, 8> WidenMask;
3870 unsigned i;
3871 for (i = 0; i != SubVecNumElts; ++i)
3872 WidenMask.push_back(i);
3873 for (; i != VecNumElts; ++i)
3874 WidenMask.push_back(PoisonMaskElem);
3875
3876 Value *WidenShuffle = Builder.CreateShuffleVector(SubVec, WidenMask);
3877
3879 for (unsigned i = 0; i != IdxN; ++i)
3880 Mask.push_back(i);
3881 for (unsigned i = DstNumElts; i != DstNumElts + SubVecNumElts; ++i)
3882 Mask.push_back(i);
3883 for (unsigned i = IdxN + SubVecNumElts; i != DstNumElts; ++i)
3884 Mask.push_back(i);
3885
3886 Value *Shuffle = Builder.CreateShuffleVector(Vec, WidenShuffle, Mask);
3887 return replaceInstUsesWith(CI, Shuffle);
3888 }
3889 break;
3890 }
3891 case Intrinsic::vector_extract: {
3892 Value *Vec = II->getArgOperand(0);
3893 Value *Idx = II->getArgOperand(1);
3894
3895 Type *ReturnType = II->getType();
3896 // (extract_vector (insert_vector InsertTuple, InsertValue, InsertIdx),
3897 // ExtractIdx)
3898 unsigned ExtractIdx = cast<ConstantInt>(Idx)->getZExtValue();
3899 Value *InsertTuple, *InsertIdx, *InsertValue;
3901 m_Value(InsertValue),
3902 m_Value(InsertIdx))) &&
3903 InsertValue->getType() == ReturnType) {
3904 unsigned Index = cast<ConstantInt>(InsertIdx)->getZExtValue();
3905 // Case where we get the same index right after setting it.
3906 // extract.vector(insert.vector(InsertTuple, InsertValue, Idx), Idx) -->
3907 // InsertValue
3908 if (ExtractIdx == Index)
3909 return replaceInstUsesWith(CI, InsertValue);
3910 // If we are getting a different index than what was set in the
3911 // insert.vector intrinsic. We can just set the input tuple to the one up
3912 // in the chain. extract.vector(insert.vector(InsertTuple, InsertValue,
3913 // InsertIndex), ExtractIndex)
3914 // --> extract.vector(InsertTuple, ExtractIndex)
3915 else
3916 return replaceOperand(CI, 0, InsertTuple);
3917 }
3918
3919 ConstantInt *ALMUpperBound;
3921 m_Value(), m_ConstantInt(ALMUpperBound)))) {
3922 const auto &Attrs = II->getFunction()->getAttributes().getFnAttrs();
3923 unsigned VScaleMin = Attrs.getVScaleRangeMin();
3924 unsigned ScaleFactor =
3925 cast<VectorType>(ReturnType)->isScalableTy() ? VScaleMin : 1;
3926 if (ExtractIdx * ScaleFactor >= ALMUpperBound->getZExtValue())
3927 return replaceInstUsesWith(CI,
3928 ConstantVector::getNullValue(ReturnType));
3929 }
3930
3931 auto *DstTy = dyn_cast<VectorType>(ReturnType);
3932 auto *VecTy = dyn_cast<VectorType>(Vec->getType());
3933
3934 if (DstTy && VecTy) {
3935 auto DstEltCnt = DstTy->getElementCount();
3936 auto VecEltCnt = VecTy->getElementCount();
3937 unsigned IdxN = cast<ConstantInt>(Idx)->getZExtValue();
3938
3939 // Extracting the entirety of Vec is a nop.
3940 if (DstEltCnt == VecTy->getElementCount()) {
3941 replaceInstUsesWith(CI, Vec);
3942 return eraseInstFromFunction(CI);
3943 }
3944
3945 // Only canonicalize to shufflevector if the destination vector and
3946 // Vec are fixed vectors.
3947 if (VecEltCnt.isScalable() || DstEltCnt.isScalable())
3948 break;
3949
3951 for (unsigned i = 0; i != DstEltCnt.getKnownMinValue(); ++i)
3952 Mask.push_back(IdxN + i);
3953
3954 Value *Shuffle = Builder.CreateShuffleVector(Vec, Mask);
3955 return replaceInstUsesWith(CI, Shuffle);
3956 }
3957 break;
3958 }
3959 case Intrinsic::vp_load: {
3960 auto *VPI = cast<VPIntrinsic>(II);
3961 // Fold away bit casts of the loaded value by loading the desired type,
3962 // if the mask is all-ones.
3963 Value *Mask = VPI->getMaskParam();
3964 Value *EVL = VPI->getVectorLengthParam();
3965 if (!isa<Constant>(Mask) || !cast<Constant>(Mask)->isAllOnesValue() ||
3966 !II->hasOneUse())
3967 break;
3968
3969 const DataLayout &DL = II->getDataLayout();
3970 auto *Cast = dyn_cast<CastInst>(II->user_back());
3971 if (!Cast || !Cast->isNoopCast(DL) || !isa<VectorType>(Cast->getDestTy()))
3972 break;
3973 VectorType *OrigVecTy = cast<VectorType>(II->getType());
3974 Align OrigAlign =
3975 DL.getValueOrABITypeAlignment(VPI->getPointerAlignment(), OrigVecTy);
3976 ElementCount OrigVecCnt = OrigVecTy->getElementCount();
3977 VectorType *NewVecTy = cast<VectorType>(Cast->getDestTy());
3978 ElementCount NewVecCnt = NewVecTy->getElementCount();
3979
3980 // Right now we only support cases where the NewVec is longer, because for
3981 // cases where it's shorter, we have to be sure that EVL can be exactly
3982 // divided, otherwise it might yield incorrect results or even page faults
3983 // (if we round-up during the division).
3984 if (OrigVecCnt.isScalable() == NewVecCnt.isScalable() &&
3985 NewVecCnt.hasKnownScalarFactor(OrigVecCnt)) {
3986 unsigned Factor = NewVecCnt.getKnownScalarFactor(OrigVecCnt);
3987 Value *NewEVL = Builder.CreateNUWMul(EVL, Builder.getInt32(Factor));
3988 Value *NewMask = Builder.CreateVectorSplat(NewVecCnt, Builder.getTrue());
3989 CallInst *NewVP = Builder.CreateIntrinsic(
3990 NewVecTy, Intrinsic::vp_load,
3991 {VPI->getMemoryPointerParam(), NewMask, NewEVL});
3992 // Preserve the original alignment.
3993 NewVP->addParamAttrs(
3994 0, AttrBuilder(VPI->getContext()).addAlignmentAttr(OrigAlign));
3995 replaceInstUsesWith(*Cast, NewVP);
3996 return eraseInstFromFunction(*Cast);
3997 }
3998 break;
3999 }
4000 case Intrinsic::experimental_vp_reverse: {
4001 Value *X;
4002 Value *Vec = II->getArgOperand(0);
4003 Value *Mask = II->getArgOperand(1);
4004 if (!match(Mask, m_AllOnes()))
4005 break;
4006 Value *EVL = II->getArgOperand(2);
4007 // TODO: Canonicalize experimental.vp.reverse after unop/binops?
4008 // rev(unop rev(X)) --> unop X
4009 if (match(Vec,
4011 m_Value(X), m_AllOnes(), m_Specific(EVL)))))) {
4012 auto *OldUnOp = cast<UnaryOperator>(Vec);
4014 OldUnOp->getOpcode(), X, OldUnOp, OldUnOp->getName(),
4015 II->getIterator());
4016 return replaceInstUsesWith(CI, NewUnOp);
4017 }
4018 break;
4019 }
4020 case Intrinsic::vector_reduce_or:
4021 case Intrinsic::vector_reduce_and: {
4022 // Canonicalize logical or/and reductions:
4023 // Or reduction for i1 is represented as:
4024 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4025 // %res = cmp ne iReduxWidth %val, 0
4026 // And reduction for i1 is represented as:
4027 // %val = bitcast <ReduxWidth x i1> to iReduxWidth
4028 // %res = cmp eq iReduxWidth %val, 11111
4029 Value *Arg = II->getArgOperand(0);
4030 Value *Vect;
4031
4032 if (Value *NewOp =
4033 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4034 replaceUse(II->getOperandUse(0), NewOp);
4035 return II;
4036 }
4037
4038 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4039 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4040 if (FTy->getElementType() == Builder.getInt1Ty()) {
4041 Value *Res = Builder.CreateBitCast(
4042 Vect, Builder.getIntNTy(FTy->getNumElements()));
4043 if (IID == Intrinsic::vector_reduce_and) {
4044 Res = Builder.CreateICmpEQ(
4046 } else {
4047 assert(IID == Intrinsic::vector_reduce_or &&
4048 "Expected or reduction.");
4049 Res = Builder.CreateIsNotNull(Res);
4050 }
4051 if (Arg != Vect)
4052 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4053 II->getType());
4054 return replaceInstUsesWith(CI, Res);
4055 }
4056 }
4057 [[fallthrough]];
4058 }
4059 case Intrinsic::vector_reduce_add: {
4060 if (IID == Intrinsic::vector_reduce_add) {
4061 // Convert vector_reduce_add(ZExt(<n x i1>)) to
4062 // ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4063 // Convert vector_reduce_add(SExt(<n x i1>)) to
4064 // -ZExtOrTrunc(ctpop(bitcast <n x i1> to in)).
4065 // Convert vector_reduce_add(<n x i1>) to
4066 // Trunc(ctpop(bitcast <n x i1> to in)).
4067 Value *Arg = II->getArgOperand(0);
4068 Value *Vect;
4069
4070 if (Value *NewOp =
4071 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4072 replaceUse(II->getOperandUse(0), NewOp);
4073 return II;
4074 }
4075
4076 // vector.reduce.add.vNiM(splat(%x)) -> mul(%x, N)
4077 if (Value *Splat = getSplatValue(Arg)) {
4078 ElementCount VecToReduceCount =
4079 cast<VectorType>(Arg->getType())->getElementCount();
4080 if (VecToReduceCount.isFixed()) {
4081 unsigned VectorSize = VecToReduceCount.getFixedValue();
4082 return BinaryOperator::CreateMul(
4083 Splat,
4084 ConstantInt::get(Splat->getType(), VectorSize, /*IsSigned=*/false,
4085 /*ImplicitTrunc=*/true));
4086 }
4087 }
4088
4089 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4090 if (auto *FTy = dyn_cast<FixedVectorType>(Vect->getType()))
4091 if (FTy->getElementType() == Builder.getInt1Ty()) {
4092 Value *V = Builder.CreateBitCast(
4093 Vect, Builder.getIntNTy(FTy->getNumElements()));
4094 Value *Res = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, V);
4095 Res = Builder.CreateZExtOrTrunc(Res, II->getType());
4096 if (Arg != Vect &&
4097 cast<Instruction>(Arg)->getOpcode() == Instruction::SExt)
4098 Res = Builder.CreateNeg(Res);
4099 return replaceInstUsesWith(CI, Res);
4100 }
4101 }
4102 }
4103 [[fallthrough]];
4104 }
4105 case Intrinsic::vector_reduce_xor: {
4106 if (IID == Intrinsic::vector_reduce_xor) {
4107 // Exclusive disjunction reduction over the vector with
4108 // (potentially-extended) i1 element type is actually a
4109 // (potentially-extended) arithmetic `add` reduction over the original
4110 // non-extended value:
4111 // vector_reduce_xor(?ext(<n x i1>))
4112 // -->
4113 // ?ext(vector_reduce_add(<n x i1>))
4114 Value *Arg = II->getArgOperand(0);
4115 Value *Vect;
4116
4117 if (Value *NewOp =
4118 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4119 replaceUse(II->getOperandUse(0), NewOp);
4120 return II;
4121 }
4122
4123 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4124 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4125 if (VTy->getElementType() == Builder.getInt1Ty()) {
4126 Value *Res = Builder.CreateAddReduce(Vect);
4127 if (Arg != Vect)
4128 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4129 II->getType());
4130 return replaceInstUsesWith(CI, Res);
4131 }
4132 }
4133 }
4134 [[fallthrough]];
4135 }
4136 case Intrinsic::vector_reduce_mul: {
4137 if (IID == Intrinsic::vector_reduce_mul) {
4138 Value *Arg = II->getArgOperand(0);
4139
4140 if (Value *NewOp =
4141 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4142 replaceUse(II->getOperandUse(0), NewOp);
4143 return II;
4144 }
4145
4146 // vector_reduce_mul(zext(<n x i1>)), or
4147 // vector_reduce_mul(sext(<n x i1>)) (if n is even) -->
4148 // zext(vector_reduce_and(<n x i1>)).
4149 // (The sext case doesn't work if n is odd because multiplying an odd
4150 // number of -1's produces -1, not 1.)
4151 Value *Vect;
4152 bool IsZext = match(Arg, m_ZExt(m_Value(Vect))) &&
4153 Vect->getType()->isIntOrIntVectorTy(1);
4154 bool IsSext =
4155 match(Arg, m_SExt(m_Value(Vect))) &&
4156 Vect->getType()->isIntOrIntVectorTy(1) &&
4157 cast<VectorType>(Vect->getType())->getElementCount().isKnownEven();
4158 if (IsZext || IsSext) {
4159 Value *Res = Builder.CreateAndReduce(Vect);
4160 return CastInst::Create(Instruction::ZExt, Res, II->getType());
4161 }
4162
4163 // vector_reduce_mul(<n x i1>) --> vector_reduce_and(<n x i1>)
4164 if (Arg->getType()->isIntOrIntVectorTy(1))
4165 return replaceInstUsesWith(CI, Builder.CreateAndReduce(Arg));
4166 }
4167 [[fallthrough]];
4168 }
4169 case Intrinsic::vector_reduce_umin:
4170 case Intrinsic::vector_reduce_umax: {
4171 if (IID == Intrinsic::vector_reduce_umin ||
4172 IID == Intrinsic::vector_reduce_umax) {
4173 // UMin/UMax reduction over the vector with (potentially-extended)
4174 // i1 element type is actually a (potentially-extended)
4175 // logical `and`/`or` reduction over the original non-extended value:
4176 // vector_reduce_u{min,max}(?ext(<n x i1>))
4177 // -->
4178 // ?ext(vector_reduce_{and,or}(<n x i1>))
4179 Value *Arg = II->getArgOperand(0);
4180 Value *Vect;
4181
4182 if (Value *NewOp =
4183 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4184 replaceUse(II->getOperandUse(0), NewOp);
4185 return II;
4186 }
4187
4188 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4189 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4190 if (VTy->getElementType() == Builder.getInt1Ty()) {
4191 Value *Res = IID == Intrinsic::vector_reduce_umin
4192 ? Builder.CreateAndReduce(Vect)
4193 : Builder.CreateOrReduce(Vect);
4194 if (Arg != Vect)
4195 Res = Builder.CreateCast(cast<CastInst>(Arg)->getOpcode(), Res,
4196 II->getType());
4197 return replaceInstUsesWith(CI, Res);
4198 }
4199 }
4200 }
4201 [[fallthrough]];
4202 }
4203 case Intrinsic::vector_reduce_smin:
4204 case Intrinsic::vector_reduce_smax: {
4205 if (IID == Intrinsic::vector_reduce_smin ||
4206 IID == Intrinsic::vector_reduce_smax) {
4207 // SMin/SMax reduction over the vector with (potentially-extended)
4208 // i1 element type is actually a (potentially-extended)
4209 // logical `and`/`or` reduction over the original non-extended value:
4210 // vector_reduce_s{min,max}(<n x i1>)
4211 // -->
4212 // vector_reduce_{or,and}(<n x i1>)
4213 // and
4214 // vector_reduce_s{min,max}(sext(<n x i1>))
4215 // -->
4216 // sext(vector_reduce_{or,and}(<n x i1>))
4217 // and
4218 // vector_reduce_s{min,max}(zext(<n x i1>))
4219 // -->
4220 // zext(vector_reduce_{and,or}(<n x i1>))
4221 Value *Arg = II->getArgOperand(0);
4222 Value *Vect;
4223
4224 if (Value *NewOp =
4225 simplifyReductionOperand(Arg, /*CanReorderLanes=*/true)) {
4226 replaceUse(II->getOperandUse(0), NewOp);
4227 return II;
4228 }
4229
4230 if (match(Arg, m_ZExtOrSExtOrSelf(m_Value(Vect)))) {
4231 if (auto *VTy = dyn_cast<VectorType>(Vect->getType()))
4232 if (VTy->getElementType() == Builder.getInt1Ty()) {
4233 Instruction::CastOps ExtOpc = Instruction::CastOps::CastOpsEnd;
4234 if (Arg != Vect)
4235 ExtOpc = cast<CastInst>(Arg)->getOpcode();
4236 Value *Res = ((IID == Intrinsic::vector_reduce_smin) ==
4237 (ExtOpc == Instruction::CastOps::ZExt))
4238 ? Builder.CreateAndReduce(Vect)
4239 : Builder.CreateOrReduce(Vect);
4240 if (Arg != Vect)
4241 Res = Builder.CreateCast(ExtOpc, Res, II->getType());
4242 return replaceInstUsesWith(CI, Res);
4243 }
4244 }
4245 }
4246 [[fallthrough]];
4247 }
4248 case Intrinsic::vector_reduce_fmax:
4249 case Intrinsic::vector_reduce_fmin:
4250 case Intrinsic::vector_reduce_fadd:
4251 case Intrinsic::vector_reduce_fmul: {
4252 bool CanReorderLanes = (IID != Intrinsic::vector_reduce_fadd &&
4253 IID != Intrinsic::vector_reduce_fmul) ||
4254 II->hasAllowReassoc();
4255 const unsigned ArgIdx = (IID == Intrinsic::vector_reduce_fadd ||
4256 IID == Intrinsic::vector_reduce_fmul)
4257 ? 1
4258 : 0;
4259 Value *Arg = II->getArgOperand(ArgIdx);
4260 if (Value *NewOp = simplifyReductionOperand(Arg, CanReorderLanes)) {
4261 replaceUse(II->getOperandUse(ArgIdx), NewOp);
4262 return nullptr;
4263 }
4264 break;
4265 }
4266 case Intrinsic::is_fpclass: {
4267 if (Instruction *I = foldIntrinsicIsFPClass(*II))
4268 return I;
4269 break;
4270 }
4271 case Intrinsic::threadlocal_address: {
4272 Align MinAlign = getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
4273 MaybeAlign Align = II->getRetAlign();
4274 if (MinAlign > Align.valueOrOne()) {
4275 II->addRetAttr(Attribute::getWithAlignment(II->getContext(), MinAlign));
4276 return II;
4277 }
4278 break;
4279 }
4280 case Intrinsic::fptoui_sat:
4281 case Intrinsic::fptosi_sat:
4282 if (Instruction *I = foldItoFPtoI(*II))
4283 return I;
4284 break;
4285 case Intrinsic::frexp: {
4286 Value *X;
4287 // The first result is idempotent with the added complication of the struct
4288 // return, and the second result is zero because the value is already
4289 // normalized.
4290 if (match(II->getArgOperand(0), m_ExtractValue<0>(m_Value(X)))) {
4292 X = Builder.CreateInsertValue(
4293 X, Constant::getNullValue(II->getType()->getStructElementType(1)),
4294 1);
4295 return replaceInstUsesWith(*II, X);
4296 }
4297 }
4298 break;
4299 }
4300 case Intrinsic::get_active_lane_mask: {
4301 const APInt *Op0, *Op1;
4302 if (match(II->getOperand(0), m_StrictlyPositive(Op0)) &&
4303 match(II->getOperand(1), m_APInt(Op1))) {
4304 Type *OpTy = II->getOperand(0)->getType();
4305 return replaceInstUsesWith(
4306 *II, Builder.CreateIntrinsic(
4307 II->getType(), Intrinsic::get_active_lane_mask,
4308 {Constant::getNullValue(OpTy),
4309 ConstantInt::get(OpTy, Op1->usub_sat(*Op0))}));
4310 }
4311 break;
4312 }
4313 case Intrinsic::experimental_get_vector_length: {
4314 // get.vector.length(Cnt, MaxLanes) --> Cnt when Cnt <= MaxLanes
4315 unsigned BitWidth =
4316 std::max(II->getArgOperand(0)->getType()->getScalarSizeInBits(),
4317 II->getType()->getScalarSizeInBits());
4318 ConstantRange Cnt =
4319 computeConstantRangeIncludingKnownBits(II->getArgOperand(0), false,
4320 SQ.getWithInstruction(II))
4322 ConstantRange MaxLanes = cast<ConstantInt>(II->getArgOperand(1))
4323 ->getValue()
4324 .zextOrTrunc(Cnt.getBitWidth());
4325 if (cast<ConstantInt>(II->getArgOperand(2))->isOne())
4326 MaxLanes = MaxLanes.multiply(
4327 getVScaleRange(II->getFunction(), Cnt.getBitWidth()));
4328
4329 if (Cnt.icmp(CmpInst::ICMP_ULE, MaxLanes))
4330 return replaceInstUsesWith(
4331 *II, Builder.CreateZExtOrTrunc(II->getArgOperand(0), II->getType()));
4332 return nullptr;
4333 }
4334 default: {
4335 // Handle target specific intrinsics
4336 std::optional<Instruction *> V = targetInstCombineIntrinsic(*II);
4337 if (V)
4338 return *V;
4339 break;
4340 }
4341 }
4342
4343 // Try to fold intrinsic into select/phi operands. This is legal if:
4344 // * The intrinsic is speculatable.
4345 // * The operand is one of the following:
4346 // - a phi.
4347 // - a select with a scalar condition.
4348 // - a select with a vector condition and II is not a cross lane operation.
4350 for (Value *Op : II->args()) {
4351 if (auto *Sel = dyn_cast<SelectInst>(Op)) {
4352 bool IsVectorCond = Sel->getCondition()->getType()->isVectorTy();
4353 if (IsVectorCond &&
4354 (!isNotCrossLaneOperation(II) || !II->getType()->isVectorTy()))
4355 continue;
4356 // Don't replace a scalar select with a more expensive vector select if
4357 // we can't simplify both arms of the select.
4358 bool SimplifyBothArms =
4359 !Op->getType()->isVectorTy() && II->getType()->isVectorTy();
4361 *II, Sel, /*FoldWithMultiUse=*/false, SimplifyBothArms))
4362 return R;
4363 }
4364 if (auto *Phi = dyn_cast<PHINode>(Op))
4365 if (Instruction *R = foldOpIntoPhi(*II, Phi))
4366 return R;
4367 }
4368 }
4369
4371 return Shuf;
4372
4374 return replaceInstUsesWith(*II, Reverse);
4375
4377 return replaceInstUsesWith(*II, Res);
4378
4379 // Some intrinsics (like experimental_gc_statepoint) can be used in invoke
4380 // context, so it is handled in visitCallBase and we should trigger it.
4381 return visitCallBase(*II);
4382}
4383
4384// Fence instruction simplification
4386 auto *NFI = dyn_cast<FenceInst>(FI.getNextNode());
4387 // This check is solely here to handle arbitrary target-dependent syncscopes.
4388 // TODO: Can remove if does not matter in practice.
4389 if (NFI && FI.isIdenticalTo(NFI))
4390 return eraseInstFromFunction(FI);
4391
4392 // Returns true if FI1 is identical or stronger fence than FI2.
4393 auto isIdenticalOrStrongerFence = [](FenceInst *FI1, FenceInst *FI2) {
4394 auto FI1SyncScope = FI1->getSyncScopeID();
4395 // Consider same scope, where scope is global or single-thread.
4396 if (FI1SyncScope != FI2->getSyncScopeID() ||
4397 (FI1SyncScope != SyncScope::System &&
4398 FI1SyncScope != SyncScope::SingleThread))
4399 return false;
4400
4401 return isAtLeastOrStrongerThan(FI1->getOrdering(), FI2->getOrdering());
4402 };
4403 if (NFI && isIdenticalOrStrongerFence(NFI, &FI))
4404 return eraseInstFromFunction(FI);
4405
4406 if (auto *PFI = dyn_cast_or_null<FenceInst>(FI.getPrevNode()))
4407 if (isIdenticalOrStrongerFence(PFI, &FI))
4408 return eraseInstFromFunction(FI);
4409 return nullptr;
4410}
4411
4412// InvokeInst simplification
4414 return visitCallBase(II);
4415}
4416
4417// CallBrInst simplification
4419 return visitCallBase(CBI);
4420}
4421
4423 if (!CI->hasFnAttr("modular-format"))
4424 return nullptr;
4425
4427 llvm::split(CI->getFnAttr("modular-format").getValueAsString(), ','));
4428 // TODO: Make use of the first two arguments
4429 unsigned FirstArgIdx;
4430 [[maybe_unused]] bool Error;
4431 Error = Args[2].getAsInteger(10, FirstArgIdx);
4432 assert(!Error && "invalid first arg index");
4433 if (FirstArgIdx == 0)
4434 return nullptr;
4435 --FirstArgIdx;
4436 StringRef FnName = Args[3];
4437 StringRef ImplName = Args[4];
4439
4440 if (AllAspects.empty())
4441 return nullptr;
4442
4443 SmallVector<StringRef> NeededAspects;
4444 for (StringRef Aspect : AllAspects) {
4445 if (Aspect == "float") {
4446 if (llvm::any_of(
4447 llvm::make_range(std::next(CI->arg_begin(), FirstArgIdx),
4448 CI->arg_end()),
4449 [](Value *V) { return V->getType()->isFloatingPointTy(); }))
4450 NeededAspects.push_back("float");
4451 } else {
4452 // Unknown aspects are always considered to be needed.
4453 NeededAspects.push_back(Aspect);
4454 }
4455 }
4456
4457 if (NeededAspects.size() == AllAspects.size())
4458 return nullptr;
4459
4460 Module *M = CI->getModule();
4461 LLVMContext &Ctx = M->getContext();
4462 Function *Callee = CI->getCalledFunction();
4463 FunctionCallee ModularFn = M->getOrInsertFunction(
4464 FnName, Callee->getFunctionType(),
4465 Callee->getAttributes().removeFnAttribute(Ctx, "modular-format"));
4466 CallInst *New = cast<CallInst>(CI->clone());
4467 New->setCalledFunction(ModularFn);
4468 New->removeFnAttr("modular-format");
4469 B.Insert(New);
4470
4471 const auto ReferenceAspect = [&](StringRef Aspect) {
4472 SmallString<20> Name = ImplName;
4473 Name += '_';
4474 Name += Aspect;
4475 Function *RelocNoneFn =
4476 Intrinsic::getOrInsertDeclaration(M, Intrinsic::reloc_none);
4477 B.CreateCall(RelocNoneFn,
4478 {MetadataAsValue::get(Ctx, MDString::get(Ctx, Name))});
4479 };
4480
4481 llvm::sort(NeededAspects);
4482 for (StringRef Request : NeededAspects)
4483 ReferenceAspect(Request);
4484
4485 return New;
4486}
4487
4488Instruction *InstCombinerImpl::tryOptimizeCall(CallInst *CI) {
4489 if (!CI->getCalledFunction()) return nullptr;
4490
4491 // Skip optimizing notail and musttail calls so
4492 // LibCallSimplifier::optimizeCall doesn't have to preserve those invariants.
4493 // LibCallSimplifier::optimizeCall should try to preserve tail calls though.
4494 if (CI->isMustTailCall() || CI->isNoTailCall())
4495 return nullptr;
4496
4497 auto InstCombineRAUW = [this](Instruction *From, Value *With) {
4498 replaceInstUsesWith(*From, With);
4499 };
4500 auto InstCombineErase = [this](Instruction *I) {
4502 };
4503 LibCallSimplifier Simplifier(DL, &TLI, &DT, &DC, &AC, ORE, BFI, PSI,
4504 InstCombineRAUW, InstCombineErase);
4505 if (Value *With = Simplifier.optimizeCall(CI, Builder)) {
4506 ++NumSimplified;
4507 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4508 }
4509 if (Value *With = optimizeModularFormat(CI, Builder)) {
4510 ++NumSimplified;
4511 return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
4512 }
4513
4514 return nullptr;
4515}
4516
4518 // Strip off at most one level of pointer casts, looking for an alloca. This
4519 // is good enough in practice and simpler than handling any number of casts.
4520 Value *Underlying = TrampMem->stripPointerCasts();
4521 if (Underlying != TrampMem &&
4522 (!Underlying->hasOneUse() || Underlying->user_back() != TrampMem))
4523 return nullptr;
4524 if (!isa<AllocaInst>(Underlying))
4525 return nullptr;
4526
4527 IntrinsicInst *InitTrampoline = nullptr;
4528 for (User *U : TrampMem->users()) {
4530 if (!II)
4531 return nullptr;
4532 if (II->getIntrinsicID() == Intrinsic::init_trampoline) {
4533 if (InitTrampoline)
4534 // More than one init_trampoline writes to this value. Give up.
4535 return nullptr;
4536 InitTrampoline = II;
4537 continue;
4538 }
4539 if (II->getIntrinsicID() == Intrinsic::adjust_trampoline)
4540 // Allow any number of calls to adjust.trampoline.
4541 continue;
4542 return nullptr;
4543 }
4544
4545 // No call to init.trampoline found.
4546 if (!InitTrampoline)
4547 return nullptr;
4548
4549 // Check that the alloca is being used in the expected way.
4550 if (InitTrampoline->getOperand(0) != TrampMem)
4551 return nullptr;
4552
4553 return InitTrampoline;
4554}
4555
4557 Value *TrampMem) {
4558 // Visit all the previous instructions in the basic block, and try to find a
4559 // init.trampoline which has a direct path to the adjust.trampoline.
4560 for (BasicBlock::iterator I = AdjustTramp->getIterator(),
4561 E = AdjustTramp->getParent()->begin();
4562 I != E;) {
4563 Instruction *Inst = &*--I;
4565 if (II->getIntrinsicID() == Intrinsic::init_trampoline &&
4566 II->getOperand(0) == TrampMem)
4567 return II;
4568 if (Inst->mayWriteToMemory())
4569 return nullptr;
4570 }
4571 return nullptr;
4572}
4573
4574// Given a call to llvm.adjust.trampoline, find and return the corresponding
4575// call to llvm.init.trampoline if the call to the trampoline can be optimized
4576// to a direct call to a function. Otherwise return NULL.
4578 Callee = Callee->stripPointerCasts();
4579 IntrinsicInst *AdjustTramp = dyn_cast<IntrinsicInst>(Callee);
4580 if (!AdjustTramp ||
4581 AdjustTramp->getIntrinsicID() != Intrinsic::adjust_trampoline)
4582 return nullptr;
4583
4584 Value *TrampMem = AdjustTramp->getOperand(0);
4585
4587 return IT;
4588 if (IntrinsicInst *IT = findInitTrampolineFromBB(AdjustTramp, TrampMem))
4589 return IT;
4590 return nullptr;
4591}
4592
4593Instruction *InstCombinerImpl::foldPtrAuthIntrinsicCallee(CallBase &Call) {
4594 const Value *Callee = Call.getCalledOperand();
4595 const auto *IPC = dyn_cast<IntToPtrInst>(Callee);
4596 if (!IPC || !IPC->isNoopCast(DL))
4597 return nullptr;
4598
4599 const auto *II = dyn_cast<IntrinsicInst>(IPC->getOperand(0));
4600 if (!II)
4601 return nullptr;
4602
4603 Intrinsic::ID IIID = II->getIntrinsicID();
4604 if (IIID != Intrinsic::ptrauth_resign && IIID != Intrinsic::ptrauth_sign)
4605 return nullptr;
4606
4607 // Isolate the ptrauth bundle from the others.
4608 std::optional<OperandBundleUse> PtrAuthBundleOrNone;
4610 for (unsigned BI = 0, BE = Call.getNumOperandBundles(); BI != BE; ++BI) {
4611 OperandBundleUse Bundle = Call.getOperandBundleAt(BI);
4612 if (Bundle.getTagID() == LLVMContext::OB_ptrauth)
4613 PtrAuthBundleOrNone = Bundle;
4614 else
4615 NewBundles.emplace_back(Bundle);
4616 }
4617
4618 if (!PtrAuthBundleOrNone)
4619 return nullptr;
4620
4621 Value *NewCallee = nullptr;
4622 switch (IIID) {
4623 // call(ptrauth.resign(p)), ["ptrauth"()] -> call p, ["ptrauth"()]
4624 // assuming the call bundle and the sign operands match.
4625 case Intrinsic::ptrauth_resign: {
4626 // Resign result key should match bundle.
4627 if (II->getOperand(3) != PtrAuthBundleOrNone->Inputs[0])
4628 return nullptr;
4629 // Resign result discriminator should match bundle.
4630 if (II->getOperand(4) != PtrAuthBundleOrNone->Inputs[1])
4631 return nullptr;
4632
4633 // Resign input (auth) key should also match: we can't change the key on
4634 // the new call we're generating, because we don't know what keys are valid.
4635 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4636 return nullptr;
4637
4638 Value *NewBundleOps[] = {II->getOperand(1), II->getOperand(2)};
4639 NewBundles.emplace_back("ptrauth", NewBundleOps);
4640 NewCallee = II->getOperand(0);
4641 break;
4642 }
4643
4644 // call(ptrauth.sign(p)), ["ptrauth"()] -> call p
4645 // assuming the call bundle and the sign operands match.
4646 // Non-ptrauth indirect calls are undesirable, but so is ptrauth.sign.
4647 case Intrinsic::ptrauth_sign: {
4648 // Sign key should match bundle.
4649 if (II->getOperand(1) != PtrAuthBundleOrNone->Inputs[0])
4650 return nullptr;
4651 // Sign discriminator should match bundle.
4652 if (II->getOperand(2) != PtrAuthBundleOrNone->Inputs[1])
4653 return nullptr;
4654 NewCallee = II->getOperand(0);
4655 break;
4656 }
4657 default:
4658 llvm_unreachable("unexpected intrinsic ID");
4659 }
4660
4661 if (!NewCallee)
4662 return nullptr;
4663
4664 NewCallee = Builder.CreateBitOrPointerCast(NewCallee, Callee->getType());
4665 CallBase *NewCall = CallBase::Create(&Call, NewBundles);
4666 NewCall->setCalledOperand(NewCallee);
4667 return NewCall;
4668}
4669
4670Instruction *InstCombinerImpl::foldPtrAuthConstantCallee(CallBase &Call) {
4672 if (!CPA)
4673 return nullptr;
4674
4675 auto *CalleeF = dyn_cast<Function>(CPA->getPointer());
4676 // If the ptrauth constant isn't based on a function pointer, bail out.
4677 if (!CalleeF)
4678 return nullptr;
4679
4680 // Inspect the call ptrauth bundle to check it matches the ptrauth constant.
4682 if (!PAB)
4683 return nullptr;
4684
4685 auto *Key = cast<ConstantInt>(PAB->Inputs[0]);
4686 Value *Discriminator = PAB->Inputs[1];
4687
4688 // If the bundle doesn't match, this is probably going to fail to auth.
4689 if (!CPA->isKnownCompatibleWith(Key, Discriminator, DL))
4690 return nullptr;
4691
4692 // If the bundle matches the constant, proceed in making this a direct call.
4694 NewCall->setCalledOperand(CalleeF);
4695 return NewCall;
4696}
4697
4698bool InstCombinerImpl::annotateAnyAllocSite(CallBase &Call,
4699 const TargetLibraryInfo *TLI) {
4700 // Note: We only handle cases which can't be driven from generic attributes
4701 // here. So, for example, nonnull and noalias (which are common properties
4702 // of some allocation functions) are expected to be handled via annotation
4703 // of the respective allocator declaration with generic attributes.
4704 bool Changed = false;
4705
4706 if (!Call.getType()->isPointerTy())
4707 return Changed;
4708
4709 std::optional<APInt> Size = getAllocSize(&Call, TLI);
4710 if (Size && *Size != 0) {
4711 // TODO: We really should just emit deref_or_null here and then
4712 // let the generic inference code combine that with nonnull.
4713 if (Call.hasRetAttr(Attribute::NonNull)) {
4714 Changed = !Call.hasRetAttr(Attribute::Dereferenceable);
4716 Call.getContext(), Size->getLimitedValue()));
4717 } else {
4718 Changed = !Call.hasRetAttr(Attribute::DereferenceableOrNull);
4720 Call.getContext(), Size->getLimitedValue()));
4721 }
4722 }
4723
4724 // Add alignment attribute if alignment is a power of two constant.
4725 Value *Alignment = getAllocAlignment(&Call, TLI);
4726 if (!Alignment)
4727 return Changed;
4728
4729 ConstantInt *AlignOpC = dyn_cast<ConstantInt>(Alignment);
4730 if (AlignOpC && AlignOpC->getValue().ult(llvm::Value::MaximumAlignment)) {
4731 uint64_t AlignmentVal = AlignOpC->getZExtValue();
4732 if (llvm::isPowerOf2_64(AlignmentVal)) {
4733 Align ExistingAlign = Call.getRetAlign().valueOrOne();
4734 Align NewAlign = Align(AlignmentVal);
4735 if (NewAlign > ExistingAlign) {
4738 Changed = true;
4739 }
4740 }
4741 }
4742 return Changed;
4743}
4744
4745/// Improvements for call, callbr and invoke instructions.
4746Instruction *InstCombinerImpl::visitCallBase(CallBase &Call) {
4747 bool Changed = annotateAnyAllocSite(Call, &TLI);
4748
4749 // Mark any parameters that are known to be non-null with the nonnull
4750 // attribute. This is helpful for inlining calls to functions with null
4751 // checks on their arguments.
4752 SmallVector<unsigned, 4> ArgNos;
4753 unsigned ArgNo = 0;
4754
4755 for (Value *V : Call.args()) {
4756 if (V->getType()->isPointerTy()) {
4757 // Simplify the nonnull operand if the parameter is known to be nonnull.
4758 // Otherwise, try to infer nonnull for it.
4759 bool HasDereferenceable = Call.getParamDereferenceableBytes(ArgNo) > 0;
4760 if (Call.paramHasAttr(ArgNo, Attribute::NonNull) ||
4761 (HasDereferenceable &&
4763 V->getType()->getPointerAddressSpace()))) {
4764 if (Value *Res = simplifyNonNullOperand(V, HasDereferenceable)) {
4765 replaceOperand(Call, ArgNo, Res);
4766 Changed = true;
4767 }
4768 } else if (isKnownNonZero(V,
4769 getSimplifyQuery().getWithInstruction(&Call))) {
4770 ArgNos.push_back(ArgNo);
4771 }
4772 }
4773 ArgNo++;
4774 }
4775
4776 assert(ArgNo == Call.arg_size() && "Call arguments not processed correctly.");
4777
4778 if (!ArgNos.empty()) {
4779 AttributeList AS = Call.getAttributes();
4780 LLVMContext &Ctx = Call.getContext();
4781 AS = AS.addParamAttribute(Ctx, ArgNos,
4782 Attribute::get(Ctx, Attribute::NonNull));
4783 Call.setAttributes(AS);
4784 Changed = true;
4785 }
4786
4787 // If the callee is a pointer to a function, attempt to move any casts to the
4788 // arguments of the call/callbr/invoke.
4790 Function *CalleeF = dyn_cast<Function>(Callee);
4791 if ((!CalleeF || CalleeF->getFunctionType() != Call.getFunctionType()) &&
4792 transformConstExprCastCall(Call))
4793 return nullptr;
4794
4795 if (CalleeF) {
4796 // Remove the convergent attr on calls when the callee is not convergent.
4797 if (Call.isConvergent() && !CalleeF->isConvergent() &&
4798 !CalleeF->isIntrinsic()) {
4799 LLVM_DEBUG(dbgs() << "Removing convergent attr from instr " << Call
4800 << "\n");
4802 return &Call;
4803 }
4804
4805 // If the call and callee calling conventions don't match, and neither one
4806 // of the calling conventions is compatible with C calling convention
4807 // this call must be unreachable, as the call is undefined.
4808 if ((CalleeF->getCallingConv() != Call.getCallingConv() &&
4809 !(CalleeF->getCallingConv() == llvm::CallingConv::C &&
4813 // Only do this for calls to a function with a body. A prototype may
4814 // not actually end up matching the implementation's calling conv for a
4815 // variety of reasons (e.g. it may be written in assembly).
4816 !CalleeF->isDeclaration()) {
4817 Instruction *OldCall = &Call;
4819 // If OldCall does not return void then replaceInstUsesWith poison.
4820 // This allows ValueHandlers and custom metadata to adjust itself.
4821 if (!OldCall->getType()->isVoidTy())
4822 replaceInstUsesWith(*OldCall, PoisonValue::get(OldCall->getType()));
4823 if (isa<CallInst>(OldCall))
4824 return eraseInstFromFunction(*OldCall);
4825
4826 // We cannot remove an invoke or a callbr, because it would change thexi
4827 // CFG, just change the callee to a null pointer.
4828 cast<CallBase>(OldCall)->setCalledFunction(
4829 CalleeF->getFunctionType(),
4830 Constant::getNullValue(CalleeF->getType()));
4831 return nullptr;
4832 }
4833 }
4834
4835 // Calling a null function pointer is undefined if a null address isn't
4836 // dereferenceable.
4837 if ((isa<ConstantPointerNull>(Callee) &&
4839 isa<UndefValue>(Callee)) {
4840 // If Call does not return void then replaceInstUsesWith poison.
4841 // This allows ValueHandlers and custom metadata to adjust itself.
4842 if (!Call.getType()->isVoidTy())
4844
4845 if (Call.isTerminator()) {
4846 // Can't remove an invoke or callbr because we cannot change the CFG.
4847 return nullptr;
4848 }
4849
4850 // This instruction is not reachable, just remove it.
4853 }
4854
4855 if (IntrinsicInst *II = findInitTrampoline(Callee))
4856 return transformCallThroughTrampoline(Call, *II);
4857
4858 // Combine calls involving pointer authentication intrinsics.
4859 if (Instruction *NewCall = foldPtrAuthIntrinsicCallee(Call))
4860 return NewCall;
4861
4862 // Combine calls to ptrauth constants.
4863 if (Instruction *NewCall = foldPtrAuthConstantCallee(Call))
4864 return NewCall;
4865
4866 if (isa<InlineAsm>(Callee) && !Call.doesNotThrow()) {
4867 InlineAsm *IA = cast<InlineAsm>(Callee);
4868 if (!IA->canThrow()) {
4869 // Normal inline asm calls cannot throw - mark them
4870 // 'nounwind'.
4872 Changed = true;
4873 }
4874 }
4875
4876 // Try to optimize the call if possible, we require DataLayout for most of
4877 // this. None of these calls are seen as possibly dead so go ahead and
4878 // delete the instruction now.
4879 if (CallInst *CI = dyn_cast<CallInst>(&Call)) {
4880 Instruction *I = tryOptimizeCall(CI);
4881 // If we changed something return the result, etc. Otherwise let
4882 // the fallthrough check.
4883 if (I) return eraseInstFromFunction(*I);
4884 }
4885
4886 if (!Call.use_empty() && !Call.isMustTailCall())
4887 if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
4888 Type *CallTy = Call.getType();
4889 Type *RetArgTy = ReturnedArg->getType();
4890 if (RetArgTy->canLosslesslyBitCastTo(CallTy))
4891 return replaceInstUsesWith(
4892 Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
4893 }
4894
4895 // Drop unnecessary callee_type metadata from calls that were converted
4896 // into direct calls.
4897 if (Call.getMetadata(LLVMContext::MD_callee_type) && !Call.isIndirectCall()) {
4898 Call.setMetadata(LLVMContext::MD_callee_type, nullptr);
4899 Changed = true;
4900 }
4901
4902 // Drop unnecessary kcfi operand bundles from calls that were converted
4903 // into direct calls.
4905 if (Bundle && !Call.isIndirectCall()) {
4906 DEBUG_WITH_TYPE(DEBUG_TYPE "-kcfi", {
4907 if (CalleeF) {
4908 ConstantInt *FunctionType = nullptr;
4909 ConstantInt *ExpectedType = cast<ConstantInt>(Bundle->Inputs[0]);
4910
4911 if (MDNode *MD = CalleeF->getMetadata(LLVMContext::MD_kcfi_type))
4912 FunctionType = mdconst::extract<ConstantInt>(MD->getOperand(0));
4913
4914 if (FunctionType &&
4915 FunctionType->getZExtValue() != ExpectedType->getZExtValue())
4916 dbgs() << Call.getModule()->getName()
4917 << ": warning: kcfi: " << Call.getCaller()->getName()
4918 << ": call to " << CalleeF->getName()
4919 << " using a mismatching function pointer type\n";
4920 }
4921 });
4922
4924 }
4925
4926 if (isRemovableAlloc(&Call, &TLI))
4927 return visitAllocSite(Call);
4928
4929 // Handle intrinsics which can be used in both call and invoke context.
4930 switch (Call.getIntrinsicID()) {
4931 case Intrinsic::experimental_gc_statepoint: {
4932 GCStatepointInst &GCSP = *cast<GCStatepointInst>(&Call);
4933 SmallPtrSet<Value *, 32> LiveGcValues;
4934 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
4935 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
4936
4937 // Remove the relocation if unused.
4938 if (GCR.use_empty()) {
4940 continue;
4941 }
4942
4943 Value *DerivedPtr = GCR.getDerivedPtr();
4944 Value *BasePtr = GCR.getBasePtr();
4945
4946 // Undef is undef, even after relocation.
4947 if (isa<UndefValue>(DerivedPtr) || isa<UndefValue>(BasePtr)) {
4950 continue;
4951 }
4952
4953 if (auto *PT = dyn_cast<PointerType>(GCR.getType())) {
4954 // The relocation of null will be null for most any collector.
4955 // TODO: provide a hook for this in GCStrategy. There might be some
4956 // weird collector this property does not hold for.
4957 if (isa<ConstantPointerNull>(DerivedPtr)) {
4958 // Use null-pointer of gc_relocate's type to replace it.
4961 continue;
4962 }
4963
4964 // isKnownNonNull -> nonnull attribute
4965 if (!GCR.hasRetAttr(Attribute::NonNull) &&
4966 isKnownNonZero(DerivedPtr,
4967 getSimplifyQuery().getWithInstruction(&Call))) {
4968 GCR.addRetAttr(Attribute::NonNull);
4969 // We discovered new fact, re-check users.
4970 Worklist.pushUsersToWorkList(GCR);
4971 }
4972 }
4973
4974 // If we have two copies of the same pointer in the statepoint argument
4975 // list, canonicalize to one. This may let us common gc.relocates.
4976 if (GCR.getBasePtr() == GCR.getDerivedPtr() &&
4977 GCR.getBasePtrIndex() != GCR.getDerivedPtrIndex()) {
4978 auto *OpIntTy = GCR.getOperand(2)->getType();
4979 GCR.setOperand(2, ConstantInt::get(OpIntTy, GCR.getBasePtrIndex()));
4980 }
4981
4982 // TODO: bitcast(relocate(p)) -> relocate(bitcast(p))
4983 // Canonicalize on the type from the uses to the defs
4984
4985 // TODO: relocate((gep p, C, C2, ...)) -> gep(relocate(p), C, C2, ...)
4986 LiveGcValues.insert(BasePtr);
4987 LiveGcValues.insert(DerivedPtr);
4988 }
4989 std::optional<OperandBundleUse> Bundle =
4991 unsigned NumOfGCLives = LiveGcValues.size();
4992 if (!Bundle || NumOfGCLives == Bundle->Inputs.size())
4993 break;
4994 // We can reduce the size of gc live bundle.
4995 DenseMap<Value *, unsigned> Val2Idx;
4996 std::vector<Value *> NewLiveGc;
4997 for (Value *V : Bundle->Inputs) {
4998 auto [It, Inserted] = Val2Idx.try_emplace(V);
4999 if (!Inserted)
5000 continue;
5001 if (LiveGcValues.count(V)) {
5002 It->second = NewLiveGc.size();
5003 NewLiveGc.push_back(V);
5004 } else
5005 It->second = NumOfGCLives;
5006 }
5007 // Update all gc.relocates
5008 for (const GCRelocateInst *Reloc : GCSP.getGCRelocates()) {
5009 GCRelocateInst &GCR = *const_cast<GCRelocateInst *>(Reloc);
5010 Value *BasePtr = GCR.getBasePtr();
5011 assert(Val2Idx.count(BasePtr) && Val2Idx[BasePtr] != NumOfGCLives &&
5012 "Missed live gc for base pointer");
5013 auto *OpIntTy1 = GCR.getOperand(1)->getType();
5014 GCR.setOperand(1, ConstantInt::get(OpIntTy1, Val2Idx[BasePtr]));
5015 Value *DerivedPtr = GCR.getDerivedPtr();
5016 assert(Val2Idx.count(DerivedPtr) && Val2Idx[DerivedPtr] != NumOfGCLives &&
5017 "Missed live gc for derived pointer");
5018 auto *OpIntTy2 = GCR.getOperand(2)->getType();
5019 GCR.setOperand(2, ConstantInt::get(OpIntTy2, Val2Idx[DerivedPtr]));
5020 }
5021 // Create new statepoint instruction.
5022 OperandBundleDef NewBundle("gc-live", std::move(NewLiveGc));
5023 return CallBase::Create(&Call, NewBundle);
5024 }
5025 default: { break; }
5026 }
5027
5028 return Changed ? &Call : nullptr;
5029}
5030
5031/// If the callee is a constexpr cast of a function, attempt to move the cast to
5032/// the arguments of the call/invoke.
5033/// CallBrInst is not supported.
5034bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) {
5035 auto *Callee =
5037 if (!Callee)
5038 return false;
5039
5041 "CallBr's don't have a single point after a def to insert at");
5042
5043 // Don't perform the transform for declarations, which may not be fully
5044 // accurate. For example, void @foo() is commonly used as a placeholder for
5045 // unknown prototypes.
5046 if (Callee->isDeclaration())
5047 return false;
5048
5049 // If this is a call to a thunk function, don't remove the cast. Thunks are
5050 // used to transparently forward all incoming parameters and outgoing return
5051 // values, so it's important to leave the cast in place.
5052 if (Callee->hasFnAttribute("thunk"))
5053 return false;
5054
5055 // If this is a call to a naked function, the assembly might be
5056 // using an argument, or otherwise rely on the frame layout,
5057 // the function prototype will mismatch.
5058 if (Callee->hasFnAttribute(Attribute::Naked))
5059 return false;
5060
5061 // If this is a musttail call, the callee's prototype must match the caller's
5062 // prototype with the exception of pointee types. The code below doesn't
5063 // implement that, so we can't do this transform.
5064 // TODO: Do the transform if it only requires adding pointer casts.
5065 if (Call.isMustTailCall())
5066 return false;
5067
5069 const AttributeList &CallerPAL = Call.getAttributes();
5070
5071 // Okay, this is a cast from a function to a different type. Unless doing so
5072 // would cause a type conversion of one of our arguments, change this call to
5073 // be a direct call with arguments casted to the appropriate types.
5074 FunctionType *FT = Callee->getFunctionType();
5075 Type *OldRetTy = Caller->getType();
5076 Type *NewRetTy = FT->getReturnType();
5077
5078 // Check to see if we are changing the return type...
5079 if (OldRetTy != NewRetTy) {
5080
5081 if (NewRetTy->isStructTy())
5082 return false; // TODO: Handle multiple return values.
5083
5084 if (!CastInst::isBitOrNoopPointerCastable(NewRetTy, OldRetTy, DL)) {
5085 if (!Caller->use_empty())
5086 return false; // Cannot transform this return value.
5087 }
5088
5089 if (!CallerPAL.isEmpty() && !Caller->use_empty()) {
5090 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5091 if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(
5092 NewRetTy, CallerPAL.getRetAttrs())))
5093 return false; // Attribute not compatible with transformed value.
5094 }
5095
5096 // If the callbase is an invoke instruction, and the return value is
5097 // used by a PHI node in a successor, we cannot change the return type of
5098 // the call because there is no place to put the cast instruction (without
5099 // breaking the critical edge). Bail out in this case.
5100 if (!Caller->use_empty()) {
5101 BasicBlock *PhisNotSupportedBlock = nullptr;
5102 if (auto *II = dyn_cast<InvokeInst>(Caller))
5103 PhisNotSupportedBlock = II->getNormalDest();
5104 if (PhisNotSupportedBlock)
5105 for (User *U : Caller->users())
5106 if (PHINode *PN = dyn_cast<PHINode>(U))
5107 if (PN->getParent() == PhisNotSupportedBlock)
5108 return false;
5109 }
5110 }
5111
5112 unsigned NumActualArgs = Call.arg_size();
5113 unsigned NumCommonArgs = std::min(FT->getNumParams(), NumActualArgs);
5114
5115 // Prevent us turning:
5116 // declare void @takes_i32_inalloca(i32* inalloca)
5117 // call void bitcast (void (i32*)* @takes_i32_inalloca to void (i32)*)(i32 0)
5118 //
5119 // into:
5120 // call void @takes_i32_inalloca(i32* null)
5121 //
5122 // Similarly, avoid folding away bitcasts of byval calls.
5123 if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
5124 Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated))
5125 return false;
5126
5127 auto AI = Call.arg_begin();
5128 for (unsigned i = 0, e = NumCommonArgs; i != e; ++i, ++AI) {
5129 Type *ParamTy = FT->getParamType(i);
5130 Type *ActTy = (*AI)->getType();
5131
5132 if (!CastInst::isBitOrNoopPointerCastable(ActTy, ParamTy, DL))
5133 return false; // Cannot transform this parameter value.
5134
5135 // Check if there are any incompatible attributes we cannot drop safely.
5136 if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i))
5137 .overlaps(AttributeFuncs::typeIncompatible(
5138 ParamTy, CallerPAL.getParamAttrs(i),
5139 AttributeFuncs::ASK_UNSAFE_TO_DROP)))
5140 return false; // Attribute not compatible with transformed value.
5141
5142 if (Call.isInAllocaArgument(i) ||
5143 CallerPAL.hasParamAttr(i, Attribute::Preallocated))
5144 return false; // Cannot transform to and from inalloca/preallocated.
5145
5146 if (CallerPAL.hasParamAttr(i, Attribute::SwiftError))
5147 return false;
5148
5149 if (CallerPAL.hasParamAttr(i, Attribute::ByVal) !=
5150 Callee->getAttributes().hasParamAttr(i, Attribute::ByVal))
5151 return false; // Cannot transform to or from byval.
5152 }
5153
5154 if (FT->getNumParams() < NumActualArgs && FT->isVarArg() &&
5155 !CallerPAL.isEmpty()) {
5156 // In this case we have more arguments than the new function type, but we
5157 // won't be dropping them. Check that these extra arguments have attributes
5158 // that are compatible with being a vararg call argument.
5159 unsigned SRetIdx;
5160 if (CallerPAL.hasAttrSomewhere(Attribute::StructRet, &SRetIdx) &&
5161 SRetIdx - AttributeList::FirstArgIndex >= FT->getNumParams())
5162 return false;
5163 }
5164
5165 // Okay, we decided that this is a safe thing to do: go ahead and start
5166 // inserting cast instructions as necessary.
5167 SmallVector<Value *, 8> Args;
5169 Args.reserve(NumActualArgs);
5170 ArgAttrs.reserve(NumActualArgs);
5171
5172 // Get any return attributes.
5173 AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs());
5174
5175 // If the return value is not being used, the type may not be compatible
5176 // with the existing attributes. Wipe out any problematic attributes.
5177 RAttrs.remove(
5178 AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs()));
5179
5180 LLVMContext &Ctx = Call.getContext();
5181 AI = Call.arg_begin();
5182 for (unsigned i = 0; i != NumCommonArgs; ++i, ++AI) {
5183 Type *ParamTy = FT->getParamType(i);
5184
5185 Value *NewArg = *AI;
5186 if ((*AI)->getType() != ParamTy)
5187 NewArg = Builder.CreateBitOrPointerCast(*AI, ParamTy);
5188 Args.push_back(NewArg);
5189
5190 // Add any parameter attributes except the ones incompatible with the new
5191 // type. Note that we made sure all incompatible ones are safe to drop.
5192 AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(
5193 ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP);
5194 ArgAttrs.push_back(
5195 CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs));
5196 }
5197
5198 // If the function takes more arguments than the call was taking, add them
5199 // now.
5200 for (unsigned i = NumCommonArgs; i != FT->getNumParams(); ++i) {
5201 Args.push_back(Constant::getNullValue(FT->getParamType(i)));
5202 ArgAttrs.push_back(AttributeSet());
5203 }
5204
5205 // If we are removing arguments to the function, emit an obnoxious warning.
5206 if (FT->getNumParams() < NumActualArgs) {
5207 // TODO: if (!FT->isVarArg()) this call may be unreachable. PR14722
5208 if (FT->isVarArg()) {
5209 // Add all of the arguments in their promoted form to the arg list.
5210 for (unsigned i = FT->getNumParams(); i != NumActualArgs; ++i, ++AI) {
5211 Type *PTy = getPromotedType((*AI)->getType());
5212 Value *NewArg = *AI;
5213 if (PTy != (*AI)->getType()) {
5214 // Must promote to pass through va_arg area!
5215 Instruction::CastOps opcode =
5216 CastInst::getCastOpcode(*AI, false, PTy, false);
5217 NewArg = Builder.CreateCast(opcode, *AI, PTy);
5218 }
5219 Args.push_back(NewArg);
5220
5221 // Add any parameter attributes.
5222 ArgAttrs.push_back(CallerPAL.getParamAttrs(i));
5223 }
5224 }
5225 }
5226
5227 AttributeSet FnAttrs = CallerPAL.getFnAttrs();
5228
5229 if (NewRetTy->isVoidTy())
5230 Caller->setName(""); // Void type should not have a name.
5231
5232 assert((ArgAttrs.size() == FT->getNumParams() || FT->isVarArg()) &&
5233 "missing argument attributes");
5234 AttributeList NewCallerPAL = AttributeList::get(
5235 Ctx, FnAttrs, AttributeSet::get(Ctx, RAttrs), ArgAttrs);
5236
5238 Call.getOperandBundlesAsDefs(OpBundles);
5239
5240 CallBase *NewCall;
5241 if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
5242 NewCall = Builder.CreateInvoke(Callee, II->getNormalDest(),
5243 II->getUnwindDest(), Args, OpBundles);
5244 } else {
5245 NewCall = Builder.CreateCall(Callee, Args, OpBundles);
5246 cast<CallInst>(NewCall)->setTailCallKind(
5247 cast<CallInst>(Caller)->getTailCallKind());
5248 }
5249 NewCall->takeName(Caller);
5251 NewCall->setAttributes(NewCallerPAL);
5252
5253 // Preserve prof metadata if any.
5254 NewCall->copyMetadata(*Caller, {LLVMContext::MD_prof});
5255
5256 // Insert a cast of the return type as necessary.
5257 Instruction *NC = NewCall;
5258 Value *NV = NC;
5259 if (OldRetTy != NV->getType() && !Caller->use_empty()) {
5260 assert(!NV->getType()->isVoidTy());
5262 NC->setDebugLoc(Caller->getDebugLoc());
5263
5264 auto OptInsertPt = NewCall->getInsertionPointAfterDef();
5265 assert(OptInsertPt && "No place to insert cast");
5266 InsertNewInstBefore(NC, *OptInsertPt);
5267 Worklist.pushUsersToWorkList(*Caller);
5268 }
5269
5270 if (!Caller->use_empty())
5271 replaceInstUsesWith(*Caller, NV);
5272 else if (Caller->hasValueHandle()) {
5273 if (OldRetTy == NV->getType())
5275 else
5276 // We cannot call ValueIsRAUWd with a different type, and the
5277 // actual tracked value will disappear.
5279 }
5280
5281 eraseInstFromFunction(*Caller);
5282 return true;
5283}
5284
5285/// Turn a call to a function created by init_trampoline / adjust_trampoline
5286/// intrinsic pair into a direct call to the underlying function.
5288InstCombinerImpl::transformCallThroughTrampoline(CallBase &Call,
5289 IntrinsicInst &Tramp) {
5290 FunctionType *FTy = Call.getFunctionType();
5291 AttributeList Attrs = Call.getAttributes();
5292
5293 // If the call already has the 'nest' attribute somewhere then give up -
5294 // otherwise 'nest' would occur twice after splicing in the chain.
5295 if (Attrs.hasAttrSomewhere(Attribute::Nest))
5296 return nullptr;
5297
5299 FunctionType *NestFTy = NestF->getFunctionType();
5300
5301 AttributeList NestAttrs = NestF->getAttributes();
5302 if (!NestAttrs.isEmpty()) {
5303 unsigned NestArgNo = 0;
5304 Type *NestTy = nullptr;
5305 AttributeSet NestAttr;
5306
5307 // Look for a parameter marked with the 'nest' attribute.
5308 for (FunctionType::param_iterator I = NestFTy->param_begin(),
5309 E = NestFTy->param_end();
5310 I != E; ++NestArgNo, ++I) {
5311 AttributeSet AS = NestAttrs.getParamAttrs(NestArgNo);
5312 if (AS.hasAttribute(Attribute::Nest)) {
5313 // Record the parameter type and any other attributes.
5314 NestTy = *I;
5315 NestAttr = AS;
5316 break;
5317 }
5318 }
5319
5320 if (NestTy) {
5321 std::vector<Value*> NewArgs;
5322 std::vector<AttributeSet> NewArgAttrs;
5323 NewArgs.reserve(Call.arg_size() + 1);
5324 NewArgAttrs.reserve(Call.arg_size());
5325
5326 // Insert the nest argument into the call argument list, which may
5327 // mean appending it. Likewise for attributes.
5328
5329 {
5330 unsigned ArgNo = 0;
5331 auto I = Call.arg_begin(), E = Call.arg_end();
5332 do {
5333 if (ArgNo == NestArgNo) {
5334 // Add the chain argument and attributes.
5335 Value *NestVal = Tramp.getArgOperand(2);
5336 if (NestVal->getType() != NestTy)
5337 NestVal = Builder.CreateBitCast(NestVal, NestTy, "nest");
5338 NewArgs.push_back(NestVal);
5339 NewArgAttrs.push_back(NestAttr);
5340 }
5341
5342 if (I == E)
5343 break;
5344
5345 // Add the original argument and attributes.
5346 NewArgs.push_back(*I);
5347 NewArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));
5348
5349 ++ArgNo;
5350 ++I;
5351 } while (true);
5352 }
5353
5354 // The trampoline may have been bitcast to a bogus type (FTy).
5355 // Handle this by synthesizing a new function type, equal to FTy
5356 // with the chain parameter inserted.
5357
5358 std::vector<Type*> NewTypes;
5359 NewTypes.reserve(FTy->getNumParams()+1);
5360
5361 // Insert the chain's type into the list of parameter types, which may
5362 // mean appending it.
5363 {
5364 unsigned ArgNo = 0;
5365 FunctionType::param_iterator I = FTy->param_begin(),
5366 E = FTy->param_end();
5367
5368 do {
5369 if (ArgNo == NestArgNo)
5370 // Add the chain's type.
5371 NewTypes.push_back(NestTy);
5372
5373 if (I == E)
5374 break;
5375
5376 // Add the original type.
5377 NewTypes.push_back(*I);
5378
5379 ++ArgNo;
5380 ++I;
5381 } while (true);
5382 }
5383
5384 // Replace the trampoline call with a direct call. Let the generic
5385 // code sort out any function type mismatches.
5386 FunctionType *NewFTy =
5387 FunctionType::get(FTy->getReturnType(), NewTypes, FTy->isVarArg());
5388 AttributeList NewPAL =
5389 AttributeList::get(FTy->getContext(), Attrs.getFnAttrs(),
5390 Attrs.getRetAttrs(), NewArgAttrs);
5391
5393 Call.getOperandBundlesAsDefs(OpBundles);
5394
5395 Instruction *NewCaller;
5396 if (InvokeInst *II = dyn_cast<InvokeInst>(&Call)) {
5397 NewCaller = InvokeInst::Create(NewFTy, NestF, II->getNormalDest(),
5398 II->getUnwindDest(), NewArgs, OpBundles);
5399 cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
5400 cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
5401 } else if (CallBrInst *CBI = dyn_cast<CallBrInst>(&Call)) {
5402 NewCaller =
5403 CallBrInst::Create(NewFTy, NestF, CBI->getDefaultDest(),
5404 CBI->getIndirectDests(), NewArgs, OpBundles);
5405 cast<CallBrInst>(NewCaller)->setCallingConv(CBI->getCallingConv());
5406 cast<CallBrInst>(NewCaller)->setAttributes(NewPAL);
5407 } else {
5408 NewCaller = CallInst::Create(NewFTy, NestF, NewArgs, OpBundles);
5409 cast<CallInst>(NewCaller)->setTailCallKind(
5410 cast<CallInst>(Call).getTailCallKind());
5411 cast<CallInst>(NewCaller)->setCallingConv(
5412 cast<CallInst>(Call).getCallingConv());
5413 cast<CallInst>(NewCaller)->setAttributes(NewPAL);
5414 }
5415 NewCaller->setDebugLoc(Call.getDebugLoc());
5416
5417 return NewCaller;
5418 }
5419 }
5420
5421 // Replace the trampoline call with a direct call. Since there is no 'nest'
5422 // parameter, there is no need to adjust the argument list. Let the generic
5423 // code sort out any function type mismatches.
5424 Call.setCalledFunction(FTy, NestF);
5425 return &Call;
5426}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
@ Scaled
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
BitTracker BT
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static SDValue foldBitOrderCrossLogicOp(SDNode *N, SelectionDAG &DAG)
#define Check(C,...)
#define DEBUG_TYPE
IRTranslator LLVM IR MI
static Type * getPromotedType(Type *Ty)
Return the specified type promoted as it would be to pass though a va_arg area.
static Instruction * createOverflowTuple(IntrinsicInst *II, Value *Result, Constant *Overflow)
Creates a result tuple for an overflow intrinsic II with a given Result and a constant Overflow value...
static IntrinsicInst * findInitTrampolineFromAlloca(Value *TrampMem)
static bool removeTriviallyEmptyRange(IntrinsicInst &EndI, InstCombinerImpl &IC, std::function< bool(const IntrinsicInst &)> IsStart)
static bool inputDenormalIsDAZ(const Function &F, const Type *Ty)
static Instruction * reassociateMinMaxWithConstantInOperand(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If this min/max has a matching min/max operand with a constant, try to push the constant operand into...
static bool isIdempotentBinaryIntrinsic(Intrinsic::ID IID)
Helper to match idempotent binary intrinsics, namely, intrinsics where f(f(x, y), y) == f(x,...
static bool signBitMustBeTheSame(Value *Op0, Value *Op1, const SimplifyQuery &SQ)
Return true if two values Op0 and Op1 are known to have the same sign.
static Value * optimizeModularFormat(CallInst *CI, IRBuilderBase &B)
static Instruction * moveAddAfterMinMax(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
Try to canonicalize min/max(X + C0, C1) as min/max(X, C1 - C0) + C0.
static Instruction * simplifyInvariantGroupIntrinsic(IntrinsicInst &II, InstCombinerImpl &IC)
This function transforms launder.invariant.group and strip.invariant.group like: launder(launder(x)) ...
static bool haveSameOperands(const IntrinsicInst &I, const IntrinsicInst &E, unsigned NumOperands)
static std::optional< bool > getKnownSign(Value *Op, const SimplifyQuery &SQ)
static cl::opt< unsigned > GuardWideningWindow("instcombine-guard-widening-window", cl::init(3), cl::desc("How wide an instruction window to bypass looking for " "another guard"))
static bool hasUndefSource(AnyMemTransferInst *MI)
Recognize a memcpy/memmove from a trivially otherwise unused alloca.
static Instruction * factorizeMinMaxTree(IntrinsicInst *II)
Reduce a sequence of min/max intrinsics with a common operand.
static Instruction * foldClampRangeOfTwo(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
If we have a clamp pattern like max (min X, 42), 41 – where the output can only be one of two possibl...
static Value * simplifyReductionOperand(Value *Arg, bool CanReorderLanes)
static IntrinsicInst * findInitTrampolineFromBB(IntrinsicInst *AdjustTramp, Value *TrampMem)
static Value * foldIntrinsicUsingDistributiveLaws(IntrinsicInst *II, InstCombiner::BuilderTy &Builder)
static std::optional< bool > getKnownSignOrZero(Value *Op, const SimplifyQuery &SQ)
static Value * foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1, const DataLayout &DL, InstCombiner::BuilderTy &Builder)
Fold an unsigned minimum of trailing or leading zero bits counts: umin(cttz(CtOp1,...
static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "(X ROp Y) LOp Z" is always equal to "(X LOp Z) ROp (Y LOp Z)".
static Value * foldIdempotentBinaryIntrinsicRecurrence(InstCombinerImpl &IC, IntrinsicInst *II)
Attempt to simplify value-accumulating recurrences of kind: umax.acc = phi i8 [ umax,...
static bool ldexpSaturatingAddIsSafe(Type *FpTy, Type *ExpTy)
static Instruction * foldCtpop(IntrinsicInst &II, InstCombinerImpl &IC)
static Instruction * simplifyNeonTbl(IntrinsicInst &II, InstCombiner &IC, bool IsExtension)
Convert tbl/tbx intrinsics to shufflevector if the mask is constant, and at most two source operands ...
static Instruction * foldCttzCtlz(IntrinsicInst &II, InstCombinerImpl &IC)
static IntrinsicInst * findInitTrampoline(Value *Callee)
static FCmpInst::Predicate fpclassTestIsFCmp0(FPClassTest Mask, const Function &F, Type *Ty)
static bool leftDistributesOverRight(Instruction::BinaryOps LOp, bool HasNUW, bool HasNSW, Intrinsic::ID ROp)
Return whether "X LOp (Y ROp Z)" is always equal to "(X LOp Y) ROp (X LOp Z)".
static Value * reassociateMinMaxWithConstants(IntrinsicInst *II, IRBuilderBase &Builder, const SimplifyQuery &SQ)
If this min/max has a constant operand and an operand that is a matching min/max with a constant oper...
static CallInst * canonicalizeConstantArg0ToArg1(CallInst &Call)
static Instruction * foldNeonShift(IntrinsicInst *II, InstCombinerImpl &IC)
This file provides internal interfaces used to implement the InstCombine.
This file provides the interface for the instcombine pass implementation.
static bool hasNoSignedWrap(BinaryOperator &I)
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
static bool inputDenormalIsIEEE(DenormalMode Mode)
Return true if it's possible to assume IEEE treatment of input denormals in F for Val.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
This file contains the declarations for metadata subclasses.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file implements the SmallBitVector class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:119
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
static LLVM_ABI bool semanticsHasInf(const fltSemantics &)
Definition APFloat.cpp:260
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static LLVM_ABI bool hasSignBitInMSB(const fltSemantics &)
Definition APFloat.cpp:273
bool isNegative() const
Definition APFloat.h:1538
void clearSign()
Definition APFloat.h:1357
static APFloat getOne(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative One.
Definition APFloat.h:1147
bool isZero() const
Definition APFloat.h:1534
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1197
static APFloat getSmallest(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) finite number in the given semantics.
Definition APFloat.h:1207
bool isInfinity() const
Definition APFloat.h:1535
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
LLVM_ABI APInt usub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:2000
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1709
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition APInt.h:1118
LLVM_ABI APInt sadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1980
LLVM_ABI APInt uadd_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1987
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:652
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt uadd_sat(const APInt &RHS) const
Definition APInt.cpp:2088
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition APInt.h:335
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1993
static APSInt getMinValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the minimum integer value with the given bit width and signedness.
Definition APSInt.h:310
static APSInt getMaxValue(uint32_t numBits, bool Unsigned)
Return the APSInt representing the maximum integer value with the given bit width and signedness.
Definition APSInt.h:302
This class represents any memset intrinsic.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:194
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
static LLVM_ABI AttributeSet get(LLVMContext &C, const AttrBuilder &B)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
static LLVM_ABI Attribute getWithDereferenceableBytes(LLVMContext &Context, uint64_t Bytes)
static LLVM_ABI Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, uint64_t Bytes)
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI bool isSigned() const
Whether the intrinsic is signed or unsigned.
LLVM_ABI Instruction::BinaryOps getBinaryOp() const
Returns the binary operation underlying the intrinsic.
static BinaryOperator * CreateFAddFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:271
static LLVM_ABI BinaryOperator * CreateNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Helper functions to construct and inspect unary operations (NEG and NOT) via binary operators SUB and...
static BinaryOperator * CreateNSW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:314
static LLVM_ABI BinaryOperator * CreateNot(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
static BinaryOperator * CreateNUW(BinaryOps Opc, Value *V1, Value *V2, const Twine &Name="")
Definition InstrTypes.h:329
static BinaryOperator * CreateFMulFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:279
static BinaryOperator * CreateFDivFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:283
static BinaryOperator * CreateFSubFMF(Value *V1, Value *V2, FastMathFlags FMF, const Twine &Name="")
Definition InstrTypes.h:275
static LLVM_ABI BinaryOperator * CreateNSWNeg(Value *Op, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
void setCallingConv(CallingConv::ID CC)
void setDoesNotThrow()
MaybeAlign getRetAlign() const
Extract the alignment of the return value.
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
OperandBundleUse getOperandBundleAt(unsigned Index) const
Return the operand bundle at a specific index.
std::optional< OperandBundleUse > getOperandBundle(StringRef Name) const
Return an operand bundle by name, if present.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isInAllocaArgument(unsigned ArgNo) const
Determine whether this argument is passed in an alloca.
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
bool hasRetAttr(Attribute::AttrKind Kind) const
Determine whether the return value has the given attribute.
unsigned getNumOperandBundles() const
Return the number of operand bundles associated with this User.
uint64_t getParamDereferenceableBytes(unsigned i) const
Extract the number of dereferenceable bytes for a call or parameter (0=unknown).
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
static LLVM_ABI CallBase * removeOperandBundleAt(CallBase *CB, size_t Offset, InsertPosition InsertPtr=nullptr)
void setNotConvergent()
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Attribute getFnAttr(StringRef Kind) const
Get the attribute of a given kind for the function.
bool doesNotThrow() const
Determine if the call cannot unwind.
void addRetAttr(Attribute::AttrKind Kind)
Adds the attribute to the return value.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
bool isConvergent() const
Determine if the invoke is convergent.
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
Value * getReturnedArgOperand() const
If one of the arguments has the 'returned' attribute, returns its operand value.
static LLVM_ABI CallBase * Create(CallBase *CB, ArrayRef< OperandBundleDef > Bundles, InsertPosition InsertPt=nullptr)
Create a clone of CB with a different set of operand bundles and insert it before InsertPt.
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
void addParamAttrs(unsigned ArgNo, const AttrBuilder &B)
Adds attributes to the indicated argument.
static LLVM_ABI CallBase * removeOperandBundle(CallBase *CB, uint32_t ID, InsertPosition InsertPt=nullptr)
Create a clone of CB with operand bundle ID removed.
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
CallBr instruction, tracking function calls that may not return control but instead transfer it to a ...
static CallBrInst * Create(FunctionType *Ty, Value *Func, BasicBlock *DefaultDest, ArrayRef< BasicBlock * > IndirectDests, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This class represents a function call, abstracting a target machine's calling convention.
bool isNoTailCall() const
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
bool isMustTailCall() const
static LLVM_ABI Instruction::CastOps getCastOpcode(const Value *Val, bool SrcIsSigned, Type *Ty, bool DstIsSigned)
Returns the opcode necessary to cast Val into Ty using usual casting rules.
static LLVM_ABI CastInst * CreateIntegerCast(Value *S, Type *Ty, bool isSigned, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a ZExt, BitCast, or Trunc for int -> int casts.
static LLVM_ABI bool isBitOrNoopPointerCastable(Type *SrcTy, Type *DestTy, const DataLayout &DL)
Check whether a bitcast, inttoptr, or ptrtoint cast between these types is valid and a no-op.
static LLVM_ABI CastInst * CreateBitOrPointerCast(Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Create a BitCast, a PtrToInt, or an IntToPTr cast instruction.
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:743
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:746
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:748
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:751
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:747
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:756
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getNonStrictPredicate() const
For example, SGT -> SGE, SLT -> SLE, ULT -> ULE, UGT -> UGE.
Definition InstrTypes.h:934
Predicate getUnorderedPredicate() const
Definition InstrTypes.h:874
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getSub(Constant *C1, Constant *C2, bool HasNUW=false, bool HasNSW=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
static LLVM_ABI ConstantFP * getInfinity(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI ConstantInt * getBool(LLVMContext &Context, bool V)
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI ConstantPtrAuth * get(Constant *Ptr, ConstantInt *Key, ConstantInt *Disc, Constant *AddrDisc, Constant *DeactivationSymbol)
Return a pointer signed with the specified parameters.
This class represents a range of values.
LLVM_ABI ConstantRange zextOrTrunc(uint32_t BitWidth) const
Make this range have the bit width given by BitWidth.
LLVM_ABI bool isFullSet() const
Return true if this set contains all of the elements possible for this data-type.
LLVM_ABI bool icmp(CmpInst::Predicate Pred, const ConstantRange &Other) const
Does the predicate Pred hold between ranges this and Other?
LLVM_ABI ConstantRange multiply(const ConstantRange &Other, unsigned NoWrapKind=0) const
Return a new range representing the possible values resulting from a multiplication of a value in thi...
LLVM_ABI bool contains(const APInt &Val) const
Return true if the specified value is in the set.
uint32_t getBitWidth() const
Get the bit width of this ConstantRange.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Record of a variable value-assignment, aka a non instruction representation of the dbg....
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:254
unsigned size() const
Definition DenseMap.h:114
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static FMFSource intersect(Value *A, Value *B)
Intersect the FMF from two instructions.
Definition IRBuilder.h:107
This class represents an extension of floating point types.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
bool allowReassoc() const
Flag queries.
Definition FMF.h:64
An instruction for ordering other memory operations.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this fence instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this fence instruction.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type::subtype_iterator param_iterator
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool isConvergent() const
Determine if the call is convergent.
Definition Function.h:618
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:354
bool doesNotThrow() const
Determine if the function cannot unwind.
Definition Function.h:602
bool isIntrinsic() const
isIntrinsic - Returns true if the function's name starts with "llvm.".
Definition Function.h:251
LLVM_ABI Value * getBasePtr() const
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
LLVM_ABI Value * getDerivedPtr() const
unsigned getDerivedPtrIndex() const
The index into the associate statepoint's argument list which contains the pointer whose relocation t...
std::vector< const GCRelocateInst * > getGCRelocates() const
Get list of all gc reloactes linked to this statepoint May contain several relocations for the same b...
Definition Statepoint.h:206
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this GlobalObject.
LLVM_ABI bool isDeclaration() const
Return true if the primary definition of this global value is outside of the current translation unit...
Definition Globals.cpp:392
PointerType * getType() const
Global values are always pointers.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI Value * CreateLaunderInvariantGroup(Value *Ptr)
Create a launder.invariant.group intrinsic call.
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:509
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > OverloadTypes, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="", ArrayRef< OperandBundleDef > OpBundles={})
Create a call to intrinsic ID with Args, mangled using OverloadTypes.
LLVM_ABI Value * CreateBinaryIntrinsic(Intrinsic::ID ID, Value *LHS, Value *RHS, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 2 operands which is mangled on the first type.
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1461
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2120
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2649
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:514
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2484
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2247
LLVM_ABI Value * CreateUnaryIntrinsic(Intrinsic::ID ID, Value *Op, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with 1 operand which is mangled on its type.
LLVM_ABI Value * CreateStripInvariantGroup(Value *Ptr)
Create a strip.invariant.group intrinsic call.
static InsertValueInst * Create(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Instruction * foldOpIntoPhi(Instruction &I, PHINode *PN, bool AllowMultipleUses=false)
Given a binary operator, cast instruction, or select which has a PHI node as operand #0,...
Value * SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, APInt &PoisonElts, unsigned Depth=0, bool AllowMultipleUsers=false) override
The specified value produces a vector with any number of elements.
bool SimplifyDemandedBits(Instruction *I, unsigned Op, const APInt &DemandedMask, KnownBits &Known, const SimplifyQuery &Q, unsigned Depth=0) override
This form of SimplifyDemandedBits simplifies the specified instruction operand if possible,...
Instruction * FoldOpIntoSelect(Instruction &Op, SelectInst *SI, bool FoldWithMultiUse=false, bool SimplifyBothArms=false)
Given an instruction with a select as one operand and a constant as the other operand,...
Instruction * SimplifyAnyMemSet(AnyMemSetInst *MI)
Instruction * foldItoFPtoI(FPToIntTy &FI)
fpto{s/u}i.sat --> X or zext(X) or sext(X) or trunc(X) This is safe if the intermediate type has enou...
Instruction * visitFree(CallInst &FI, Value *FreedOp)
Instruction * visitCallBrInst(CallBrInst &CBI)
Instruction * eraseInstFromFunction(Instruction &I) override
Combiner aware instruction erasure.
Value * foldReversedIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are reverses, try to pull the reverse after the intrinsic.
Value * tryGetLog2(Value *Op, bool AssumeNonZero)
Instruction * visitFenceInst(FenceInst &FI)
Instruction * foldShuffledIntrinsicOperands(IntrinsicInst *II)
If all arguments of the intrinsic are unary shuffles with the same mask, try to shuffle after the int...
Instruction * visitInvokeInst(InvokeInst &II)
bool SimplifyDemandedInstructionBits(Instruction &Inst)
Tries to simplify operands to an integer instruction based on its demanded bits.
void CreateNonTerminatorUnreachable(Instruction *InsertAt)
Create and insert the idiom we use to indicate a block is unreachable without having to rewrite the C...
Instruction * visitVAEndInst(VAEndInst &I)
Instruction * matchBSwapOrBitReverse(Instruction &I, bool MatchBSwaps, bool MatchBitReversals)
Given an initial instruction, check to see if it is the root of a bswap/bitreverse idiom.
Constant * unshuffleConstant(ArrayRef< int > ShMask, Constant *C, VectorType *NewCTy)
Find a constant NewC that has property: shuffle(NewC, ShMask) = C Returns nullptr if such a constant ...
Instruction * visitAllocSite(Instruction &FI)
Instruction * SimplifyAnyMemTransfer(AnyMemTransferInst *MI)
OverflowResult computeOverflow(Instruction::BinaryOps BinaryOp, bool IsSigned, Value *LHS, Value *RHS, Instruction *CxtI) const
Instruction * visitCallInst(CallInst &CI)
CallInst simplification.
The core instruction combiner logic.
SimplifyQuery SQ
const DataLayout & getDataLayout() const
unsigned ComputeMaxSignificantBits(const Value *Op, const Instruction *CxtI=nullptr, unsigned Depth=0) const
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
bool isFreeToInvert(Value *V, bool WillInvertAllUses, bool &DoesConsume)
Return true if the specified value is free to invert (apply ~ to).
DominatorTree & getDominatorTree() const
BlockFrequencyInfo * BFI
TargetLibraryInfo & TLI
Instruction * InsertNewInstBefore(Instruction *New, BasicBlock::iterator Old)
Inserts an instruction New before instruction Old.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
void replaceUse(Use &U, Value *NewValue)
Replace use and add the previously used value to the worklist.
InstructionWorklist & Worklist
A worklist of the instructions that need to be simplified.
const DataLayout & DL
DomConditionCache DC
void computeKnownBits(const Value *V, KnownBits &Known, const Instruction *CxtI, unsigned Depth=0) const
LLVM_ABI std::optional< Instruction * > targetInstCombineIntrinsic(IntrinsicInst &II)
AssumptionCache & AC
Instruction * replaceOperand(Instruction &I, unsigned OpNum, Value *V)
Replace operand of instruction and add old operand to the worklist.
bool MaskedValueIsZero(const Value *V, const APInt &Mask, const Instruction *CxtI=nullptr, unsigned Depth=0) const
DominatorTree & DT
ProfileSummaryInfo * PSI
BuilderTy & Builder
AssumptionCache & getAssumptionCache() const
OptimizationRemarkEmitter & ORE
Value * getFreelyInverted(Value *V, bool WillInvertAllUses, BuilderTy *Builder, bool &DoesConsume)
const SimplifyQuery & getSimplifyQuery() const
bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero=false, const Instruction *CxtI=nullptr, unsigned Depth=0)
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
LLVM_ABI bool mayWriteToMemory() const LLVM_READONLY
Return true if this instruction may modify memory.
LLVM_ABI void copyIRFlags(const Value *V, bool IncludeWrapFlags=true)
Convenience method to copy supported exact, fast-math, and (optionally) wrapping flags from V to this...
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI bool isCommutative() const LLVM_READONLY
Return true if the instruction is commutative:
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
bool isTerminator() const
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI std::optional< InstListType::iterator > getInsertionPointAfterDef()
Get the first insertion point at which the result of this instruction is defined.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:350
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Invoke instruction.
static InvokeInst * Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal, BasicBlock *IfException, ArrayRef< Value * > Args, const Twine &NameStr, InsertPosition InsertBefore=nullptr)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
Metadata node.
Definition Metadata.h:1080
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
static ICmpInst::Predicate getPredicate(Intrinsic::ID ID)
Returns the comparison predicate underlying the intrinsic.
ICmpInst::Predicate getPredicate() const
Returns the comparison predicate underlying the intrinsic.
bool isSigned() const
Whether the intrinsic is signed or unsigned.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
StringRef getName() const
Get a short "name" for the module.
Definition Module.h:272
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
Definition Operator.h:43
Utility class for integer operators which may exhibit overflow - Add, Sub, Mul, and Shl.
Definition Operator.h:78
bool hasNoSignedWrap() const
Test whether this operation is known to never undergo signed overflow, aka the nsw property.
Definition Operator.h:113
bool hasNoUnsignedWrap() const
Test whether this operation is known to never undergo unsigned overflow, aka the nuw property.
Definition Operator.h:107
bool isCommutative() const
Return true if the instruction is commutative.
Definition Operator.h:130
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Represents a saturating add/sub intrinsic.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
This instruction constructs a fixed permutation of two input vectors.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
SmallBitVector & set()
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setOrdering(AtomicOrdering Ordering)
Sets the ordering constraint of this store instruction.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
Class to represent struct types.
static LLVM_ABI bool isCallingConvCCompatible(CallBase *CI)
Returns true if call site / callee has cdecl-compatible calling conventions.
Provides information about what library functions are available for the current target.
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:263
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI bool canLosslesslyBitCastTo(Type *Ty) const
Return true if this type could be converted with a lossless BitCast to type 'Ty'.
Definition Type.cpp:153
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:276
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
LLVM_ABI const fltSemantics & getFltSemantics() const
Definition Type.cpp:106
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
static UnaryOperator * CreateWithCopiedFlags(UnaryOps Opc, Value *V, Instruction *CopyO, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:148
static UnaryOperator * CreateFNegFMF(Value *Op, Instruction *FMFSource, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Definition InstrTypes.h:156
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
void setOperand(unsigned i, Value *Val)
Definition User.h:212
Value * getOperand(unsigned i) const
Definition User.h:207
This represents the llvm.va_end intrinsic.
static LLVM_ABI void ValueIsDeleted(Value *V)
Definition Value.cpp:1233
static LLVM_ABI void ValueIsRAUWd(Value *Old, Value *New)
Definition Value.cpp:1286
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:798
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
static LLVM_ABI void dropDroppableUse(Use &U)
Remove the droppable use U.
Definition Value.cpp:222
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
bool use_empty() const
Definition Value.h:346
static constexpr unsigned MaxAlignmentExponent
The maximum alignment for instructions.
Definition Value.h:797
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
Definition TypeSize.h:269
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
Definition TypeSize.h:277
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
match_combine_and< Ty... > m_CombineAnd(const Ty &...Ps)
Combine pattern matchers matching all of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
auto m_PtrToIntOrAddr(const OpTy &Op)
Matches PtrToInt or PtrToAddr.
m_Intrinsic_Ty< Opnd0 >::Ty m_BitReverse(const Opnd0 &Op0)
auto m_Poison()
Match an arbitrary poison constant.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWSub(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
match_deferred< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
ap_match< APFloat > m_APFloat(const APFloat *&Res)
Match a ConstantFP or splatted ConstantVector, binding the specified pointer to the contained APFloat...
auto match_fn(const Pattern &P)
A match functor that can be used as a UnaryPredicate in functional algorithms like all_of.
OverflowingBinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub, OverflowingBinaryOperator::NoSignedWrap > m_NSWNeg(const ValTy &V)
Matches a 'Neg' as 'sub nsw 0, V'.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cstfp_pred_ty< is_neg_zero_fp > m_NegZeroFP()
Match a floating-point negative zero.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
specific_fpval m_SpecificFP(double V)
Match a specific floating point value or vector with all elements equal to the value.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
BinOpPred_match< LHS, RHS, is_logical_shift_op > m_LogicalShift(const LHS &L, const RHS &R)
Matches logical shift operations.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
auto m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor, true > m_c_Xor(const LHS &L, const RHS &R)
Matches an Xor with LHS and RHS in either order.
auto m_Constant()
Match an arbitrary Constant and ignore it.
match_combine_or< match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > >, OpTy > m_ZExtOrSExtOrSelf(const OpTy &Op)
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
cst_pred_ty< is_strictlypositive > m_StrictlyPositive()
Match an integer or vector of strictly positive values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
CastInst_match< OpTy, FPExtInst > m_FPExt(const OpTy &Op)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Mul, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWMul(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, umax_pred_ty > m_UMax(const LHS &L, const RHS &R)
cst_pred_ty< is_negated_power2 > m_NegatedPower2()
Match a integer or vector negated power-of-2.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
cst_pred_ty< custom_checkfn< APInt > > m_CheckedInt(function_ref< bool(const APInt &)> CheckFn)
Match an integer or vector where CheckFn(ele) for each element is true.
auto m_MaxOrMin(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
auto m_c_MaxOrMin(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Sub, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWSub(const LHS &L, const RHS &R)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap >, DisjointOr_match< LHS, RHS > > m_NSWAddLike(const LHS &L, const RHS &R)
Match either "add nsw" or "or disjoint".
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Exact_match< T > m_Exact(const T &SubPattern)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
BinOpPred_match< LHS, RHS, is_shift_op > m_Shift(const LHS &L, const RHS &R)
Matches shift operations.
cstfp_pred_ty< is_pos_zero_fp > m_PosZeroFP()
Match a floating-point positive zero.
auto m_UnOp()
Match an arbitrary unary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
m_Intrinsic_Ty< Opnd0 >::Ty m_VecReverse(const Opnd0 &Op0)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
m_Intrinsic_Ty< Opnd0, Opnd1, Opnd2 >::Ty m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2)
BinaryOp_match< LHS, RHS, Instruction::SRem > m_SRem(const LHS &L, const RHS &R)
auto m_Undef()
Match an arbitrary undef constant.
m_Intrinsic_Ty< Opnd0 >::Ty m_BSwap(const Opnd0 &Op0)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
match_combine_or< OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap >, DisjointOr_match< LHS, RHS > > m_NUWAddLike(const LHS &L, const RHS &R)
Match either "add nuw" or "or disjoint".
BinOpPred_match< LHS, RHS, is_bitwiselogic_op > m_BitwiseLogic(const LHS &L, const RHS &R)
Matches bitwise logic operations.
ElementWiseBitCast_match< OpTy > m_ElementWiseBitCast(const OpTy &Op)
m_Intrinsic_Ty< Opnd0 >::Ty m_FAbs(const Opnd0 &Op0)
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
m_Intrinsic_Ty< Opnd0, Opnd1 >::Ty m_CopySign(const Opnd0 &Op0, const Opnd1 &Op1)
MaxMin_match< ICmpInst, LHS, RHS, umin_pred_ty > m_UMin(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition LLVMContext.h:55
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
initializer< Ty > init(const Ty &Val)
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
constexpr double e
DiagnosticInfoOptimizationBase::Argument NV
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
LLVM_ABI cl::opt< bool > EnableKnowledgeRetention
LLVM_ABI Intrinsic::ID getInverseMinMaxIntrinsic(Intrinsic::ID MinMaxID)
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
@ NeverOverflows
Never overflows.
@ AlwaysOverflowsHigh
Always overflows in the direction of signed/unsigned max value.
@ AlwaysOverflowsLow
Always overflows in the direction of signed/unsigned min value.
@ MayOverflow
May or may not overflow.
LLVM_ABI KnownFPClass computeKnownFPClass(const Value *V, const APInt &DemandedElts, FPClassTest InterestedClasses, const SimplifyQuery &SQ, unsigned Depth=0)
Determine which floating-point classes are valid for V, and return them in KnownFPClass bit sets.
LLVM_ABI Value * simplifyFMulInst(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for an FMul, fold the result or return null.
LLVM_ABI bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI, const DominatorTree *DT=nullptr, bool AllowEphemerals=false)
Return true if it is valid to use the assumptions provided by an assume intrinsic,...
LLVM_ABI APInt possiblyDemandedEltsInMask(Value *Mask)
Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y) for each lane which may be ...
LLVM_ABI RetainedKnowledge simplifyRetainedKnowledge(AssumeInst *Assume, RetainedKnowledge RK, AssumptionCache *AC, DominatorTree *DT)
canonicalize the RetainedKnowledge RK.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isRemovableAlloc(const CallBase *V, const TargetLibraryInfo *TLI)
Return true if this is a call to an allocation function that does not have side effects that we are r...
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI Value * lowerObjectSizeCall(IntrinsicInst *ObjectSize, const DataLayout &DL, const TargetLibraryInfo *TLI, bool MustSucceed)
Try to turn a call to @llvm.objectsize into an integer value of the given Type.
LLVM_ABI Value * getAllocAlignment(const CallBase *V, const TargetLibraryInfo *TLI)
Gets the alignment argument for an aligned_alloc-like function, using either built-in knowledge based...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI RetainedKnowledge getKnowledgeFromOperandInAssume(AssumeInst &Assume, unsigned Idx)
Retreive the information help by Assume on the operand at index Idx.
LLVM_READONLY APFloat maximum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximum semantics.
Definition APFloat.h:1740
LLVM_ABI Value * simplifyCall(CallBase *Call, Value *Callee, ArrayRef< Value * > Args, const SimplifyQuery &Q)
Given a callsite, callee, and arguments, fold the result or return null.
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
constexpr T alignDown(U Value, V Align, W Skew=0)
Returns the largest unsigned integer less than or equal to Value and is Skew mod Align.
Definition MathExtras.h:546
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool isAssumeWithEmptyBundle(const AssumeInst &Assume)
Return true iff the operand bundles of the provided llvm.assume doesn't contain any valuable informat...
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
LLVM_ABI RetainedKnowledge getKnowledgeFromBundle(AssumeInst &Assume, const CallBase::BundleOpInfo &BOI)
This extracts the Knowledge from an element of an operand bundle.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 maxNum semantics.
Definition APFloat.h:1695
LLVM_ABI FPClassTest fneg(FPClassTest Mask)
Return the test mask which returns true if the value's sign bit is flipped.
SelectPatternFlavor
Specific patterns of select instructions we can match.
@ SPF_ABS
Floating point maxnum.
@ SPF_NABS
Absolute value.
LLVM_ABI Constant * getLosslessUnsignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_READONLY APFloat minimumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimumNumber semantics.
Definition APFloat.h:1726
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM)
Returns: X * 2^Exp for integral exponents.
Definition APFloat.h:1640
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
LLVM_ABI bool matchSimpleBinaryIntrinsicRecurrence(const IntrinsicInst *I, PHINode *&P, Value *&Init, Value *&OtherOp)
Attempt to match a simple value-accumulating recurrence of the form: llvm.intrinsic....
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
auto find_if_not(R &&Range, UnaryPredicate P)
Definition STLExtras.h:1776
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
bool isAtLeastOrStrongerThan(AtomicOrdering AO, AtomicOrdering Other)
LLVM_ABI Constant * getLosslessSignedTrunc(Constant *C, Type *DestTy, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
LLVM_ABI FPClassTest inverse_fabs(FPClassTest Mask)
Return the test mask which returns true after fabs is applied to the value.
LLVM_ABI ConstantRange getVScaleRange(const Function *F, unsigned BitWidth)
Determine the possible constant range of vscale with the given bit width, based on the vscale_range f...
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI bool isNotCrossLaneOperation(const Instruction *I)
Return true if the instruction doesn't potentially cross vector lanes.
LLVM_ABI bool maskIsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI Constant * ConstantFoldBinaryOpOperands(unsigned Opcode, Constant *LHS, Constant *RHS, const DataLayout &DL)
Attempt to constant fold a binary operation with the specified operands.
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
constexpr int PoisonMaskElem
@ Mod
The access may modify the value stored in memory.
Definition ModRef.h:34
LLVM_ABI Value * simplifyFMAFMul(Value *LHS, Value *RHS, FastMathFlags FMF, const SimplifyQuery &Q, fp::ExceptionBehavior ExBehavior=fp::ebIgnore, RoundingMode Rounding=RoundingMode::NearestTiesToEven)
Given operands for the multiplication of a FMA, fold the result or return null.
@ Other
Any other memory.
Definition ModRef.h:68
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
LLVM_ABI Value * simplifyConstrainedFPCall(CallBase *Call, const SimplifyQuery &Q)
Given a constrained FP intrinsic call, tries to compute its simplified version.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE-754 2008 minNum semantics.
Definition APFloat.h:1676
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
@ Add
Sum of integers.
LLVM_ABI bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx, const TargetTransformInfo *TTI)
Identifies if the vector form of the intrinsic has a scalar operand.
LLVM_ABI ConstantRange computeConstantRangeIncludingKnownBits(const WithCache< const Value * > &V, bool ForSigned, const SimplifyQuery &SQ)
Combine constant ranges from computeConstantRange() and computeKnownBits().
DWARFExpression::Operation Op
bool isSafeToSpeculativelyExecuteWithVariableReplaced(const Instruction *I, bool IgnoreUBImplyingAttrs=true)
Don't use information from its non-constant operands.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI Value * getFreedOperand(const CallBase *CB, const TargetLibraryInfo *TLI)
If this if a call to a free function, return the freed operand.
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
constexpr unsigned BitWidth
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if this is always a dereferenceable pointer.
Definition Loads.cpp:250
LLVM_ABI Constant * getLosslessInvCast(Constant *C, Type *InvCastTo, unsigned CastOp, const DataLayout &DL, PreservedCastFlags *Flags=nullptr)
Try to cast C to InvC losslessly, satisfying CastOp(InvC) equals C, or CastOp(InvC) is a refined valu...
LLVM_ABI bool maskIsAllZeroOrUndef(Value *Mask)
Given a mask vector of i1, Return true if all of the elements of this predicate mask are known to be ...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
LLVM_ABI std::optional< APInt > getAllocSize(const CallBase *CB, const TargetLibraryInfo *TLI, function_ref< const Value *(const Value *)> Mapper=[](const Value *V) { return V;})
Return the size of the requested allocation.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI bool maskContainsAllOneOrUndef(Value *Mask)
Given a mask vector of i1, Return true if any of the elements of this predicate mask are known to be ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
LLVM_READONLY APFloat minimum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 minimum semantics.
Definition APFloat.h:1713
LLVM_ABI bool isKnownNegation(const Value *X, const Value *Y, bool NeedNSW=false, bool AllowPoison=true)
Return true if the two given values are negation.
LLVM_READONLY APFloat maximumnum(const APFloat &A, const APFloat &B)
Implements IEEE 754-2019 maximumNumber semantics.
Definition APFloat.h:1753
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI bool isKnownNonNegative(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Returns true if the give value is known to be non-negative.
LLVM_ABI bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
LLVM_ABI std::optional< bool > computeKnownFPSignBit(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return false if we can prove that the specified FP value's sign bit is 0.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define NC
Definition regutils.h:42
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
@ IEEE
IEEE-754 denormal numbers preserved.
bool isNonNegative() const
Returns true if this value is known to be non-negative.
Definition KnownBits.h:106
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition KnownBits.h:256
unsigned countMaxTrailingZeros() const
Returns the maximum number of trailing zero bits possible.
Definition KnownBits.h:288
unsigned countMaxPopulation() const
Returns the maximum number of bits that could be one.
Definition KnownBits.h:303
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
bool isNonZero() const
Returns true if this value is known to be non-zero.
Definition KnownBits.h:109
unsigned countMinLeadingZeros() const
Returns the minimum number of leading zero bits.
Definition KnownBits.h:262
bool isNegative() const
Returns true if this value is known to be negative.
Definition KnownBits.h:103
unsigned countMaxLeadingZeros() const
Returns the maximum number of leading zero bits possible.
Definition KnownBits.h:294
unsigned countMinPopulation() const
Returns the number of bits known to be one.
Definition KnownBits.h:300
bool isAllOnes() const
Returns true if value is all one bits.
Definition KnownBits.h:81
FPClassTest KnownFPClasses
Floating-point classes the value could be one of.
Matching combinators.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition Alignment.h:130
A lightweight accessor for an operand bundle meant to be passed around by value.
StringRef getTagName() const
Return the tag of this operand bundle as a string.
uint32_t getTagID() const
Return the tag of this operand bundle as an integer.
ArrayRef< Use > Inputs
Represent one information held inside an operand bundle of an llvm.assume.
Attribute::AttrKind AttrKind
SelectPatternFlavor Flavor
const DataLayout & DL
const Instruction * CxtI
SimplifyQuery getWithInstruction(const Instruction *I) const