42#define DEBUG_TYPE "bypass-slow-division"
50 QuotRemPair(
Value *InQuotient,
Value *InRemainder)
51 : Quotient(InQuotient), Remainder(InRemainder) {}
59 Value *Quotient =
nullptr;
60 Value *Remainder =
nullptr;
77class FastDivInsertionTask {
78 bool IsValidTask =
false;
85 bool isHashLikeValue(
Value *V, VisitedSetTy &Visited);
86 ValueRange getValueRange(
Value *
Op, VisitedSetTy &Visited);
89 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &
LHS, QuotRemWithBB &
RHS,
92 std::optional<QuotRemPair> insertFastDivAndRem();
95 return SlowDivOrRem->
getOpcode() == Instruction::SDiv ||
96 SlowDivOrRem->
getOpcode() == Instruction::SRem;
100 return SlowDivOrRem->
getOpcode() == Instruction::SDiv ||
101 SlowDivOrRem->
getOpcode() == Instruction::UDiv;
104 Type *getSlowType() {
return SlowDivOrRem->
getType(); }
107 FastDivInsertionTask(
Instruction *
I,
const BypassWidthsTy &BypassWidths,
110 Value *getReplacement(DivCacheTy &Cache);
115FastDivInsertionTask::FastDivInsertionTask(
Instruction *
I,
116 const BypassWidthsTy &BypassWidths,
119 switch (
I->getOpcode()) {
120 case Instruction::UDiv:
121 case Instruction::SDiv:
122 case Instruction::URem:
123 case Instruction::SRem:
137 auto BI = BypassWidths.find(SlowType->getBitWidth());
138 if (BI == BypassWidths.end())
146 MainBB =
I->getParent();
156Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
165 auto CacheI = Cache.find(
Key);
167 if (CacheI == Cache.end()) {
169 std::optional<QuotRemPair> OptResult = insertFastDivAndRem();
173 CacheI = Cache.insert({
Key, *OptResult}).first;
176 QuotRemPair &
Value = CacheI->second;
177 return isDivisionOp() ?
Value.Quotient :
Value.Remainder;
195bool FastDivInsertionTask::isHashLikeValue(
Value *V, VisitedSetTy &Visited) {
200 switch (
I->getOpcode()) {
201 case Instruction::Xor:
203 case Instruction::Mul: {
208 Value *Op1 =
I->getOperand(1);
212 return C &&
C->getValue().getSignificantBits() > BypassType->
getBitWidth();
214 case Instruction::PHI:
217 if (Visited.size() >= 16)
221 if (!Visited.insert(
I).second)
226 return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
235ValueRange FastDivInsertionTask::getValueRange(
Value *V,
236 VisitedSetTy &Visited) {
238 unsigned LongLen =
V->getType()->getIntegerBitWidth();
240 assert(LongLen > ShortLen &&
"Value type must be wider than BypassType");
241 unsigned HiBits = LongLen - ShortLen;
244 KnownBits Known(LongLen);
248 if (Known.countMinLeadingZeros() >= HiBits)
249 return VALRNG_KNOWN_SHORT;
251 if (Known.countMaxLeadingZeros() < HiBits)
252 return VALRNG_LIKELY_LONG;
258 if (isHashLikeValue(V, Visited))
259 return VALRNG_LIKELY_LONG;
261 return VALRNG_UNKNOWN;
266QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
267 QuotRemWithBB DivRemPair;
271 Builder.SetCurrentDebugLocation(SlowDivOrRem->
getDebugLoc());
277 DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
278 DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
280 DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
281 DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
284 Builder.CreateBr(SuccessorBB);
290QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
291 QuotRemWithBB DivRemPair;
295 Builder.SetCurrentDebugLocation(SlowDivOrRem->
getDebugLoc());
299 Value *ShortDivisorV =
300 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
301 Value *ShortDividendV =
302 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
305 Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
306 Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
307 DivRemPair.Quotient =
308 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
309 DivRemPair.Remainder =
310 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
311 Builder.CreateBr(SuccessorBB);
317QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &
LHS,
321 Builder.SetCurrentDebugLocation(SlowDivOrRem->
getDebugLoc());
322 PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
325 PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
328 return QuotRemPair(QuoPhi, RemPhi);
335Value *FastDivInsertionTask::insertOperandRuntimeCheck(
Value *Op1,
Value *Op2) {
336 assert((Op1 || Op2) &&
"Nothing to check");
338 Builder.SetCurrentDebugLocation(SlowDivOrRem->
getDebugLoc());
342 OrV = Builder.CreateOr(Op1, Op2);
344 OrV = Op1 ? Op1 : Op2;
347 Value *AndV = Builder.CreateAnd(
353 return Builder.CreateICmpEQ(AndV, ZeroV);
358std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
363 ValueRange DividendRange = getValueRange(Dividend, SetL);
364 if (DividendRange == VALRNG_LIKELY_LONG)
368 ValueRange DivisorRange = getValueRange(Divisor, SetR);
369 if (DivisorRange == VALRNG_LIKELY_LONG)
372 bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
373 bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
375 if (DividendShort && DivisorShort) {
382 Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
383 Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
384 Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
385 Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
386 Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
387 Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
388 return QuotRemPair(ExtDiv, ExtRem);
403 if (BCI->getParent() == SlowDivOrRem->
getParent() &&
408 Builder.SetCurrentDebugLocation(SlowDivOrRem->
getDebugLoc());
428 Long.Quotient = ConstantInt::get(getSlowType(), 0);
429 Long.Remainder = Dividend;
430 QuotRemWithBB
Fast = createFastBB(SuccessorBB);
431 QuotRemPair
Result = createDivRemPhiNodes(
Fast, Long, SuccessorBB);
432 Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
433 Builder.CreateCondBr(CmpV,
Fast.BB, SuccessorBB);
437 {DominatorTree::Insert,
Fast.BB, SuccessorBB}});
440 L->addBasicBlockToLoop(
Fast.BB, *LI);
453 QuotRemWithBB
Fast = createFastBB(SuccessorBB);
454 QuotRemWithBB Slow = createSlowBB(SuccessorBB);
455 QuotRemPair
Result = createDivRemPhiNodes(
Fast, Slow, SuccessorBB);
456 Value *CmpV = insertOperandRuntimeCheck(DividendShort ?
nullptr : Dividend,
457 DivisorShort ?
nullptr : Divisor);
458 Builder.CreateCondBr(CmpV,
Fast.BB, Slow.BB);
461 {DominatorTree::Insert, MainBB, Slow.BB},
462 {DominatorTree::Insert,
Fast.BB, SuccessorBB},
463 {DominatorTree::Insert, Slow.BB, SuccessorBB},
464 {DominatorTree::Delete, MainBB, SuccessorBB}});
467 L->addBasicBlockToLoop(
Fast.BB, *LI);
468 L->addBasicBlockToLoop(Slow.BB, *LI);
477 const BypassWidthsTy &BypassWidths,
478 DomTreeUpdater *DTU, LoopInfo *LI) {
479 DivCacheTy PerBBDivCache;
481 bool MadeChange =
false;
483 while (
Next !=
nullptr) {
493 FastDivInsertionTask Task(
I, BypassWidths, DTU, LI);
494 if (
Value *Replacement = Task.getReplacement(PerBBDivCache)) {
495 I->replaceAllUsesWith(Replacement);
496 I->eraseFromParent();
504 for (
auto &KV : PerBBDivCache)
505 for (
Value *V : {KV.second.Quotient, KV.second.Remainder})
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static int isSignedOp(ISD::CondCode Opcode)
For an integer comparison, return 1 if the comparison is a signed operation and 2 if the result is an...
This file defines the SmallPtrSet class.
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
const Instruction & back() const
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const ParentTy * getParent() const
@ Fast
Attempts to make calls as fast as possible (e.g.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.