42 #define DEBUG_TYPE "bypass-slow-division"
50 QuotRemPair(
Value *InQuotient,
Value *InRemainder)
51 : Quotient(InQuotient), Remainder(InRemainder) {}
57 struct QuotRemWithBB {
59 Value *Quotient =
nullptr;
60 Value *Remainder =
nullptr;
77 class FastDivInsertionTask {
78 bool IsValidTask =
false;
83 bool isHashLikeValue(
Value *V, VisitedSetTy &Visited);
84 ValueRange getValueRange(
Value *
Op, VisitedSetTy &Visited);
87 QuotRemPair createDivRemPhiNodes(QuotRemWithBB &
LHS, QuotRemWithBB &
RHS,
93 return SlowDivOrRem->
getOpcode() == Instruction::SDiv ||
94 SlowDivOrRem->
getOpcode() == Instruction::SRem;
98 return SlowDivOrRem->
getOpcode() == Instruction::SDiv ||
99 SlowDivOrRem->
getOpcode() == Instruction::UDiv;
102 Type *getSlowType() {
return SlowDivOrRem->
getType(); }
105 FastDivInsertionTask(
Instruction *
I,
const BypassWidthsTy &BypassWidths);
107 Value *getReplacement(DivCacheTy &Cache);
112 FastDivInsertionTask::FastDivInsertionTask(
Instruction *
I,
113 const BypassWidthsTy &BypassWidths) {
114 switch (
I->getOpcode()) {
115 case Instruction::UDiv:
116 case Instruction::SDiv:
117 case Instruction::URem:
118 case Instruction::SRem:
127 IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
132 auto BI = BypassWidths.find(SlowType->
getBitWidth());
133 if (BI == BypassWidths.end())
141 MainBB =
I->getParent();
151 Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
157 Value *Dividend = SlowDivOrRem->getOperand(0);
158 Value *Divisor = SlowDivOrRem->getOperand(1);
160 auto CacheI = Cache.find(
Key);
162 if (CacheI == Cache.end()) {
168 CacheI = Cache.insert({
Key, *OptResult}).first;
171 QuotRemPair &
Value = CacheI->second;
172 return isDivisionOp() ?
Value.Quotient :
Value.Remainder;
190 bool FastDivInsertionTask::isHashLikeValue(
Value *V, VisitedSetTy &Visited) {
195 switch (
I->getOpcode()) {
196 case Instruction::Xor:
203 Value *Op1 =
I->getOperand(1);
205 if (!
C && isa<BitCastInst>(Op1))
206 C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
207 return C &&
C->getValue().getMinSignedBits() > BypassType->getBitWidth();
209 case Instruction::PHI:
212 if (Visited.size() >= 16)
216 if (!Visited.insert(
I).second)
221 return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
230 ValueRange FastDivInsertionTask::getValueRange(
Value *V,
231 VisitedSetTy &Visited) {
232 unsigned ShortLen = BypassType->getBitWidth();
235 assert(LongLen > ShortLen &&
"Value type must be wider than BypassType");
236 unsigned HiBits = LongLen - ShortLen;
238 const DataLayout &
DL = SlowDivOrRem->getModule()->getDataLayout();
243 if (Known.countMinLeadingZeros() >= HiBits)
244 return VALRNG_KNOWN_SHORT;
246 if (Known.countMaxLeadingZeros() < HiBits)
247 return VALRNG_LIKELY_LONG;
253 if (isHashLikeValue(V, Visited))
254 return VALRNG_LIKELY_LONG;
256 return VALRNG_UNKNOWN;
261 QuotRemWithBB FastDivInsertionTask::createSlowBB(
BasicBlock *SuccessorBB) {
262 QuotRemWithBB DivRemPair;
264 MainBB->getParent(), SuccessorBB);
266 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
268 Value *Dividend = SlowDivOrRem->getOperand(0);
269 Value *Divisor = SlowDivOrRem->getOperand(1);
272 DivRemPair.Quotient =
Builder.CreateSDiv(Dividend, Divisor);
273 DivRemPair.Remainder =
Builder.CreateSRem(Dividend, Divisor);
275 DivRemPair.Quotient =
Builder.CreateUDiv(Dividend, Divisor);
276 DivRemPair.Remainder =
Builder.CreateURem(Dividend, Divisor);
285 QuotRemWithBB FastDivInsertionTask::createFastBB(
BasicBlock *SuccessorBB) {
286 QuotRemWithBB DivRemPair;
288 MainBB->getParent(), SuccessorBB);
290 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
292 Value *Dividend = SlowDivOrRem->getOperand(0);
293 Value *Divisor = SlowDivOrRem->getOperand(1);
294 Value *ShortDivisorV =
295 Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
296 Value *ShortDividendV =
297 Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
300 Value *ShortQV =
Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
301 Value *ShortRV =
Builder.CreateURem(ShortDividendV, ShortDivisorV);
302 DivRemPair.Quotient =
303 Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
304 DivRemPair.Remainder =
305 Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
312 QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &
LHS,
316 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
323 return QuotRemPair(QuoPhi, RemPhi);
330 Value *FastDivInsertionTask::insertOperandRuntimeCheck(
Value *Op1,
Value *Op2) {
331 assert((Op1 || Op2) &&
"Nothing to check");
333 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
337 OrV =
Builder.CreateOr(Op1, Op2);
339 OrV = Op1 ? Op1 : Op2;
343 uint64_t BitMask = ~BypassType->getBitMask();
348 return Builder.CreateICmpEQ(AndV, ZeroV);
354 Value *Dividend = SlowDivOrRem->getOperand(0);
355 Value *Divisor = SlowDivOrRem->getOperand(1);
358 ValueRange DividendRange = getValueRange(Dividend, SetL);
359 if (DividendRange == VALRNG_LIKELY_LONG)
363 ValueRange DivisorRange = getValueRange(Divisor, SetR);
364 if (DivisorRange == VALRNG_LIKELY_LONG)
367 bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
368 bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
370 if (DividendShort && DivisorShort) {
377 Value *TruncDividend =
Builder.CreateTrunc(Dividend, BypassType);
378 Value *TruncDivisor =
Builder.CreateTrunc(Divisor, BypassType);
379 Value *TruncDiv =
Builder.CreateUDiv(TruncDividend, TruncDivisor);
380 Value *TruncRem =
Builder.CreateURem(TruncDividend, TruncDivisor);
381 Value *ExtDiv =
Builder.CreateZExt(TruncDiv, getSlowType());
382 Value *ExtRem =
Builder.CreateZExt(TruncRem, getSlowType());
383 return QuotRemPair(ExtDiv, ExtRem);
386 if (isa<ConstantInt>(Divisor)) {
397 if (
auto *BCI = dyn_cast<BitCastInst>(Divisor))
398 if (BCI->getParent() == SlowDivOrRem->getParent() &&
399 isa<ConstantInt>(BCI->getOperand(0)))
403 Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
424 Long.Remainder = Dividend;
425 QuotRemWithBB
Fast = createFastBB(SuccessorBB);
426 QuotRemPair
Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
438 QuotRemWithBB
Fast = createFastBB(SuccessorBB);
439 QuotRemWithBB Slow = createSlowBB(SuccessorBB);
440 QuotRemPair
Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
441 Value *CmpV = insertOperandRuntimeCheck(DividendShort ?
nullptr : Dividend,
442 DivisorShort ?
nullptr : Divisor);
451 const BypassWidthsTy &BypassWidths) {
452 DivCacheTy PerBBDivCache;
454 bool MadeChange =
false;
456 while (Next !=
nullptr) {
466 FastDivInsertionTask Task(
I, BypassWidths);
467 if (
Value *Replacement = Task.getReplacement(PerBBDivCache)) {
468 I->replaceAllUsesWith(Replacement);
469 I->eraseFromParent();
477 for (
auto &KV : PerBBDivCache)
478 for (
Value *V : {KV.second.Quotient, KV.second.Remainder})