LLVM 20.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
44#include "llvm/Config/llvm-config.h"
45#include "llvm/IR/Argument.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugInfo.h"
53#include "llvm/IR/Dominators.h"
54#include "llvm/IR/Function.h"
56#include "llvm/IR/GlobalValue.h"
58#include "llvm/IR/IRBuilder.h"
59#include "llvm/IR/InlineAsm.h"
60#include "llvm/IR/InstrTypes.h"
61#include "llvm/IR/Instruction.h"
64#include "llvm/IR/Intrinsics.h"
65#include "llvm/IR/IntrinsicsAArch64.h"
66#include "llvm/IR/LLVMContext.h"
67#include "llvm/IR/MDBuilder.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
72#include "llvm/IR/Statepoint.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/User.h"
76#include "llvm/IR/Value.h"
77#include "llvm/IR/ValueHandle.h"
78#include "llvm/IR/ValueMap.h"
80#include "llvm/Pass.h"
86#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstdint>
100#include <iterator>
101#include <limits>
102#include <memory>
103#include <optional>
104#include <utility>
105#include <vector>
106
107using namespace llvm;
108using namespace llvm::PatternMatch;
109
110#define DEBUG_TYPE "codegenprepare"
111
112STATISTIC(NumBlocksElim, "Number of blocks eliminated");
113STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
114STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
115STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
116 "sunken Cmps");
117STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
118 "of sunken Casts");
119STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
120 "computations were sunk");
121STATISTIC(NumMemoryInstsPhiCreated,
122 "Number of phis created when address "
123 "computations were sunk to memory instructions");
124STATISTIC(NumMemoryInstsSelectCreated,
125 "Number of select created when address "
126 "computations were sunk to memory instructions");
127STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
128STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
129STATISTIC(NumAndsAdded,
130 "Number of and mask instructions added to form ext loads");
131STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
132STATISTIC(NumRetsDup, "Number of return instructions duplicated");
133STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
134STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
135STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
136
138 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
139 cl::desc("Disable branch optimizations in CodeGenPrepare"));
140
141static cl::opt<bool>
142 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
143 cl::desc("Disable GC optimizations in CodeGenPrepare"));
144
145static cl::opt<bool>
146 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
147 cl::init(false),
148 cl::desc("Disable select to branch conversion."));
149
150static cl::opt<bool>
151 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
152 cl::desc("Address sinking in CGP using GEPs."));
153
154static cl::opt<bool>
155 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
156 cl::desc("Enable sinkinig and/cmp into branches."));
157
159 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
160 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
161
163 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
164 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
165
167 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
168 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
169 "CodeGenPrepare"));
170
172 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
173 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
174 "optimization in CodeGenPrepare"));
175
177 "disable-preheader-prot", cl::Hidden, cl::init(false),
178 cl::desc("Disable protection against removing loop preheaders"));
179
181 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
182 cl::desc("Use profile info to add section prefix for hot/cold functions"));
183
185 "profile-unknown-in-special-section", cl::Hidden,
186 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
187 "profile, we cannot tell the function is cold for sure because "
188 "it may be a function newly added without ever being sampled. "
189 "With the flag enabled, compiler can put such profile unknown "
190 "functions into a special section, so runtime system can choose "
191 "to handle it in a different way than .text section, to save "
192 "RAM for example. "));
193
195 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
196 cl::desc("Use the basic-block-sections profile to determine the text "
197 "section prefix for hot functions. Functions with "
198 "basic-block-sections profile will be placed in `.text.hot` "
199 "regardless of their FDO profile info. Other functions won't be "
200 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
201 "profiles."));
202
204 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
205 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
206 "(frequency of destination block) is greater than this ratio"));
207
209 "force-split-store", cl::Hidden, cl::init(false),
210 cl::desc("Force store splitting no matter what the target query says."));
211
213 "cgp-type-promotion-merge", cl::Hidden,
214 cl::desc("Enable merging of redundant sexts when one is dominating"
215 " the other."),
216 cl::init(true));
217
219 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
220 cl::desc("Disables combining addressing modes with different parts "
221 "in optimizeMemoryInst."));
222
223static cl::opt<bool>
224 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
225 cl::desc("Allow creation of Phis in Address sinking."));
226
228 "addr-sink-new-select", cl::Hidden, cl::init(true),
229 cl::desc("Allow creation of selects in Address sinking."));
230
232 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
233 cl::desc("Allow combining of BaseReg field in Address sinking."));
234
236 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
237 cl::desc("Allow combining of BaseGV field in Address sinking."));
238
240 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
241 cl::desc("Allow combining of BaseOffs field in Address sinking."));
242
244 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
245 cl::desc("Allow combining of ScaledReg field in Address sinking."));
246
247static cl::opt<bool>
248 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
249 cl::init(true),
250 cl::desc("Enable splitting large offset of GEP."));
251
253 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
254 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
255
256static cl::opt<bool>
257 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
258 cl::desc("Enable BFI update verification for "
259 "CodeGenPrepare."));
260
261static cl::opt<bool>
262 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
263 cl::desc("Enable converting phi types in CodeGenPrepare"));
264
266 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
267 cl::desc("Least BB number of huge function."));
268
270 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
272 cl::desc("Max number of address users to look at"));
273
274static cl::opt<bool>
275 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
276 cl::desc("Disable elimination of dead PHI nodes."));
277
278namespace {
279
280enum ExtType {
281 ZeroExtension, // Zero extension has been seen.
282 SignExtension, // Sign extension has been seen.
283 BothExtension // This extension type is used if we saw sext after
284 // ZeroExtension had been set, or if we saw zext after
285 // SignExtension had been set. It makes the type
286 // information of a promoted instruction invalid.
287};
288
289enum ModifyDT {
290 NotModifyDT, // Not Modify any DT.
291 ModifyBBDT, // Modify the Basic Block Dominator Tree.
292 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
293 // This usually means we move/delete/insert instruction
294 // in a Basic Block. So we should re-iterate instructions
295 // in such Basic Block.
296};
297
298using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
299using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
300using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
302using ValueToSExts = MapVector<Value *, SExts>;
303
304class TypePromotionTransaction;
305
306class CodeGenPrepare {
307 friend class CodeGenPrepareLegacyPass;
308 const TargetMachine *TM = nullptr;
309 const TargetSubtargetInfo *SubtargetInfo = nullptr;
310 const TargetLowering *TLI = nullptr;
311 const TargetRegisterInfo *TRI = nullptr;
312 const TargetTransformInfo *TTI = nullptr;
313 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
314 const TargetLibraryInfo *TLInfo = nullptr;
315 LoopInfo *LI = nullptr;
316 std::unique_ptr<BlockFrequencyInfo> BFI;
317 std::unique_ptr<BranchProbabilityInfo> BPI;
318 ProfileSummaryInfo *PSI = nullptr;
319
320 /// As we scan instructions optimizing them, this is the next instruction
321 /// to optimize. Transforms that can invalidate this should update it.
322 BasicBlock::iterator CurInstIterator;
323
324 /// Keeps track of non-local addresses that have been sunk into a block.
325 /// This allows us to avoid inserting duplicate code for blocks with
326 /// multiple load/stores of the same address. The usage of WeakTrackingVH
327 /// enables SunkAddrs to be treated as a cache whose entries can be
328 /// invalidated if a sunken address computation has been erased.
330
331 /// Keeps track of all instructions inserted for the current function.
332 SetOfInstrs InsertedInsts;
333
334 /// Keeps track of the type of the related instruction before their
335 /// promotion for the current function.
336 InstrToOrigTy PromotedInsts;
337
338 /// Keep track of instructions removed during promotion.
339 SetOfInstrs RemovedInsts;
340
341 /// Keep track of sext chains based on their initial value.
342 DenseMap<Value *, Instruction *> SeenChainsForSExt;
343
344 /// Keep track of GEPs accessing the same data structures such as structs or
345 /// arrays that are candidates to be split later because of their large
346 /// size.
349 LargeOffsetGEPMap;
350
351 /// Keep track of new GEP base after splitting the GEPs having large offset.
352 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
353
354 /// Map serial numbers to Large offset GEPs.
355 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
356
357 /// Keep track of SExt promoted.
358 ValueToSExts ValToSExtendedUses;
359
360 /// True if the function has the OptSize attribute.
361 bool OptSize;
362
363 /// DataLayout for the Function being processed.
364 const DataLayout *DL = nullptr;
365
366 /// Building the dominator tree can be expensive, so we only build it
367 /// lazily and update it when required.
368 std::unique_ptr<DominatorTree> DT;
369
370public:
371 CodeGenPrepare(){};
372 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
373 /// If encounter huge function, we need to limit the build time.
374 bool IsHugeFunc = false;
375
376 /// FreshBBs is like worklist, it collected the updated BBs which need
377 /// to be optimized again.
378 /// Note: Consider building time in this pass, when a BB updated, we need
379 /// to insert such BB into FreshBBs for huge function.
381
382 void releaseMemory() {
383 // Clear per function information.
384 InsertedInsts.clear();
385 PromotedInsts.clear();
386 FreshBBs.clear();
387 BPI.reset();
388 BFI.reset();
389 }
390
392
393private:
394 template <typename F>
395 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
396 // Substituting can cause recursive simplifications, which can invalidate
397 // our iterator. Use a WeakTrackingVH to hold onto it in case this
398 // happens.
399 Value *CurValue = &*CurInstIterator;
400 WeakTrackingVH IterHandle(CurValue);
401
402 f();
403
404 // If the iterator instruction was recursively deleted, start over at the
405 // start of the block.
406 if (IterHandle != CurValue) {
407 CurInstIterator = BB->begin();
408 SunkAddrs.clear();
409 }
410 }
411
412 // Get the DominatorTree, building if necessary.
413 DominatorTree &getDT(Function &F) {
414 if (!DT)
415 DT = std::make_unique<DominatorTree>(F);
416 return *DT;
417 }
418
419 void removeAllAssertingVHReferences(Value *V);
420 bool eliminateAssumptions(Function &F);
421 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
422 bool eliminateMostlyEmptyBlocks(Function &F);
423 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
424 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
425 void eliminateMostlyEmptyBlock(BasicBlock *BB);
426 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
427 bool isPreheader);
428 bool makeBitReverse(Instruction &I);
429 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
430 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
431 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
432 unsigned AddrSpace);
433 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
434 bool optimizeInlineAsmInst(CallInst *CS);
435 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
436 bool optimizeExt(Instruction *&I);
437 bool optimizeExtUses(Instruction *I);
438 bool optimizeLoadExt(LoadInst *Load);
439 bool optimizeShiftInst(BinaryOperator *BO);
440 bool optimizeFunnelShift(IntrinsicInst *Fsh);
441 bool optimizeSelectInst(SelectInst *SI);
442 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
443 bool optimizeSwitchType(SwitchInst *SI);
444 bool optimizeSwitchPhiConstants(SwitchInst *SI);
445 bool optimizeSwitchInst(SwitchInst *SI);
446 bool optimizeExtractElementInst(Instruction *Inst);
447 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
448 bool fixupDbgValue(Instruction *I);
449 bool fixupDbgVariableRecord(DbgVariableRecord &I);
450 bool fixupDbgVariableRecordsOnInst(Instruction &I);
451 bool placeDbgValues(Function &F);
452 bool placePseudoProbes(Function &F);
453 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
454 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
455 bool tryToPromoteExts(TypePromotionTransaction &TPT,
457 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
458 unsigned CreatedInstsCost = 0);
459 bool mergeSExts(Function &F);
460 bool splitLargeGEPOffsets();
461 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
462 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
463 bool optimizePhiTypes(Function &F);
464 bool performAddressTypePromotion(
465 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
466 bool HasPromoted, TypePromotionTransaction &TPT,
467 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
468 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
469 bool simplifyOffsetableRelocate(GCStatepointInst &I);
470
471 bool tryToSinkFreeOperands(Instruction *I);
472 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
473 CmpInst *Cmp, Intrinsic::ID IID);
474 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
475 bool optimizeURem(Instruction *Rem);
476 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
478 void verifyBFIUpdates(Function &F);
479 bool _run(Function &F);
480};
481
482class CodeGenPrepareLegacyPass : public FunctionPass {
483public:
484 static char ID; // Pass identification, replacement for typeid
485
486 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
488 }
489
490 bool runOnFunction(Function &F) override;
491
492 StringRef getPassName() const override { return "CodeGen Prepare"; }
493
494 void getAnalysisUsage(AnalysisUsage &AU) const override {
495 // FIXME: When we can selectively preserve passes, preserve the domtree.
502 }
503};
504
505} // end anonymous namespace
506
507char CodeGenPrepareLegacyPass::ID = 0;
508
509bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
510 if (skipFunction(F))
511 return false;
512 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
513 CodeGenPrepare CGP(TM);
514 CGP.DL = &F.getDataLayout();
515 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
516 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
517 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
518 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
519 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
520 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
521 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
522 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
523 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
524 auto BBSPRWP =
525 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
526 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
539INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
541
543 return new CodeGenPrepareLegacyPass();
544}
545
548 CodeGenPrepare CGP(TM);
549
550 bool Changed = CGP.run(F, AM);
551 if (!Changed)
552 return PreservedAnalyses::all();
553
558 return PA;
559}
560
561bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
562 DL = &F.getDataLayout();
563 SubtargetInfo = TM->getSubtargetImpl(F);
564 TLI = SubtargetInfo->getTargetLowering();
565 TRI = SubtargetInfo->getRegisterInfo();
566 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
568 LI = &AM.getResult<LoopAnalysis>(F);
569 BPI.reset(new BranchProbabilityInfo(F, *LI));
570 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
571 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
572 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
573 BBSectionsProfileReader =
575 return _run(F);
576}
577
578bool CodeGenPrepare::_run(Function &F) {
579 bool EverMadeChange = false;
580
581 OptSize = F.hasOptSize();
582 // Use the basic-block-sections profile to promote hot functions to .text.hot
583 // if requested.
584 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
585 BBSectionsProfileReader->isFunctionHot(F.getName())) {
586 F.setSectionPrefix("hot");
587 } else if (ProfileGuidedSectionPrefix) {
588 // The hot attribute overwrites profile count based hotness while profile
589 // counts based hotness overwrite the cold attribute.
590 // This is a conservative behabvior.
591 if (F.hasFnAttribute(Attribute::Hot) ||
592 PSI->isFunctionHotInCallGraph(&F, *BFI))
593 F.setSectionPrefix("hot");
594 // If PSI shows this function is not hot, we will placed the function
595 // into unlikely section if (1) PSI shows this is a cold function, or
596 // (2) the function has a attribute of cold.
597 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
598 F.hasFnAttribute(Attribute::Cold))
599 F.setSectionPrefix("unlikely");
600 else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
601 PSI->isFunctionHotnessUnknown(F))
602 F.setSectionPrefix("unknown");
603 }
604
605 /// This optimization identifies DIV instructions that can be
606 /// profitably bypassed and carried out with a shorter, faster divide.
607 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
608 const DenseMap<unsigned int, unsigned int> &BypassWidths =
610 BasicBlock *BB = &*F.begin();
611 while (BB != nullptr) {
612 // bypassSlowDivision may create new BBs, but we don't want to reapply the
613 // optimization to those blocks.
614 BasicBlock *Next = BB->getNextNode();
615 // F.hasOptSize is already checked in the outer if statement.
616 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
617 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
618 BB = Next;
619 }
620 }
621
622 // Get rid of @llvm.assume builtins before attempting to eliminate empty
623 // blocks, since there might be blocks that only contain @llvm.assume calls
624 // (plus arguments that we can get rid of).
625 EverMadeChange |= eliminateAssumptions(F);
626
627 // Eliminate blocks that contain only PHI nodes and an
628 // unconditional branch.
629 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
630
631 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
633 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
634
635 // Split some critical edges where one of the sources is an indirect branch,
636 // to help generate sane code for PHIs involving such edges.
637 EverMadeChange |=
638 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
639
640 // If we are optimzing huge function, we need to consider the build time.
641 // Because the basic algorithm's complex is near O(N!).
642 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
643
644 // Transformations above may invalidate dominator tree and/or loop info.
645 DT.reset();
646 LI->releaseMemory();
647 LI->analyze(getDT(F));
648
649 bool MadeChange = true;
650 bool FuncIterated = false;
651 while (MadeChange) {
652 MadeChange = false;
653
655 if (FuncIterated && !FreshBBs.contains(&BB))
656 continue;
657
658 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
659 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
660
661 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
662 DT.reset();
663
664 MadeChange |= Changed;
665 if (IsHugeFunc) {
666 // If the BB is updated, it may still has chance to be optimized.
667 // This usually happen at sink optimization.
668 // For example:
669 //
670 // bb0:
671 // %and = and i32 %a, 4
672 // %cmp = icmp eq i32 %and, 0
673 //
674 // If the %cmp sink to other BB, the %and will has chance to sink.
675 if (Changed)
676 FreshBBs.insert(&BB);
677 else if (FuncIterated)
678 FreshBBs.erase(&BB);
679 } else {
680 // For small/normal functions, we restart BB iteration if the dominator
681 // tree of the Function was changed.
682 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
683 break;
684 }
685 }
686 // We have iterated all the BB in the (only work for huge) function.
687 FuncIterated = IsHugeFunc;
688
689 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
690 MadeChange |= mergeSExts(F);
691 if (!LargeOffsetGEPMap.empty())
692 MadeChange |= splitLargeGEPOffsets();
693 MadeChange |= optimizePhiTypes(F);
694
695 if (MadeChange)
696 eliminateFallThrough(F, DT.get());
697
698#ifndef NDEBUG
699 if (MadeChange && VerifyLoopInfo)
700 LI->verify(getDT(F));
701#endif
702
703 // Really free removed instructions during promotion.
704 for (Instruction *I : RemovedInsts)
705 I->deleteValue();
706
707 EverMadeChange |= MadeChange;
708 SeenChainsForSExt.clear();
709 ValToSExtendedUses.clear();
710 RemovedInsts.clear();
711 LargeOffsetGEPMap.clear();
712 LargeOffsetGEPID.clear();
713 }
714
715 NewGEPBases.clear();
716 SunkAddrs.clear();
717
718 if (!DisableBranchOpts) {
719 MadeChange = false;
720 // Use a set vector to get deterministic iteration order. The order the
721 // blocks are removed may affect whether or not PHI nodes in successors
722 // are removed.
724 for (BasicBlock &BB : F) {
726 MadeChange |= ConstantFoldTerminator(&BB, true);
727 if (!MadeChange)
728 continue;
729
730 for (BasicBlock *Succ : Successors)
731 if (pred_empty(Succ))
732 WorkList.insert(Succ);
733 }
734
735 // Delete the dead blocks and any of their dead successors.
736 MadeChange |= !WorkList.empty();
737 while (!WorkList.empty()) {
738 BasicBlock *BB = WorkList.pop_back_val();
740
741 DeleteDeadBlock(BB);
742
743 for (BasicBlock *Succ : Successors)
744 if (pred_empty(Succ))
745 WorkList.insert(Succ);
746 }
747
748 // Merge pairs of basic blocks with unconditional branches, connected by
749 // a single edge.
750 if (EverMadeChange || MadeChange)
751 MadeChange |= eliminateFallThrough(F);
752
753 EverMadeChange |= MadeChange;
754 }
755
756 if (!DisableGCOpts) {
758 for (BasicBlock &BB : F)
759 for (Instruction &I : BB)
760 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
761 Statepoints.push_back(SP);
762 for (auto &I : Statepoints)
763 EverMadeChange |= simplifyOffsetableRelocate(*I);
764 }
765
766 // Do this last to clean up use-before-def scenarios introduced by other
767 // preparatory transforms.
768 EverMadeChange |= placeDbgValues(F);
769 EverMadeChange |= placePseudoProbes(F);
770
771#ifndef NDEBUG
773 verifyBFIUpdates(F);
774#endif
775
776 return EverMadeChange;
777}
778
779bool CodeGenPrepare::eliminateAssumptions(Function &F) {
780 bool MadeChange = false;
781 for (BasicBlock &BB : F) {
782 CurInstIterator = BB.begin();
783 while (CurInstIterator != BB.end()) {
784 Instruction *I = &*(CurInstIterator++);
785 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
786 MadeChange = true;
787 Value *Operand = Assume->getOperand(0);
788 Assume->eraseFromParent();
789
790 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
791 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
792 });
793 }
794 }
795 }
796 return MadeChange;
797}
798
799/// An instruction is about to be deleted, so remove all references to it in our
800/// GEP-tracking data strcutures.
801void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
802 LargeOffsetGEPMap.erase(V);
803 NewGEPBases.erase(V);
804
805 auto GEP = dyn_cast<GetElementPtrInst>(V);
806 if (!GEP)
807 return;
808
809 LargeOffsetGEPID.erase(GEP);
810
811 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
812 if (VecI == LargeOffsetGEPMap.end())
813 return;
814
815 auto &GEPVector = VecI->second;
816 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
817
818 if (GEPVector.empty())
819 LargeOffsetGEPMap.erase(VecI);
820}
821
822// Verify BFI has been updated correctly by recomputing BFI and comparing them.
823void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
824 DominatorTree NewDT(F);
825 LoopInfo NewLI(NewDT);
826 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
827 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
828 NewBFI.verifyMatch(*BFI);
829}
830
831/// Merge basic blocks which are connected by a single edge, where one of the
832/// basic blocks has a single successor pointing to the other basic block,
833/// which has a single predecessor.
834bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
835 bool Changed = false;
836 // Scan all of the blocks in the function, except for the entry block.
837 // Use a temporary array to avoid iterator being invalidated when
838 // deleting blocks.
840 for (auto &Block : llvm::drop_begin(F))
841 Blocks.push_back(&Block);
842
844 for (auto &Block : Blocks) {
845 auto *BB = cast_or_null<BasicBlock>(Block);
846 if (!BB)
847 continue;
848 // If the destination block has a single pred, then this is a trivial
849 // edge, just collapse it.
850 BasicBlock *SinglePred = BB->getSinglePredecessor();
851
852 // Don't merge if BB's address is taken.
853 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
854 continue;
855
856 // Make an effort to skip unreachable blocks.
857 if (DT && !DT->isReachableFromEntry(BB))
858 continue;
859
860 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
861 if (Term && !Term->isConditional()) {
862 Changed = true;
863 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
864
865 // Merge BB into SinglePred and delete it.
866 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
867 /* MemDep */ nullptr,
868 /* PredecessorWithTwoSuccessors */ false, DT);
869 Preds.insert(SinglePred);
870
871 if (IsHugeFunc) {
872 // Update FreshBBs to optimize the merged BB.
873 FreshBBs.insert(SinglePred);
874 FreshBBs.erase(BB);
875 }
876 }
877 }
878
879 // (Repeatedly) merging blocks into their predecessors can create redundant
880 // debug intrinsics.
881 for (const auto &Pred : Preds)
882 if (auto *BB = cast_or_null<BasicBlock>(Pred))
884
885 return Changed;
886}
887
888/// Find a destination block from BB if BB is mergeable empty block.
889BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
890 // If this block doesn't end with an uncond branch, ignore it.
891 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
892 if (!BI || !BI->isUnconditional())
893 return nullptr;
894
895 // If the instruction before the branch (skipping debug info) isn't a phi
896 // node, then other stuff is happening here.
898 if (BBI != BB->begin()) {
899 --BBI;
900 while (isa<DbgInfoIntrinsic>(BBI)) {
901 if (BBI == BB->begin())
902 break;
903 --BBI;
904 }
905 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
906 return nullptr;
907 }
908
909 // Do not break infinite loops.
910 BasicBlock *DestBB = BI->getSuccessor(0);
911 if (DestBB == BB)
912 return nullptr;
913
914 if (!canMergeBlocks(BB, DestBB))
915 DestBB = nullptr;
916
917 return DestBB;
918}
919
920/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
921/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
922/// edges in ways that are non-optimal for isel. Start by eliminating these
923/// blocks so we can split them the way we want them.
924bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
926 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
927 while (!LoopList.empty()) {
928 Loop *L = LoopList.pop_back_val();
929 llvm::append_range(LoopList, *L);
930 if (BasicBlock *Preheader = L->getLoopPreheader())
931 Preheaders.insert(Preheader);
932 }
933
934 bool MadeChange = false;
935 // Copy blocks into a temporary array to avoid iterator invalidation issues
936 // as we remove them.
937 // Note that this intentionally skips the entry block.
939 for (auto &Block : llvm::drop_begin(F)) {
940 // Delete phi nodes that could block deleting other empty blocks.
942 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
943 Blocks.push_back(&Block);
944 }
945
946 for (auto &Block : Blocks) {
947 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
948 if (!BB)
949 continue;
950 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
951 if (!DestBB ||
952 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
953 continue;
954
955 eliminateMostlyEmptyBlock(BB);
956 MadeChange = true;
957 }
958 return MadeChange;
959}
960
961bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
962 BasicBlock *DestBB,
963 bool isPreheader) {
964 // Do not delete loop preheaders if doing so would create a critical edge.
965 // Loop preheaders can be good locations to spill registers. If the
966 // preheader is deleted and we create a critical edge, registers may be
967 // spilled in the loop body instead.
968 if (!DisablePreheaderProtect && isPreheader &&
969 !(BB->getSinglePredecessor() &&
971 return false;
972
973 // Skip merging if the block's successor is also a successor to any callbr
974 // that leads to this block.
975 // FIXME: Is this really needed? Is this a correctness issue?
976 for (BasicBlock *Pred : predecessors(BB)) {
977 if (isa<CallBrInst>(Pred->getTerminator()) &&
978 llvm::is_contained(successors(Pred), DestBB))
979 return false;
980 }
981
982 // Try to skip merging if the unique predecessor of BB is terminated by a
983 // switch or indirect branch instruction, and BB is used as an incoming block
984 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
985 // add COPY instructions in the predecessor of BB instead of BB (if it is not
986 // merged). Note that the critical edge created by merging such blocks wont be
987 // split in MachineSink because the jump table is not analyzable. By keeping
988 // such empty block (BB), ISel will place COPY instructions in BB, not in the
989 // predecessor of BB.
990 BasicBlock *Pred = BB->getUniquePredecessor();
991 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
992 isa<IndirectBrInst>(Pred->getTerminator())))
993 return true;
994
995 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
996 return true;
997
998 // We use a simple cost heuristic which determine skipping merging is
999 // profitable if the cost of skipping merging is less than the cost of
1000 // merging : Cost(skipping merging) < Cost(merging BB), where the
1001 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1002 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1003 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1004 // Freq(Pred) / Freq(BB) > 2.
1005 // Note that if there are multiple empty blocks sharing the same incoming
1006 // value for the PHIs in the DestBB, we consider them together. In such
1007 // case, Cost(merging BB) will be the sum of their frequencies.
1008
1009 if (!isa<PHINode>(DestBB->begin()))
1010 return true;
1011
1012 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1013
1014 // Find all other incoming blocks from which incoming values of all PHIs in
1015 // DestBB are the same as the ones from BB.
1016 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1017 if (DestBBPred == BB)
1018 continue;
1019
1020 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1021 return DestPN.getIncomingValueForBlock(BB) ==
1022 DestPN.getIncomingValueForBlock(DestBBPred);
1023 }))
1024 SameIncomingValueBBs.insert(DestBBPred);
1025 }
1026
1027 // See if all BB's incoming values are same as the value from Pred. In this
1028 // case, no reason to skip merging because COPYs are expected to be place in
1029 // Pred already.
1030 if (SameIncomingValueBBs.count(Pred))
1031 return true;
1032
1033 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1034 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1035
1036 for (auto *SameValueBB : SameIncomingValueBBs)
1037 if (SameValueBB->getUniquePredecessor() == Pred &&
1038 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1039 BBFreq += BFI->getBlockFreq(SameValueBB);
1040
1041 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1042 return !Limit || PredFreq <= *Limit;
1043}
1044
1045/// Return true if we can merge BB into DestBB if there is a single
1046/// unconditional branch between them, and BB contains no other non-phi
1047/// instructions.
1048bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1049 const BasicBlock *DestBB) const {
1050 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1051 // the successor. If there are more complex condition (e.g. preheaders),
1052 // don't mess around with them.
1053 for (const PHINode &PN : BB->phis()) {
1054 for (const User *U : PN.users()) {
1055 const Instruction *UI = cast<Instruction>(U);
1056 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1057 return false;
1058 // If User is inside DestBB block and it is a PHINode then check
1059 // incoming value. If incoming value is not from BB then this is
1060 // a complex condition (e.g. preheaders) we want to avoid here.
1061 if (UI->getParent() == DestBB) {
1062 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1063 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1064 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1065 if (Insn && Insn->getParent() == BB &&
1066 Insn->getParent() != UPN->getIncomingBlock(I))
1067 return false;
1068 }
1069 }
1070 }
1071 }
1072
1073 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1074 // and DestBB may have conflicting incoming values for the block. If so, we
1075 // can't merge the block.
1076 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1077 if (!DestBBPN)
1078 return true; // no conflict.
1079
1080 // Collect the preds of BB.
1082 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1083 // It is faster to get preds from a PHI than with pred_iterator.
1084 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1085 BBPreds.insert(BBPN->getIncomingBlock(i));
1086 } else {
1087 BBPreds.insert(pred_begin(BB), pred_end(BB));
1088 }
1089
1090 // Walk the preds of DestBB.
1091 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1092 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1093 if (BBPreds.count(Pred)) { // Common predecessor?
1094 for (const PHINode &PN : DestBB->phis()) {
1095 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1096 const Value *V2 = PN.getIncomingValueForBlock(BB);
1097
1098 // If V2 is a phi node in BB, look up what the mapped value will be.
1099 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1100 if (V2PN->getParent() == BB)
1101 V2 = V2PN->getIncomingValueForBlock(Pred);
1102
1103 // If there is a conflict, bail out.
1104 if (V1 != V2)
1105 return false;
1106 }
1107 }
1108 }
1109
1110 return true;
1111}
1112
1113/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1114static void replaceAllUsesWith(Value *Old, Value *New,
1116 bool IsHuge) {
1117 auto *OldI = dyn_cast<Instruction>(Old);
1118 if (OldI) {
1119 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1120 UI != E; ++UI) {
1121 Instruction *User = cast<Instruction>(*UI);
1122 if (IsHuge)
1123 FreshBBs.insert(User->getParent());
1124 }
1125 }
1126 Old->replaceAllUsesWith(New);
1127}
1128
1129/// Eliminate a basic block that has only phi's and an unconditional branch in
1130/// it.
1131void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1132 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1133 BasicBlock *DestBB = BI->getSuccessor(0);
1134
1135 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1136 << *BB << *DestBB);
1137
1138 // If the destination block has a single pred, then this is a trivial edge,
1139 // just collapse it.
1140 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1141 if (SinglePred != DestBB) {
1142 assert(SinglePred == BB &&
1143 "Single predecessor not the same as predecessor");
1144 // Merge DestBB into SinglePred/BB and delete it.
1146 // Note: BB(=SinglePred) will not be deleted on this path.
1147 // DestBB(=its single successor) is the one that was deleted.
1148 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1149
1150 if (IsHugeFunc) {
1151 // Update FreshBBs to optimize the merged BB.
1152 FreshBBs.insert(SinglePred);
1153 FreshBBs.erase(DestBB);
1154 }
1155 return;
1156 }
1157 }
1158
1159 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1160 // to handle the new incoming edges it is about to have.
1161 for (PHINode &PN : DestBB->phis()) {
1162 // Remove the incoming value for BB, and remember it.
1163 Value *InVal = PN.removeIncomingValue(BB, false);
1164
1165 // Two options: either the InVal is a phi node defined in BB or it is some
1166 // value that dominates BB.
1167 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1168 if (InValPhi && InValPhi->getParent() == BB) {
1169 // Add all of the input values of the input PHI as inputs of this phi.
1170 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1171 PN.addIncoming(InValPhi->getIncomingValue(i),
1172 InValPhi->getIncomingBlock(i));
1173 } else {
1174 // Otherwise, add one instance of the dominating value for each edge that
1175 // we will be adding.
1176 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1177 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1178 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1179 } else {
1180 for (BasicBlock *Pred : predecessors(BB))
1181 PN.addIncoming(InVal, Pred);
1182 }
1183 }
1184 }
1185
1186 // The PHIs are now updated, change everything that refers to BB to use
1187 // DestBB and remove BB.
1188 BB->replaceAllUsesWith(DestBB);
1189 BB->eraseFromParent();
1190 ++NumBlocksElim;
1191
1192 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1193}
1194
1195// Computes a map of base pointer relocation instructions to corresponding
1196// derived pointer relocation instructions given a vector of all relocate calls
1198 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1200 &RelocateInstMap) {
1201 // Collect information in two maps: one primarily for locating the base object
1202 // while filling the second map; the second map is the final structure holding
1203 // a mapping between Base and corresponding Derived relocate calls
1205 for (auto *ThisRelocate : AllRelocateCalls) {
1206 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1207 ThisRelocate->getDerivedPtrIndex());
1208 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1209 }
1210 for (auto &Item : RelocateIdxMap) {
1211 std::pair<unsigned, unsigned> Key = Item.first;
1212 if (Key.first == Key.second)
1213 // Base relocation: nothing to insert
1214 continue;
1215
1216 GCRelocateInst *I = Item.second;
1217 auto BaseKey = std::make_pair(Key.first, Key.first);
1218
1219 // We're iterating over RelocateIdxMap so we cannot modify it.
1220 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1221 if (MaybeBase == RelocateIdxMap.end())
1222 // TODO: We might want to insert a new base object relocate and gep off
1223 // that, if there are enough derived object relocates.
1224 continue;
1225
1226 RelocateInstMap[MaybeBase->second].push_back(I);
1227 }
1228}
1229
1230// Accepts a GEP and extracts the operands into a vector provided they're all
1231// small integer constants
1233 SmallVectorImpl<Value *> &OffsetV) {
1234 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1235 // Only accept small constant integer operands
1236 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1237 if (!Op || Op->getZExtValue() > 20)
1238 return false;
1239 }
1240
1241 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1242 OffsetV.push_back(GEP->getOperand(i));
1243 return true;
1244}
1245
1246// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1247// replace, computes a replacement, and affects it.
1248static bool
1250 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1251 bool MadeChange = false;
1252 // We must ensure the relocation of derived pointer is defined after
1253 // relocation of base pointer. If we find a relocation corresponding to base
1254 // defined earlier than relocation of base then we move relocation of base
1255 // right before found relocation. We consider only relocation in the same
1256 // basic block as relocation of base. Relocations from other basic block will
1257 // be skipped by optimization and we do not care about them.
1258 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1259 &*R != RelocatedBase; ++R)
1260 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1261 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1262 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1263 RelocatedBase->moveBefore(RI);
1264 MadeChange = true;
1265 break;
1266 }
1267
1268 for (GCRelocateInst *ToReplace : Targets) {
1269 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1270 "Not relocating a derived object of the original base object");
1271 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1272 // A duplicate relocate call. TODO: coalesce duplicates.
1273 continue;
1274 }
1275
1276 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1277 // Base and derived relocates are in different basic blocks.
1278 // In this case transform is only valid when base dominates derived
1279 // relocate. However it would be too expensive to check dominance
1280 // for each such relocate, so we skip the whole transformation.
1281 continue;
1282 }
1283
1284 Value *Base = ToReplace->getBasePtr();
1285 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1286 if (!Derived || Derived->getPointerOperand() != Base)
1287 continue;
1288
1290 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1291 continue;
1292
1293 // Create a Builder and replace the target callsite with a gep
1294 assert(RelocatedBase->getNextNode() &&
1295 "Should always have one since it's not a terminator");
1296
1297 // Insert after RelocatedBase
1298 IRBuilder<> Builder(RelocatedBase->getNextNode());
1299 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1300
1301 // If gc_relocate does not match the actual type, cast it to the right type.
1302 // In theory, there must be a bitcast after gc_relocate if the type does not
1303 // match, and we should reuse it to get the derived pointer. But it could be
1304 // cases like this:
1305 // bb1:
1306 // ...
1307 // %g1 = call coldcc i8 addrspace(1)*
1308 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1309 //
1310 // bb2:
1311 // ...
1312 // %g2 = call coldcc i8 addrspace(1)*
1313 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1314 //
1315 // merge:
1316 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1317 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1318 //
1319 // In this case, we can not find the bitcast any more. So we insert a new
1320 // bitcast no matter there is already one or not. In this way, we can handle
1321 // all cases, and the extra bitcast should be optimized away in later
1322 // passes.
1323 Value *ActualRelocatedBase = RelocatedBase;
1324 if (RelocatedBase->getType() != Base->getType()) {
1325 ActualRelocatedBase =
1326 Builder.CreateBitCast(RelocatedBase, Base->getType());
1327 }
1328 Value *Replacement =
1329 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1330 ArrayRef(OffsetV));
1331 Replacement->takeName(ToReplace);
1332 // If the newly generated derived pointer's type does not match the original
1333 // derived pointer's type, cast the new derived pointer to match it. Same
1334 // reasoning as above.
1335 Value *ActualReplacement = Replacement;
1336 if (Replacement->getType() != ToReplace->getType()) {
1337 ActualReplacement =
1338 Builder.CreateBitCast(Replacement, ToReplace->getType());
1339 }
1340 ToReplace->replaceAllUsesWith(ActualReplacement);
1341 ToReplace->eraseFromParent();
1342
1343 MadeChange = true;
1344 }
1345 return MadeChange;
1346}
1347
1348// Turns this:
1349//
1350// %base = ...
1351// %ptr = gep %base + 15
1352// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1353// %base' = relocate(%tok, i32 4, i32 4)
1354// %ptr' = relocate(%tok, i32 4, i32 5)
1355// %val = load %ptr'
1356//
1357// into this:
1358//
1359// %base = ...
1360// %ptr = gep %base + 15
1361// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1362// %base' = gc.relocate(%tok, i32 4, i32 4)
1363// %ptr' = gep %base' + 15
1364// %val = load %ptr'
1365bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1366 bool MadeChange = false;
1367 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1368 for (auto *U : I.users())
1369 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1370 // Collect all the relocate calls associated with a statepoint
1371 AllRelocateCalls.push_back(Relocate);
1372
1373 // We need at least one base pointer relocation + one derived pointer
1374 // relocation to mangle
1375 if (AllRelocateCalls.size() < 2)
1376 return false;
1377
1378 // RelocateInstMap is a mapping from the base relocate instruction to the
1379 // corresponding derived relocate instructions
1381 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1382 if (RelocateInstMap.empty())
1383 return false;
1384
1385 for (auto &Item : RelocateInstMap)
1386 // Item.first is the RelocatedBase to offset against
1387 // Item.second is the vector of Targets to replace
1388 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1389 return MadeChange;
1390}
1391
1392/// Sink the specified cast instruction into its user blocks.
1393static bool SinkCast(CastInst *CI) {
1394 BasicBlock *DefBB = CI->getParent();
1395
1396 /// InsertedCasts - Only insert a cast in each block once.
1398
1399 bool MadeChange = false;
1400 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1401 UI != E;) {
1402 Use &TheUse = UI.getUse();
1403 Instruction *User = cast<Instruction>(*UI);
1404
1405 // Figure out which BB this cast is used in. For PHI's this is the
1406 // appropriate predecessor block.
1407 BasicBlock *UserBB = User->getParent();
1408 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1409 UserBB = PN->getIncomingBlock(TheUse);
1410 }
1411
1412 // Preincrement use iterator so we don't invalidate it.
1413 ++UI;
1414
1415 // The first insertion point of a block containing an EH pad is after the
1416 // pad. If the pad is the user, we cannot sink the cast past the pad.
1417 if (User->isEHPad())
1418 continue;
1419
1420 // If the block selected to receive the cast is an EH pad that does not
1421 // allow non-PHI instructions before the terminator, we can't sink the
1422 // cast.
1423 if (UserBB->getTerminator()->isEHPad())
1424 continue;
1425
1426 // If this user is in the same block as the cast, don't change the cast.
1427 if (UserBB == DefBB)
1428 continue;
1429
1430 // If we have already inserted a cast into this block, use it.
1431 CastInst *&InsertedCast = InsertedCasts[UserBB];
1432
1433 if (!InsertedCast) {
1434 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1435 assert(InsertPt != UserBB->end());
1436 InsertedCast = cast<CastInst>(CI->clone());
1437 InsertedCast->insertBefore(*UserBB, InsertPt);
1438 }
1439
1440 // Replace a use of the cast with a use of the new cast.
1441 TheUse = InsertedCast;
1442 MadeChange = true;
1443 ++NumCastUses;
1444 }
1445
1446 // If we removed all uses, nuke the cast.
1447 if (CI->use_empty()) {
1448 salvageDebugInfo(*CI);
1449 CI->eraseFromParent();
1450 MadeChange = true;
1451 }
1452
1453 return MadeChange;
1454}
1455
1456/// If the specified cast instruction is a noop copy (e.g. it's casting from
1457/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1458/// reduce the number of virtual registers that must be created and coalesced.
1459///
1460/// Return true if any changes are made.
1462 const DataLayout &DL) {
1463 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1464 // than sinking only nop casts, but is helpful on some platforms.
1465 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1466 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1467 ASC->getDestAddressSpace()))
1468 return false;
1469 }
1470
1471 // If this is a noop copy,
1472 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1473 EVT DstVT = TLI.getValueType(DL, CI->getType());
1474
1475 // This is an fp<->int conversion?
1476 if (SrcVT.isInteger() != DstVT.isInteger())
1477 return false;
1478
1479 // If this is an extension, it will be a zero or sign extension, which
1480 // isn't a noop.
1481 if (SrcVT.bitsLT(DstVT))
1482 return false;
1483
1484 // If these values will be promoted, find out what they will be promoted
1485 // to. This helps us consider truncates on PPC as noop copies when they
1486 // are.
1487 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1489 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1490 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1492 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1493
1494 // If, after promotion, these are the same types, this is a noop copy.
1495 if (SrcVT != DstVT)
1496 return false;
1497
1498 return SinkCast(CI);
1499}
1500
1501// Match a simple increment by constant operation. Note that if a sub is
1502// matched, the step is negated (as if the step had been canonicalized to
1503// an add, even though we leave the instruction alone.)
1504static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1505 Constant *&Step) {
1506 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1507 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1508 m_Instruction(LHS), m_Constant(Step)))))
1509 return true;
1510 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1511 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1512 m_Instruction(LHS), m_Constant(Step))))) {
1513 Step = ConstantExpr::getNeg(Step);
1514 return true;
1515 }
1516 return false;
1517}
1518
1519/// If given \p PN is an inductive variable with value IVInc coming from the
1520/// backedge, and on each iteration it gets increased by Step, return pair
1521/// <IVInc, Step>. Otherwise, return std::nullopt.
1522static std::optional<std::pair<Instruction *, Constant *>>
1523getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1524 const Loop *L = LI->getLoopFor(PN->getParent());
1525 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1526 return std::nullopt;
1527 auto *IVInc =
1528 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1529 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1530 return std::nullopt;
1531 Instruction *LHS = nullptr;
1532 Constant *Step = nullptr;
1533 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1534 return std::make_pair(IVInc, Step);
1535 return std::nullopt;
1536}
1537
1538static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1539 auto *I = dyn_cast<Instruction>(V);
1540 if (!I)
1541 return false;
1542 Instruction *LHS = nullptr;
1543 Constant *Step = nullptr;
1544 if (!matchIncrement(I, LHS, Step))
1545 return false;
1546 if (auto *PN = dyn_cast<PHINode>(LHS))
1547 if (auto IVInc = getIVIncrement(PN, LI))
1548 return IVInc->first == I;
1549 return false;
1550}
1551
1552bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1553 Value *Arg0, Value *Arg1,
1554 CmpInst *Cmp,
1555 Intrinsic::ID IID) {
1556 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1557 if (!isIVIncrement(BO, LI))
1558 return false;
1559 const Loop *L = LI->getLoopFor(BO->getParent());
1560 assert(L && "L should not be null after isIVIncrement()");
1561 // Do not risk on moving increment into a child loop.
1562 if (LI->getLoopFor(Cmp->getParent()) != L)
1563 return false;
1564
1565 // Finally, we need to ensure that the insert point will dominate all
1566 // existing uses of the increment.
1567
1568 auto &DT = getDT(*BO->getParent()->getParent());
1569 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1570 // If we're moving up the dom tree, all uses are trivially dominated.
1571 // (This is the common case for code produced by LSR.)
1572 return true;
1573
1574 // Otherwise, special case the single use in the phi recurrence.
1575 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1576 };
1577 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1578 // We used to use a dominator tree here to allow multi-block optimization.
1579 // But that was problematic because:
1580 // 1. It could cause a perf regression by hoisting the math op into the
1581 // critical path.
1582 // 2. It could cause a perf regression by creating a value that was live
1583 // across multiple blocks and increasing register pressure.
1584 // 3. Use of a dominator tree could cause large compile-time regression.
1585 // This is because we recompute the DT on every change in the main CGP
1586 // run-loop. The recomputing is probably unnecessary in many cases, so if
1587 // that was fixed, using a DT here would be ok.
1588 //
1589 // There is one important particular case we still want to handle: if BO is
1590 // the IV increment. Important properties that make it profitable:
1591 // - We can speculate IV increment anywhere in the loop (as long as the
1592 // indvar Phi is its only user);
1593 // - Upon computing Cmp, we effectively compute something equivalent to the
1594 // IV increment (despite it loops differently in the IR). So moving it up
1595 // to the cmp point does not really increase register pressure.
1596 return false;
1597 }
1598
1599 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1600 if (BO->getOpcode() == Instruction::Add &&
1601 IID == Intrinsic::usub_with_overflow) {
1602 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1603 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1604 }
1605
1606 // Insert at the first instruction of the pair.
1607 Instruction *InsertPt = nullptr;
1608 for (Instruction &Iter : *Cmp->getParent()) {
1609 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1610 // the overflow intrinsic are defined.
1611 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1612 InsertPt = &Iter;
1613 break;
1614 }
1615 }
1616 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1617
1618 IRBuilder<> Builder(InsertPt);
1619 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1620 if (BO->getOpcode() != Instruction::Xor) {
1621 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1622 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1623 } else
1624 assert(BO->hasOneUse() &&
1625 "Patterns with XOr should use the BO only in the compare");
1626 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1627 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1628 Cmp->eraseFromParent();
1629 BO->eraseFromParent();
1630 return true;
1631}
1632
1633/// Match special-case patterns that check for unsigned add overflow.
1635 BinaryOperator *&Add) {
1636 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1637 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1638 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1639
1640 // We are not expecting non-canonical/degenerate code. Just bail out.
1641 if (isa<Constant>(A))
1642 return false;
1643
1644 ICmpInst::Predicate Pred = Cmp->getPredicate();
1645 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1646 B = ConstantInt::get(B->getType(), 1);
1647 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1648 B = Constant::getAllOnesValue(B->getType());
1649 else
1650 return false;
1651
1652 // Check the users of the variable operand of the compare looking for an add
1653 // with the adjusted constant.
1654 for (User *U : A->users()) {
1655 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1656 Add = cast<BinaryOperator>(U);
1657 return true;
1658 }
1659 }
1660 return false;
1661}
1662
1663/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1664/// intrinsic. Return true if any changes were made.
1665bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1666 ModifyDT &ModifiedDT) {
1667 bool EdgeCase = false;
1668 Value *A, *B;
1670 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1672 return false;
1673 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1674 A = Add->getOperand(0);
1675 B = Add->getOperand(1);
1676 EdgeCase = true;
1677 }
1678
1680 TLI->getValueType(*DL, Add->getType()),
1681 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1682 return false;
1683
1684 // We don't want to move around uses of condition values this late, so we
1685 // check if it is legal to create the call to the intrinsic in the basic
1686 // block containing the icmp.
1687 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1688 return false;
1689
1690 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1691 Intrinsic::uadd_with_overflow))
1692 return false;
1693
1694 // Reset callers - do not crash by iterating over a dead instruction.
1695 ModifiedDT = ModifyDT::ModifyInstDT;
1696 return true;
1697}
1698
1699bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1700 ModifyDT &ModifiedDT) {
1701 // We are not expecting non-canonical/degenerate code. Just bail out.
1702 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1703 if (isa<Constant>(A) && isa<Constant>(B))
1704 return false;
1705
1706 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1707 ICmpInst::Predicate Pred = Cmp->getPredicate();
1708 if (Pred == ICmpInst::ICMP_UGT) {
1709 std::swap(A, B);
1710 Pred = ICmpInst::ICMP_ULT;
1711 }
1712 // Convert special-case: (A == 0) is the same as (A u< 1).
1713 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1714 B = ConstantInt::get(B->getType(), 1);
1715 Pred = ICmpInst::ICMP_ULT;
1716 }
1717 // Convert special-case: (A != 0) is the same as (0 u< A).
1718 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1719 std::swap(A, B);
1720 Pred = ICmpInst::ICMP_ULT;
1721 }
1722 if (Pred != ICmpInst::ICMP_ULT)
1723 return false;
1724
1725 // Walk the users of a variable operand of a compare looking for a subtract or
1726 // add with that same operand. Also match the 2nd operand of the compare to
1727 // the add/sub, but that may be a negated constant operand of an add.
1728 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1729 BinaryOperator *Sub = nullptr;
1730 for (User *U : CmpVariableOperand->users()) {
1731 // A - B, A u< B --> usubo(A, B)
1732 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1733 Sub = cast<BinaryOperator>(U);
1734 break;
1735 }
1736
1737 // A + (-C), A u< C (canonicalized form of (sub A, C))
1738 const APInt *CmpC, *AddC;
1739 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1740 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1741 Sub = cast<BinaryOperator>(U);
1742 break;
1743 }
1744 }
1745 if (!Sub)
1746 return false;
1747
1749 TLI->getValueType(*DL, Sub->getType()),
1750 Sub->hasNUsesOrMore(1)))
1751 return false;
1752
1753 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1754 Cmp, Intrinsic::usub_with_overflow))
1755 return false;
1756
1757 // Reset callers - do not crash by iterating over a dead instruction.
1758 ModifiedDT = ModifyDT::ModifyInstDT;
1759 return true;
1760}
1761
1762/// Sink the given CmpInst into user blocks to reduce the number of virtual
1763/// registers that must be created and coalesced. This is a clear win except on
1764/// targets with multiple condition code registers (PowerPC), where it might
1765/// lose; some adjustment may be wanted there.
1766///
1767/// Return true if any changes are made.
1768static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1770 return false;
1771
1772 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1773 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1774 return false;
1775
1776 // Only insert a cmp in each block once.
1778
1779 bool MadeChange = false;
1780 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1781 UI != E;) {
1782 Use &TheUse = UI.getUse();
1783 Instruction *User = cast<Instruction>(*UI);
1784
1785 // Preincrement use iterator so we don't invalidate it.
1786 ++UI;
1787
1788 // Don't bother for PHI nodes.
1789 if (isa<PHINode>(User))
1790 continue;
1791
1792 // Figure out which BB this cmp is used in.
1793 BasicBlock *UserBB = User->getParent();
1794 BasicBlock *DefBB = Cmp->getParent();
1795
1796 // If this user is in the same block as the cmp, don't change the cmp.
1797 if (UserBB == DefBB)
1798 continue;
1799
1800 // If we have already inserted a cmp into this block, use it.
1801 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1802
1803 if (!InsertedCmp) {
1804 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1805 assert(InsertPt != UserBB->end());
1806 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1807 Cmp->getOperand(0), Cmp->getOperand(1), "");
1808 InsertedCmp->insertBefore(*UserBB, InsertPt);
1809 // Propagate the debug info.
1810 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1811 }
1812
1813 // Replace a use of the cmp with a use of the new cmp.
1814 TheUse = InsertedCmp;
1815 MadeChange = true;
1816 ++NumCmpUses;
1817 }
1818
1819 // If we removed all uses, nuke the cmp.
1820 if (Cmp->use_empty()) {
1821 Cmp->eraseFromParent();
1822 MadeChange = true;
1823 }
1824
1825 return MadeChange;
1826}
1827
1828/// For pattern like:
1829///
1830/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1831/// ...
1832/// DomBB:
1833/// ...
1834/// br DomCond, TrueBB, CmpBB
1835/// CmpBB: (with DomBB being the single predecessor)
1836/// ...
1837/// Cmp = icmp eq CmpOp0, CmpOp1
1838/// ...
1839///
1840/// It would use two comparison on targets that lowering of icmp sgt/slt is
1841/// different from lowering of icmp eq (PowerPC). This function try to convert
1842/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1843/// After that, DomCond and Cmp can use the same comparison so reduce one
1844/// comparison.
1845///
1846/// Return true if any changes are made.
1848 const TargetLowering &TLI) {
1850 return false;
1851
1852 ICmpInst::Predicate Pred = Cmp->getPredicate();
1853 if (Pred != ICmpInst::ICMP_EQ)
1854 return false;
1855
1856 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1857 // icmp slt/sgt would introduce more redundant LLVM IR.
1858 for (User *U : Cmp->users()) {
1859 if (isa<BranchInst>(U))
1860 continue;
1861 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1862 continue;
1863 return false;
1864 }
1865
1866 // This is a cheap/incomplete check for dominance - just match a single
1867 // predecessor with a conditional branch.
1868 BasicBlock *CmpBB = Cmp->getParent();
1869 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1870 if (!DomBB)
1871 return false;
1872
1873 // We want to ensure that the only way control gets to the comparison of
1874 // interest is that a less/greater than comparison on the same operands is
1875 // false.
1876 Value *DomCond;
1877 BasicBlock *TrueBB, *FalseBB;
1878 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1879 return false;
1880 if (CmpBB != FalseBB)
1881 return false;
1882
1883 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1884 ICmpInst::Predicate DomPred;
1885 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1886 return false;
1887 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1888 return false;
1889
1890 // Convert the equality comparison to the opposite of the dominating
1891 // comparison and swap the direction for all branch/select users.
1892 // We have conceptually converted:
1893 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1894 // to
1895 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1896 // And similarly for branches.
1897 for (User *U : Cmp->users()) {
1898 if (auto *BI = dyn_cast<BranchInst>(U)) {
1899 assert(BI->isConditional() && "Must be conditional");
1900 BI->swapSuccessors();
1901 continue;
1902 }
1903 if (auto *SI = dyn_cast<SelectInst>(U)) {
1904 // Swap operands
1905 SI->swapValues();
1906 SI->swapProfMetadata();
1907 continue;
1908 }
1909 llvm_unreachable("Must be a branch or a select");
1910 }
1911 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1912 return true;
1913}
1914
1915/// Many architectures use the same instruction for both subtract and cmp. Try
1916/// to swap cmp operands to match subtract operations to allow for CSE.
1918 Value *Op0 = Cmp->getOperand(0);
1919 Value *Op1 = Cmp->getOperand(1);
1920 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1921 isa<Constant>(Op1) || Op0 == Op1)
1922 return false;
1923
1924 // If a subtract already has the same operands as a compare, swapping would be
1925 // bad. If a subtract has the same operands as a compare but in reverse order,
1926 // then swapping is good.
1927 int GoodToSwap = 0;
1928 unsigned NumInspected = 0;
1929 for (const User *U : Op0->users()) {
1930 // Avoid walking many users.
1931 if (++NumInspected > 128)
1932 return false;
1933 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
1934 GoodToSwap++;
1935 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
1936 GoodToSwap--;
1937 }
1938
1939 if (GoodToSwap > 0) {
1940 Cmp->swapOperands();
1941 return true;
1942 }
1943 return false;
1944}
1945
1946static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
1947 const DataLayout &DL) {
1948 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
1949 if (!FCmp)
1950 return false;
1951
1952 // Don't fold if the target offers free fabs and the predicate is legal.
1953 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
1954 if (TLI.isFAbsFree(VT) &&
1956 VT.getSimpleVT()))
1957 return false;
1958
1959 // Reverse the canonicalization if it is a FP class test
1960 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
1961 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
1962 };
1963 auto [ClassVal, ClassTest] =
1964 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
1965 FCmp->getOperand(0), FCmp->getOperand(1));
1966 if (!ClassVal)
1967 return false;
1968
1969 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
1970 return false;
1971
1972 IRBuilder<> Builder(Cmp);
1973 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
1974 Cmp->replaceAllUsesWith(IsFPClass);
1976 return true;
1977}
1978
1980 const LoopInfo *LI,
1981 Value *&RemAmtOut,
1982 PHINode *&LoopIncrPNOut) {
1983 Value *Incr, *RemAmt;
1984 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
1985 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
1986 return false;
1987
1988 // Find out loop increment PHI.
1989 auto *PN = dyn_cast<PHINode>(Incr);
1990 if (!PN)
1991 return false;
1992
1993 // This isn't strictly necessary, what we really need is one increment and any
1994 // amount of initial values all being the same.
1995 if (PN->getNumIncomingValues() != 2)
1996 return false;
1997
1998 // Only trivially analyzable loops.
1999 Loop *L = LI->getLoopFor(PN->getParent());
2000 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2001 return false;
2002
2003 // Req that the remainder is in the loop
2004 if (!L->contains(Rem))
2005 return false;
2006
2007 // Only works if the remainder amount is a loop invaraint
2008 if (!L->isLoopInvariant(RemAmt))
2009 return false;
2010
2011 // Is the PHI a loop increment?
2012 auto LoopIncrInfo = getIVIncrement(PN, LI);
2013 if (!LoopIncrInfo)
2014 return false;
2015
2016 // We need remainder_amount % increment_amount to be zero. Increment of one
2017 // satisfies that without any special logic and is overwhelmingly the common
2018 // case.
2019 if (!match(LoopIncrInfo->second, m_One()))
2020 return false;
2021
2022 // Need the increment to not overflow.
2023 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2024 return false;
2025
2026 // Set output variables.
2027 RemAmtOut = RemAmt;
2028 LoopIncrPNOut = PN;
2029
2030 return true;
2031}
2032
2033// Try to transform:
2034//
2035// for(i = Start; i < End; ++i)
2036// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2037//
2038// ->
2039//
2040// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2041// for(i = Start; i < End; ++i, ++rem)
2042// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2043//
2044// Currently only implemented for `IncrLoopInvariant` being zero.
2046 const LoopInfo *LI,
2048 bool IsHuge) {
2049 Value *RemAmt;
2050 PHINode *LoopIncrPN;
2051 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, LoopIncrPN))
2052 return false;
2053
2054 // Only non-constant remainder as the extra IV is probably not profitable
2055 // in that case.
2056 //
2057 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2058 // we can rule out register pressure and ensure this `urem` is executed each
2059 // iteration, its probably profitable to handle the const case as well.
2060 //
2061 // Potential TODO(2): Should we have a check for how "nested" this remainder
2062 // operation is? The new code runs every iteration so if the remainder is
2063 // guarded behind unlikely conditions this might not be worth it.
2064 if (match(RemAmt, m_ImmConstant()))
2065 return false;
2066
2067 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2068 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2069 // If we can't fully optimize out the `rem`, skip this transform.
2070 Start = simplifyURemInst(Start, RemAmt, *DL);
2071 if (!Start)
2072 return false;
2073
2074 // Create new remainder with induction variable.
2075 Type *Ty = Rem->getType();
2076 IRBuilder<> Builder(Rem->getContext());
2077
2078 Builder.SetInsertPoint(LoopIncrPN);
2079 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2080
2081 Builder.SetInsertPoint(cast<Instruction>(
2082 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2083 // `(add (urem x, y), 1)` is always nuw.
2084 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2085 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2086 Value *RemSel =
2087 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2088
2089 NewRem->addIncoming(Start, L->getLoopPreheader());
2090 NewRem->addIncoming(RemSel, L->getLoopLatch());
2091
2092 // Insert all touched BBs.
2093 FreshBBs.insert(LoopIncrPN->getParent());
2094 FreshBBs.insert(L->getLoopLatch());
2095 FreshBBs.insert(Rem->getParent());
2096
2097 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2098 Rem->eraseFromParent();
2099 return true;
2100}
2101
2102bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2103 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2104 return true;
2105 return false;
2106}
2107
2108bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2109 if (sinkCmpExpression(Cmp, *TLI))
2110 return true;
2111
2112 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2113 return true;
2114
2115 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2116 return true;
2117
2118 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2119 return true;
2120
2122 return true;
2123
2124 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2125 return true;
2126
2127 return false;
2128}
2129
2130/// Duplicate and sink the given 'and' instruction into user blocks where it is
2131/// used in a compare to allow isel to generate better code for targets where
2132/// this operation can be combined.
2133///
2134/// Return true if any changes are made.
2136 SetOfInstrs &InsertedInsts) {
2137 // Double-check that we're not trying to optimize an instruction that was
2138 // already optimized by some other part of this pass.
2139 assert(!InsertedInsts.count(AndI) &&
2140 "Attempting to optimize already optimized and instruction");
2141 (void)InsertedInsts;
2142
2143 // Nothing to do for single use in same basic block.
2144 if (AndI->hasOneUse() &&
2145 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2146 return false;
2147
2148 // Try to avoid cases where sinking/duplicating is likely to increase register
2149 // pressure.
2150 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2151 !isa<ConstantInt>(AndI->getOperand(1)) &&
2152 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2153 return false;
2154
2155 for (auto *U : AndI->users()) {
2156 Instruction *User = cast<Instruction>(U);
2157
2158 // Only sink 'and' feeding icmp with 0.
2159 if (!isa<ICmpInst>(User))
2160 return false;
2161
2162 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2163 if (!CmpC || !CmpC->isZero())
2164 return false;
2165 }
2166
2167 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2168 return false;
2169
2170 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2171 LLVM_DEBUG(AndI->getParent()->dump());
2172
2173 // Push the 'and' into the same block as the icmp 0. There should only be
2174 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2175 // others, so we don't need to keep track of which BBs we insert into.
2176 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2177 UI != E;) {
2178 Use &TheUse = UI.getUse();
2179 Instruction *User = cast<Instruction>(*UI);
2180
2181 // Preincrement use iterator so we don't invalidate it.
2182 ++UI;
2183
2184 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2185
2186 // Keep the 'and' in the same place if the use is already in the same block.
2187 Instruction *InsertPt =
2188 User->getParent() == AndI->getParent() ? AndI : User;
2189 Instruction *InsertedAnd = BinaryOperator::Create(
2190 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2191 InsertPt->getIterator());
2192 // Propagate the debug info.
2193 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2194
2195 // Replace a use of the 'and' with a use of the new 'and'.
2196 TheUse = InsertedAnd;
2197 ++NumAndUses;
2198 LLVM_DEBUG(User->getParent()->dump());
2199 }
2200
2201 // We removed all uses, nuke the and.
2202 AndI->eraseFromParent();
2203 return true;
2204}
2205
2206/// Check if the candidates could be combined with a shift instruction, which
2207/// includes:
2208/// 1. Truncate instruction
2209/// 2. And instruction and the imm is a mask of the low bits:
2210/// imm & (imm+1) == 0
2212 if (!isa<TruncInst>(User)) {
2213 if (User->getOpcode() != Instruction::And ||
2214 !isa<ConstantInt>(User->getOperand(1)))
2215 return false;
2216
2217 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2218
2219 if ((Cimm & (Cimm + 1)).getBoolValue())
2220 return false;
2221 }
2222 return true;
2223}
2224
2225/// Sink both shift and truncate instruction to the use of truncate's BB.
2226static bool
2229 const TargetLowering &TLI, const DataLayout &DL) {
2230 BasicBlock *UserBB = User->getParent();
2232 auto *TruncI = cast<TruncInst>(User);
2233 bool MadeChange = false;
2234
2235 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2236 TruncE = TruncI->user_end();
2237 TruncUI != TruncE;) {
2238
2239 Use &TruncTheUse = TruncUI.getUse();
2240 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2241 // Preincrement use iterator so we don't invalidate it.
2242
2243 ++TruncUI;
2244
2245 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2246 if (!ISDOpcode)
2247 continue;
2248
2249 // If the use is actually a legal node, there will not be an
2250 // implicit truncate.
2251 // FIXME: always querying the result type is just an
2252 // approximation; some nodes' legality is determined by the
2253 // operand or other means. There's no good way to find out though.
2255 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2256 continue;
2257
2258 // Don't bother for PHI nodes.
2259 if (isa<PHINode>(TruncUser))
2260 continue;
2261
2262 BasicBlock *TruncUserBB = TruncUser->getParent();
2263
2264 if (UserBB == TruncUserBB)
2265 continue;
2266
2267 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2268 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2269
2270 if (!InsertedShift && !InsertedTrunc) {
2271 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2272 assert(InsertPt != TruncUserBB->end());
2273 // Sink the shift
2274 if (ShiftI->getOpcode() == Instruction::AShr)
2275 InsertedShift =
2276 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2277 else
2278 InsertedShift =
2279 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2280 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2281 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2282
2283 // Sink the trunc
2284 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2285 TruncInsertPt++;
2286 // It will go ahead of any debug-info.
2287 TruncInsertPt.setHeadBit(true);
2288 assert(TruncInsertPt != TruncUserBB->end());
2289
2290 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2291 TruncI->getType(), "");
2292 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2293 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2294
2295 MadeChange = true;
2296
2297 TruncTheUse = InsertedTrunc;
2298 }
2299 }
2300 return MadeChange;
2301}
2302
2303/// Sink the shift *right* instruction into user blocks if the uses could
2304/// potentially be combined with this shift instruction and generate BitExtract
2305/// instruction. It will only be applied if the architecture supports BitExtract
2306/// instruction. Here is an example:
2307/// BB1:
2308/// %x.extract.shift = lshr i64 %arg1, 32
2309/// BB2:
2310/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2311/// ==>
2312///
2313/// BB2:
2314/// %x.extract.shift.1 = lshr i64 %arg1, 32
2315/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2316///
2317/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2318/// instruction.
2319/// Return true if any changes are made.
2321 const TargetLowering &TLI,
2322 const DataLayout &DL) {
2323 BasicBlock *DefBB = ShiftI->getParent();
2324
2325 /// Only insert instructions in each block once.
2327
2328 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2329
2330 bool MadeChange = false;
2331 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2332 UI != E;) {
2333 Use &TheUse = UI.getUse();
2334 Instruction *User = cast<Instruction>(*UI);
2335 // Preincrement use iterator so we don't invalidate it.
2336 ++UI;
2337
2338 // Don't bother for PHI nodes.
2339 if (isa<PHINode>(User))
2340 continue;
2341
2343 continue;
2344
2345 BasicBlock *UserBB = User->getParent();
2346
2347 if (UserBB == DefBB) {
2348 // If the shift and truncate instruction are in the same BB. The use of
2349 // the truncate(TruncUse) may still introduce another truncate if not
2350 // legal. In this case, we would like to sink both shift and truncate
2351 // instruction to the BB of TruncUse.
2352 // for example:
2353 // BB1:
2354 // i64 shift.result = lshr i64 opnd, imm
2355 // trunc.result = trunc shift.result to i16
2356 //
2357 // BB2:
2358 // ----> We will have an implicit truncate here if the architecture does
2359 // not have i16 compare.
2360 // cmp i16 trunc.result, opnd2
2361 //
2362 if (isa<TruncInst>(User) &&
2363 shiftIsLegal
2364 // If the type of the truncate is legal, no truncate will be
2365 // introduced in other basic blocks.
2366 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2367 MadeChange =
2368 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2369
2370 continue;
2371 }
2372 // If we have already inserted a shift into this block, use it.
2373 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2374
2375 if (!InsertedShift) {
2376 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2377 assert(InsertPt != UserBB->end());
2378
2379 if (ShiftI->getOpcode() == Instruction::AShr)
2380 InsertedShift =
2381 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2382 else
2383 InsertedShift =
2384 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2385 InsertedShift->insertBefore(*UserBB, InsertPt);
2386 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2387
2388 MadeChange = true;
2389 }
2390
2391 // Replace a use of the shift with a use of the new shift.
2392 TheUse = InsertedShift;
2393 }
2394
2395 // If we removed all uses, or there are none, nuke the shift.
2396 if (ShiftI->use_empty()) {
2397 salvageDebugInfo(*ShiftI);
2398 ShiftI->eraseFromParent();
2399 MadeChange = true;
2400 }
2401
2402 return MadeChange;
2403}
2404
2405/// If counting leading or trailing zeros is an expensive operation and a zero
2406/// input is defined, add a check for zero to avoid calling the intrinsic.
2407///
2408/// We want to transform:
2409/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2410///
2411/// into:
2412/// entry:
2413/// %cmpz = icmp eq i64 %A, 0
2414/// br i1 %cmpz, label %cond.end, label %cond.false
2415/// cond.false:
2416/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2417/// br label %cond.end
2418/// cond.end:
2419/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2420///
2421/// If the transform is performed, return true and set ModifiedDT to true.
2422static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2423 LoopInfo &LI,
2424 const TargetLowering *TLI,
2425 const DataLayout *DL, ModifyDT &ModifiedDT,
2427 bool IsHugeFunc) {
2428 // If a zero input is undefined, it doesn't make sense to despeculate that.
2429 if (match(CountZeros->getOperand(1), m_One()))
2430 return false;
2431
2432 // If it's cheap to speculate, there's nothing to do.
2433 Type *Ty = CountZeros->getType();
2434 auto IntrinsicID = CountZeros->getIntrinsicID();
2435 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2436 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2437 return false;
2438
2439 // Only handle legal scalar cases. Anything else requires too much work.
2440 unsigned SizeInBits = Ty->getScalarSizeInBits();
2441 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
2442 return false;
2443
2444 // Bail if the value is never zero.
2445 Use &Op = CountZeros->getOperandUse(0);
2446 if (isKnownNonZero(Op, *DL))
2447 return false;
2448
2449 // The intrinsic will be sunk behind a compare against zero and branch.
2450 BasicBlock *StartBlock = CountZeros->getParent();
2451 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2452 if (IsHugeFunc)
2453 FreshBBs.insert(CallBlock);
2454
2455 // Create another block after the count zero intrinsic. A PHI will be added
2456 // in this block to select the result of the intrinsic or the bit-width
2457 // constant if the input to the intrinsic is zero.
2458 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2459 // Any debug-info after CountZeros should not be included.
2460 SplitPt.setHeadBit(true);
2461 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2462 if (IsHugeFunc)
2463 FreshBBs.insert(EndBlock);
2464
2465 // Update the LoopInfo. The new blocks are in the same loop as the start
2466 // block.
2467 if (Loop *L = LI.getLoopFor(StartBlock)) {
2468 L->addBasicBlockToLoop(CallBlock, LI);
2469 L->addBasicBlockToLoop(EndBlock, LI);
2470 }
2471
2472 // Set up a builder to create a compare, conditional branch, and PHI.
2473 IRBuilder<> Builder(CountZeros->getContext());
2474 Builder.SetInsertPoint(StartBlock->getTerminator());
2475 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2476
2477 // Replace the unconditional branch that was created by the first split with
2478 // a compare against zero and a conditional branch.
2479 Value *Zero = Constant::getNullValue(Ty);
2480 // Avoid introducing branch on poison. This also replaces the ctz operand.
2482 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2483 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2484 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2485 StartBlock->getTerminator()->eraseFromParent();
2486
2487 // Create a PHI in the end block to select either the output of the intrinsic
2488 // or the bit width of the operand.
2489 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2490 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2491 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2492 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2493 PN->addIncoming(BitWidth, StartBlock);
2494 PN->addIncoming(CountZeros, CallBlock);
2495
2496 // We are explicitly handling the zero case, so we can set the intrinsic's
2497 // undefined zero argument to 'true'. This will also prevent reprocessing the
2498 // intrinsic; we only despeculate when a zero input is defined.
2499 CountZeros->setArgOperand(1, Builder.getTrue());
2500 ModifiedDT = ModifyDT::ModifyBBDT;
2501 return true;
2502}
2503
2504bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2505 BasicBlock *BB = CI->getParent();
2506
2507 // Lower inline assembly if we can.
2508 // If we found an inline asm expession, and if the target knows how to
2509 // lower it to normal LLVM code, do so now.
2510 if (CI->isInlineAsm()) {
2511 if (TLI->ExpandInlineAsm(CI)) {
2512 // Avoid invalidating the iterator.
2513 CurInstIterator = BB->begin();
2514 // Avoid processing instructions out of order, which could cause
2515 // reuse before a value is defined.
2516 SunkAddrs.clear();
2517 return true;
2518 }
2519 // Sink address computing for memory operands into the block.
2520 if (optimizeInlineAsmInst(CI))
2521 return true;
2522 }
2523
2524 // Align the pointer arguments to this call if the target thinks it's a good
2525 // idea
2526 unsigned MinSize;
2527 Align PrefAlign;
2528 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2529 for (auto &Arg : CI->args()) {
2530 // We want to align both objects whose address is used directly and
2531 // objects whose address is used in casts and GEPs, though it only makes
2532 // sense for GEPs if the offset is a multiple of the desired alignment and
2533 // if size - offset meets the size threshold.
2534 if (!Arg->getType()->isPointerTy())
2535 continue;
2536 APInt Offset(DL->getIndexSizeInBits(
2537 cast<PointerType>(Arg->getType())->getAddressSpace()),
2538 0);
2540 uint64_t Offset2 = Offset.getLimitedValue();
2541 if (!isAligned(PrefAlign, Offset2))
2542 continue;
2543 AllocaInst *AI;
2544 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2545 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2546 AI->setAlignment(PrefAlign);
2547 // Global variables can only be aligned if they are defined in this
2548 // object (i.e. they are uniquely initialized in this object), and
2549 // over-aligning global variables that have an explicit section is
2550 // forbidden.
2551 GlobalVariable *GV;
2552 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2553 GV->getPointerAlignment(*DL) < PrefAlign &&
2554 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2555 GV->setAlignment(PrefAlign);
2556 }
2557 }
2558 // If this is a memcpy (or similar) then we may be able to improve the
2559 // alignment.
2560 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2561 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2562 MaybeAlign MIDestAlign = MI->getDestAlign();
2563 if (!MIDestAlign || DestAlign > *MIDestAlign)
2564 MI->setDestAlignment(DestAlign);
2565 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2566 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2567 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2568 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2569 MTI->setSourceAlignment(SrcAlign);
2570 }
2571 }
2572
2573 // If we have a cold call site, try to sink addressing computation into the
2574 // cold block. This interacts with our handling for loads and stores to
2575 // ensure that we can fold all uses of a potential addressing computation
2576 // into their uses. TODO: generalize this to work over profiling data
2577 if (CI->hasFnAttr(Attribute::Cold) && !OptSize &&
2578 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2579 for (auto &Arg : CI->args()) {
2580 if (!Arg->getType()->isPointerTy())
2581 continue;
2582 unsigned AS = Arg->getType()->getPointerAddressSpace();
2583 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2584 return true;
2585 }
2586
2587 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2588 if (II) {
2589 switch (II->getIntrinsicID()) {
2590 default:
2591 break;
2592 case Intrinsic::assume:
2593 llvm_unreachable("llvm.assume should have been removed already");
2594 case Intrinsic::allow_runtime_check:
2595 case Intrinsic::allow_ubsan_check:
2596 case Intrinsic::experimental_widenable_condition: {
2597 // Give up on future widening opportunities so that we can fold away dead
2598 // paths and merge blocks before going into block-local instruction
2599 // selection.
2600 if (II->use_empty()) {
2601 II->eraseFromParent();
2602 return true;
2603 }
2604 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2605 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2606 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2607 });
2608 return true;
2609 }
2610 case Intrinsic::objectsize:
2611 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2612 case Intrinsic::is_constant:
2613 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2614 case Intrinsic::aarch64_stlxr:
2615 case Intrinsic::aarch64_stxr: {
2616 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2617 if (!ExtVal || !ExtVal->hasOneUse() ||
2618 ExtVal->getParent() == CI->getParent())
2619 return false;
2620 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2621 ExtVal->moveBefore(CI);
2622 // Mark this instruction as "inserted by CGP", so that other
2623 // optimizations don't touch it.
2624 InsertedInsts.insert(ExtVal);
2625 return true;
2626 }
2627
2628 case Intrinsic::launder_invariant_group:
2629 case Intrinsic::strip_invariant_group: {
2630 Value *ArgVal = II->getArgOperand(0);
2631 auto it = LargeOffsetGEPMap.find(II);
2632 if (it != LargeOffsetGEPMap.end()) {
2633 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2634 // Make sure not to have to deal with iterator invalidation
2635 // after possibly adding ArgVal to LargeOffsetGEPMap.
2636 auto GEPs = std::move(it->second);
2637 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2638 LargeOffsetGEPMap.erase(II);
2639 }
2640
2641 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2642 II->eraseFromParent();
2643 return true;
2644 }
2645 case Intrinsic::cttz:
2646 case Intrinsic::ctlz:
2647 // If counting zeros is expensive, try to avoid it.
2648 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2649 IsHugeFunc);
2650 case Intrinsic::fshl:
2651 case Intrinsic::fshr:
2652 return optimizeFunnelShift(II);
2653 case Intrinsic::dbg_assign:
2654 case Intrinsic::dbg_value:
2655 return fixupDbgValue(II);
2656 case Intrinsic::masked_gather:
2657 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2658 case Intrinsic::masked_scatter:
2659 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2660 }
2661
2663 Type *AccessTy;
2664 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2665 while (!PtrOps.empty()) {
2666 Value *PtrVal = PtrOps.pop_back_val();
2667 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2668 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2669 return true;
2670 }
2671 }
2672
2673 // From here on out we're working with named functions.
2674 if (!CI->getCalledFunction())
2675 return false;
2676
2677 // Lower all default uses of _chk calls. This is very similar
2678 // to what InstCombineCalls does, but here we are only lowering calls
2679 // to fortified library functions (e.g. __memcpy_chk) that have the default
2680 // "don't know" as the objectsize. Anything else should be left alone.
2681 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2682 IRBuilder<> Builder(CI);
2683 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2684 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2685 CI->eraseFromParent();
2686 return true;
2687 }
2688
2689 return false;
2690}
2691
2693 const CallInst *CI) {
2694 assert(CI && CI->use_empty());
2695
2696 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2697 switch (II->getIntrinsicID()) {
2698 case Intrinsic::memset:
2699 case Intrinsic::memcpy:
2700 case Intrinsic::memmove:
2701 return true;
2702 default:
2703 return false;
2704 }
2705
2706 LibFunc LF;
2707 Function *Callee = CI->getCalledFunction();
2708 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2709 switch (LF) {
2710 case LibFunc_strcpy:
2711 case LibFunc_strncpy:
2712 case LibFunc_strcat:
2713 case LibFunc_strncat:
2714 return true;
2715 default:
2716 return false;
2717 }
2718
2719 return false;
2720}
2721
2722/// Look for opportunities to duplicate return instructions to the predecessor
2723/// to enable tail call optimizations. The case it is currently looking for is
2724/// the following one. Known intrinsics or library function that may be tail
2725/// called are taken into account as well.
2726/// @code
2727/// bb0:
2728/// %tmp0 = tail call i32 @f0()
2729/// br label %return
2730/// bb1:
2731/// %tmp1 = tail call i32 @f1()
2732/// br label %return
2733/// bb2:
2734/// %tmp2 = tail call i32 @f2()
2735/// br label %return
2736/// return:
2737/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2738/// ret i32 %retval
2739/// @endcode
2740///
2741/// =>
2742///
2743/// @code
2744/// bb0:
2745/// %tmp0 = tail call i32 @f0()
2746/// ret i32 %tmp0
2747/// bb1:
2748/// %tmp1 = tail call i32 @f1()
2749/// ret i32 %tmp1
2750/// bb2:
2751/// %tmp2 = tail call i32 @f2()
2752/// ret i32 %tmp2
2753/// @endcode
2754bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2755 ModifyDT &ModifiedDT) {
2756 if (!BB->getTerminator())
2757 return false;
2758
2759 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2760 if (!RetI)
2761 return false;
2762
2763 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2764
2765 PHINode *PN = nullptr;
2766 ExtractValueInst *EVI = nullptr;
2767 BitCastInst *BCI = nullptr;
2768 Value *V = RetI->getReturnValue();
2769 if (V) {
2770 BCI = dyn_cast<BitCastInst>(V);
2771 if (BCI)
2772 V = BCI->getOperand(0);
2773
2774 EVI = dyn_cast<ExtractValueInst>(V);
2775 if (EVI) {
2776 V = EVI->getOperand(0);
2777 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2778 return false;
2779 }
2780
2781 PN = dyn_cast<PHINode>(V);
2782 }
2783
2784 if (PN && PN->getParent() != BB)
2785 return false;
2786
2787 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2788 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2789 if (BC && BC->hasOneUse())
2790 Inst = BC->user_back();
2791
2792 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2793 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2794 return false;
2795 };
2796
2797 // Make sure there are no instructions between the first instruction
2798 // and return.
2799 const Instruction *BI = BB->getFirstNonPHI();
2800 // Skip over debug and the bitcast.
2801 while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
2802 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
2803 BI = BI->getNextNode();
2804 if (BI != RetI)
2805 return false;
2806
2807 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2808 /// call.
2809 const Function *F = BB->getParent();
2810 SmallVector<BasicBlock *, 4> TailCallBBs;
2811 if (PN) {
2812 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2813 // Look through bitcasts.
2814 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2815 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2816 BasicBlock *PredBB = PN->getIncomingBlock(I);
2817 // Make sure the phi value is indeed produced by the tail call.
2818 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2819 TLI->mayBeEmittedAsTailCall(CI) &&
2820 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2821 TailCallBBs.push_back(PredBB);
2822 } else {
2823 // Consider the cases in which the phi value is indirectly produced by
2824 // the tail call, for example when encountering memset(), memmove(),
2825 // strcpy(), whose return value may have been optimized out. In such
2826 // cases, the value needs to be the first function argument.
2827 //
2828 // bb0:
2829 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
2830 // br label %return
2831 // return:
2832 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
2833 if (PredBB && PredBB->getSingleSuccessor() == BB)
2834 CI = dyn_cast_or_null<CallInst>(
2835 PredBB->getTerminator()->getPrevNonDebugInstruction(true));
2836
2837 if (CI && CI->use_empty() &&
2838 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2839 IncomingVal == CI->getArgOperand(0) &&
2840 TLI->mayBeEmittedAsTailCall(CI) &&
2841 attributesPermitTailCall(F, CI, RetI, *TLI))
2842 TailCallBBs.push_back(PredBB);
2843 }
2844 }
2845 } else {
2847 for (BasicBlock *Pred : predecessors(BB)) {
2848 if (!VisitedBBs.insert(Pred).second)
2849 continue;
2850 if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
2851 CallInst *CI = dyn_cast<CallInst>(I);
2852 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2853 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2854 // Either we return void or the return value must be the first
2855 // argument of a known intrinsic or library function.
2856 if (!V || isa<UndefValue>(V) ||
2857 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2858 V == CI->getArgOperand(0))) {
2859 TailCallBBs.push_back(Pred);
2860 }
2861 }
2862 }
2863 }
2864 }
2865
2866 bool Changed = false;
2867 for (auto const &TailCallBB : TailCallBBs) {
2868 // Make sure the call instruction is followed by an unconditional branch to
2869 // the return block.
2870 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2871 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2872 continue;
2873
2874 // Duplicate the return into TailCallBB.
2875 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2877 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
2878 BFI->setBlockFreq(BB,
2879 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
2880 ModifiedDT = ModifyDT::ModifyBBDT;
2881 Changed = true;
2882 ++NumRetsDup;
2883 }
2884
2885 // If we eliminated all predecessors of the block, delete the block now.
2886 if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
2887 BB->eraseFromParent();
2888
2889 return Changed;
2890}
2891
2892//===----------------------------------------------------------------------===//
2893// Memory Optimization
2894//===----------------------------------------------------------------------===//
2895
2896namespace {
2897
2898/// This is an extended version of TargetLowering::AddrMode
2899/// which holds actual Value*'s for register values.
2900struct ExtAddrMode : public TargetLowering::AddrMode {
2901 Value *BaseReg = nullptr;
2902 Value *ScaledReg = nullptr;
2903 Value *OriginalValue = nullptr;
2904 bool InBounds = true;
2905
2906 enum FieldName {
2907 NoField = 0x00,
2908 BaseRegField = 0x01,
2909 BaseGVField = 0x02,
2910 BaseOffsField = 0x04,
2911 ScaledRegField = 0x08,
2912 ScaleField = 0x10,
2913 MultipleFields = 0xff
2914 };
2915
2916 ExtAddrMode() = default;
2917
2918 void print(raw_ostream &OS) const;
2919 void dump() const;
2920
2921 FieldName compare(const ExtAddrMode &other) {
2922 // First check that the types are the same on each field, as differing types
2923 // is something we can't cope with later on.
2924 if (BaseReg && other.BaseReg &&
2925 BaseReg->getType() != other.BaseReg->getType())
2926 return MultipleFields;
2927 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
2928 return MultipleFields;
2929 if (ScaledReg && other.ScaledReg &&
2930 ScaledReg->getType() != other.ScaledReg->getType())
2931 return MultipleFields;
2932
2933 // Conservatively reject 'inbounds' mismatches.
2934 if (InBounds != other.InBounds)
2935 return MultipleFields;
2936
2937 // Check each field to see if it differs.
2938 unsigned Result = NoField;
2939 if (BaseReg != other.BaseReg)
2940 Result |= BaseRegField;
2941 if (BaseGV != other.BaseGV)
2942 Result |= BaseGVField;
2943 if (BaseOffs != other.BaseOffs)
2944 Result |= BaseOffsField;
2945 if (ScaledReg != other.ScaledReg)
2946 Result |= ScaledRegField;
2947 // Don't count 0 as being a different scale, because that actually means
2948 // unscaled (which will already be counted by having no ScaledReg).
2949 if (Scale && other.Scale && Scale != other.Scale)
2950 Result |= ScaleField;
2951
2952 if (llvm::popcount(Result) > 1)
2953 return MultipleFields;
2954 else
2955 return static_cast<FieldName>(Result);
2956 }
2957
2958 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2959 // with no offset.
2960 bool isTrivial() {
2961 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2962 // trivial if at most one of these terms is nonzero, except that BaseGV and
2963 // BaseReg both being zero actually means a null pointer value, which we
2964 // consider to be 'non-zero' here.
2965 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2966 }
2967
2968 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2969 switch (Field) {
2970 default:
2971 return nullptr;
2972 case BaseRegField:
2973 return BaseReg;
2974 case BaseGVField:
2975 return BaseGV;
2976 case ScaledRegField:
2977 return ScaledReg;
2978 case BaseOffsField:
2979 return ConstantInt::get(IntPtrTy, BaseOffs);
2980 }
2981 }
2982
2983 void SetCombinedField(FieldName Field, Value *V,
2984 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2985 switch (Field) {
2986 default:
2987 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
2988 break;
2989 case ExtAddrMode::BaseRegField:
2990 BaseReg = V;
2991 break;
2992 case ExtAddrMode::BaseGVField:
2993 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2994 // in the BaseReg field.
2995 assert(BaseReg == nullptr);
2996 BaseReg = V;
2997 BaseGV = nullptr;
2998 break;
2999 case ExtAddrMode::ScaledRegField:
3000 ScaledReg = V;
3001 // If we have a mix of scaled and unscaled addrmodes then we want scale
3002 // to be the scale and not zero.
3003 if (!Scale)
3004 for (const ExtAddrMode &AM : AddrModes)
3005 if (AM.Scale) {
3006 Scale = AM.Scale;
3007 break;
3008 }
3009 break;
3010 case ExtAddrMode::BaseOffsField:
3011 // The offset is no longer a constant, so it goes in ScaledReg with a
3012 // scale of 1.
3013 assert(ScaledReg == nullptr);
3014 ScaledReg = V;
3015 Scale = 1;
3016 BaseOffs = 0;
3017 break;
3018 }
3019 }
3020};
3021
3022#ifndef NDEBUG
3023static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3024 AM.print(OS);
3025 return OS;
3026}
3027#endif
3028
3029#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3030void ExtAddrMode::print(raw_ostream &OS) const {
3031 bool NeedPlus = false;
3032 OS << "[";
3033 if (InBounds)
3034 OS << "inbounds ";
3035 if (BaseGV) {
3036 OS << "GV:";
3037 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3038 NeedPlus = true;
3039 }
3040
3041 if (BaseOffs) {
3042 OS << (NeedPlus ? " + " : "") << BaseOffs;
3043 NeedPlus = true;
3044 }
3045
3046 if (BaseReg) {
3047 OS << (NeedPlus ? " + " : "") << "Base:";
3048 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3049 NeedPlus = true;
3050 }
3051 if (Scale) {
3052 OS << (NeedPlus ? " + " : "") << Scale << "*";
3053 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3054 }
3055
3056 OS << ']';
3057}
3058
3059LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3060 print(dbgs());
3061 dbgs() << '\n';
3062}
3063#endif
3064
3065} // end anonymous namespace
3066
3067namespace {
3068
3069/// This class provides transaction based operation on the IR.
3070/// Every change made through this class is recorded in the internal state and
3071/// can be undone (rollback) until commit is called.
3072/// CGP does not check if instructions could be speculatively executed when
3073/// moved. Preserving the original location would pessimize the debugging
3074/// experience, as well as negatively impact the quality of sample PGO.
3075class TypePromotionTransaction {
3076 /// This represents the common interface of the individual transaction.
3077 /// Each class implements the logic for doing one specific modification on
3078 /// the IR via the TypePromotionTransaction.
3079 class TypePromotionAction {
3080 protected:
3081 /// The Instruction modified.
3082 Instruction *Inst;
3083
3084 public:
3085 /// Constructor of the action.
3086 /// The constructor performs the related action on the IR.
3087 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3088
3089 virtual ~TypePromotionAction() = default;
3090
3091 /// Undo the modification done by this action.
3092 /// When this method is called, the IR must be in the same state as it was
3093 /// before this action was applied.
3094 /// \pre Undoing the action works if and only if the IR is in the exact same
3095 /// state as it was directly after this action was applied.
3096 virtual void undo() = 0;
3097
3098 /// Advocate every change made by this action.
3099 /// When the results on the IR of the action are to be kept, it is important
3100 /// to call this function, otherwise hidden information may be kept forever.
3101 virtual void commit() {
3102 // Nothing to be done, this action is not doing anything.
3103 }
3104 };
3105
3106 /// Utility to remember the position of an instruction.
3107 class InsertionHandler {
3108 /// Position of an instruction.
3109 /// Either an instruction:
3110 /// - Is the first in a basic block: BB is used.
3111 /// - Has a previous instruction: PrevInst is used.
3112 union {
3113 Instruction *PrevInst;
3114 BasicBlock *BB;
3115 } Point;
3116 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3117
3118 /// Remember whether or not the instruction had a previous instruction.
3119 bool HasPrevInstruction;
3120
3121 public:
3122 /// Record the position of \p Inst.
3123 InsertionHandler(Instruction *Inst) {
3124 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3125 BasicBlock *BB = Inst->getParent();
3126
3127 // Record where we would have to re-insert the instruction in the sequence
3128 // of DbgRecords, if we ended up reinserting.
3129 if (BB->IsNewDbgInfoFormat)
3130 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3131
3132 if (HasPrevInstruction) {
3133 Point.PrevInst = &*std::prev(Inst->getIterator());
3134 } else {
3135 Point.BB = BB;
3136 }
3137 }
3138
3139 /// Insert \p Inst at the recorded position.
3140 void insert(Instruction *Inst) {
3141 if (HasPrevInstruction) {
3142 if (Inst->getParent())
3143 Inst->removeFromParent();
3144 Inst->insertAfter(&*Point.PrevInst);
3145 } else {
3146 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3147 if (Inst->getParent())
3148 Inst->moveBefore(*Point.BB, Position);
3149 else
3150 Inst->insertBefore(*Point.BB, Position);
3151 }
3152
3153 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3154 }
3155 };
3156
3157 /// Move an instruction before another.
3158 class InstructionMoveBefore : public TypePromotionAction {
3159 /// Original position of the instruction.
3160 InsertionHandler Position;
3161
3162 public:
3163 /// Move \p Inst before \p Before.
3164 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
3165 : TypePromotionAction(Inst), Position(Inst) {
3166 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3167 << "\n");
3168 Inst->moveBefore(Before);
3169 }
3170
3171 /// Move the instruction back to its original position.
3172 void undo() override {
3173 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3174 Position.insert(Inst);
3175 }
3176 };
3177
3178 /// Set the operand of an instruction with a new value.
3179 class OperandSetter : public TypePromotionAction {
3180 /// Original operand of the instruction.
3181 Value *Origin;
3182
3183 /// Index of the modified instruction.
3184 unsigned Idx;
3185
3186 public:
3187 /// Set \p Idx operand of \p Inst with \p NewVal.
3188 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3189 : TypePromotionAction(Inst), Idx(Idx) {
3190 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3191 << "for:" << *Inst << "\n"
3192 << "with:" << *NewVal << "\n");
3193 Origin = Inst->getOperand(Idx);
3194 Inst->setOperand(Idx, NewVal);
3195 }
3196
3197 /// Restore the original value of the instruction.
3198 void undo() override {
3199 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3200 << "for: " << *Inst << "\n"
3201 << "with: " << *Origin << "\n");
3202 Inst->setOperand(Idx, Origin);
3203 }
3204 };
3205
3206 /// Hide the operands of an instruction.
3207 /// Do as if this instruction was not using any of its operands.
3208 class OperandsHider : public TypePromotionAction {
3209 /// The list of original operands.
3210 SmallVector<Value *, 4> OriginalValues;
3211
3212 public:
3213 /// Remove \p Inst from the uses of the operands of \p Inst.
3214 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3215 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3216 unsigned NumOpnds = Inst->getNumOperands();
3217 OriginalValues.reserve(NumOpnds);
3218 for (unsigned It = 0; It < NumOpnds; ++It) {
3219 // Save the current operand.
3220 Value *Val = Inst->getOperand(It);
3221 OriginalValues.push_back(Val);
3222 // Set a dummy one.
3223 // We could use OperandSetter here, but that would imply an overhead
3224 // that we are not willing to pay.
3225 Inst->setOperand(It, UndefValue::get(Val->getType()));
3226 }
3227 }
3228
3229 /// Restore the original list of uses.
3230 void undo() override {
3231 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3232 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3233 Inst->setOperand(It, OriginalValues[It]);
3234 }
3235 };
3236
3237 /// Build a truncate instruction.
3238 class TruncBuilder : public TypePromotionAction {
3239 Value *Val;
3240
3241 public:
3242 /// Build a truncate instruction of \p Opnd producing a \p Ty
3243 /// result.
3244 /// trunc Opnd to Ty.
3245 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3246 IRBuilder<> Builder(Opnd);
3247 Builder.SetCurrentDebugLocation(DebugLoc());
3248 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3249 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3250 }
3251
3252 /// Get the built value.
3253 Value *getBuiltValue() { return Val; }
3254
3255 /// Remove the built instruction.
3256 void undo() override {
3257 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3258 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3259 IVal->eraseFromParent();
3260 }
3261 };
3262
3263 /// Build a sign extension instruction.
3264 class SExtBuilder : public TypePromotionAction {
3265 Value *Val;
3266
3267 public:
3268 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3269 /// result.
3270 /// sext Opnd to Ty.
3271 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3272 : TypePromotionAction(InsertPt) {
3273 IRBuilder<> Builder(InsertPt);
3274 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3275 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3276 }
3277
3278 /// Get the built value.
3279 Value *getBuiltValue() { return Val; }
3280
3281 /// Remove the built instruction.
3282 void undo() override {
3283 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3284 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3285 IVal->eraseFromParent();
3286 }
3287 };
3288
3289 /// Build a zero extension instruction.
3290 class ZExtBuilder : public TypePromotionAction {
3291 Value *Val;
3292
3293 public:
3294 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3295 /// result.
3296 /// zext Opnd to Ty.
3297 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3298 : TypePromotionAction(InsertPt) {
3299 IRBuilder<> Builder(InsertPt);
3300 Builder.SetCurrentDebugLocation(DebugLoc());
3301 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3302 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3303 }
3304
3305 /// Get the built value.
3306 Value *getBuiltValue() { return Val; }
3307
3308 /// Remove the built instruction.
3309 void undo() override {
3310 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3311 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3312 IVal->eraseFromParent();
3313 }
3314 };
3315
3316 /// Mutate an instruction to another type.
3317 class TypeMutator : public TypePromotionAction {
3318 /// Record the original type.
3319 Type *OrigTy;
3320
3321 public:
3322 /// Mutate the type of \p Inst into \p NewTy.
3323 TypeMutator(Instruction *Inst, Type *NewTy)
3324 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3325 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3326 << "\n");
3327 Inst->mutateType(NewTy);
3328 }
3329
3330 /// Mutate the instruction back to its original type.
3331 void undo() override {
3332 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3333 << "\n");
3334 Inst->mutateType(OrigTy);
3335 }
3336 };
3337
3338 /// Replace the uses of an instruction by another instruction.
3339 class UsesReplacer : public TypePromotionAction {
3340 /// Helper structure to keep track of the replaced uses.
3341 struct InstructionAndIdx {
3342 /// The instruction using the instruction.
3343 Instruction *Inst;
3344
3345 /// The index where this instruction is used for Inst.
3346 unsigned Idx;
3347
3348 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3349 : Inst(Inst), Idx(Idx) {}
3350 };
3351
3352 /// Keep track of the original uses (pair Instruction, Index).
3354 /// Keep track of the debug users.
3356 /// And non-instruction debug-users too.
3357 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3358
3359 /// Keep track of the new value so that we can undo it by replacing
3360 /// instances of the new value with the original value.
3361 Value *New;
3362
3364
3365 public:
3366 /// Replace all the use of \p Inst by \p New.
3367 UsesReplacer(Instruction *Inst, Value *New)
3368 : TypePromotionAction(Inst), New(New) {
3369 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3370 << "\n");
3371 // Record the original uses.
3372 for (Use &U : Inst->uses()) {
3373 Instruction *UserI = cast<Instruction>(U.getUser());
3374 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3375 }
3376 // Record the debug uses separately. They are not in the instruction's
3377 // use list, but they are replaced by RAUW.
3378 findDbgValues(DbgValues, Inst, &DbgVariableRecords);
3379
3380 // Now, we can replace the uses.
3381 Inst->replaceAllUsesWith(New);
3382 }
3383
3384 /// Reassign the original uses of Inst to Inst.
3385 void undo() override {
3386 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3387 for (InstructionAndIdx &Use : OriginalUses)
3388 Use.Inst->setOperand(Use.Idx, Inst);
3389 // RAUW has replaced all original uses with references to the new value,
3390 // including the debug uses. Since we are undoing the replacements,
3391 // the original debug uses must also be reinstated to maintain the
3392 // correctness and utility of debug value instructions.
3393 for (auto *DVI : DbgValues)
3394 DVI->replaceVariableLocationOp(New, Inst);
3395 // Similar story with DbgVariableRecords, the non-instruction
3396 // representation of dbg.values.
3397 for (DbgVariableRecord *DVR : DbgVariableRecords)
3398 DVR->replaceVariableLocationOp(New, Inst);
3399 }
3400 };
3401
3402 /// Remove an instruction from the IR.
3403 class InstructionRemover : public TypePromotionAction {
3404 /// Original position of the instruction.
3405 InsertionHandler Inserter;
3406
3407 /// Helper structure to hide all the link to the instruction. In other
3408 /// words, this helps to do as if the instruction was removed.
3409 OperandsHider Hider;
3410
3411 /// Keep track of the uses replaced, if any.
3412 UsesReplacer *Replacer = nullptr;
3413
3414 /// Keep track of instructions removed.
3415 SetOfInstrs &RemovedInsts;
3416
3417 public:
3418 /// Remove all reference of \p Inst and optionally replace all its
3419 /// uses with New.
3420 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3421 /// \pre If !Inst->use_empty(), then New != nullptr
3422 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3423 Value *New = nullptr)
3424 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3425 RemovedInsts(RemovedInsts) {
3426 if (New)
3427 Replacer = new UsesReplacer(Inst, New);
3428 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3429 RemovedInsts.insert(Inst);
3430 /// The instructions removed here will be freed after completing
3431 /// optimizeBlock() for all blocks as we need to keep track of the
3432 /// removed instructions during promotion.
3433 Inst->removeFromParent();
3434 }
3435
3436 ~InstructionRemover() override { delete Replacer; }
3437
3438 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3439 InstructionRemover(const InstructionRemover &other) = delete;
3440
3441 /// Resurrect the instruction and reassign it to the proper uses if
3442 /// new value was provided when build this action.
3443 void undo() override {
3444 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3445 Inserter.insert(Inst);
3446 if (Replacer)
3447 Replacer->undo();
3448 Hider.undo();
3449 RemovedInsts.erase(Inst);
3450 }
3451 };
3452
3453public:
3454 /// Restoration point.
3455 /// The restoration point is a pointer to an action instead of an iterator
3456 /// because the iterator may be invalidated but not the pointer.
3457 using ConstRestorationPt = const TypePromotionAction *;
3458
3459 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3460 : RemovedInsts(RemovedInsts) {}
3461
3462 /// Advocate every changes made in that transaction. Return true if any change
3463 /// happen.
3464 bool commit();
3465
3466 /// Undo all the changes made after the given point.
3467 void rollback(ConstRestorationPt Point);
3468
3469 /// Get the current restoration point.
3470 ConstRestorationPt getRestorationPoint() const;
3471
3472 /// \name API for IR modification with state keeping to support rollback.
3473 /// @{
3474 /// Same as Instruction::setOperand.
3475 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3476
3477 /// Same as Instruction::eraseFromParent.
3478 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3479
3480 /// Same as Value::replaceAllUsesWith.
3481 void replaceAllUsesWith(Instruction *Inst, Value *New);
3482
3483 /// Same as Value::mutateType.
3484 void mutateType(Instruction *Inst, Type *NewTy);
3485
3486 /// Same as IRBuilder::createTrunc.
3487 Value *createTrunc(Instruction *Opnd, Type *Ty);
3488
3489 /// Same as IRBuilder::createSExt.
3490 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3491
3492 /// Same as IRBuilder::createZExt.
3493 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3494
3495private:
3496 /// The ordered list of actions made so far.
3498
3499 using CommitPt =
3501
3502 SetOfInstrs &RemovedInsts;
3503};
3504
3505} // end anonymous namespace
3506
3507void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3508 Value *NewVal) {
3509 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3510 Inst, Idx, NewVal));
3511}
3512
3513void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3514 Value *NewVal) {
3515 Actions.push_back(
3516 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3517 Inst, RemovedInsts, NewVal));
3518}
3519
3520void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3521 Value *New) {
3522 Actions.push_back(
3523 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3524}
3525
3526void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3527 Actions.push_back(
3528 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3529}
3530
3531Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3532 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3533 Value *Val = Ptr->getBuiltValue();
3534 Actions.push_back(std::move(Ptr));
3535 return Val;
3536}
3537
3538Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3539 Type *Ty) {
3540 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3541 Value *Val = Ptr->getBuiltValue();
3542 Actions.push_back(std::move(Ptr));
3543 return Val;
3544}
3545
3546Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3547 Type *Ty) {
3548 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3549 Value *Val = Ptr->getBuiltValue();
3550 Actions.push_back(std::move(Ptr));
3551 return Val;
3552}
3553
3554TypePromotionTransaction::ConstRestorationPt
3555TypePromotionTransaction::getRestorationPoint() const {
3556 return !Actions.empty() ? Actions.back().get() : nullptr;
3557}
3558
3559bool TypePromotionTransaction::commit() {
3560 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3561 Action->commit();
3562 bool Modified = !Actions.empty();
3563 Actions.clear();
3564 return Modified;
3565}
3566
3567void TypePromotionTransaction::rollback(
3568 TypePromotionTransaction::ConstRestorationPt Point) {
3569 while (!Actions.empty() && Point != Actions.back().get()) {
3570 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3571 Curr->undo();
3572 }
3573}
3574
3575namespace {
3576
3577/// A helper class for matching addressing modes.
3578///
3579/// This encapsulates the logic for matching the target-legal addressing modes.
3580class AddressingModeMatcher {
3581 SmallVectorImpl<Instruction *> &AddrModeInsts;
3582 const TargetLowering &TLI;
3583 const TargetRegisterInfo &TRI;
3584 const DataLayout &DL;
3585 const LoopInfo &LI;
3586 const std::function<const DominatorTree &()> getDTFn;
3587
3588 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3589 /// the memory instruction that we're computing this address for.
3590 Type *AccessTy;
3591 unsigned AddrSpace;
3592 Instruction *MemoryInst;
3593
3594 /// This is the addressing mode that we're building up. This is
3595 /// part of the return value of this addressing mode matching stuff.
3597
3598 /// The instructions inserted by other CodeGenPrepare optimizations.
3599 const SetOfInstrs &InsertedInsts;
3600
3601 /// A map from the instructions to their type before promotion.
3602 InstrToOrigTy &PromotedInsts;
3603
3604 /// The ongoing transaction where every action should be registered.
3605 TypePromotionTransaction &TPT;
3606
3607 // A GEP which has too large offset to be folded into the addressing mode.
3608 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3609
3610 /// This is set to true when we should not do profitability checks.
3611 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3612 bool IgnoreProfitability;
3613
3614 /// True if we are optimizing for size.
3615 bool OptSize = false;
3616
3617 ProfileSummaryInfo *PSI;
3619
3620 AddressingModeMatcher(
3622 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3623 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3624 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3625 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3626 TypePromotionTransaction &TPT,
3627 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3628 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3629 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3630 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3631 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3632 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3633 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3634 IgnoreProfitability = false;
3635 }
3636
3637public:
3638 /// Find the maximal addressing mode that a load/store of V can fold,
3639 /// give an access type of AccessTy. This returns a list of involved
3640 /// instructions in AddrModeInsts.
3641 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3642 /// optimizations.
3643 /// \p PromotedInsts maps the instructions to their type before promotion.
3644 /// \p The ongoing transaction where every action should be registered.
3645 static ExtAddrMode
3646 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3647 SmallVectorImpl<Instruction *> &AddrModeInsts,
3648 const TargetLowering &TLI, const LoopInfo &LI,
3649 const std::function<const DominatorTree &()> getDTFn,
3650 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3651 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3652 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3653 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3655
3656 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3657 AccessTy, AS, MemoryInst, Result,
3658 InsertedInsts, PromotedInsts, TPT,
3659 LargeOffsetGEP, OptSize, PSI, BFI)
3660 .matchAddr(V, 0);
3661 (void)Success;
3662 assert(Success && "Couldn't select *anything*?");
3663 return Result;
3664 }
3665
3666private:
3667 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3668 bool matchAddr(Value *Addr, unsigned Depth);
3669 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3670 bool *MovedAway = nullptr);
3671 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3672 ExtAddrMode &AMBefore,
3673 ExtAddrMode &AMAfter);
3674 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3675 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3676 Value *PromotedOperand) const;
3677};
3678
3679class PhiNodeSet;
3680
3681/// An iterator for PhiNodeSet.
3682class PhiNodeSetIterator {
3683 PhiNodeSet *const Set;
3684 size_t CurrentIndex = 0;
3685
3686public:
3687 /// The constructor. Start should point to either a valid element, or be equal
3688 /// to the size of the underlying SmallVector of the PhiNodeSet.
3689 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3690 PHINode *operator*() const;
3691 PhiNodeSetIterator &operator++();
3692 bool operator==(const PhiNodeSetIterator &RHS) const;
3693 bool operator!=(const PhiNodeSetIterator &RHS) const;
3694};
3695
3696/// Keeps a set of PHINodes.
3697///
3698/// This is a minimal set implementation for a specific use case:
3699/// It is very fast when there are very few elements, but also provides good
3700/// performance when there are many. It is similar to SmallPtrSet, but also
3701/// provides iteration by insertion order, which is deterministic and stable
3702/// across runs. It is also similar to SmallSetVector, but provides removing
3703/// elements in O(1) time. This is achieved by not actually removing the element
3704/// from the underlying vector, so comes at the cost of using more memory, but
3705/// that is fine, since PhiNodeSets are used as short lived objects.
3706class PhiNodeSet {
3707 friend class PhiNodeSetIterator;
3708
3710 using iterator = PhiNodeSetIterator;
3711
3712 /// Keeps the elements in the order of their insertion in the underlying
3713 /// vector. To achieve constant time removal, it never deletes any element.
3715
3716 /// Keeps the elements in the underlying set implementation. This (and not the
3717 /// NodeList defined above) is the source of truth on whether an element
3718 /// is actually in the collection.
3719 MapType NodeMap;
3720
3721 /// Points to the first valid (not deleted) element when the set is not empty
3722 /// and the value is not zero. Equals to the size of the underlying vector
3723 /// when the set is empty. When the value is 0, as in the beginning, the
3724 /// first element may or may not be valid.
3725 size_t FirstValidElement = 0;
3726
3727public:
3728 /// Inserts a new element to the collection.
3729 /// \returns true if the element is actually added, i.e. was not in the
3730 /// collection before the operation.
3731 bool insert(PHINode *Ptr) {
3732 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3733 NodeList.push_back(Ptr);
3734 return true;
3735 }
3736 return false;
3737 }
3738
3739 /// Removes the element from the collection.
3740 /// \returns whether the element is actually removed, i.e. was in the
3741 /// collection before the operation.
3742 bool erase(PHINode *Ptr) {
3743 if (NodeMap.erase(Ptr)) {
3744 SkipRemovedElements(FirstValidElement);
3745 return true;
3746 }
3747 return false;
3748 }
3749
3750 /// Removes all elements and clears the collection.
3751 void clear() {
3752 NodeMap.clear();
3753 NodeList.clear();
3754 FirstValidElement = 0;
3755 }
3756
3757 /// \returns an iterator that will iterate the elements in the order of
3758 /// insertion.
3759 iterator begin() {
3760 if (FirstValidElement == 0)
3761 SkipRemovedElements(FirstValidElement);
3762 return PhiNodeSetIterator(this, FirstValidElement);
3763 }
3764
3765 /// \returns an iterator that points to the end of the collection.
3766 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3767
3768 /// Returns the number of elements in the collection.
3769 size_t size() const { return NodeMap.size(); }
3770
3771 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3772 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3773
3774private:
3775 /// Updates the CurrentIndex so that it will point to a valid element.
3776 ///
3777 /// If the element of NodeList at CurrentIndex is valid, it does not
3778 /// change it. If there are no more valid elements, it updates CurrentIndex
3779 /// to point to the end of the NodeList.
3780 void SkipRemovedElements(size_t &CurrentIndex) {
3781 while (CurrentIndex < NodeList.size()) {
3782 auto it = NodeMap.find(NodeList[CurrentIndex]);
3783 // If the element has been deleted and added again later, NodeMap will
3784 // point to a different index, so CurrentIndex will still be invalid.
3785 if (it != NodeMap.end() && it->second == CurrentIndex)
3786 break;
3787 ++CurrentIndex;
3788 }
3789 }
3790};
3791
3792PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3793 : Set(Set), CurrentIndex(Start) {}
3794
3795PHINode *PhiNodeSetIterator::operator*() const {
3796 assert(CurrentIndex < Set->NodeList.size() &&
3797 "PhiNodeSet access out of range");
3798 return Set->NodeList[CurrentIndex];
3799}
3800
3801PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
3802 assert(CurrentIndex < Set->NodeList.size() &&
3803 "PhiNodeSet access out of range");
3804 ++CurrentIndex;
3805 Set->SkipRemovedElements(CurrentIndex);
3806 return *this;
3807}
3808
3809bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3810 return CurrentIndex == RHS.CurrentIndex;
3811}
3812
3813bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3814 return !((*this) == RHS);
3815}
3816
3817/// Keep track of simplification of Phi nodes.
3818/// Accept the set of all phi nodes and erase phi node from this set
3819/// if it is simplified.
3820class SimplificationTracker {
3822 const SimplifyQuery &SQ;
3823 // Tracks newly created Phi nodes. The elements are iterated by insertion
3824 // order.
3825 PhiNodeSet AllPhiNodes;
3826 // Tracks newly created Select nodes.
3827 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3828
3829public:
3830 SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
3831
3832 Value *Get(Value *V) {
3833 do {
3834 auto SV = Storage.find(V);
3835 if (SV == Storage.end())
3836 return V;
3837 V = SV->second;
3838 } while (true);
3839 }
3840
3841 Value *Simplify(Value *Val) {
3842 SmallVector<Value *, 32> WorkList;
3844 WorkList.push_back(Val);
3845 while (!WorkList.empty()) {
3846 auto *P = WorkList.pop_back_val();
3847 if (!Visited.insert(P).second)
3848 continue;
3849 if (auto *PI = dyn_cast<Instruction>(P))
3850 if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
3851 for (auto *U : PI->users())
3852 WorkList.push_back(cast<Value>(U));
3853 Put(PI, V);
3854 PI->replaceAllUsesWith(V);
3855 if (auto *PHI = dyn_cast<PHINode>(PI))
3856 AllPhiNodes.erase(PHI);
3857 if (auto *Select = dyn_cast<SelectInst>(PI))
3858 AllSelectNodes.erase(Select);
3859 PI->eraseFromParent();
3860 }
3861 }
3862 return Get(Val);
3863 }
3864
3865 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
3866
3867 void ReplacePhi(PHINode *From, PHINode *To) {
3868 Value *OldReplacement = Get(From);
3869 while (OldReplacement != From) {
3870 From = To;
3871 To = dyn_cast<PHINode>(OldReplacement);
3872 OldReplacement = Get(From);
3873 }
3874 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
3875 Put(From, To);
3876 From->replaceAllUsesWith(To);
3877 AllPhiNodes.erase(From);
3878 From->eraseFromParent();
3879 }
3880
3881 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
3882
3883 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3884
3885 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3886
3887 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3888
3889 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3890
3891 void destroyNewNodes(Type *CommonType) {
3892 // For safe erasing, replace the uses with dummy value first.
3893 auto *Dummy = PoisonValue::get(CommonType);
3894 for (auto *I : AllPhiNodes) {
3895 I->replaceAllUsesWith(Dummy);
3896 I->eraseFromParent();
3897 }
3898 AllPhiNodes.clear();
3899 for (auto *I : AllSelectNodes) {
3900 I->replaceAllUsesWith(Dummy);
3901 I->eraseFromParent();
3902 }
3903 AllSelectNodes.clear();
3904 }
3905};
3906
3907/// A helper class for combining addressing modes.
3908class AddressingModeCombiner {
3909 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3910 typedef std::pair<PHINode *, PHINode *> PHIPair;
3911
3912private:
3913 /// The addressing modes we've collected.
3915
3916 /// The field in which the AddrModes differ, when we have more than one.
3917 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3918
3919 /// Are the AddrModes that we have all just equal to their original values?
3920 bool AllAddrModesTrivial = true;
3921
3922 /// Common Type for all different fields in addressing modes.
3923 Type *CommonType = nullptr;
3924
3925 /// SimplifyQuery for simplifyInstruction utility.
3926 const SimplifyQuery &SQ;
3927
3928 /// Original Address.
3929 Value *Original;
3930
3931 /// Common value among addresses
3932 Value *CommonValue = nullptr;
3933
3934public:
3935 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3936 : SQ(_SQ), Original(OriginalValue) {}
3937
3938 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
3939
3940 /// Get the combined AddrMode
3941 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
3942
3943 /// Add a new AddrMode if it's compatible with the AddrModes we already
3944 /// have.
3945 /// \return True iff we succeeded in doing so.
3946 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3947 // Take note of if we have any non-trivial AddrModes, as we need to detect
3948 // when all AddrModes are trivial as then we would introduce a phi or select
3949 // which just duplicates what's already there.
3950 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3951
3952 // If this is the first addrmode then everything is fine.
3953 if (AddrModes.empty()) {
3954 AddrModes.emplace_back(NewAddrMode);
3955 return true;
3956 }
3957
3958 // Figure out how different this is from the other address modes, which we
3959 // can do just by comparing against the first one given that we only care
3960 // about the cumulative difference.
3961 ExtAddrMode::FieldName ThisDifferentField =
3962 AddrModes[0].compare(NewAddrMode);
3963 if (DifferentField == ExtAddrMode::NoField)
3964 DifferentField = ThisDifferentField;
3965 else if (DifferentField != ThisDifferentField)
3966 DifferentField = ExtAddrMode::MultipleFields;
3967
3968 // If NewAddrMode differs in more than one dimension we cannot handle it.
3969 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3970
3971 // If Scale Field is different then we reject.
3972 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3973
3974 // We also must reject the case when base offset is different and
3975 // scale reg is not null, we cannot handle this case due to merge of
3976 // different offsets will be used as ScaleReg.
3977 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3978 !NewAddrMode.ScaledReg);
3979
3980 // We also must reject the case when GV is different and BaseReg installed
3981 // due to we want to use base reg as a merge of GV values.
3982 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3983 !NewAddrMode.HasBaseReg);
3984
3985 // Even if NewAddMode is the same we still need to collect it due to
3986 // original value is different. And later we will need all original values
3987 // as anchors during finding the common Phi node.
3988 if (CanHandle)
3989 AddrModes.emplace_back(NewAddrMode);
3990 else
3991 AddrModes.clear();
3992
3993 return CanHandle;
3994 }
3995
3996 /// Combine the addressing modes we've collected into a single
3997 /// addressing mode.
3998 /// \return True iff we successfully combined them or we only had one so
3999 /// didn't need to combine them anyway.
4000 bool combineAddrModes() {
4001 // If we have no AddrModes then they can't be combined.
4002 if (AddrModes.size() == 0)
4003 return false;
4004
4005 // A single AddrMode can trivially be combined.
4006 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4007 return true;
4008
4009 // If the AddrModes we collected are all just equal to the value they are
4010 // derived from then combining them wouldn't do anything useful.
4011 if (AllAddrModesTrivial)
4012 return false;
4013
4014 if (!addrModeCombiningAllowed())
4015 return false;
4016
4017 // Build a map between <original value, basic block where we saw it> to
4018 // value of base register.
4019 // Bail out if there is no common type.
4020 FoldAddrToValueMapping Map;
4021 if (!initializeMap(Map))
4022 return false;
4023
4024 CommonValue = findCommon(Map);
4025 if (CommonValue)
4026 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4027 return CommonValue != nullptr;
4028 }
4029
4030private:
4031 /// `CommonValue` may be a placeholder inserted by us.
4032 /// If the placeholder is not used, we should remove this dead instruction.
4033 void eraseCommonValueIfDead() {
4034 if (CommonValue && CommonValue->getNumUses() == 0)
4035 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4036 CommonInst->eraseFromParent();
4037 }
4038
4039 /// Initialize Map with anchor values. For address seen
4040 /// we set the value of different field saw in this address.
4041 /// At the same time we find a common type for different field we will
4042 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4043 /// Return false if there is no common type found.
4044 bool initializeMap(FoldAddrToValueMapping &Map) {
4045 // Keep track of keys where the value is null. We will need to replace it
4046 // with constant null when we know the common type.
4047 SmallVector<Value *, 2> NullValue;
4048 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4049 for (auto &AM : AddrModes) {
4050 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4051 if (DV) {
4052 auto *Type = DV->getType();
4053 if (CommonType && CommonType != Type)
4054 return false;
4055 CommonType = Type;
4056 Map[AM.OriginalValue] = DV;
4057 } else {
4058 NullValue.push_back(AM.OriginalValue);
4059 }
4060 }
4061 assert(CommonType && "At least one non-null value must be!");
4062 for (auto *V : NullValue)
4063 Map[V] = Constant::getNullValue(CommonType);
4064 return true;
4065 }
4066
4067 /// We have mapping between value A and other value B where B was a field in
4068 /// addressing mode represented by A. Also we have an original value C
4069 /// representing an address we start with. Traversing from C through phi and
4070 /// selects we ended up with A's in a map. This utility function tries to find
4071 /// a value V which is a field in addressing mode C and traversing through phi
4072 /// nodes and selects we will end up in corresponded values B in a map.
4073 /// The utility will create a new Phi/Selects if needed.
4074 // The simple example looks as follows:
4075 // BB1:
4076 // p1 = b1 + 40
4077 // br cond BB2, BB3
4078 // BB2:
4079 // p2 = b2 + 40
4080 // br BB3
4081 // BB3:
4082 // p = phi [p1, BB1], [p2, BB2]
4083 // v = load p
4084 // Map is
4085 // p1 -> b1
4086 // p2 -> b2
4087 // Request is
4088 // p -> ?
4089 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4090 Value *findCommon(FoldAddrToValueMapping &Map) {
4091 // Tracks the simplification of newly created phi nodes. The reason we use
4092 // this mapping is because we will add new created Phi nodes in AddrToBase.
4093 // Simplification of Phi nodes is recursive, so some Phi node may
4094 // be simplified after we added it to AddrToBase. In reality this
4095 // simplification is possible only if original phi/selects were not
4096 // simplified yet.
4097 // Using this mapping we can find the current value in AddrToBase.
4098 SimplificationTracker ST(SQ);
4099
4100 // First step, DFS to create PHI nodes for all intermediate blocks.
4101 // Also fill traverse order for the second step.
4102 SmallVector<Value *, 32> TraverseOrder;
4103 InsertPlaceholders(Map, TraverseOrder, ST);
4104
4105 // Second Step, fill new nodes by merged values and simplify if possible.
4106 FillPlaceholders(Map, TraverseOrder, ST);
4107
4108 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4109 ST.destroyNewNodes(CommonType);
4110 return nullptr;
4111 }
4112
4113 // Now we'd like to match New Phi nodes to existed ones.
4114 unsigned PhiNotMatchedCount = 0;
4115 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4116 ST.destroyNewNodes(CommonType);
4117 return nullptr;
4118 }
4119
4120 auto *Result = ST.Get(Map.find(Original)->second);
4121 if (Result) {
4122 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4123 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4124 }
4125 return Result;
4126 }
4127
4128 /// Try to match PHI node to Candidate.
4129 /// Matcher tracks the matched Phi nodes.
4130 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4132 PhiNodeSet &PhiNodesToMatch) {
4133 SmallVector<PHIPair, 8> WorkList;
4134 Matcher.insert({PHI, Candidate});
4135 SmallSet<PHINode *, 8> MatchedPHIs;
4136 MatchedPHIs.insert(PHI);
4137 WorkList.push_back({PHI, Candidate});
4138 SmallSet<PHIPair, 8> Visited;
4139 while (!WorkList.empty()) {
4140 auto Item = WorkList.pop_back_val();
4141 if (!Visited.insert(Item).second)
4142 continue;
4143 // We iterate over all incoming values to Phi to compare them.
4144 // If values are different and both of them Phi and the first one is a
4145 // Phi we added (subject to match) and both of them is in the same basic
4146 // block then we can match our pair if values match. So we state that
4147 // these values match and add it to work list to verify that.
4148 for (auto *B : Item.first->blocks()) {
4149 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4150 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4151 if (FirstValue == SecondValue)
4152 continue;
4153
4154 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4155 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4156
4157 // One of them is not Phi or
4158 // The first one is not Phi node from the set we'd like to match or
4159 // Phi nodes from different basic blocks then
4160 // we will not be able to match.
4161 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4162 FirstPhi->getParent() != SecondPhi->getParent())
4163 return false;
4164
4165 // If we already matched them then continue.
4166 if (Matcher.count({FirstPhi, SecondPhi}))
4167 continue;
4168 // So the values are different and does not match. So we need them to
4169 // match. (But we register no more than one match per PHI node, so that
4170 // we won't later try to replace them twice.)
4171 if (MatchedPHIs.insert(FirstPhi).second)
4172 Matcher.insert({FirstPhi, SecondPhi});
4173 // But me must check it.
4174 WorkList.push_back({FirstPhi, SecondPhi});
4175 }
4176 }
4177 return true;
4178 }
4179
4180 /// For the given set of PHI nodes (in the SimplificationTracker) try
4181 /// to find their equivalents.
4182 /// Returns false if this matching fails and creation of new Phi is disabled.
4183 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4184 unsigned &PhiNotMatchedCount) {
4185 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4186 // order, so the replacements (ReplacePhi) are also done in a deterministic
4187 // order.
4189 SmallPtrSet<PHINode *, 8> WillNotMatch;
4190 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4191 while (PhiNodesToMatch.size()) {
4192 PHINode *PHI = *PhiNodesToMatch.begin();
4193
4194 // Add us, if no Phi nodes in the basic block we do not match.
4195 WillNotMatch.clear();
4196 WillNotMatch.insert(PHI);
4197
4198 // Traverse all Phis until we found equivalent or fail to do that.
4199 bool IsMatched = false;
4200 for (auto &P : PHI->getParent()->phis()) {
4201 // Skip new Phi nodes.
4202 if (PhiNodesToMatch.count(&P))
4203 continue;
4204 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4205 break;
4206 // If it does not match, collect all Phi nodes from matcher.
4207 // if we end up with no match, them all these Phi nodes will not match
4208 // later.
4209 for (auto M : Matched)
4210 WillNotMatch.insert(M.first);
4211 Matched.clear();
4212 }
4213 if (IsMatched) {
4214 // Replace all matched values and erase them.
4215 for (auto MV : Matched)
4216 ST.ReplacePhi(MV.first, MV.second);
4217 Matched.clear();
4218 continue;
4219 }
4220 // If we are not allowed to create new nodes then bail out.
4221 if (!AllowNewPhiNodes)
4222 return false;
4223 // Just remove all seen values in matcher. They will not match anything.
4224 PhiNotMatchedCount += WillNotMatch.size();
4225 for (auto *P : WillNotMatch)
4226 PhiNodesToMatch.erase(P);
4227 }
4228 return true;
4229 }
4230 /// Fill the placeholders with values from predecessors and simplify them.
4231 void FillPlaceholders(FoldAddrToValueMapping &Map,
4232 SmallVectorImpl<Value *> &TraverseOrder,
4233 SimplificationTracker &ST) {
4234 while (!TraverseOrder.empty()) {
4235 Value *Current = TraverseOrder.pop_back_val();
4236 assert(Map.contains(Current) && "No node to fill!!!");
4237 Value *V = Map[Current];
4238
4239 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4240 // CurrentValue also must be Select.
4241 auto *CurrentSelect = cast<SelectInst>(Current);
4242 auto *TrueValue = CurrentSelect->getTrueValue();
4243 assert(Map.contains(TrueValue) && "No True Value!");
4244 Select->setTrueValue(ST.Get(Map[TrueValue]));
4245 auto *FalseValue = CurrentSelect->getFalseValue();
4246 assert(Map.contains(FalseValue) && "No False Value!");
4247 Select->setFalseValue(ST.Get(Map[FalseValue]));
4248 } else {
4249 // Must be a Phi node then.
4250 auto *PHI = cast<PHINode>(V);
4251 // Fill the Phi node with values from predecessors.
4252 for (auto *B : predecessors(PHI->getParent())) {
4253 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4254 assert(Map.contains(PV) && "No predecessor Value!");
4255 PHI->addIncoming(ST.Get(Map[PV]), B);
4256 }
4257 }
4258 Map[Current] = ST.Simplify(V);
4259 }
4260 }
4261
4262 /// Starting from original value recursively iterates over def-use chain up to
4263 /// known ending values represented in a map. For each traversed phi/select
4264 /// inserts a placeholder Phi or Select.
4265 /// Reports all new created Phi/Select nodes by adding them to set.
4266 /// Also reports and order in what values have been traversed.
4267 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4268 SmallVectorImpl<Value *> &TraverseOrder,
4269 SimplificationTracker &ST) {
4270 SmallVector<Value *, 32> Worklist;
4271 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4272 "Address must be a Phi or Select node");
4273 auto *Dummy = PoisonValue::get(CommonType);
4274 Worklist.push_back(Original);
4275 while (!Worklist.empty()) {
4276 Value *Current = Worklist.pop_back_val();
4277 // if it is already visited or it is an ending value then skip it.
4278 if (Map.contains(Current))
4279 continue;
4280 TraverseOrder.push_back(Current);
4281
4282 // CurrentValue must be a Phi node or select. All others must be covered
4283 // by anchors.
4284 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4285 // Is it OK to get metadata from OrigSelect?!
4286 // Create a Select placeholder with dummy value.
4288 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4289 CurrentSelect->getName(),
4290 CurrentSelect->getIterator(), CurrentSelect);
4291 Map[Current] = Select;
4292 ST.insertNewSelect(Select);
4293 // We are interested in True and False values.
4294 Worklist.push_back(CurrentSelect->getTrueValue());
4295 Worklist.push_back(CurrentSelect->getFalseValue());
4296 } else {
4297 // It must be a Phi node then.
4298 PHINode *CurrentPhi = cast<PHINode>(Current);
4299 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4300 PHINode *PHI =
4301 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4302 Map[Current] = PHI;
4303 ST.insertNewPhi(PHI);
4304 append_range(Worklist, CurrentPhi->incoming_values());
4305 }
4306 }
4307 }
4308
4309 bool addrModeCombiningAllowed() {
4311 return false;
4312 switch (DifferentField) {
4313 default:
4314 return false;
4315 case ExtAddrMode::BaseRegField:
4317 case ExtAddrMode::BaseGVField:
4318 return AddrSinkCombineBaseGV;
4319 case ExtAddrMode::BaseOffsField:
4321 case ExtAddrMode::ScaledRegField:
4323 }
4324 }
4325};
4326} // end anonymous namespace
4327
4328/// Try adding ScaleReg*Scale to the current addressing mode.
4329/// Return true and update AddrMode if this addr mode is legal for the target,
4330/// false if not.
4331bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4332 unsigned Depth) {
4333 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4334 // mode. Just process that directly.
4335 if (Scale == 1)
4336 return matchAddr(ScaleReg, Depth);
4337
4338 // If the scale is 0, it takes nothing to add this.
4339 if (Scale == 0)
4340 return true;
4341
4342 // If we already have a scale of this value, we can add to it, otherwise, we
4343 // need an available scale field.
4344 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4345 return false;
4346
4347 ExtAddrMode TestAddrMode = AddrMode;
4348
4349 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4350 // [A+B + A*7] -> [B+A*8].
4351 TestAddrMode.Scale += Scale;
4352 TestAddrMode.ScaledReg = ScaleReg;
4353
4354 // If the new address isn't legal, bail out.
4355 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4356 return false;
4357
4358 // It was legal, so commit it.
4359 AddrMode = TestAddrMode;
4360
4361 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4362 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4363 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4364 // go any further: we can reuse it and cannot eliminate it.
4365 ConstantInt *CI = nullptr;
4366 Value *AddLHS = nullptr;
4367 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4368 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4369 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4370 TestAddrMode.InBounds = false;
4371 TestAddrMode.ScaledReg = AddLHS;
4372 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4373
4374 // If this addressing mode is legal, commit it and remember that we folded
4375 // this instruction.
4376 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4377 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4378 AddrMode = TestAddrMode;
4379 return true;
4380 }
4381 // Restore status quo.
4382 TestAddrMode = AddrMode;
4383 }
4384
4385 // If this is an add recurrence with a constant step, return the increment
4386 // instruction and the canonicalized step.
4387 auto GetConstantStep =
4388 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4389 auto *PN = dyn_cast<PHINode>(V);
4390 if (!PN)
4391 return std::nullopt;
4392 auto IVInc = getIVIncrement(PN, &LI);
4393 if (!IVInc)
4394 return std::nullopt;
4395 // TODO: The result of the intrinsics above is two-complement. However when
4396 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4397 // If it has nuw or nsw flags, we need to make sure that these flags are
4398 // inferrable at the point of memory instruction. Otherwise we are replacing
4399 // well-defined two-complement computation with poison. Currently, to avoid
4400 // potentially complex analysis needed to prove this, we reject such cases.
4401 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4402 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4403 return std::nullopt;
4404 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4405 return std::make_pair(IVInc->first, ConstantStep->getValue());
4406 return std::nullopt;
4407 };
4408
4409 // Try to account for the following special case:
4410 // 1. ScaleReg is an inductive variable;
4411 // 2. We use it with non-zero offset;
4412 // 3. IV's increment is available at the point of memory instruction.
4413 //
4414 // In this case, we may reuse the IV increment instead of the IV Phi to
4415 // achieve the following advantages:
4416 // 1. If IV step matches the offset, we will have no need in the offset;
4417 // 2. Even if they don't match, we will reduce the overlap of living IV
4418 // and IV increment, that will potentially lead to better register
4419 // assignment.
4420 if (AddrMode.BaseOffs) {
4421 if (auto IVStep = GetConstantStep(ScaleReg)) {
4422 Instruction *IVInc = IVStep->first;
4423 // The following assert is important to ensure a lack of infinite loops.
4424 // This transforms is (intentionally) the inverse of the one just above.
4425 // If they don't agree on the definition of an increment, we'd alternate
4426 // back and forth indefinitely.
4427 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4428 APInt Step = IVStep->second;
4429 APInt Offset = Step * AddrMode.Scale;
4430 if (Offset.isSignedIntN(64)) {
4431 TestAddrMode.InBounds = false;
4432 TestAddrMode.ScaledReg = IVInc;
4433 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4434 // If this addressing mode is legal, commit it..
4435 // (Note that we defer the (expensive) domtree base legality check
4436 // to the very last possible point.)
4437 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4438 getDTFn().dominates(IVInc, MemoryInst)) {
4439 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4440 AddrMode = TestAddrMode;
4441 return true;
4442 }
4443 // Restore status quo.
4444 TestAddrMode = AddrMode;
4445 }
4446 }
4447 }
4448
4449 // Otherwise, just return what we have.
4450 return true;
4451}
4452
4453/// This is a little filter, which returns true if an addressing computation
4454/// involving I might be folded into a load/store accessing it.
4455/// This doesn't need to be perfect, but needs to accept at least
4456/// the set of instructions that MatchOperationAddr can.
4458 switch (I->getOpcode()) {
4459 case Instruction::BitCast:
4460 case Instruction::AddrSpaceCast:
4461 // Don't touch identity bitcasts.
4462 if (I->getType() == I->getOperand(0)->getType())
4463 return false;
4464 return I->getType()->isIntOrPtrTy();
4465 case Instruction::PtrToInt:
4466 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4467 return true;
4468 case Instruction::IntToPtr:
4469 // We know the input is intptr_t, so this is foldable.
4470 return true;
4471 case Instruction::Add:
4472 return true;
4473 case Instruction::Mul:
4474 case Instruction::Shl:
4475 // Can only handle X*C and X << C.
4476 return isa<ConstantInt>(I->getOperand(1));
4477 case Instruction::GetElementPtr:
4478 return true;
4479 default:
4480 return false;
4481 }
4482}
4483
4484/// Check whether or not \p Val is a legal instruction for \p TLI.
4485/// \note \p Val is assumed to be the product of some type promotion.
4486/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4487/// to be legal, as the non-promoted value would have had the same state.
4489 const DataLayout &DL, Value *Val) {
4490 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4491 if (!PromotedInst)
4492 return false;
4493 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4494 // If the ISDOpcode is undefined, it was undefined before the promotion.
4495 if (!ISDOpcode)
4496 return true;
4497 // Otherwise, check if the promoted instruction is legal or not.
4498 return TLI.isOperationLegalOrCustom(
4499 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4500}
4501
4502namespace {
4503
4504/// Hepler class to perform type promotion.
4505class TypePromotionHelper {
4506 /// Utility function to add a promoted instruction \p ExtOpnd to
4507 /// \p PromotedInsts and record the type of extension we have seen.
4508 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4509 Instruction *ExtOpnd, bool IsSExt) {
4510 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4511 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4512 if (It != PromotedInsts.end()) {
4513 // If the new extension is same as original, the information in
4514 // PromotedInsts[ExtOpnd] is still correct.
4515 if (It->second.getInt() == ExtTy)
4516 return;
4517
4518 // Now the new extension is different from old extension, we make
4519 // the type information invalid by setting extension type to
4520 // BothExtension.
4521 ExtTy = BothExtension;
4522 }
4523 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4524 }
4525
4526 /// Utility function to query the original type of instruction \p Opnd
4527 /// with a matched extension type. If the extension doesn't match, we
4528 /// cannot use the information we had on the original type.
4529 /// BothExtension doesn't match any extension type.
4530 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4531 Instruction *Opnd, bool IsSExt) {
4532 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4533 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4534 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4535 return It->second.getPointer();
4536 return nullptr;
4537 }
4538
4539 /// Utility function to check whether or not a sign or zero extension
4540 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4541 /// either using the operands of \p Inst or promoting \p Inst.
4542 /// The type of the extension is defined by \p IsSExt.
4543 /// In other words, check if:
4544 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4545 /// #1 Promotion applies:
4546 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4547 /// #2 Operand reuses:
4548 /// ext opnd1 to ConsideredExtType.
4549 /// \p PromotedInsts maps the instructions to their type before promotion.
4550 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4551 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4552
4553 /// Utility function to determine if \p OpIdx should be promoted when
4554 /// promoting \p Inst.
4555 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4556 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4557 }
4558
4559 /// Utility function to promote the operand of \p Ext when this
4560 /// operand is a promotable trunc or sext or zext.
4561 /// \p PromotedInsts maps the instructions to their type before promotion.
4562 /// \p CreatedInstsCost[out] contains the cost of all instructions
4563 /// created to promote the operand of Ext.
4564 /// Newly added extensions are inserted in \p Exts.
4565 /// Newly added truncates are inserted in \p Truncs.
4566 /// Should never be called directly.
4567 /// \return The promoted value which is used instead of Ext.
4568 static Value *promoteOperandForTruncAndAnyExt(
4569 Instruction *Ext, TypePromotionTransaction &TPT,
4570 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4573
4574 /// Utility function to promote the operand of \p Ext when this
4575 /// operand is promotable and is not a supported trunc or sext.
4576 /// \p PromotedInsts maps the instructions to their type before promotion.
4577 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4578 /// created to promote the operand of Ext.
4579 /// Newly added extensions are inserted in \p Exts.
4580 /// Newly added truncates are inserted in \p Truncs.
4581 /// Should never be called directly.
4582 /// \return The promoted value which is used instead of Ext.
4583 static Value *promoteOperandForOther(Instruction *Ext,
4584 TypePromotionTransaction &TPT,
4585 InstrToOrigTy &PromotedInsts,
4586 unsigned &CreatedInstsCost,
4589 const TargetLowering &TLI, bool IsSExt);
4590
4591 /// \see promoteOperandForOther.
4592 static Value *signExtendOperandForOther(
4593 Instruction *Ext, TypePromotionTransaction &TPT,
4594 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4596 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4597 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4598 Exts, Truncs, TLI, true);
4599 }
4600
4601 /// \see promoteOperandForOther.
4602 static Value *zeroExtendOperandForOther(
4603 Instruction *Ext, TypePromotionTransaction &TPT,
4604 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4606 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4607 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4608 Exts, Truncs, TLI, false);
4609 }
4610
4611public:
4612 /// Type for the utility function that promotes the operand of Ext.
4613 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4614 InstrToOrigTy &PromotedInsts,
4615 unsigned &CreatedInstsCost,
4618 const TargetLowering &TLI);
4619
4620 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4621 /// action to promote the operand of \p Ext instead of using Ext.
4622 /// \return NULL if no promotable action is possible with the current
4623 /// sign extension.
4624 /// \p InsertedInsts keeps track of all the instructions inserted by the
4625 /// other CodeGenPrepare optimizations. This information is important
4626 /// because we do not want to promote these instructions as CodeGenPrepare
4627 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4628 /// \p PromotedInsts maps the instructions to their type before promotion.
4629 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4630 const TargetLowering &TLI,
4631 const InstrToOrigTy &PromotedInsts);
4632};
4633
4634} // end anonymous namespace
4635
4636bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4637 Type *ConsideredExtType,
4638 const InstrToOrigTy &PromotedInsts,
4639 bool IsSExt) {
4640 // The promotion helper does not know how to deal with vector types yet.
4641 // To be able to fix that, we would need to fix the places where we
4642 // statically extend, e.g., constants and such.
4643 if (Inst->getType()->isVectorTy())
4644 return false;
4645
4646 // We can always get through zext.
4647 if (isa<ZExtInst>(Inst))
4648 return true;
4649
4650 // sext(sext) is ok too.
4651 if (IsSExt && isa<SExtInst>(Inst))
4652 return true;
4653
4654 // We can get through binary operator, if it is legal. In other words, the
4655 // binary operator must have a nuw or nsw flag.
4656 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4657 if (isa<OverflowingBinaryOperator>(BinOp) &&
4658 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4659 (IsSExt && BinOp->hasNoSignedWrap())))
4660 return true;
4661
4662 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4663 if ((Inst->getOpcode() == Instruction::And ||
4664 Inst->getOpcode() == Instruction::Or))
4665 return true;
4666
4667 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4668 if (Inst->getOpcode() == Instruction::Xor) {
4669 // Make sure it is not a NOT.
4670 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4671 if (!Cst->getValue().isAllOnes())
4672 return true;
4673 }
4674
4675 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4676 // It may change a poisoned value into a regular value, like
4677 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4678 // poisoned value regular value
4679 // It should be OK since undef covers valid value.
4680 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4681 return true;
4682
4683 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4684 // It may change a poisoned value into a regular value, like
4685 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4686 // poisoned value regular value
4687 // It should be OK since undef covers valid value.
4688 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4689 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4690 if (ExtInst->hasOneUse()) {
4691 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4692 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4693 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4694 if (Cst &&
4695 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4696 return true;
4697 }
4698 }
4699 }
4700
4701 // Check if we can do the following simplification.
4702 // ext(trunc(opnd)) --> ext(opnd)
4703 if (!isa<TruncInst>(Inst))
4704 return false;
4705
4706 Value *OpndVal = Inst->getOperand(0);
4707 // Check if we can use this operand in the extension.
4708 // If the type is larger than the result type of the extension, we cannot.
4709 if (!OpndVal->getType()->isIntegerTy() ||
4710 OpndVal->getType()->getIntegerBitWidth() >
4711 ConsideredExtType->getIntegerBitWidth())
4712 return false;
4713
4714 // If the operand of the truncate is not an instruction, we will not have
4715 // any information on the dropped bits.
4716 // (Actually we could for constant but it is not worth the extra logic).
4717 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4718 if (!Opnd)
4719 return false;
4720
4721 // Check if the source of the type is narrow enough.
4722 // I.e., check that trunc just drops extended bits of the same kind of
4723 // the extension.
4724 // #1 get the type of the operand and check the kind of the extended bits.
4725 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4726 if (OpndType)
4727 ;
4728 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4729 OpndType = Opnd->getOperand(0)->getType();
4730 else
4731 return false;
4732
4733 // #2 check that the truncate just drops extended bits.
4734 return Inst->getType()->getIntegerBitWidth() >=
4735 OpndType->getIntegerBitWidth();
4736}
4737
4738TypePromotionHelper::Action TypePromotionHelper::getAction(
4739 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4740 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4741 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4742 "Unexpected instruction type");
4743 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4744 Type *ExtTy = Ext->getType();
4745 bool IsSExt = isa<SExtInst>(Ext);
4746 // If the operand of the extension is not an instruction, we cannot
4747 // get through.
4748 // If it, check we can get through.
4749 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4750 return nullptr;
4751
4752 // Do not promote if the operand has been added by codegenprepare.
4753 // Otherwise, it means we are undoing an optimization that is likely to be
4754 // redone, thus causing potential infinite loop.
4755 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4756 return nullptr;
4757
4758 // SExt or Trunc instructions.
4759 // Return the related handler.
4760 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4761 isa<ZExtInst>(ExtOpnd))
4762 return promoteOperandForTruncAndAnyExt;
4763
4764 // Regular instruction.
4765 // Abort early if we will have to insert non-free instructions.
4766 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4767 return nullptr;
4768 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4769}
4770
4771Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4772 Instruction *SExt, TypePromotionTransaction &TPT,
4773 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4775 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4776 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4777 // get through it and this method should not be called.
4778 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4779 Value *ExtVal = SExt;
4780 bool HasMergedNonFreeExt = false;
4781 if (isa<ZExtInst>(SExtOpnd)) {
4782 // Replace s|zext(zext(opnd))
4783 // => zext(opnd).
4784 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4785 Value *ZExt =
4786 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4787 TPT.replaceAllUsesWith(SExt, ZExt);
4788 TPT.eraseInstruction(SExt);
4789 ExtVal = ZExt;
4790 } else {
4791 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4792 // => z|sext(opnd).
4793 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4794 }
4795 CreatedInstsCost = 0;
4796
4797 // Remove dead code.
4798 if (SExtOpnd->use_empty())
4799 TPT.eraseInstruction(SExtOpnd);
4800
4801 // Check if the extension is still needed.
4802 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4803 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4804 if (ExtInst) {
4805 if (Exts)
4806 Exts->push_back(ExtInst);
4807 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4808 }
4809 return ExtVal;
4810 }
4811
4812 // At this point we have: ext ty opnd to ty.
4813 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
4814 Value *NextVal = ExtInst->getOperand(0);
4815 TPT.eraseInstruction(ExtInst, NextVal);
4816 return NextVal;
4817}
4818
4819Value *TypePromotionHelper::promoteOperandForOther(
4820 Instruction *Ext, TypePromotionTransaction &TPT,
4821 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4824 bool IsSExt) {
4825 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4826 // get through it and this method should not be called.
4827 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4828 CreatedInstsCost = 0;
4829 if (!ExtOpnd->hasOneUse()) {
4830 // ExtOpnd will be promoted.
4831 // All its uses, but Ext, will need to use a truncated value of the
4832 // promoted version.
4833 // Create the truncate now.
4834 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4835 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4836 // Insert it just after the definition.
4837 ITrunc->moveAfter(ExtOpnd);
4838 if (Truncs)
4839 Truncs->push_back(ITrunc);
4840 }
4841
4842 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4843 // Restore the operand of Ext (which has been replaced by the previous call
4844 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
4845 TPT.setOperand(Ext, 0, ExtOpnd);
4846 }
4847
4848 // Get through the Instruction:
4849 // 1. Update its type.
4850 // 2. Replace the uses of Ext by Inst.
4851 // 3. Extend each operand that needs to be extended.
4852
4853 // Remember the original type of the instruction before promotion.
4854 // This is useful to know that the high bits are sign extended bits.
4855 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
4856 // Step #1.
4857 TPT.mutateType(ExtOpnd, Ext->getType());
4858 // Step #2.
4859 TPT.replaceAllUsesWith(Ext, ExtOpnd);
4860 // Step #3.
4861 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
4862 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
4863 ++OpIdx) {
4864 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
4865 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
4866 !shouldExtOperand(ExtOpnd, OpIdx)) {
4867 LLVM_DEBUG(dbgs() << "No need to propagate\n");
4868 continue;
4869 }
4870 // Check if we can statically extend the operand.
4871 Value *Opnd = ExtOpnd->getOperand(OpIdx);
4872 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
4873 LLVM_DEBUG(dbgs() << "Statically extend\n");
4874 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
4875 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
4876 : Cst->getValue().zext(BitWidth);
4877 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
4878 continue;
4879 }
4880 // UndefValue are typed, so we have to statically sign extend them.
4881 if (isa<UndefValue>(Opnd)) {
4882 LLVM_DEBUG(dbgs() << "Statically extend\n");
4883 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
4884 continue;
4885 }
4886
4887 // Otherwise we have to explicitly sign extend the operand.
4888 Value *ValForExtOpnd = IsSExt
4889 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
4890 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
4891 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
4892 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
4893 if (!InstForExtOpnd)
4894 continue;
4895
4896 if (Exts)
4897 Exts->push_back(InstForExtOpnd);
4898
4899 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
4900 }
4901 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
4902 TPT.eraseInstruction(Ext);
4903 return ExtOpnd;
4904}
4905
4906/// Check whether or not promoting an instruction to a wider type is profitable.
4907/// \p NewCost gives the cost of extension instructions created by the
4908/// promotion.
4909/// \p OldCost gives the cost of extension instructions before the promotion
4910/// plus the number of instructions that have been
4911/// matched in the addressing mode the promotion.
4912/// \p PromotedOperand is the value that has been promoted.
4913/// \return True if the promotion is profitable, false otherwise.
4914bool AddressingModeMatcher::isPromotionProfitable(
4915 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4916 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
4917 << '\n');
4918 // The cost of the new extensions is greater than the cost of the
4919 // old extension plus what we folded.
4920 // This is not profitable.
4921 if (NewCost > OldCost)
4922 return false;
4923 if (NewCost < OldCost)
4924 return true;
4925 // The promotion is neutral but it may help folding the sign extension in
4926 // loads for instance.
4927 // Check that we did not create an illegal instruction.
4928 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4929}
4930
4931/// Given an instruction or constant expr, see if we can fold the operation
4932/// into the addressing mode. If so, update the addressing mode and return
4933/// true, otherwise return false without modifying AddrMode.
4934/// If \p MovedAway is not NULL, it contains the information of whether or
4935/// not AddrInst has to be folded into the addressing mode on success.
4936/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4937/// because it has been moved away.
4938/// Thus AddrInst must not be added in the matched instructions.
4939/// This state can happen when AddrInst is a sext, since it may be moved away.
4940/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4941/// not be referenced anymore.
4942bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4943 unsigned Depth,
4944 bool *MovedAway) {
4945 // Avoid exponential behavior on extremely deep expression trees.
4946 if (Depth >= 5)
4947 return false;
4948
4949 // By default, all matched instructions stay in place.
4950 if (MovedAway)
4951 *MovedAway = false;
4952
4953 switch (Opcode) {
4954 case Instruction::PtrToInt:
4955 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4956 return matchAddr(AddrInst->getOperand(0), Depth);
4957 case Instruction::IntToPtr: {
4958 auto AS = AddrInst->getType()->getPointerAddressSpace();
4959 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4960 // This inttoptr is a no-op if the integer type is pointer sized.
4961 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4962 return matchAddr(AddrInst->getOperand(0), Depth);
4963 return false;
4964 }
4965 case Instruction::BitCast:
4966 // BitCast is always a noop, and we can handle it as long as it is
4967 // int->int or pointer->pointer (we don't want int<->fp or something).
4968 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4969 // Don't touch identity bitcasts. These were probably put here by LSR,
4970 // and we don't want to mess around with them. Assume it knows what it
4971 // is doing.
4972 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4973 return matchAddr(AddrInst->getOperand(0), Depth);
4974 return false;
4975 case Instruction::AddrSpaceCast: {
4976 unsigned SrcAS =
4977 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4978 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4979 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
4980 return matchAddr(AddrInst->getOperand(0), Depth);
4981 return false;
4982 }
4983 case Instruction::Add: {
4984 // Check to see if we can merge in one operand, then the other. If so, we
4985 // win.
4986 ExtAddrMode BackupAddrMode = AddrMode;
4987 unsigned OldSize = AddrModeInsts.size();
4988 // Start a transaction at this point.
4989 // The LHS may match but not the RHS.
4990 // Therefore, we need a higher level restoration point to undo partially
4991 // matched operation.
4992 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4993 TPT.getRestorationPoint();
4994
4995 // Try to match an integer constant second to increase its chance of ending
4996 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
4997 int First = 0, Second = 1;
4998 if (isa<ConstantInt>(AddrInst->getOperand(First))
4999 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5000 std::swap(First, Second);
5001 AddrMode.InBounds = false;
5002 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5003 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5004 return true;
5005
5006 // Restore the old addr mode info.
5007 AddrMode = BackupAddrMode;
5008 AddrModeInsts.resize(OldSize);
5009 TPT.rollback(LastKnownGood);
5010
5011 // Otherwise this was over-aggressive. Try merging operands in the opposite
5012 // order.
5013 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5014 matchAddr(AddrInst->getOperand(First), Depth + 1))
5015 return true;
5016
5017 // Otherwise we definitely can't merge the ADD in.
5018 AddrMode = BackupAddrMode;
5019 AddrModeInsts.resize(OldSize);
5020 TPT.rollback(LastKnownGood);
5021 break;
5022 }
5023 // case Instruction::Or:
5024 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5025 // break;
5026 case Instruction::Mul:
5027 case Instruction::Shl: {
5028 // Can only handle X*C and X << C.
5029 AddrMode.InBounds = false;
5030 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5031 if (!RHS || RHS->getBitWidth() > 64)
5032 return false;
5033 int64_t Scale = Opcode == Instruction::Shl
5034 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5035 : RHS->getSExtValue();
5036
5037 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5038 }
5039 case Instruction::GetElementPtr: {
5040 // Scan the GEP. We check it if it contains constant offsets and at most
5041 // one variable offset.
5042 int VariableOperand = -1;
5043 unsigned VariableScale = 0;
5044
5045 int64_t ConstantOffset = 0;
5046 gep_type_iterator GTI = gep_type_begin(AddrInst);
5047 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5048 if (StructType *STy = GTI.getStructTypeOrNull()) {
5049 const StructLayout *SL = DL.getStructLayout(STy);
5050 unsigned Idx =
5051 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5052 ConstantOffset += SL->getElementOffset(Idx);
5053 } else {
5055 if (TS.isNonZero()) {
5056 // The optimisations below currently only work for fixed offsets.
5057 if (TS.isScalable())
5058 return false;
5059 int64_t TypeSize = TS.getFixedValue();
5060 if (ConstantInt *CI =
5061 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5062 const APInt &CVal = CI->getValue();
5063 if (CVal.getSignificantBits() <= 64) {
5064 ConstantOffset += CVal.getSExtValue() * TypeSize;
5065 continue;
5066 }
5067 }
5068 // We only allow one variable index at the moment.
5069 if (VariableOperand != -1)
5070 return false;
5071
5072 // Remember the variable index.
5073 VariableOperand = i;
5074 VariableScale = TypeSize;
5075 }
5076 }
5077 }
5078
5079 // A common case is for the GEP to only do a constant offset. In this case,
5080 // just add it to the disp field and check validity.
5081 if (VariableOperand == -1) {
5082 AddrMode.BaseOffs += ConstantOffset;
5083 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5084 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5085 AddrMode.InBounds = false;
5086 return true;
5087 }
5088 AddrMode.BaseOffs -= ConstantOffset;
5089
5090 if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
5091 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5092 ConstantOffset > 0) {
5093 // Record GEPs with non-zero offsets as candidates for splitting in
5094 // the event that the offset cannot fit into the r+i addressing mode.
5095 // Simple and common case that only one GEP is used in calculating the
5096 // address for the memory access.
5097 Value *Base = AddrInst->getOperand(0);
5098 auto *BaseI = dyn_cast<Instruction>(Base);
5099 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5100 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
5101 (BaseI && !isa<CastInst>(BaseI) &&
5102 !isa<GetElementPtrInst>(BaseI))) {
5103 // Make sure the parent block allows inserting non-PHI instructions
5104 // before the terminator.
5105 BasicBlock *Parent = BaseI ? BaseI->getParent()
5106 : &GEP->getFunction()->getEntryBlock();
5107 if (!Parent->getTerminator()->isEHPad())
5108 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5109 }
5110 }
5111
5112 return false;
5113 }
5114
5115 // Save the valid addressing mode in case we can't match.
5116 ExtAddrMode BackupAddrMode = AddrMode;
5117 unsigned OldSize = AddrModeInsts.size();
5118
5119 // See if the scale and offset amount is valid for this target.
5120 AddrMode.BaseOffs += ConstantOffset;
5121 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5122 AddrMode.InBounds = false;
5123
5124 // Match the base operand of the GEP.
5125 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5126 // If it couldn't be matched, just stuff the value in a register.
5127 if (AddrMode.HasBaseReg) {
5128 AddrMode = BackupAddrMode;
5129 AddrModeInsts.resize(OldSize);
5130 return false;
5131 }
5132 AddrMode.HasBaseReg = true;
5133 AddrMode.BaseReg = AddrInst->getOperand(0);
5134 }
5135
5136 // Match the remaining variable portion of the GEP.
5137 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5138 Depth)) {
5139 // If it couldn't be matched, try stuffing the base into a register
5140 // instead of matching it, and retrying the match of the scale.
5141 AddrMode = BackupAddrMode;
5142 AddrModeInsts.resize(OldSize);
5143 if (AddrMode.HasBaseReg)
5144 return false;
5145 AddrMode.HasBaseReg = true;