LLVM 23.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
46#include "llvm/Config/llvm-config.h"
47#include "llvm/IR/Argument.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/BasicBlock.h"
50#include "llvm/IR/CFG.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/Constants.h"
53#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/DebugInfo.h"
56#include "llvm/IR/Dominators.h"
57#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalValue.h"
61#include "llvm/IR/IRBuilder.h"
62#include "llvm/IR/InlineAsm.h"
63#include "llvm/IR/InstrTypes.h"
64#include "llvm/IR/Instruction.h"
67#include "llvm/IR/Intrinsics.h"
68#include "llvm/IR/IntrinsicsAArch64.h"
69#include "llvm/IR/LLVMContext.h"
70#include "llvm/IR/MDBuilder.h"
71#include "llvm/IR/Module.h"
72#include "llvm/IR/Operator.h"
75#include "llvm/IR/Statepoint.h"
76#include "llvm/IR/Type.h"
77#include "llvm/IR/Use.h"
78#include "llvm/IR/User.h"
79#include "llvm/IR/Value.h"
80#include "llvm/IR/ValueHandle.h"
81#include "llvm/IR/ValueMap.h"
83#include "llvm/Pass.h"
89#include "llvm/Support/Debug.h"
99#include <algorithm>
100#include <cassert>
101#include <cstdint>
102#include <iterator>
103#include <limits>
104#include <memory>
105#include <optional>
106#include <utility>
107#include <vector>
108
109using namespace llvm;
110using namespace llvm::PatternMatch;
111
112#define DEBUG_TYPE "codegenprepare"
113
114STATISTIC(NumBlocksElim, "Number of blocks eliminated");
115STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
116STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
117STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
118 "sunken Cmps");
119STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
120 "of sunken Casts");
121STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
122 "computations were sunk");
123STATISTIC(NumMemoryInstsPhiCreated,
124 "Number of phis created when address "
125 "computations were sunk to memory instructions");
126STATISTIC(NumMemoryInstsSelectCreated,
127 "Number of select created when address "
128 "computations were sunk to memory instructions");
129STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
130STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
131STATISTIC(NumAndsAdded,
132 "Number of and mask instructions added to form ext loads");
133STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
134STATISTIC(NumRetsDup, "Number of return instructions duplicated");
135STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
136STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
137STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
138
140 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
141 cl::desc("Disable branch optimizations in CodeGenPrepare"));
142
143static cl::opt<bool>
144 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
145 cl::desc("Disable GC optimizations in CodeGenPrepare"));
146
147static cl::opt<bool>
148 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
149 cl::init(false),
150 cl::desc("Disable select to branch conversion."));
151
152static cl::opt<bool>
153 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
154 cl::desc("Address sinking in CGP using GEPs."));
155
156static cl::opt<bool>
157 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
158 cl::desc("Enable sinking and/cmp into branches."));
159
161 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
162 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
163
165 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
166 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
167
169 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
170 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
171 "CodeGenPrepare"));
172
174 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
175 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
176 "optimization in CodeGenPrepare"));
177
179 "disable-preheader-prot", cl::Hidden, cl::init(false),
180 cl::desc("Disable protection against removing loop preheaders"));
181
183 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
184 cl::desc("Use profile info to add section prefix for hot/cold functions"));
185
187 "profile-unknown-in-special-section", cl::Hidden,
188 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
189 "profile, we cannot tell the function is cold for sure because "
190 "it may be a function newly added without ever being sampled. "
191 "With the flag enabled, compiler can put such profile unknown "
192 "functions into a special section, so runtime system can choose "
193 "to handle it in a different way than .text section, to save "
194 "RAM for example. "));
195
197 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
198 cl::desc("Use the basic-block-sections profile to determine the text "
199 "section prefix for hot functions. Functions with "
200 "basic-block-sections profile will be placed in `.text.hot` "
201 "regardless of their FDO profile info. Other functions won't be "
202 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
203 "profiles."));
204
206 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
207 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
208 "(frequency of destination block) is greater than this ratio"));
209
211 "force-split-store", cl::Hidden, cl::init(false),
212 cl::desc("Force store splitting no matter what the target query says."));
213
215 "cgp-type-promotion-merge", cl::Hidden,
216 cl::desc("Enable merging of redundant sexts when one is dominating"
217 " the other."),
218 cl::init(true));
219
221 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
222 cl::desc("Disables combining addressing modes with different parts "
223 "in optimizeMemoryInst."));
224
225static cl::opt<bool>
226 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
227 cl::desc("Allow creation of Phis in Address sinking."));
228
230 "addr-sink-new-select", cl::Hidden, cl::init(true),
231 cl::desc("Allow creation of selects in Address sinking."));
232
234 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
235 cl::desc("Allow combining of BaseReg field in Address sinking."));
236
238 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
239 cl::desc("Allow combining of BaseGV field in Address sinking."));
240
242 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
243 cl::desc("Allow combining of BaseOffs field in Address sinking."));
244
246 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
247 cl::desc("Allow combining of ScaledReg field in Address sinking."));
248
249static cl::opt<bool>
250 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
251 cl::init(true),
252 cl::desc("Enable splitting large offset of GEP."));
253
255 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
256 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
257
258static cl::opt<bool>
259 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
260 cl::desc("Enable BFI update verification for "
261 "CodeGenPrepare."));
262
263static cl::opt<bool>
264 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
265 cl::desc("Enable converting phi types in CodeGenPrepare"));
266
268 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
269 cl::desc("Least BB number of huge function."));
270
272 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
274 cl::desc("Max number of address users to look at"));
275
276static cl::opt<bool>
277 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
278 cl::desc("Disable elimination of dead PHI nodes."));
279
280namespace {
281
282enum ExtType {
283 ZeroExtension, // Zero extension has been seen.
284 SignExtension, // Sign extension has been seen.
285 BothExtension // This extension type is used if we saw sext after
286 // ZeroExtension had been set, or if we saw zext after
287 // SignExtension had been set. It makes the type
288 // information of a promoted instruction invalid.
289};
290
291enum ModifyDT {
292 NotModifyDT, // Not Modify any DT.
293 ModifyBBDT, // Modify the Basic Block Dominator Tree.
294 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
295 // This usually means we move/delete/insert instruction
296 // in a Basic Block. So we should re-iterate instructions
297 // in such Basic Block.
298};
299
300using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
301using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
302using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
304using ValueToSExts = MapVector<Value *, SExts>;
305
306class TypePromotionTransaction;
307
308class CodeGenPrepare {
309 friend class CodeGenPrepareLegacyPass;
310 const TargetMachine *TM = nullptr;
311 const TargetSubtargetInfo *SubtargetInfo = nullptr;
312 const TargetLowering *TLI = nullptr;
313 const TargetRegisterInfo *TRI = nullptr;
314 const TargetTransformInfo *TTI = nullptr;
315 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
316 const TargetLibraryInfo *TLInfo = nullptr;
317 DomTreeUpdater *DTU = nullptr;
318 LoopInfo *LI = nullptr;
319 BlockFrequencyInfo *BFI;
320 BranchProbabilityInfo *BPI;
321 ProfileSummaryInfo *PSI = nullptr;
322
323 /// As we scan instructions optimizing them, this is the next instruction
324 /// to optimize. Transforms that can invalidate this should update it.
325 BasicBlock::iterator CurInstIterator;
326
327 /// Keeps track of non-local addresses that have been sunk into a block.
328 /// This allows us to avoid inserting duplicate code for blocks with
329 /// multiple load/stores of the same address. The usage of WeakTrackingVH
330 /// enables SunkAddrs to be treated as a cache whose entries can be
331 /// invalidated if a sunken address computation has been erased.
332 ValueMap<Value *, WeakTrackingVH> SunkAddrs;
333
334 /// Keeps track of all instructions inserted for the current function.
335 SetOfInstrs InsertedInsts;
336
337 /// Keeps track of the type of the related instruction before their
338 /// promotion for the current function.
339 InstrToOrigTy PromotedInsts;
340
341 /// Keep track of instructions removed during promotion.
342 SetOfInstrs RemovedInsts;
343
344 /// Keep track of sext chains based on their initial value.
345 DenseMap<Value *, Instruction *> SeenChainsForSExt;
346
347 /// Keep track of GEPs accessing the same data structures such as structs or
348 /// arrays that are candidates to be split later because of their large
349 /// size.
350 MapVector<AssertingVH<Value>,
352 LargeOffsetGEPMap;
353
354 /// Keep track of new GEP base after splitting the GEPs having large offset.
355 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
356
357 /// Map serial numbers to Large offset GEPs.
358 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
359
360 /// Keep track of SExt promoted.
361 ValueToSExts ValToSExtendedUses;
362
363 /// True if the function has the OptSize attribute.
364 bool OptSize;
365
366 /// DataLayout for the Function being processed.
367 const DataLayout *DL = nullptr;
368
369public:
370 CodeGenPrepare() = default;
371 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
372 /// If encounter huge function, we need to limit the build time.
373 bool IsHugeFunc = false;
374
375 /// FreshBBs is like worklist, it collected the updated BBs which need
376 /// to be optimized again.
377 /// Note: Consider building time in this pass, when a BB updated, we need
378 /// to insert such BB into FreshBBs for huge function.
379 SmallPtrSet<BasicBlock *, 32> FreshBBs;
380
381 void releaseMemory() {
382 // Clear per function information.
383 InsertedInsts.clear();
384 PromotedInsts.clear();
385 FreshBBs.clear();
386 }
387
388 bool run(Function &F, FunctionAnalysisManager &AM);
389
390private:
391 template <typename F>
392 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
393 // Substituting can cause recursive simplifications, which can invalidate
394 // our iterator. Use a WeakTrackingVH to hold onto it in case this
395 // happens.
396 Value *CurValue = &*CurInstIterator;
397 WeakTrackingVH IterHandle(CurValue);
398
399 f();
400
401 // If the iterator instruction was recursively deleted, start over at the
402 // start of the block.
403 if (IterHandle != CurValue) {
404 CurInstIterator = BB->begin();
405 SunkAddrs.clear();
406 }
407 }
408
409 // Get the DominatorTree, updating it if necessary.
410 DominatorTree &getDT() { return DTU->getDomTree(); }
411
412 void removeAllAssertingVHReferences(Value *V);
413 bool eliminateAssumptions(Function &F);
414 bool eliminateFallThrough(Function &F);
415 bool eliminateMostlyEmptyBlocks(Function &F, bool &ResetLI);
416 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
417 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
418 bool eliminateMostlyEmptyBlock(BasicBlock *BB);
419 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
420 bool isPreheader);
421 bool makeBitReverse(Instruction &I);
422 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
423 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
424 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
425 unsigned AddrSpace);
426 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
427 bool optimizeMulWithOverflow(Instruction *I, bool IsSigned,
428 ModifyDT &ModifiedDT);
429 bool optimizeInlineAsmInst(CallInst *CS);
430 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
431 bool optimizeExt(Instruction *&I);
432 bool optimizeExtUses(Instruction *I);
433 bool optimizeLoadExt(LoadInst *Load);
434 bool optimizeShiftInst(BinaryOperator *BO);
435 bool optimizeFunnelShift(IntrinsicInst *Fsh);
436 bool optimizeSelectInst(SelectInst *SI);
437 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
438 bool optimizeSwitchType(SwitchInst *SI);
439 bool optimizeSwitchPhiConstants(SwitchInst *SI);
440 bool optimizeSwitchInst(SwitchInst *SI);
441 bool optimizeExtractElementInst(Instruction *Inst);
442 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
443 bool fixupDbgVariableRecord(DbgVariableRecord &I);
444 bool fixupDbgVariableRecordsOnInst(Instruction &I);
445 bool placeDbgValues(Function &F);
446 bool placePseudoProbes(Function &F);
447 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
448 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
449 bool tryToPromoteExts(TypePromotionTransaction &TPT,
450 const SmallVectorImpl<Instruction *> &Exts,
451 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
452 unsigned CreatedInstsCost = 0);
453 bool mergeSExts(Function &F);
454 bool splitLargeGEPOffsets();
455 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
456 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
457 bool optimizePhiTypes(Function &F);
458 bool performAddressTypePromotion(
459 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
460 bool HasPromoted, TypePromotionTransaction &TPT,
461 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
462 bool splitBranchCondition(Function &F);
463 bool simplifyOffsetableRelocate(GCStatepointInst &I);
464
465 bool tryToSinkFreeOperands(Instruction *I);
466 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
467 CmpInst *Cmp, Intrinsic::ID IID);
468 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
469 bool optimizeURem(Instruction *Rem);
470 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
471 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
472 bool unfoldPowerOf2Test(CmpInst *Cmp);
473 void verifyBFIUpdates(Function &F);
474 bool _run(Function &F);
475};
476
477class CodeGenPrepareLegacyPass : public FunctionPass {
478public:
479 static char ID; // Pass identification, replacement for typeid
480
481 CodeGenPrepareLegacyPass() : FunctionPass(ID) {}
482
483 bool runOnFunction(Function &F) override;
484
485 StringRef getPassName() const override { return "CodeGen Prepare"; }
486
487 void getAnalysisUsage(AnalysisUsage &AU) const override {
488 // FIXME: When we can selectively preserve passes, preserve the domtree.
489 AU.addRequired<ProfileSummaryInfoWrapperPass>();
490 AU.addRequired<TargetLibraryInfoWrapperPass>();
491 AU.addRequired<TargetPassConfig>();
492 AU.addRequired<TargetTransformInfoWrapperPass>();
493 AU.addRequired<DominatorTreeWrapperPass>();
494 AU.addRequired<LoopInfoWrapperPass>();
495 AU.addRequired<BranchProbabilityInfoWrapperPass>();
496 AU.addRequired<BlockFrequencyInfoWrapperPass>();
497 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
498 }
499};
500
501} // end anonymous namespace
502
503char CodeGenPrepareLegacyPass::ID = 0;
504
505bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
506 if (skipFunction(F))
507 return false;
508 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
509 CodeGenPrepare CGP(TM);
510 CGP.DL = &F.getDataLayout();
511 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
512 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
513 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
514 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
515 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
516 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
517 CGP.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
518 CGP.BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
519 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
520 auto BBSPRWP =
521 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
522 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
523 DomTreeUpdater DTUpdater(
524 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
525 DomTreeUpdater::UpdateStrategy::Lazy);
526 CGP.DTU = &DTUpdater;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
540INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
541 "Optimize for code generation", false, false)
542
544 return new CodeGenPrepareLegacyPass();
545}
546
549 CodeGenPrepare CGP(TM);
550
551 bool Changed = CGP.run(F, AM);
552 if (!Changed)
553 return PreservedAnalyses::all();
554
558 return PA;
559}
560
561bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
562 DL = &F.getDataLayout();
563 SubtargetInfo = TM->getSubtargetImpl(F);
564 TLI = SubtargetInfo->getTargetLowering();
565 TRI = SubtargetInfo->getRegisterInfo();
566 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
568 LI = &AM.getResult<LoopAnalysis>(F);
571 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
572 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
573 if (!PSI)
574 reportFatalUsageError("this pass requires the profile-summary module "
575 "analysis to be available");
576 BBSectionsProfileReader =
579 DomTreeUpdater::UpdateStrategy::Lazy);
580 DTU = &DTUpdater;
581 return _run(F);
582}
583
584bool CodeGenPrepare::_run(Function &F) {
585 bool EverMadeChange = false;
586
587 OptSize = F.hasOptSize();
588 // Use the basic-block-sections profile to promote hot functions to .text.hot
589 // if requested.
590 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
591 BBSectionsProfileReader->isFunctionHot(F.getName())) {
592 (void)F.setSectionPrefix("hot");
593 } else if (ProfileGuidedSectionPrefix) {
594 // The hot attribute overwrites profile count based hotness while profile
595 // counts based hotness overwrite the cold attribute.
596 // This is a conservative behabvior.
597 if (F.hasFnAttribute(Attribute::Hot) ||
598 PSI->isFunctionHotInCallGraph(&F, *BFI))
599 (void)F.setSectionPrefix("hot");
600 // If PSI shows this function is not hot, we will placed the function
601 // into unlikely section if (1) PSI shows this is a cold function, or
602 // (2) the function has a attribute of cold.
603 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
604 F.hasFnAttribute(Attribute::Cold))
605 (void)F.setSectionPrefix("unlikely");
606 else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
607 PSI->isFunctionHotnessUnknown(F))
608 (void)F.setSectionPrefix("unknown");
609 }
610
611 /// This optimization identifies DIV instructions that can be
612 /// profitably bypassed and carried out with a shorter, faster divide.
613 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
614 const DenseMap<unsigned int, unsigned int> &BypassWidths =
616 BasicBlock *BB = &*F.begin();
617 while (BB != nullptr) {
618 // bypassSlowDivision may create new BBs, but we don't want to reapply the
619 // optimization to those blocks.
620 BasicBlock *Next = BB->getNextNode();
621 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI))
622 EverMadeChange |= bypassSlowDivision(BB, BypassWidths, DTU, LI);
623 BB = Next;
624 }
625 }
626
627 // Get rid of @llvm.assume builtins before attempting to eliminate empty
628 // blocks, since there might be blocks that only contain @llvm.assume calls
629 // (plus arguments that we can get rid of).
630 EverMadeChange |= eliminateAssumptions(F);
631
632 auto resetLoopInfo = [this]() {
633 LI->releaseMemory();
634 LI->analyze(DTU->getDomTree());
635 };
636
637 // Eliminate blocks that contain only PHI nodes and an
638 // unconditional branch.
639 bool ResetLI = false;
640 EverMadeChange |= eliminateMostlyEmptyBlocks(F, ResetLI);
641 if (ResetLI)
642 resetLoopInfo();
643
645 EverMadeChange |= splitBranchCondition(F);
646
647 // Split some critical edges where one of the sources is an indirect branch,
648 // to help generate sane code for PHIs involving such edges.
649 bool Split = SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true,
650 BPI, BFI, DTU);
651 EverMadeChange |= Split;
652 if (Split)
653 resetLoopInfo();
654
655#ifndef NDEBUG
656 if (VerifyDomInfo)
657 assert(getDT().verify(DominatorTree::VerificationLevel::Fast) &&
658 "Incorrect DominatorTree updates in CGP");
659
660 if (VerifyLoopInfo)
661 LI->verify(getDT());
662#endif
663
664 // If we are optimzing huge function, we need to consider the build time.
665 // Because the basic algorithm's complex is near O(N!).
666 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
667
668 bool MadeChange = true;
669 bool FuncIterated = false;
670 while (MadeChange) {
671 MadeChange = false;
672
673 // This is required because optimizeBlock() calls getDT() inside the loop
674 // below, which flushes pending updates and may delete dead blocks, leading
675 // to iterator invalidation.
676 DTU->flush();
677
678 for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
679 if (FuncIterated && !FreshBBs.contains(&BB))
680 continue;
681
682 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
683 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
684
685 MadeChange |= Changed;
686 if (IsHugeFunc) {
687 // If the BB is updated, it may still has chance to be optimized.
688 // This usually happen at sink optimization.
689 // For example:
690 //
691 // bb0:
692 // %and = and i32 %a, 4
693 // %cmp = icmp eq i32 %and, 0
694 //
695 // If the %cmp sink to other BB, the %and will has chance to sink.
696 if (Changed)
697 FreshBBs.insert(&BB);
698 else if (FuncIterated)
699 FreshBBs.erase(&BB);
700 } else {
701 // For small/normal functions, we restart BB iteration if the dominator
702 // tree of the Function was changed.
703 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
704 break;
705 }
706 }
707 // We have iterated all the BB in the (only work for huge) function.
708 FuncIterated = IsHugeFunc;
709
710 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
711 MadeChange |= mergeSExts(F);
712 if (!LargeOffsetGEPMap.empty())
713 MadeChange |= splitLargeGEPOffsets();
714 MadeChange |= optimizePhiTypes(F);
715
716 if (MadeChange)
717 eliminateFallThrough(F);
718
719#ifndef NDEBUG
720 if (VerifyDomInfo)
721 assert(getDT().verify(DominatorTree::VerificationLevel::Fast) &&
722 "Incorrect DominatorTree updates in CGP");
723
724 if (VerifyLoopInfo)
725 LI->verify(getDT());
726#endif
727
728 // Really free removed instructions during promotion.
729 for (Instruction *I : RemovedInsts)
730 I->deleteValue();
731
732 EverMadeChange |= MadeChange;
733 SeenChainsForSExt.clear();
734 ValToSExtendedUses.clear();
735 RemovedInsts.clear();
736 LargeOffsetGEPMap.clear();
737 LargeOffsetGEPID.clear();
738 }
739
740 NewGEPBases.clear();
741 SunkAddrs.clear();
742
743 // LoopInfo is not needed anymore and ConstantFoldTerminator can break it.
744 LI = nullptr;
745
746 if (!DisableBranchOpts) {
747 MadeChange = false;
748 // Use a set vector to get deterministic iteration order. The order the
749 // blocks are removed may affect whether or not PHI nodes in successors
750 // are removed.
751 SmallSetVector<BasicBlock *, 8> WorkList;
752 for (BasicBlock &BB : F) {
754 MadeChange |= ConstantFoldTerminator(&BB, true, nullptr, DTU);
755 if (!MadeChange)
756 continue;
757
758 for (BasicBlock *Succ : Successors)
759 if (pred_empty(Succ))
760 WorkList.insert(Succ);
761 }
762
763 // Delete the dead blocks and any of their dead successors.
764 MadeChange |= !WorkList.empty();
765 while (!WorkList.empty()) {
766 BasicBlock *BB = WorkList.pop_back_val();
768
769 DeleteDeadBlock(BB, DTU);
770
771 for (BasicBlock *Succ : Successors)
772 if (pred_empty(Succ))
773 WorkList.insert(Succ);
774 }
775
776 // Flush pending DT updates in order to finalise deletion of dead blocks.
777 DTU->flush();
778
779 // Merge pairs of basic blocks with unconditional branches, connected by
780 // a single edge.
781 if (EverMadeChange || MadeChange)
782 MadeChange |= eliminateFallThrough(F);
783
784 EverMadeChange |= MadeChange;
785 }
786
787 if (!DisableGCOpts) {
789 for (BasicBlock &BB : F)
790 for (Instruction &I : BB)
791 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
792 Statepoints.push_back(SP);
793 for (auto &I : Statepoints)
794 EverMadeChange |= simplifyOffsetableRelocate(*I);
795 }
796
797 // Do this last to clean up use-before-def scenarios introduced by other
798 // preparatory transforms.
799 EverMadeChange |= placeDbgValues(F);
800 EverMadeChange |= placePseudoProbes(F);
801
802#ifndef NDEBUG
804 verifyBFIUpdates(F);
805#endif
806
807 return EverMadeChange;
808}
809
810bool CodeGenPrepare::eliminateAssumptions(Function &F) {
811 bool MadeChange = false;
812 for (BasicBlock &BB : F) {
813 CurInstIterator = BB.begin();
814 while (CurInstIterator != BB.end()) {
815 Instruction *I = &*(CurInstIterator++);
816 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
817 MadeChange = true;
818 Value *Operand = Assume->getOperand(0);
819 Assume->eraseFromParent();
820
821 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
822 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
823 });
824 }
825 }
826 }
827 return MadeChange;
828}
829
830/// An instruction is about to be deleted, so remove all references to it in our
831/// GEP-tracking data strcutures.
832void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
833 LargeOffsetGEPMap.erase(V);
834 NewGEPBases.erase(V);
835
837 if (!GEP)
838 return;
839
840 LargeOffsetGEPID.erase(GEP);
841
842 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
843 if (VecI == LargeOffsetGEPMap.end())
844 return;
845
846 auto &GEPVector = VecI->second;
847 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
848
849 if (GEPVector.empty())
850 LargeOffsetGEPMap.erase(VecI);
851}
852
853// Verify BFI has been updated correctly by recomputing BFI and comparing them.
854[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
855 DominatorTree NewDT(F);
856 LoopInfo NewLI(NewDT);
857 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
858 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
859 NewBFI.verifyMatch(*BFI);
860}
861
862/// Merge basic blocks which are connected by a single edge, where one of the
863/// basic blocks has a single successor pointing to the other basic block,
864/// which has a single predecessor.
865bool CodeGenPrepare::eliminateFallThrough(Function &F) {
866 bool Changed = false;
867 SmallPtrSet<BasicBlock *, 8> Preds;
868 // Scan all of the blocks in the function, except for the entry block.
869 for (auto &Block : llvm::drop_begin(F)) {
870 auto *BB = &Block;
871 if (DTU->isBBPendingDeletion(BB))
872 continue;
873 // If the destination block has a single pred, then this is a trivial
874 // edge, just collapse it.
875 BasicBlock *SinglePred = BB->getSinglePredecessor();
876
877 // Don't merge if BB's address is taken.
878 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
879 continue;
880
881 if (isa<UncondBrInst>(SinglePred->getTerminator())) {
882 Changed = true;
883 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
884
885 // Merge BB into SinglePred and delete it.
886 MergeBlockIntoPredecessor(BB, DTU, LI);
887 Preds.insert(SinglePred);
888
889 if (IsHugeFunc) {
890 // Update FreshBBs to optimize the merged BB.
891 FreshBBs.insert(SinglePred);
892 FreshBBs.erase(BB);
893 }
894 }
895 }
896
897 // (Repeatedly) merging blocks into their predecessors can create redundant
898 // debug intrinsics.
899 for (auto *Pred : Preds)
900 if (!DTU->isBBPendingDeletion(Pred))
902
903 return Changed;
904}
905
906/// Find a destination block from BB if BB is mergeable empty block.
907BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
908 // If this block doesn't end with an uncond branch, ignore it.
909 UncondBrInst *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
910 if (!BI)
911 return nullptr;
912
913 // If the instruction before the branch (skipping debug info) isn't a phi
914 // node, then other stuff is happening here.
915 BasicBlock::iterator BBI = BI->getIterator();
916 if (BBI != BB->begin()) {
917 --BBI;
918 if (!isa<PHINode>(BBI))
919 return nullptr;
920 }
921
922 // Do not break infinite loops.
923 BasicBlock *DestBB = BI->getSuccessor();
924 if (DestBB == BB)
925 return nullptr;
926
927 if (!canMergeBlocks(BB, DestBB))
928 DestBB = nullptr;
929
930 return DestBB;
931}
932
933/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
934/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
935/// edges in ways that are non-optimal for isel. Start by eliminating these
936/// blocks so we can split them the way we want them.
937bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F, bool &ResetLI) {
938 SmallPtrSet<BasicBlock *, 16> Preheaders;
939 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
940 while (!LoopList.empty()) {
941 Loop *L = LoopList.pop_back_val();
942 llvm::append_range(LoopList, *L);
943 if (BasicBlock *Preheader = L->getLoopPreheader())
944 Preheaders.insert(Preheader);
945 }
946
947 ResetLI = false;
948 bool MadeChange = false;
949 // Note that this intentionally skips the entry block.
950 for (auto &Block : llvm::drop_begin(F)) {
951 // Delete phi nodes that could block deleting other empty blocks.
953 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
954 }
955
956 for (auto &Block : llvm::drop_begin(F)) {
957 auto *BB = &Block;
958 if (DTU->isBBPendingDeletion(BB))
959 continue;
960 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
961 if (!DestBB ||
962 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
963 continue;
964
965 ResetLI |= eliminateMostlyEmptyBlock(BB);
966 MadeChange = true;
967 }
968 return MadeChange;
969}
970
971bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
972 BasicBlock *DestBB,
973 bool isPreheader) {
974 // Do not delete loop preheaders if doing so would create a critical edge.
975 // Loop preheaders can be good locations to spill registers. If the
976 // preheader is deleted and we create a critical edge, registers may be
977 // spilled in the loop body instead.
978 if (!DisablePreheaderProtect && isPreheader &&
979 !(BB->getSinglePredecessor() &&
981 return false;
982
983 // Skip merging if the block's successor is also a successor to any callbr
984 // that leads to this block.
985 // FIXME: Is this really needed? Is this a correctness issue?
986 for (BasicBlock *Pred : predecessors(BB)) {
987 if (isa<CallBrInst>(Pred->getTerminator()) &&
988 llvm::is_contained(successors(Pred), DestBB))
989 return false;
990 }
991
992 // Try to skip merging if the unique predecessor of BB is terminated by a
993 // switch or indirect branch instruction, and BB is used as an incoming block
994 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
995 // add COPY instructions in the predecessor of BB instead of BB (if it is not
996 // merged). Note that the critical edge created by merging such blocks wont be
997 // split in MachineSink because the jump table is not analyzable. By keeping
998 // such empty block (BB), ISel will place COPY instructions in BB, not in the
999 // predecessor of BB.
1000 BasicBlock *Pred = BB->getUniquePredecessor();
1001 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
1003 return true;
1004
1005 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
1006 return true;
1007
1008 // We use a simple cost heuristic which determine skipping merging is
1009 // profitable if the cost of skipping merging is less than the cost of
1010 // merging : Cost(skipping merging) < Cost(merging BB), where the
1011 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1012 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1013 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1014 // Freq(Pred) / Freq(BB) > 2.
1015 // Note that if there are multiple empty blocks sharing the same incoming
1016 // value for the PHIs in the DestBB, we consider them together. In such
1017 // case, Cost(merging BB) will be the sum of their frequencies.
1018
1019 if (!isa<PHINode>(DestBB->begin()))
1020 return true;
1021
1022 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1023
1024 // Find all other incoming blocks from which incoming values of all PHIs in
1025 // DestBB are the same as the ones from BB.
1026 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1027 if (DestBBPred == BB)
1028 continue;
1029
1030 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1031 return DestPN.getIncomingValueForBlock(BB) ==
1032 DestPN.getIncomingValueForBlock(DestBBPred);
1033 }))
1034 SameIncomingValueBBs.insert(DestBBPred);
1035 }
1036
1037 // See if all BB's incoming values are same as the value from Pred. In this
1038 // case, no reason to skip merging because COPYs are expected to be place in
1039 // Pred already.
1040 if (SameIncomingValueBBs.count(Pred))
1041 return true;
1042
1043 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1044 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1045
1046 for (auto *SameValueBB : SameIncomingValueBBs)
1047 if (SameValueBB->getUniquePredecessor() == Pred &&
1048 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1049 BBFreq += BFI->getBlockFreq(SameValueBB);
1050
1051 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1052 return !Limit || PredFreq <= *Limit;
1053}
1054
1055/// Return true if we can merge BB into DestBB if there is a single
1056/// unconditional branch between them, and BB contains no other non-phi
1057/// instructions.
1058bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1059 const BasicBlock *DestBB) const {
1060 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1061 // the successor. If there are more complex condition (e.g. preheaders),
1062 // don't mess around with them.
1063 for (const PHINode &PN : BB->phis()) {
1064 for (const User *U : PN.users()) {
1065 const Instruction *UI = cast<Instruction>(U);
1066 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1067 return false;
1068 // If User is inside DestBB block and it is a PHINode then check
1069 // incoming value. If incoming value is not from BB then this is
1070 // a complex condition (e.g. preheaders) we want to avoid here.
1071 if (UI->getParent() == DestBB) {
1072 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1073 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1074 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1075 if (Insn && Insn->getParent() == BB &&
1076 Insn->getParent() != UPN->getIncomingBlock(I))
1077 return false;
1078 }
1079 }
1080 }
1081 }
1082
1083 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1084 // and DestBB may have conflicting incoming values for the block. If so, we
1085 // can't merge the block.
1086 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1087 if (!DestBBPN)
1088 return true; // no conflict.
1089
1090 // Collect the preds of BB.
1091 SmallPtrSet<const BasicBlock *, 16> BBPreds;
1092 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1093 // It is faster to get preds from a PHI than with pred_iterator.
1094 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1095 BBPreds.insert(BBPN->getIncomingBlock(i));
1096 } else {
1097 BBPreds.insert_range(predecessors(BB));
1098 }
1099
1100 // Walk the preds of DestBB.
1101 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1102 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1103 if (BBPreds.count(Pred)) { // Common predecessor?
1104 for (const PHINode &PN : DestBB->phis()) {
1105 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1106 const Value *V2 = PN.getIncomingValueForBlock(BB);
1107
1108 // If V2 is a phi node in BB, look up what the mapped value will be.
1109 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1110 if (V2PN->getParent() == BB)
1111 V2 = V2PN->getIncomingValueForBlock(Pred);
1112
1113 // If there is a conflict, bail out.
1114 if (V1 != V2)
1115 return false;
1116 }
1117 }
1118 }
1119
1120 return true;
1121}
1122
1123/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1124static void replaceAllUsesWith(Value *Old, Value *New,
1126 bool IsHuge) {
1127 auto *OldI = dyn_cast<Instruction>(Old);
1128 if (OldI) {
1129 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1130 UI != E; ++UI) {
1132 if (IsHuge)
1133 FreshBBs.insert(User->getParent());
1134 }
1135 }
1136 Old->replaceAllUsesWith(New);
1137}
1138
1139/// Eliminate a basic block that has only phi's and an unconditional branch in
1140/// it.
1141/// Indicate that the LoopInfo was modified only if it wasn't updated.
1142bool CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1143 UncondBrInst *BI = cast<UncondBrInst>(BB->getTerminator());
1144 BasicBlock *DestBB = BI->getSuccessor();
1145
1146 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1147 << *BB << *DestBB);
1148
1149 // If the destination block has a single pred, then this is a trivial edge,
1150 // just collapse it.
1151 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1152 if (SinglePred != DestBB) {
1153 assert(SinglePred == BB &&
1154 "Single predecessor not the same as predecessor");
1155 // Merge DestBB into SinglePred/BB and delete it.
1156 MergeBlockIntoPredecessor(DestBB, DTU, LI);
1157 // Note: BB(=SinglePred) will not be deleted on this path.
1158 // DestBB(=its single successor) is the one that was deleted.
1159 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1160
1161 if (IsHugeFunc) {
1162 // Update FreshBBs to optimize the merged BB.
1163 FreshBBs.insert(SinglePred);
1164 FreshBBs.erase(DestBB);
1165 }
1166 return false;
1167 }
1168 }
1169
1170 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1171 // to handle the new incoming edges it is about to have.
1172 for (PHINode &PN : DestBB->phis()) {
1173 // Remove the incoming value for BB, and remember it.
1174 Value *InVal = PN.removeIncomingValue(BB, false);
1175
1176 // Two options: either the InVal is a phi node defined in BB or it is some
1177 // value that dominates BB.
1178 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1179 if (InValPhi && InValPhi->getParent() == BB) {
1180 // Add all of the input values of the input PHI as inputs of this phi.
1181 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1182 PN.addIncoming(InValPhi->getIncomingValue(i),
1183 InValPhi->getIncomingBlock(i));
1184 } else {
1185 // Otherwise, add one instance of the dominating value for each edge that
1186 // we will be adding.
1187 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1188 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1189 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1190 } else {
1191 for (BasicBlock *Pred : predecessors(BB))
1192 PN.addIncoming(InVal, Pred);
1193 }
1194 }
1195 }
1196
1197 // Preserve loop Metadata.
1198 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1199 for (auto *Pred : predecessors(BB))
1200 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1201 }
1202
1203 // The PHIs are now updated, change everything that refers to BB to use
1204 // DestBB and remove BB.
1206 SmallPtrSet<BasicBlock *, 8> SeenPreds;
1207 SmallPtrSet<BasicBlock *, 8> PredOfDestBB(llvm::from_range,
1208 predecessors(DestBB));
1209 for (auto *Pred : predecessors(BB)) {
1210 if (!PredOfDestBB.contains(Pred)) {
1211 if (SeenPreds.insert(Pred).second)
1212 DTUpdates.push_back({DominatorTree::Insert, Pred, DestBB});
1213 }
1214 }
1215 SeenPreds.clear();
1216 for (auto *Pred : predecessors(BB)) {
1217 if (SeenPreds.insert(Pred).second)
1218 DTUpdates.push_back({DominatorTree::Delete, Pred, BB});
1219 }
1220 DTUpdates.push_back({DominatorTree::Delete, BB, DestBB});
1221 BB->replaceAllUsesWith(DestBB);
1222 DTU->applyUpdates(DTUpdates);
1223 DTU->deleteBB(BB);
1224 ++NumBlocksElim;
1225
1226 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1227 return true;
1228}
1229
1230// Computes a map of base pointer relocation instructions to corresponding
1231// derived pointer relocation instructions given a vector of all relocate calls
1233 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1235 &RelocateInstMap) {
1236 // Collect information in two maps: one primarily for locating the base object
1237 // while filling the second map; the second map is the final structure holding
1238 // a mapping between Base and corresponding Derived relocate calls
1240 for (auto *ThisRelocate : AllRelocateCalls) {
1241 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1242 ThisRelocate->getDerivedPtrIndex());
1243 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1244 }
1245 for (auto &Item : RelocateIdxMap) {
1246 std::pair<unsigned, unsigned> Key = Item.first;
1247 if (Key.first == Key.second)
1248 // Base relocation: nothing to insert
1249 continue;
1250
1251 GCRelocateInst *I = Item.second;
1252 auto BaseKey = std::make_pair(Key.first, Key.first);
1253
1254 // We're iterating over RelocateIdxMap so we cannot modify it.
1255 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1256 if (MaybeBase == RelocateIdxMap.end())
1257 // TODO: We might want to insert a new base object relocate and gep off
1258 // that, if there are enough derived object relocates.
1259 continue;
1260
1261 RelocateInstMap[MaybeBase->second].push_back(I);
1262 }
1263}
1264
1265// Accepts a GEP and extracts the operands into a vector provided they're all
1266// small integer constants
1268 SmallVectorImpl<Value *> &OffsetV) {
1269 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1270 // Only accept small constant integer operands
1271 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1272 if (!Op || Op->getZExtValue() > 20)
1273 return false;
1274 }
1275
1276 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1277 OffsetV.push_back(GEP->getOperand(i));
1278 return true;
1279}
1280
1281// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1282// replace, computes a replacement, and affects it.
1283static bool
1285 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1286 bool MadeChange = false;
1287 // We must ensure the relocation of derived pointer is defined after
1288 // relocation of base pointer. If we find a relocation corresponding to base
1289 // defined earlier than relocation of base then we move relocation of base
1290 // right before found relocation. We consider only relocation in the same
1291 // basic block as relocation of base. Relocations from other basic block will
1292 // be skipped by optimization and we do not care about them.
1293 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1294 &*R != RelocatedBase; ++R)
1295 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1296 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1297 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1298 RelocatedBase->moveBefore(RI->getIterator());
1299 MadeChange = true;
1300 break;
1301 }
1302
1303 for (GCRelocateInst *ToReplace : Targets) {
1304 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1305 "Not relocating a derived object of the original base object");
1306 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1307 // A duplicate relocate call. TODO: coalesce duplicates.
1308 continue;
1309 }
1310
1311 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1312 // Base and derived relocates are in different basic blocks.
1313 // In this case transform is only valid when base dominates derived
1314 // relocate. However it would be too expensive to check dominance
1315 // for each such relocate, so we skip the whole transformation.
1316 continue;
1317 }
1318
1319 Value *Base = ToReplace->getBasePtr();
1320 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1321 if (!Derived || Derived->getPointerOperand() != Base)
1322 continue;
1323
1325 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1326 continue;
1327
1328 // Create a Builder and replace the target callsite with a gep
1329 assert(RelocatedBase->getNextNode() &&
1330 "Should always have one since it's not a terminator");
1331
1332 // Insert after RelocatedBase
1333 IRBuilder<> Builder(RelocatedBase->getNextNode());
1334 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1335
1336 // If gc_relocate does not match the actual type, cast it to the right type.
1337 // In theory, there must be a bitcast after gc_relocate if the type does not
1338 // match, and we should reuse it to get the derived pointer. But it could be
1339 // cases like this:
1340 // bb1:
1341 // ...
1342 // %g1 = call coldcc i8 addrspace(1)*
1343 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1344 //
1345 // bb2:
1346 // ...
1347 // %g2 = call coldcc i8 addrspace(1)*
1348 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1349 //
1350 // merge:
1351 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1352 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1353 //
1354 // In this case, we can not find the bitcast any more. So we insert a new
1355 // bitcast no matter there is already one or not. In this way, we can handle
1356 // all cases, and the extra bitcast should be optimized away in later
1357 // passes.
1358 Value *ActualRelocatedBase = RelocatedBase;
1359 if (RelocatedBase->getType() != Base->getType()) {
1360 ActualRelocatedBase =
1361 Builder.CreateBitCast(RelocatedBase, Base->getType());
1362 }
1363 Value *Replacement =
1364 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1365 ArrayRef(OffsetV));
1366 Replacement->takeName(ToReplace);
1367 // If the newly generated derived pointer's type does not match the original
1368 // derived pointer's type, cast the new derived pointer to match it. Same
1369 // reasoning as above.
1370 Value *ActualReplacement = Replacement;
1371 if (Replacement->getType() != ToReplace->getType()) {
1372 ActualReplacement =
1373 Builder.CreateBitCast(Replacement, ToReplace->getType());
1374 }
1375 ToReplace->replaceAllUsesWith(ActualReplacement);
1376 ToReplace->eraseFromParent();
1377
1378 MadeChange = true;
1379 }
1380 return MadeChange;
1381}
1382
1383// Turns this:
1384//
1385// %base = ...
1386// %ptr = gep %base + 15
1387// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1388// %base' = relocate(%tok, i32 4, i32 4)
1389// %ptr' = relocate(%tok, i32 4, i32 5)
1390// %val = load %ptr'
1391//
1392// into this:
1393//
1394// %base = ...
1395// %ptr = gep %base + 15
1396// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1397// %base' = gc.relocate(%tok, i32 4, i32 4)
1398// %ptr' = gep %base' + 15
1399// %val = load %ptr'
1400bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1401 bool MadeChange = false;
1402 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1403 for (auto *U : I.users())
1404 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1405 // Collect all the relocate calls associated with a statepoint
1406 AllRelocateCalls.push_back(Relocate);
1407
1408 // We need at least one base pointer relocation + one derived pointer
1409 // relocation to mangle
1410 if (AllRelocateCalls.size() < 2)
1411 return false;
1412
1413 // RelocateInstMap is a mapping from the base relocate instruction to the
1414 // corresponding derived relocate instructions
1415 MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
1416 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1417 if (RelocateInstMap.empty())
1418 return false;
1419
1420 for (auto &Item : RelocateInstMap)
1421 // Item.first is the RelocatedBase to offset against
1422 // Item.second is the vector of Targets to replace
1423 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1424 return MadeChange;
1425}
1426
1427/// Sink the specified cast instruction into its user blocks.
1428static bool SinkCast(CastInst *CI) {
1429 BasicBlock *DefBB = CI->getParent();
1430
1431 /// InsertedCasts - Only insert a cast in each block once.
1433
1434 bool MadeChange = false;
1435 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1436 UI != E;) {
1437 Use &TheUse = UI.getUse();
1439
1440 // Figure out which BB this cast is used in. For PHI's this is the
1441 // appropriate predecessor block.
1442 BasicBlock *UserBB = User->getParent();
1443 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1444 UserBB = PN->getIncomingBlock(TheUse);
1445 }
1446
1447 // Preincrement use iterator so we don't invalidate it.
1448 ++UI;
1449
1450 // The first insertion point of a block containing an EH pad is after the
1451 // pad. If the pad is the user, we cannot sink the cast past the pad.
1452 if (User->isEHPad())
1453 continue;
1454
1455 // If the block selected to receive the cast is an EH pad that does not
1456 // allow non-PHI instructions before the terminator, we can't sink the
1457 // cast.
1458 if (UserBB->getTerminator()->isEHPad())
1459 continue;
1460
1461 // If this user is in the same block as the cast, don't change the cast.
1462 if (UserBB == DefBB)
1463 continue;
1464
1465 // If we have already inserted a cast into this block, use it.
1466 CastInst *&InsertedCast = InsertedCasts[UserBB];
1467
1468 if (!InsertedCast) {
1469 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1470 assert(InsertPt != UserBB->end());
1471 InsertedCast = cast<CastInst>(CI->clone());
1472 InsertedCast->insertBefore(*UserBB, InsertPt);
1473 }
1474
1475 // Replace a use of the cast with a use of the new cast.
1476 TheUse = InsertedCast;
1477 MadeChange = true;
1478 ++NumCastUses;
1479 }
1480
1481 // If we removed all uses, nuke the cast.
1482 if (CI->use_empty()) {
1483 salvageDebugInfo(*CI);
1484 CI->eraseFromParent();
1485 MadeChange = true;
1486 }
1487
1488 return MadeChange;
1489}
1490
1491/// If the specified cast instruction is a noop copy (e.g. it's casting from
1492/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1493/// reduce the number of virtual registers that must be created and coalesced.
1494///
1495/// Return true if any changes are made.
1497 const DataLayout &DL) {
1498 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1499 // than sinking only nop casts, but is helpful on some platforms.
1500 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1501 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1502 ASC->getDestAddressSpace()))
1503 return false;
1504 }
1505
1506 // If this is a noop copy,
1507 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1508 EVT DstVT = TLI.getValueType(DL, CI->getType());
1509
1510 // This is an fp<->int conversion?
1511 if (SrcVT.isInteger() != DstVT.isInteger())
1512 return false;
1513
1514 // If this is an extension, it will be a zero or sign extension, which
1515 // isn't a noop.
1516 if (SrcVT.bitsLT(DstVT))
1517 return false;
1518
1519 // If these values will be promoted, find out what they will be promoted
1520 // to. This helps us consider truncates on PPC as noop copies when they
1521 // are.
1522 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1524 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1525 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1527 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1528
1529 // If, after promotion, these are the same types, this is a noop copy.
1530 if (SrcVT != DstVT)
1531 return false;
1532
1533 return SinkCast(CI);
1534}
1535
1536// Match a simple increment by constant operation. Note that if a sub is
1537// matched, the step is negated (as if the step had been canonicalized to
1538// an add, even though we leave the instruction alone.)
1539static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1540 Constant *&Step) {
1541 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1543 m_Instruction(LHS), m_Constant(Step)))))
1544 return true;
1545 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1547 m_Instruction(LHS), m_Constant(Step))))) {
1548 Step = ConstantExpr::getNeg(Step);
1549 return true;
1550 }
1551 return false;
1552}
1553
1554/// If given \p PN is an inductive variable with value IVInc coming from the
1555/// backedge, and on each iteration it gets increased by Step, return pair
1556/// <IVInc, Step>. Otherwise, return std::nullopt.
1557static std::optional<std::pair<Instruction *, Constant *>>
1558getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1559 const Loop *L = LI->getLoopFor(PN->getParent());
1560 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1561 return std::nullopt;
1562 auto *IVInc =
1563 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1564 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1565 return std::nullopt;
1566 Instruction *LHS = nullptr;
1567 Constant *Step = nullptr;
1568 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1569 return std::make_pair(IVInc, Step);
1570 return std::nullopt;
1571}
1572
1573static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1574 auto *I = dyn_cast<Instruction>(V);
1575 if (!I)
1576 return false;
1577 Instruction *LHS = nullptr;
1578 Constant *Step = nullptr;
1579 if (!matchIncrement(I, LHS, Step))
1580 return false;
1581 if (auto *PN = dyn_cast<PHINode>(LHS))
1582 if (auto IVInc = getIVIncrement(PN, LI))
1583 return IVInc->first == I;
1584 return false;
1585}
1586
1587bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1588 Value *Arg0, Value *Arg1,
1589 CmpInst *Cmp,
1590 Intrinsic::ID IID) {
1591 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1592 if (!isIVIncrement(BO, LI))
1593 return false;
1594 const Loop *L = LI->getLoopFor(BO->getParent());
1595 assert(L && "L should not be null after isIVIncrement()");
1596 // Do not risk on moving increment into a child loop.
1597 if (LI->getLoopFor(Cmp->getParent()) != L)
1598 return false;
1599
1600 // Finally, we need to ensure that the insert point will dominate all
1601 // existing uses of the increment.
1602
1603 auto &DT = getDT();
1604 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1605 // If we're moving up the dom tree, all uses are trivially dominated.
1606 // (This is the common case for code produced by LSR.)
1607 return true;
1608
1609 // Otherwise, special case the single use in the phi recurrence.
1610 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1611 };
1612 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1613 // We used to use a dominator tree here to allow multi-block optimization.
1614 // But that was problematic because:
1615 // 1. It could cause a perf regression by hoisting the math op into the
1616 // critical path.
1617 // 2. It could cause a perf regression by creating a value that was live
1618 // across multiple blocks and increasing register pressure.
1619 // 3. Use of a dominator tree could cause large compile-time regression.
1620 // This is because we recompute the DT on every change in the main CGP
1621 // run-loop. The recomputing is probably unnecessary in many cases, so if
1622 // that was fixed, using a DT here would be ok.
1623 //
1624 // There is one important particular case we still want to handle: if BO is
1625 // the IV increment. Important properties that make it profitable:
1626 // - We can speculate IV increment anywhere in the loop (as long as the
1627 // indvar Phi is its only user);
1628 // - Upon computing Cmp, we effectively compute something equivalent to the
1629 // IV increment (despite it loops differently in the IR). So moving it up
1630 // to the cmp point does not really increase register pressure.
1631 return false;
1632 }
1633
1634 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1635 if (BO->getOpcode() == Instruction::Add &&
1636 IID == Intrinsic::usub_with_overflow) {
1637 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1639 }
1640
1641 // Insert at the first instruction of the pair.
1642 Instruction *InsertPt = nullptr;
1643 for (Instruction &Iter : *Cmp->getParent()) {
1644 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1645 // the overflow intrinsic are defined.
1646 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1647 InsertPt = &Iter;
1648 break;
1649 }
1650 }
1651 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1652
1653 IRBuilder<> Builder(InsertPt);
1654 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1655 if (BO->getOpcode() != Instruction::Xor) {
1656 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1657 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1658 } else
1659 assert(BO->hasOneUse() &&
1660 "Patterns with XOr should use the BO only in the compare");
1661 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1662 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1663 Cmp->eraseFromParent();
1664 BO->eraseFromParent();
1665 return true;
1666}
1667
1668/// Match special-case patterns that check for unsigned add overflow.
1670 BinaryOperator *&Add) {
1671 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1672 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1673 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1674
1675 // We are not expecting non-canonical/degenerate code. Just bail out.
1676 if (isa<Constant>(A))
1677 return false;
1678
1679 ICmpInst::Predicate Pred = Cmp->getPredicate();
1680 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1681 B = ConstantInt::get(B->getType(), 1);
1682 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1683 B = Constant::getAllOnesValue(B->getType());
1684 else
1685 return false;
1686
1687 // Check the users of the variable operand of the compare looking for an add
1688 // with the adjusted constant.
1689 for (User *U : A->users()) {
1690 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1692 return true;
1693 }
1694 }
1695 return false;
1696}
1697
1698/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1699/// intrinsic. Return true if any changes were made.
1700bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1701 ModifyDT &ModifiedDT) {
1702 bool EdgeCase = false;
1703 Value *A, *B;
1704 BinaryOperator *Add;
1705 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1707 return false;
1708 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1709 A = Add->getOperand(0);
1710 B = Add->getOperand(1);
1711 EdgeCase = true;
1712 }
1713
1715 TLI->getValueType(*DL, Add->getType()),
1716 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1717 return false;
1718
1719 // We don't want to move around uses of condition values this late, so we
1720 // check if it is legal to create the call to the intrinsic in the basic
1721 // block containing the icmp.
1722 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1723 return false;
1724
1725 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1726 Intrinsic::uadd_with_overflow))
1727 return false;
1728
1729 // Reset callers - do not crash by iterating over a dead instruction.
1730 ModifiedDT = ModifyDT::ModifyInstDT;
1731 return true;
1732}
1733
1734bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1735 ModifyDT &ModifiedDT) {
1736 // We are not expecting non-canonical/degenerate code. Just bail out.
1737 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1738 if (isa<Constant>(A) && isa<Constant>(B))
1739 return false;
1740
1741 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1742 ICmpInst::Predicate Pred = Cmp->getPredicate();
1743 if (Pred == ICmpInst::ICMP_UGT) {
1744 std::swap(A, B);
1745 Pred = ICmpInst::ICMP_ULT;
1746 }
1747 // Convert special-case: (A == 0) is the same as (A u< 1).
1748 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1749 B = ConstantInt::get(B->getType(), 1);
1750 Pred = ICmpInst::ICMP_ULT;
1751 }
1752 // Convert special-case: (A != 0) is the same as (0 u< A).
1753 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1754 std::swap(A, B);
1755 Pred = ICmpInst::ICMP_ULT;
1756 }
1757 if (Pred != ICmpInst::ICMP_ULT)
1758 return false;
1759
1760 // Walk the users of a variable operand of a compare looking for a subtract or
1761 // add with that same operand. Also match the 2nd operand of the compare to
1762 // the add/sub, but that may be a negated constant operand of an add.
1763 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1764 BinaryOperator *Sub = nullptr;
1765 for (User *U : CmpVariableOperand->users()) {
1766 // A - B, A u< B --> usubo(A, B)
1767 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1769 break;
1770 }
1771
1772 // A + (-C), A u< C (canonicalized form of (sub A, C))
1773 const APInt *CmpC, *AddC;
1774 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1775 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1777 break;
1778 }
1779 }
1780 if (!Sub)
1781 return false;
1782
1784 TLI->getValueType(*DL, Sub->getType()),
1785 Sub->hasNUsesOrMore(1)))
1786 return false;
1787
1788 // We don't want to move around uses of condition values this late, so we
1789 // check if it is legal to create the call to the intrinsic in the basic
1790 // block containing the icmp.
1791 if (Sub->getParent() != Cmp->getParent() && !Sub->hasOneUse())
1792 return false;
1793
1794 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1795 Cmp, Intrinsic::usub_with_overflow))
1796 return false;
1797
1798 // Reset callers - do not crash by iterating over a dead instruction.
1799 ModifiedDT = ModifyDT::ModifyInstDT;
1800 return true;
1801}
1802
1803// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1804// The same transformation exists in DAG combiner, but we repeat it here because
1805// DAG builder can break the pattern by moving icmp into a successor block.
1806bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1807 CmpPredicate Pred;
1808 Value *X;
1809 const APInt *C;
1810
1811 // (icmp (ctpop x), c)
1812 if (!match(Cmp, m_ICmp(Pred, m_Ctpop(m_Value(X)), m_APIntAllowPoison(C))))
1813 return false;
1814
1815 // We're only interested in "is power of 2 [or zero]" patterns.
1816 bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1817 bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1818 (Pred == CmpInst::ICMP_UGT && *C == 1);
1819 if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1820 return false;
1821
1822 // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1823 // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1824 // and otherwise expand ctpop into a few simple instructions.
1825 Type *OpTy = X->getType();
1826 if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1827 // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1828 if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1829 return false;
1830
1831 // ctpop(x) == 1 -> ctpop(x) u< 2
1832 // ctpop(x) != 1 -> ctpop(x) u> 1
1833 if (Pred == ICmpInst::ICMP_EQ) {
1834 Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1835 Cmp->setPredicate(ICmpInst::ICMP_ULT);
1836 } else {
1837 Cmp->setPredicate(ICmpInst::ICMP_UGT);
1838 }
1839 return true;
1840 }
1841
1842 Value *NewCmp;
1843 if (IsPowerOf2OrZeroTest ||
1844 (IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1845 // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1846 // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1847 IRBuilder<> Builder(Cmp);
1848 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1849 Value *And = Builder.CreateAnd(X, Sub);
1850 CmpInst::Predicate NewPred =
1851 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1853 : CmpInst::ICMP_NE;
1854 NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1855 } else {
1856 // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1857 // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1858 IRBuilder<> Builder(Cmp);
1859 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1860 Value *Xor = Builder.CreateXor(X, Sub);
1861 CmpInst::Predicate NewPred =
1863 NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1864 }
1865
1866 Cmp->replaceAllUsesWith(NewCmp);
1868 return true;
1869}
1870
1871/// Sink the given CmpInst into user blocks to reduce the number of virtual
1872/// registers that must be created and coalesced. This is a clear win except on
1873/// targets with multiple condition code registers (PowerPC), where it might
1874/// lose; some adjustment may be wanted there.
1875///
1876/// Return true if any changes are made.
1877static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
1878 const DataLayout &DL) {
1879 if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1880 return false;
1881
1882 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1883 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1884 return false;
1885
1886 bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
1887 return isa<PHINode>(U) ||
1888 cast<Instruction>(U)->getParent() == Cmp->getParent();
1889 });
1890
1891 // Avoid sinking larger than legal integer comparisons unless its ONLY used in
1892 // another BB.
1893 if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
1894 Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
1895 DL.getLargestLegalIntTypeSizeInBits())
1896 return false;
1897
1898 // Only insert a cmp in each block once.
1900
1901 bool MadeChange = false;
1902 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1903 UI != E;) {
1904 Use &TheUse = UI.getUse();
1906
1907 // Preincrement use iterator so we don't invalidate it.
1908 ++UI;
1909
1910 // Don't bother for PHI nodes.
1911 if (isa<PHINode>(User))
1912 continue;
1913
1914 // Figure out which BB this cmp is used in.
1915 BasicBlock *UserBB = User->getParent();
1916 BasicBlock *DefBB = Cmp->getParent();
1917
1918 // If this user is in the same block as the cmp, don't change the cmp.
1919 if (UserBB == DefBB)
1920 continue;
1921
1922 // If we have already inserted a cmp into this block, use it.
1923 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1924
1925 if (!InsertedCmp) {
1926 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1927 assert(InsertPt != UserBB->end());
1928 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1929 Cmp->getOperand(0), Cmp->getOperand(1), "");
1930 InsertedCmp->insertBefore(*UserBB, InsertPt);
1931 // Propagate the debug info.
1932 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1933 }
1934
1935 // Replace a use of the cmp with a use of the new cmp.
1936 TheUse = InsertedCmp;
1937 MadeChange = true;
1938 ++NumCmpUses;
1939 }
1940
1941 // If we removed all uses, nuke the cmp.
1942 if (Cmp->use_empty()) {
1943 Cmp->eraseFromParent();
1944 MadeChange = true;
1945 }
1946
1947 return MadeChange;
1948}
1949
1950/// For pattern like:
1951///
1952/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1953/// ...
1954/// DomBB:
1955/// ...
1956/// br DomCond, TrueBB, CmpBB
1957/// CmpBB: (with DomBB being the single predecessor)
1958/// ...
1959/// Cmp = icmp eq CmpOp0, CmpOp1
1960/// ...
1961///
1962/// It would use two comparison on targets that lowering of icmp sgt/slt is
1963/// different from lowering of icmp eq (PowerPC). This function try to convert
1964/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1965/// After that, DomCond and Cmp can use the same comparison so reduce one
1966/// comparison.
1967///
1968/// Return true if any changes are made.
1970 const TargetLowering &TLI) {
1972 return false;
1973
1974 ICmpInst::Predicate Pred = Cmp->getPredicate();
1975 if (Pred != ICmpInst::ICMP_EQ)
1976 return false;
1977
1978 // If icmp eq has users other than CondBrInst and SelectInst, converting it to
1979 // icmp slt/sgt would introduce more redundant LLVM IR.
1980 for (User *U : Cmp->users()) {
1981 if (isa<CondBrInst>(U))
1982 continue;
1983 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1984 continue;
1985 return false;
1986 }
1987
1988 // This is a cheap/incomplete check for dominance - just match a single
1989 // predecessor with a conditional branch.
1990 BasicBlock *CmpBB = Cmp->getParent();
1991 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1992 if (!DomBB)
1993 return false;
1994
1995 // We want to ensure that the only way control gets to the comparison of
1996 // interest is that a less/greater than comparison on the same operands is
1997 // false.
1998 Value *DomCond;
1999 BasicBlock *TrueBB, *FalseBB;
2000 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
2001 return false;
2002 if (CmpBB != FalseBB)
2003 return false;
2004
2005 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
2006 CmpPredicate DomPred;
2007 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
2008 return false;
2009 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
2010 return false;
2011
2012 // Convert the equality comparison to the opposite of the dominating
2013 // comparison and swap the direction for all branch/select users.
2014 // We have conceptually converted:
2015 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
2016 // to
2017 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
2018 // And similarly for branches.
2019 for (User *U : Cmp->users()) {
2020 if (auto *BI = dyn_cast<CondBrInst>(U)) {
2021 BI->swapSuccessors();
2022 continue;
2023 }
2024 if (auto *SI = dyn_cast<SelectInst>(U)) {
2025 // Swap operands
2026 SI->swapValues();
2027 SI->swapProfMetadata();
2028 continue;
2029 }
2030 llvm_unreachable("Must be a branch or a select");
2031 }
2032 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
2033 return true;
2034}
2035
2036/// Many architectures use the same instruction for both subtract and cmp. Try
2037/// to swap cmp operands to match subtract operations to allow for CSE.
2039 Value *Op0 = Cmp->getOperand(0);
2040 Value *Op1 = Cmp->getOperand(1);
2041 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
2042 isa<Constant>(Op1) || Op0 == Op1)
2043 return false;
2044
2045 // If a subtract already has the same operands as a compare, swapping would be
2046 // bad. If a subtract has the same operands as a compare but in reverse order,
2047 // then swapping is good.
2048 int GoodToSwap = 0;
2049 unsigned NumInspected = 0;
2050 for (const User *U : Op0->users()) {
2051 // Avoid walking many users.
2052 if (++NumInspected > 128)
2053 return false;
2054 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
2055 GoodToSwap++;
2056 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
2057 GoodToSwap--;
2058 }
2059
2060 if (GoodToSwap > 0) {
2061 Cmp->swapOperands();
2062 return true;
2063 }
2064 return false;
2065}
2066
2067static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
2068 const DataLayout &DL) {
2069 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
2070 if (!FCmp)
2071 return false;
2072
2073 // Don't fold if the target offers free fabs and the predicate is legal.
2074 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
2075 if (TLI.isFAbsFree(VT) &&
2077 VT.getSimpleVT()))
2078 return false;
2079
2080 // Reverse the canonicalization if it is a FP class test
2081 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2082 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
2083 };
2084 auto [ClassVal, ClassTest] =
2085 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
2086 FCmp->getOperand(0), FCmp->getOperand(1));
2087 if (!ClassVal)
2088 return false;
2089
2090 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
2091 return false;
2092
2093 IRBuilder<> Builder(Cmp);
2094 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
2095 Cmp->replaceAllUsesWith(IsFPClass);
2097 return true;
2098}
2099
2101 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
2102 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
2103 Value *Incr, *RemAmt;
2104 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
2105 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
2106 return false;
2107
2108 Value *AddInst, *AddOffset;
2109 // Find out loop increment PHI.
2110 auto *PN = dyn_cast<PHINode>(Incr);
2111 if (PN != nullptr) {
2112 AddInst = nullptr;
2113 AddOffset = nullptr;
2114 } else {
2115 // Search through a NUW add on top of the loop increment.
2116 Value *V0, *V1;
2117 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2118 return false;
2119
2120 AddInst = Incr;
2121 PN = dyn_cast<PHINode>(V0);
2122 if (PN != nullptr) {
2123 AddOffset = V1;
2124 } else {
2125 PN = dyn_cast<PHINode>(V1);
2126 AddOffset = V0;
2127 }
2128 }
2129
2130 if (!PN)
2131 return false;
2132
2133 // This isn't strictly necessary, what we really need is one increment and any
2134 // amount of initial values all being the same.
2135 if (PN->getNumIncomingValues() != 2)
2136 return false;
2137
2138 // Only trivially analyzable loops.
2139 Loop *L = LI->getLoopFor(PN->getParent());
2140 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2141 return false;
2142
2143 // Req that the remainder is in the loop
2144 if (!L->contains(Rem))
2145 return false;
2146
2147 // Only works if the remainder amount is a loop invaraint
2148 if (!L->isLoopInvariant(RemAmt))
2149 return false;
2150
2151 // Only works if the AddOffset is a loop invaraint
2152 if (AddOffset && !L->isLoopInvariant(AddOffset))
2153 return false;
2154
2155 // Is the PHI a loop increment?
2156 auto LoopIncrInfo = getIVIncrement(PN, LI);
2157 if (!LoopIncrInfo)
2158 return false;
2159
2160 // We need remainder_amount % increment_amount to be zero. Increment of one
2161 // satisfies that without any special logic and is overwhelmingly the common
2162 // case.
2163 if (!match(LoopIncrInfo->second, m_One()))
2164 return false;
2165
2166 // Need the increment to not overflow.
2167 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2168 return false;
2169
2170 // Set output variables.
2171 RemAmtOut = RemAmt;
2172 LoopIncrPNOut = PN;
2173 AddInstOut = AddInst;
2174 AddOffsetOut = AddOffset;
2175
2176 return true;
2177}
2178
2179// Try to transform:
2180//
2181// for(i = Start; i < End; ++i)
2182// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2183//
2184// ->
2185//
2186// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2187// for(i = Start; i < End; ++i, ++rem)
2188// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2190 const LoopInfo *LI,
2192 bool IsHuge) {
2193 Value *AddOffset, *RemAmt, *AddInst;
2194 PHINode *LoopIncrPN;
2195 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2196 AddOffset, LoopIncrPN))
2197 return false;
2198
2199 // Only non-constant remainder as the extra IV is probably not profitable
2200 // in that case.
2201 //
2202 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2203 // we can rule out register pressure and ensure this `urem` is executed each
2204 // iteration, its probably profitable to handle the const case as well.
2205 //
2206 // Potential TODO(2): Should we have a check for how "nested" this remainder
2207 // operation is? The new code runs every iteration so if the remainder is
2208 // guarded behind unlikely conditions this might not be worth it.
2209 if (match(RemAmt, m_ImmConstant()))
2210 return false;
2211
2212 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2213 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2214 // If we have add create initial value for remainder.
2215 // The logic here is:
2216 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2217 //
2218 // Only proceed if the expression simplifies (otherwise we can't fully
2219 // optimize out the urem).
2220 if (AddInst) {
2221 assert(AddOffset && "We found an add but missing values");
2222 // Without dom-condition/assumption cache we aren't likely to get much out
2223 // of a context instruction.
2224 Start = simplifyAddInst(Start, AddOffset,
2225 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2226 /*IsNUW=*/true, *DL);
2227 if (!Start)
2228 return false;
2229 }
2230
2231 // If we can't fully optimize out the `rem`, skip this transform.
2232 Start = simplifyURemInst(Start, RemAmt, *DL);
2233 if (!Start)
2234 return false;
2235
2236 // Create new remainder with induction variable.
2237 Type *Ty = Rem->getType();
2238 IRBuilder<> Builder(Rem->getContext());
2239
2240 Builder.SetInsertPoint(LoopIncrPN);
2241 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2242
2243 Builder.SetInsertPoint(cast<Instruction>(
2244 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2245 // `(add (urem x, y), 1)` is always nuw.
2246 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2247 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2248 Value *RemSel =
2249 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2250
2251 NewRem->addIncoming(Start, L->getLoopPreheader());
2252 NewRem->addIncoming(RemSel, L->getLoopLatch());
2253
2254 // Insert all touched BBs.
2255 FreshBBs.insert(LoopIncrPN->getParent());
2256 FreshBBs.insert(L->getLoopLatch());
2257 FreshBBs.insert(Rem->getParent());
2258 if (AddInst)
2259 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2260 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2261 Rem->eraseFromParent();
2262 if (AddInst && AddInst->use_empty())
2263 cast<Instruction>(AddInst)->eraseFromParent();
2264 return true;
2265}
2266
2267bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2268 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2269 return true;
2270 return false;
2271}
2272
2273bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2274 if (sinkCmpExpression(Cmp, *TLI, *DL))
2275 return true;
2276
2277 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2278 return true;
2279
2280 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2281 return true;
2282
2283 if (unfoldPowerOf2Test(Cmp))
2284 return true;
2285
2286 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2287 return true;
2288
2290 return true;
2291
2292 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2293 return true;
2294
2295 return false;
2296}
2297
2298/// Duplicate and sink the given 'and' instruction into user blocks where it is
2299/// used in a compare to allow isel to generate better code for targets where
2300/// this operation can be combined.
2301///
2302/// Return true if any changes are made.
2304 SetOfInstrs &InsertedInsts) {
2305 // Double-check that we're not trying to optimize an instruction that was
2306 // already optimized by some other part of this pass.
2307 assert(!InsertedInsts.count(AndI) &&
2308 "Attempting to optimize already optimized and instruction");
2309 (void)InsertedInsts;
2310
2311 // Nothing to do for single use in same basic block.
2312 if (AndI->hasOneUse() &&
2313 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2314 return false;
2315
2316 // Try to avoid cases where sinking/duplicating is likely to increase register
2317 // pressure.
2318 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2319 !isa<ConstantInt>(AndI->getOperand(1)) &&
2320 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2321 return false;
2322
2323 for (auto *U : AndI->users()) {
2325
2326 // Only sink 'and' feeding icmp with 0.
2327 if (!isa<ICmpInst>(User))
2328 return false;
2329
2330 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2331 if (!CmpC || !CmpC->isZero())
2332 return false;
2333 }
2334
2335 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2336 return false;
2337
2338 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2339 LLVM_DEBUG(AndI->getParent()->dump());
2340
2341 // Push the 'and' into the same block as the icmp 0. There should only be
2342 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2343 // others, so we don't need to keep track of which BBs we insert into.
2344 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2345 UI != E;) {
2346 Use &TheUse = UI.getUse();
2348
2349 // Preincrement use iterator so we don't invalidate it.
2350 ++UI;
2351
2352 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2353
2354 // Keep the 'and' in the same place if the use is already in the same block.
2355 Instruction *InsertPt =
2356 User->getParent() == AndI->getParent() ? AndI : User;
2357 Instruction *InsertedAnd = BinaryOperator::Create(
2358 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2359 InsertPt->getIterator());
2360 // Propagate the debug info.
2361 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2362
2363 // Replace a use of the 'and' with a use of the new 'and'.
2364 TheUse = InsertedAnd;
2365 ++NumAndUses;
2366 LLVM_DEBUG(User->getParent()->dump());
2367 }
2368
2369 // We removed all uses, nuke the and.
2370 AndI->eraseFromParent();
2371 return true;
2372}
2373
2374/// Check if the candidates could be combined with a shift instruction, which
2375/// includes:
2376/// 1. Truncate instruction
2377/// 2. And instruction and the imm is a mask of the low bits:
2378/// imm & (imm+1) == 0
2380 if (!isa<TruncInst>(User)) {
2381 if (User->getOpcode() != Instruction::And ||
2383 return false;
2384
2385 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2386
2387 if ((Cimm & (Cimm + 1)).getBoolValue())
2388 return false;
2389 }
2390 return true;
2391}
2392
2393/// Sink both shift and truncate instruction to the use of truncate's BB.
2394static bool
2397 const TargetLowering &TLI, const DataLayout &DL) {
2398 BasicBlock *UserBB = User->getParent();
2400 auto *TruncI = cast<TruncInst>(User);
2401 bool MadeChange = false;
2402
2403 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2404 TruncE = TruncI->user_end();
2405 TruncUI != TruncE;) {
2406
2407 Use &TruncTheUse = TruncUI.getUse();
2408 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2409 // Preincrement use iterator so we don't invalidate it.
2410
2411 ++TruncUI;
2412
2413 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2414 if (!ISDOpcode)
2415 continue;
2416
2417 // If the use is actually a legal node, there will not be an
2418 // implicit truncate.
2419 // FIXME: always querying the result type is just an
2420 // approximation; some nodes' legality is determined by the
2421 // operand or other means. There's no good way to find out though.
2423 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2424 continue;
2425
2426 // Don't bother for PHI nodes.
2427 if (isa<PHINode>(TruncUser))
2428 continue;
2429
2430 BasicBlock *TruncUserBB = TruncUser->getParent();
2431
2432 if (UserBB == TruncUserBB)
2433 continue;
2434
2435 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2436 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2437
2438 if (!InsertedShift && !InsertedTrunc) {
2439 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2440 assert(InsertPt != TruncUserBB->end());
2441 // Sink the shift
2442 if (ShiftI->getOpcode() == Instruction::AShr)
2443 InsertedShift =
2444 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2445 else
2446 InsertedShift =
2447 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2448 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2449 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2450
2451 // Sink the trunc
2452 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2453 TruncInsertPt++;
2454 // It will go ahead of any debug-info.
2455 TruncInsertPt.setHeadBit(true);
2456 assert(TruncInsertPt != TruncUserBB->end());
2457
2458 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2459 TruncI->getType(), "");
2460 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2461 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2462
2463 MadeChange = true;
2464
2465 TruncTheUse = InsertedTrunc;
2466 }
2467 }
2468 return MadeChange;
2469}
2470
2471/// Sink the shift *right* instruction into user blocks if the uses could
2472/// potentially be combined with this shift instruction and generate BitExtract
2473/// instruction. It will only be applied if the architecture supports BitExtract
2474/// instruction. Here is an example:
2475/// BB1:
2476/// %x.extract.shift = lshr i64 %arg1, 32
2477/// BB2:
2478/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2479/// ==>
2480///
2481/// BB2:
2482/// %x.extract.shift.1 = lshr i64 %arg1, 32
2483/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2484///
2485/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2486/// instruction.
2487/// Return true if any changes are made.
2489 const TargetLowering &TLI,
2490 const DataLayout &DL) {
2491 BasicBlock *DefBB = ShiftI->getParent();
2492
2493 /// Only insert instructions in each block once.
2495
2496 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2497
2498 bool MadeChange = false;
2499 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2500 UI != E;) {
2501 Use &TheUse = UI.getUse();
2503 // Preincrement use iterator so we don't invalidate it.
2504 ++UI;
2505
2506 // Don't bother for PHI nodes.
2507 if (isa<PHINode>(User))
2508 continue;
2509
2511 continue;
2512
2513 BasicBlock *UserBB = User->getParent();
2514
2515 if (UserBB == DefBB) {
2516 // If the shift and truncate instruction are in the same BB. The use of
2517 // the truncate(TruncUse) may still introduce another truncate if not
2518 // legal. In this case, we would like to sink both shift and truncate
2519 // instruction to the BB of TruncUse.
2520 // for example:
2521 // BB1:
2522 // i64 shift.result = lshr i64 opnd, imm
2523 // trunc.result = trunc shift.result to i16
2524 //
2525 // BB2:
2526 // ----> We will have an implicit truncate here if the architecture does
2527 // not have i16 compare.
2528 // cmp i16 trunc.result, opnd2
2529 //
2530 if (isa<TruncInst>(User) &&
2531 shiftIsLegal
2532 // If the type of the truncate is legal, no truncate will be
2533 // introduced in other basic blocks.
2534 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2535 MadeChange =
2536 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2537
2538 continue;
2539 }
2540 // If we have already inserted a shift into this block, use it.
2541 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2542
2543 if (!InsertedShift) {
2544 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2545 assert(InsertPt != UserBB->end());
2546
2547 if (ShiftI->getOpcode() == Instruction::AShr)
2548 InsertedShift =
2549 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2550 else
2551 InsertedShift =
2552 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2553 InsertedShift->insertBefore(*UserBB, InsertPt);
2554 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2555
2556 MadeChange = true;
2557 }
2558
2559 // Replace a use of the shift with a use of the new shift.
2560 TheUse = InsertedShift;
2561 }
2562
2563 // If we removed all uses, or there are none, nuke the shift.
2564 if (ShiftI->use_empty()) {
2565 salvageDebugInfo(*ShiftI);
2566 ShiftI->eraseFromParent();
2567 MadeChange = true;
2568 }
2569
2570 return MadeChange;
2571}
2572
2573/// If counting leading or trailing zeros is an expensive operation and a zero
2574/// input is defined, add a check for zero to avoid calling the intrinsic.
2575///
2576/// We want to transform:
2577/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2578///
2579/// into:
2580/// entry:
2581/// %cmpz = icmp eq i64 %A, 0
2582/// br i1 %cmpz, label %cond.end, label %cond.false
2583/// cond.false:
2584/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2585/// br label %cond.end
2586/// cond.end:
2587/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2588///
2589/// If the transform is performed, return true and set ModifiedDT to true.
2590static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2591 DomTreeUpdater *DTU, LoopInfo *LI,
2592 const TargetLowering *TLI,
2593 const DataLayout *DL, ModifyDT &ModifiedDT,
2595 bool IsHugeFunc) {
2596 // If a zero input is undefined, it doesn't make sense to despeculate that.
2597 if (match(CountZeros->getOperand(1), m_One()))
2598 return false;
2599
2600 // If it's cheap to speculate, there's nothing to do.
2601 Type *Ty = CountZeros->getType();
2602 auto IntrinsicID = CountZeros->getIntrinsicID();
2603 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2604 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2605 return false;
2606
2607 // Only handle scalar cases. Anything else requires too much work.
2608 unsigned SizeInBits = Ty->getScalarSizeInBits();
2609 if (Ty->isVectorTy())
2610 return false;
2611
2612 // Bail if the value is never zero.
2613 Use &Op = CountZeros->getOperandUse(0);
2614 if (isKnownNonZero(Op, *DL))
2615 return false;
2616
2617 // The intrinsic will be sunk behind a compare against zero and branch.
2618 BasicBlock *StartBlock = CountZeros->getParent();
2619 BasicBlock *CallBlock = SplitBlock(StartBlock, CountZeros, DTU, LI,
2620 /* MSSAU */ nullptr, "cond.false");
2621 if (IsHugeFunc)
2622 FreshBBs.insert(CallBlock);
2623
2624 // Create another block after the count zero intrinsic. A PHI will be added
2625 // in this block to select the result of the intrinsic or the bit-width
2626 // constant if the input to the intrinsic is zero.
2627 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2628 // Any debug-info after CountZeros should not be included.
2629 SplitPt.setHeadBit(true);
2630 BasicBlock *EndBlock = SplitBlock(CallBlock, &*SplitPt, DTU, LI,
2631 /* MSSAU */ nullptr, "cond.end");
2632 if (IsHugeFunc)
2633 FreshBBs.insert(EndBlock);
2634
2635 // Set up a builder to create a compare, conditional branch, and PHI.
2636 IRBuilder<> Builder(CountZeros->getContext());
2637 Builder.SetInsertPoint(StartBlock->getTerminator());
2638 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2639
2640 // Replace the unconditional branch that was created by the first split with
2641 // a compare against zero and a conditional branch.
2642 Value *Zero = Constant::getNullValue(Ty);
2643 // Avoid introducing branch on poison. This also replaces the ctz operand.
2645 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2646 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2647 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2648 StartBlock->getTerminator()->eraseFromParent();
2649 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}});
2650
2651 // Create a PHI in the end block to select either the output of the intrinsic
2652 // or the bit width of the operand.
2653 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2654 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2655 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2656 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2657 PN->addIncoming(BitWidth, StartBlock);
2658 PN->addIncoming(CountZeros, CallBlock);
2659
2660 // We are explicitly handling the zero case, so we can set the intrinsic's
2661 // undefined zero argument to 'true'. This will also prevent reprocessing the
2662 // intrinsic; we only despeculate when a zero input is defined.
2663 CountZeros->setArgOperand(1, Builder.getTrue());
2664 ModifiedDT = ModifyDT::ModifyBBDT;
2665 return true;
2666}
2667
2668bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2669 BasicBlock *BB = CI->getParent();
2670
2671 // Sink address computing for memory operands into the block.
2672 if (CI->isInlineAsm() && optimizeInlineAsmInst(CI))
2673 return true;
2674
2675 // Align the pointer arguments to this call if the target thinks it's a good
2676 // idea
2677 unsigned MinSize;
2678 Align PrefAlign;
2679 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2680 for (auto &Arg : CI->args()) {
2681 // We want to align both objects whose address is used directly and
2682 // objects whose address is used in casts and GEPs, though it only makes
2683 // sense for GEPs if the offset is a multiple of the desired alignment and
2684 // if size - offset meets the size threshold.
2685 if (!Arg->getType()->isPointerTy())
2686 continue;
2687 APInt Offset(DL->getIndexSizeInBits(
2688 cast<PointerType>(Arg->getType())->getAddressSpace()),
2689 0);
2690 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2691 uint64_t Offset2 = Offset.getLimitedValue();
2692 if (!isAligned(PrefAlign, Offset2))
2693 continue;
2694 AllocaInst *AI;
2695 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign) {
2696 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(*DL);
2697 if (AllocaSize && AllocaSize->getKnownMinValue() >= MinSize + Offset2)
2698 AI->setAlignment(PrefAlign);
2699 }
2700 // Global variables can only be aligned if they are defined in this
2701 // object (i.e. they are uniquely initialized in this object), and
2702 // over-aligning global variables that have an explicit section is
2703 // forbidden.
2704 GlobalVariable *GV;
2705 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2706 GV->getPointerAlignment(*DL) < PrefAlign &&
2707 GV->getGlobalSize(*DL) >= MinSize + Offset2)
2708 GV->setAlignment(PrefAlign);
2709 }
2710 }
2711 // If this is a memcpy (or similar) then we may be able to improve the
2712 // alignment.
2713 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2714 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2715 MaybeAlign MIDestAlign = MI->getDestAlign();
2716 if (!MIDestAlign || DestAlign > *MIDestAlign)
2717 MI->setDestAlignment(DestAlign);
2718 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2719 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2720 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2721 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2722 MTI->setSourceAlignment(SrcAlign);
2723 }
2724 }
2725
2726 // If we have a cold call site, try to sink addressing computation into the
2727 // cold block. This interacts with our handling for loads and stores to
2728 // ensure that we can fold all uses of a potential addressing computation
2729 // into their uses. TODO: generalize this to work over profiling data
2730 if (CI->hasFnAttr(Attribute::Cold) &&
2731 !llvm::shouldOptimizeForSize(BB, PSI, BFI))
2732 for (auto &Arg : CI->args()) {
2733 if (!Arg->getType()->isPointerTy())
2734 continue;
2735 unsigned AS = Arg->getType()->getPointerAddressSpace();
2736 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2737 return true;
2738 }
2739
2740 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2741 if (II) {
2742 switch (II->getIntrinsicID()) {
2743 default:
2744 break;
2745 case Intrinsic::assume:
2746 llvm_unreachable("llvm.assume should have been removed already");
2747 case Intrinsic::allow_runtime_check:
2748 case Intrinsic::allow_ubsan_check:
2749 case Intrinsic::experimental_widenable_condition: {
2750 // Give up on future widening opportunities so that we can fold away dead
2751 // paths and merge blocks before going into block-local instruction
2752 // selection.
2753 if (II->use_empty()) {
2754 II->eraseFromParent();
2755 return true;
2756 }
2757 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2758 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2759 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2760 });
2761 return true;
2762 }
2763 case Intrinsic::objectsize:
2764 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2765 case Intrinsic::is_constant:
2766 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2767 case Intrinsic::aarch64_stlxr:
2768 case Intrinsic::aarch64_stxr: {
2769 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2770 if (!ExtVal || !ExtVal->hasOneUse() ||
2771 ExtVal->getParent() == CI->getParent())
2772 return false;
2773 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2774 ExtVal->moveBefore(CI->getIterator());
2775 // Mark this instruction as "inserted by CGP", so that other
2776 // optimizations don't touch it.
2777 InsertedInsts.insert(ExtVal);
2778 return true;
2779 }
2780
2781 case Intrinsic::launder_invariant_group:
2782 case Intrinsic::strip_invariant_group: {
2783 Value *ArgVal = II->getArgOperand(0);
2784 auto it = LargeOffsetGEPMap.find(II);
2785 if (it != LargeOffsetGEPMap.end()) {
2786 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2787 // Make sure not to have to deal with iterator invalidation
2788 // after possibly adding ArgVal to LargeOffsetGEPMap.
2789 auto GEPs = std::move(it->second);
2790 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2791 LargeOffsetGEPMap.erase(II);
2792 }
2793
2794 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2795 II->eraseFromParent();
2796 return true;
2797 }
2798 case Intrinsic::cttz:
2799 case Intrinsic::ctlz:
2800 // If counting zeros is expensive, try to avoid it.
2801 return despeculateCountZeros(II, DTU, LI, TLI, DL, ModifiedDT, FreshBBs,
2802 IsHugeFunc);
2803 case Intrinsic::fshl:
2804 case Intrinsic::fshr:
2805 return optimizeFunnelShift(II);
2806 case Intrinsic::masked_gather:
2807 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2808 case Intrinsic::masked_scatter:
2809 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2810 case Intrinsic::masked_load:
2811 // Treat v1X masked load as load X type.
2812 if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2813 if (VT->getNumElements() == 1) {
2814 Value *PtrVal = II->getArgOperand(0);
2815 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2816 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2817 return true;
2818 }
2819 }
2820 return false;
2821 case Intrinsic::masked_store:
2822 // Treat v1X masked store as store X type.
2823 if (auto *VT =
2824 dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2825 if (VT->getNumElements() == 1) {
2826 Value *PtrVal = II->getArgOperand(1);
2827 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2828 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2829 return true;
2830 }
2831 }
2832 return false;
2833 case Intrinsic::umul_with_overflow:
2834 return optimizeMulWithOverflow(II, /*IsSigned=*/false, ModifiedDT);
2835 case Intrinsic::smul_with_overflow:
2836 return optimizeMulWithOverflow(II, /*IsSigned=*/true, ModifiedDT);
2837 }
2838
2839 SmallVector<Value *, 2> PtrOps;
2840 Type *AccessTy;
2841 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2842 while (!PtrOps.empty()) {
2843 Value *PtrVal = PtrOps.pop_back_val();
2844 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2845 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2846 return true;
2847 }
2848 }
2849
2850 // From here on out we're working with named functions.
2851 auto *Callee = CI->getCalledFunction();
2852 if (!Callee)
2853 return false;
2854
2855 // Lower all default uses of _chk calls. This is very similar
2856 // to what InstCombineCalls does, but here we are only lowering calls
2857 // to fortified library functions (e.g. __memcpy_chk) that have the default
2858 // "don't know" as the objectsize. Anything else should be left alone.
2859 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2860 IRBuilder<> Builder(CI);
2861 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2862 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2863 CI->eraseFromParent();
2864 return true;
2865 }
2866
2867 // SCCP may have propagated, among other things, C++ static variables across
2868 // calls. If this happens to be the case, we may want to undo it in order to
2869 // avoid redundant pointer computation of the constant, as the function method
2870 // returning the constant needs to be executed anyways.
2871 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2872 if (!F->getReturnType()->isPointerTy())
2873 return nullptr;
2874
2875 GlobalVariable *UniformValue = nullptr;
2876 for (auto &BB : *F) {
2877 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2878 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2879 if (!UniformValue)
2880 UniformValue = V;
2881 else if (V != UniformValue)
2882 return nullptr;
2883 } else {
2884 return nullptr;
2885 }
2886 }
2887 }
2888
2889 return UniformValue;
2890 };
2891
2892 if (Callee->hasExactDefinition()) {
2893 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2894 bool MadeChange = false;
2895 for (Use &U : make_early_inc_range(RV->uses())) {
2896 auto *I = dyn_cast<Instruction>(U.getUser());
2897 if (!I || I->getParent() != CI->getParent()) {
2898 // Limit to the same basic block to avoid extending the call-site live
2899 // range, which otherwise could increase register pressure.
2900 continue;
2901 }
2902 if (CI->comesBefore(I)) {
2903 U.set(CI);
2904 MadeChange = true;
2905 }
2906 }
2907
2908 return MadeChange;
2909 }
2910 }
2911
2912 return false;
2913}
2914
2916 const CallInst *CI) {
2917 assert(CI && CI->use_empty());
2918
2919 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2920 switch (II->getIntrinsicID()) {
2921 case Intrinsic::memset:
2922 case Intrinsic::memcpy:
2923 case Intrinsic::memmove:
2924 return true;
2925 default:
2926 return false;
2927 }
2928
2929 LibFunc LF;
2930 Function *Callee = CI->getCalledFunction();
2931 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2932 switch (LF) {
2933 case LibFunc_strcpy:
2934 case LibFunc_strncpy:
2935 case LibFunc_strcat:
2936 case LibFunc_strncat:
2937 return true;
2938 default:
2939 return false;
2940 }
2941
2942 return false;
2943}
2944
2945/// Look for opportunities to duplicate return instructions to the predecessor
2946/// to enable tail call optimizations. The case it is currently looking for is
2947/// the following one. Known intrinsics or library function that may be tail
2948/// called are taken into account as well.
2949/// @code
2950/// bb0:
2951/// %tmp0 = tail call i32 @f0()
2952/// br label %return
2953/// bb1:
2954/// %tmp1 = tail call i32 @f1()
2955/// br label %return
2956/// bb2:
2957/// %tmp2 = tail call i32 @f2()
2958/// br label %return
2959/// return:
2960/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2961/// ret i32 %retval
2962/// @endcode
2963///
2964/// =>
2965///
2966/// @code
2967/// bb0:
2968/// %tmp0 = tail call i32 @f0()
2969/// ret i32 %tmp0
2970/// bb1:
2971/// %tmp1 = tail call i32 @f1()
2972/// ret i32 %tmp1
2973/// bb2:
2974/// %tmp2 = tail call i32 @f2()
2975/// ret i32 %tmp2
2976/// @endcode
2977bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2978 ModifyDT &ModifiedDT) {
2979 if (!BB->getTerminator())
2980 return false;
2981
2982 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2983 if (!RetI)
2984 return false;
2985
2986 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2987
2988 PHINode *PN = nullptr;
2989 ExtractValueInst *EVI = nullptr;
2990 BitCastInst *BCI = nullptr;
2991 Value *V = RetI->getReturnValue();
2992 if (V) {
2993 BCI = dyn_cast<BitCastInst>(V);
2994 if (BCI)
2995 V = BCI->getOperand(0);
2996
2998 if (EVI) {
2999 V = EVI->getOperand(0);
3000 if (!llvm::all_of(EVI->indices(), equal_to(0)))
3001 return false;
3002 }
3003
3004 PN = dyn_cast<PHINode>(V);
3005 }
3006
3007 if (PN && PN->getParent() != BB)
3008 return false;
3009
3010 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
3011 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
3012 if (BC && BC->hasOneUse())
3013 Inst = BC->user_back();
3014
3015 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
3016 return II->getIntrinsicID() == Intrinsic::lifetime_end;
3017 return false;
3018 };
3019
3021
3022 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
3023 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
3024 II && II->getIntrinsicID() == Intrinsic::fake_use) {
3025 // Record the instruction so it can be preserved when the exit block is
3026 // removed. Do not preserve the fake use that uses the result of the
3027 // PHI instruction.
3028 // Do not copy fake uses that use the result of a PHI node.
3029 // FIXME: If we do want to copy the fake use into the return blocks, we
3030 // have to figure out which of the PHI node operands to use for each
3031 // copy.
3032 if (!isa<PHINode>(II->getOperand(0))) {
3033 FakeUses.push_back(II);
3034 }
3035 return true;
3036 }
3037
3038 return false;
3039 };
3040
3041 // Make sure there are no instructions between the first instruction
3042 // and return.
3044 // Skip over pseudo-probes and the bitcast.
3045 while (&*BI == BCI || &*BI == EVI || isa<PseudoProbeInst>(BI) ||
3046 isLifetimeEndOrBitCastFor(&*BI) || isFakeUse(&*BI))
3047 BI = std::next(BI);
3048 if (&*BI != RetI)
3049 return false;
3050
3051 // Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
3052 // call.
3053 auto MayBePermittedAsTailCall = [&](const auto *CI) {
3054 return TLI->mayBeEmittedAsTailCall(CI) &&
3055 attributesPermitTailCall(BB->getParent(), CI, RetI, *TLI);
3056 };
3057
3058 SmallVector<BasicBlock *, 4> TailCallBBs;
3059 // Record the call instructions so we can insert any fake uses
3060 // that need to be preserved before them.
3062 if (PN) {
3063 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
3064 // Look through bitcasts.
3065 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
3066 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
3067 BasicBlock *PredBB = PN->getIncomingBlock(I);
3068 // Make sure the phi value is indeed produced by the tail call.
3069 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3070 MayBePermittedAsTailCall(CI)) {
3071 TailCallBBs.push_back(PredBB);
3072 CallInsts.push_back(CI);
3073 } else {
3074 // Consider the cases in which the phi value is indirectly produced by
3075 // the tail call, for example when encountering memset(), memmove(),
3076 // strcpy(), whose return value may have been optimized out. In such
3077 // cases, the value needs to be the first function argument.
3078 //
3079 // bb0:
3080 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3081 // br label %return
3082 // return:
3083 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3084 if (PredBB && PredBB->getSingleSuccessor() == BB)
3086 PredBB->getTerminator()->getPrevNode());
3087
3088 if (CI && CI->use_empty() &&
3089 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3090 IncomingVal == CI->getArgOperand(0) &&
3091 MayBePermittedAsTailCall(CI)) {
3092 TailCallBBs.push_back(PredBB);
3093 CallInsts.push_back(CI);
3094 }
3095 }
3096 }
3097 } else {
3098 SmallPtrSet<BasicBlock *, 4> VisitedBBs;
3099 for (BasicBlock *Pred : predecessors(BB)) {
3100 if (!VisitedBBs.insert(Pred).second)
3101 continue;
3102 if (Instruction *I = Pred->rbegin()->getPrevNode()) {
3103 CallInst *CI = dyn_cast<CallInst>(I);
3104 if (CI && CI->use_empty() && MayBePermittedAsTailCall(CI)) {
3105 // Either we return void or the return value must be the first
3106 // argument of a known intrinsic or library function.
3107 if (!V || isa<UndefValue>(V) ||
3108 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3109 V == CI->getArgOperand(0))) {
3110 TailCallBBs.push_back(Pred);
3111 CallInsts.push_back(CI);
3112 }
3113 }
3114 }
3115 }
3116 }
3117
3118 bool Changed = false;
3119 for (auto const &TailCallBB : TailCallBBs) {
3120 // Make sure the call instruction is followed by an unconditional branch to
3121 // the return block.
3122 UncondBrInst *BI = dyn_cast<UncondBrInst>(TailCallBB->getTerminator());
3123 if (!BI || BI->getSuccessor() != BB)
3124 continue;
3125
3126 // Duplicate the return into TailCallBB.
3127 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB, DTU);
3129 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3130 BFI->setBlockFreq(BB,
3131 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3132 ModifiedDT = ModifyDT::ModifyBBDT;
3133 Changed = true;
3134 ++NumRetsDup;
3135 }
3136
3137 // If we eliminated all predecessors of the block, delete the block now.
3138 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3139 // Copy the fake uses found in the original return block to all blocks
3140 // that contain tail calls.
3141 for (auto *CI : CallInsts) {
3142 for (auto const *FakeUse : FakeUses) {
3143 auto *ClonedInst = FakeUse->clone();
3144 ClonedInst->insertBefore(CI->getIterator());
3145 }
3146 }
3147 DTU->deleteBB(BB);
3148 }
3149
3150 return Changed;
3151}
3152
3153//===----------------------------------------------------------------------===//
3154// Memory Optimization
3155//===----------------------------------------------------------------------===//
3156
3157namespace {
3158
3159/// This is an extended version of TargetLowering::AddrMode
3160/// which holds actual Value*'s for register values.
3161struct ExtAddrMode : public TargetLowering::AddrMode {
3162 Value *BaseReg = nullptr;
3163 Value *ScaledReg = nullptr;
3164 Value *OriginalValue = nullptr;
3165 bool InBounds = true;
3166
3167 enum FieldName {
3168 NoField = 0x00,
3169 BaseRegField = 0x01,
3170 BaseGVField = 0x02,
3171 BaseOffsField = 0x04,
3172 ScaledRegField = 0x08,
3173 ScaleField = 0x10,
3174 MultipleFields = 0xff
3175 };
3176
3177 ExtAddrMode() = default;
3178
3179 void print(raw_ostream &OS) const;
3180 void dump() const;
3181
3182 // Replace From in ExtAddrMode with To.
3183 // E.g., SExt insts may be promoted and deleted. We should replace them with
3184 // the promoted values.
3185 void replaceWith(Value *From, Value *To) {
3186 if (ScaledReg == From)
3187 ScaledReg = To;
3188 }
3189
3190 FieldName compare(const ExtAddrMode &other) {
3191 // First check that the types are the same on each field, as differing types
3192 // is something we can't cope with later on.
3193 if (BaseReg && other.BaseReg &&
3194 BaseReg->getType() != other.BaseReg->getType())
3195 return MultipleFields;
3196 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3197 return MultipleFields;
3198 if (ScaledReg && other.ScaledReg &&
3199 ScaledReg->getType() != other.ScaledReg->getType())
3200 return MultipleFields;
3201
3202 // Conservatively reject 'inbounds' mismatches.
3203 if (InBounds != other.InBounds)
3204 return MultipleFields;
3205
3206 // Check each field to see if it differs.
3207 unsigned Result = NoField;
3208 if (BaseReg != other.BaseReg)
3209 Result |= BaseRegField;
3210 if (BaseGV != other.BaseGV)
3211 Result |= BaseGVField;
3212 if (BaseOffs != other.BaseOffs)
3213 Result |= BaseOffsField;
3214 if (ScaledReg != other.ScaledReg)
3215 Result |= ScaledRegField;
3216 // Don't count 0 as being a different scale, because that actually means
3217 // unscaled (which will already be counted by having no ScaledReg).
3218 if (Scale && other.Scale && Scale != other.Scale)
3219 Result |= ScaleField;
3220
3221 if (llvm::popcount(Result) > 1)
3222 return MultipleFields;
3223 else
3224 return static_cast<FieldName>(Result);
3225 }
3226
3227 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3228 // with no offset.
3229 bool isTrivial() {
3230 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3231 // trivial if at most one of these terms is nonzero, except that BaseGV and
3232 // BaseReg both being zero actually means a null pointer value, which we
3233 // consider to be 'non-zero' here.
3234 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3235 }
3236
3237 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3238 switch (Field) {
3239 default:
3240 return nullptr;
3241 case BaseRegField:
3242 return BaseReg;
3243 case BaseGVField:
3244 return BaseGV;
3245 case ScaledRegField:
3246 return ScaledReg;
3247 case BaseOffsField:
3248 return ConstantInt::getSigned(IntPtrTy, BaseOffs);
3249 }
3250 }
3251
3252 void SetCombinedField(FieldName Field, Value *V,
3253 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3254 switch (Field) {
3255 default:
3256 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3257 break;
3258 case ExtAddrMode::BaseRegField:
3259 BaseReg = V;
3260 break;
3261 case ExtAddrMode::BaseGVField:
3262 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3263 // in the BaseReg field.
3264 assert(BaseReg == nullptr);
3265 BaseReg = V;
3266 BaseGV = nullptr;
3267 break;
3268 case ExtAddrMode::ScaledRegField:
3269 ScaledReg = V;
3270 // If we have a mix of scaled and unscaled addrmodes then we want scale
3271 // to be the scale and not zero.
3272 if (!Scale)
3273 for (const ExtAddrMode &AM : AddrModes)
3274 if (AM.Scale) {
3275 Scale = AM.Scale;
3276 break;
3277 }
3278 break;
3279 case ExtAddrMode::BaseOffsField:
3280 // The offset is no longer a constant, so it goes in ScaledReg with a
3281 // scale of 1.
3282 assert(ScaledReg == nullptr);
3283 ScaledReg = V;
3284 Scale = 1;
3285 BaseOffs = 0;
3286 break;
3287 }
3288 }
3289};
3290
3291#ifndef NDEBUG
3292static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3293 AM.print(OS);
3294 return OS;
3295}
3296#endif
3297
3298#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3299void ExtAddrMode::print(raw_ostream &OS) const {
3300 bool NeedPlus = false;
3301 OS << "[";
3302 if (InBounds)
3303 OS << "inbounds ";
3304 if (BaseGV) {
3305 OS << "GV:";
3306 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3307 NeedPlus = true;
3308 }
3309
3310 if (BaseOffs) {
3311 OS << (NeedPlus ? " + " : "") << BaseOffs;
3312 NeedPlus = true;
3313 }
3314
3315 if (BaseReg) {
3316 OS << (NeedPlus ? " + " : "") << "Base:";
3317 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3318 NeedPlus = true;
3319 }
3320 if (Scale) {
3321 OS << (NeedPlus ? " + " : "") << Scale << "*";
3322 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3323 }
3324
3325 OS << ']';
3326}
3327
3328LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3329 print(dbgs());
3330 dbgs() << '\n';
3331}
3332#endif
3333
3334} // end anonymous namespace
3335
3336namespace {
3337
3338/// This class provides transaction based operation on the IR.
3339/// Every change made through this class is recorded in the internal state and
3340/// can be undone (rollback) until commit is called.
3341/// CGP does not check if instructions could be speculatively executed when
3342/// moved. Preserving the original location would pessimize the debugging
3343/// experience, as well as negatively impact the quality of sample PGO.
3344class TypePromotionTransaction {
3345 /// This represents the common interface of the individual transaction.
3346 /// Each class implements the logic for doing one specific modification on
3347 /// the IR via the TypePromotionTransaction.
3348 class TypePromotionAction {
3349 protected:
3350 /// The Instruction modified.
3351 Instruction *Inst;
3352
3353 public:
3354 /// Constructor of the action.
3355 /// The constructor performs the related action on the IR.
3356 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3357
3358 virtual ~TypePromotionAction() = default;
3359
3360 /// Undo the modification done by this action.
3361 /// When this method is called, the IR must be in the same state as it was
3362 /// before this action was applied.
3363 /// \pre Undoing the action works if and only if the IR is in the exact same
3364 /// state as it was directly after this action was applied.
3365 virtual void undo() = 0;
3366
3367 /// Advocate every change made by this action.
3368 /// When the results on the IR of the action are to be kept, it is important
3369 /// to call this function, otherwise hidden information may be kept forever.
3370 virtual void commit() {
3371 // Nothing to be done, this action is not doing anything.
3372 }
3373 };
3374
3375 /// Utility to remember the position of an instruction.
3376 class InsertionHandler {
3377 /// Position of an instruction.
3378 /// Either an instruction:
3379 /// - Is the first in a basic block: BB is used.
3380 /// - Has a previous instruction: PrevInst is used.
3381 struct {
3382 BasicBlock::iterator PrevInst;
3383 BasicBlock *BB;
3384 } Point;
3385 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3386
3387 /// Remember whether or not the instruction had a previous instruction.
3388 bool HasPrevInstruction;
3389
3390 public:
3391 /// Record the position of \p Inst.
3392 InsertionHandler(Instruction *Inst) {
3393 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3394 BasicBlock *BB = Inst->getParent();
3395
3396 // Record where we would have to re-insert the instruction in the sequence
3397 // of DbgRecords, if we ended up reinserting.
3398 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3399
3400 if (HasPrevInstruction) {
3401 Point.PrevInst = std::prev(Inst->getIterator());
3402 } else {
3403 Point.BB = BB;
3404 }
3405 }
3406
3407 /// Insert \p Inst at the recorded position.
3408 void insert(Instruction *Inst) {
3409 if (HasPrevInstruction) {
3410 if (Inst->getParent())
3411 Inst->removeFromParent();
3412 Inst->insertAfter(Point.PrevInst);
3413 } else {
3414 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3415 if (Inst->getParent())
3416 Inst->moveBefore(*Point.BB, Position);
3417 else
3418 Inst->insertBefore(*Point.BB, Position);
3419 }
3420
3421 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3422 }
3423 };
3424
3425 /// Move an instruction before another.
3426 class InstructionMoveBefore : public TypePromotionAction {
3427 /// Original position of the instruction.
3428 InsertionHandler Position;
3429
3430 public:
3431 /// Move \p Inst before \p Before.
3432 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3433 : TypePromotionAction(Inst), Position(Inst) {
3434 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3435 << "\n");
3436 Inst->moveBefore(Before);
3437 }
3438
3439 /// Move the instruction back to its original position.
3440 void undo() override {
3441 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3442 Position.insert(Inst);
3443 }
3444 };
3445
3446 /// Set the operand of an instruction with a new value.
3447 class OperandSetter : public TypePromotionAction {
3448 /// Original operand of the instruction.
3449 Value *Origin;
3450
3451 /// Index of the modified instruction.
3452 unsigned Idx;
3453
3454 public:
3455 /// Set \p Idx operand of \p Inst with \p NewVal.
3456 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3457 : TypePromotionAction(Inst), Idx(Idx) {
3458 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3459 << "for:" << *Inst << "\n"
3460 << "with:" << *NewVal << "\n");
3461 Origin = Inst->getOperand(Idx);
3462 Inst->setOperand(Idx, NewVal);
3463 }
3464
3465 /// Restore the original value of the instruction.
3466 void undo() override {
3467 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3468 << "for: " << *Inst << "\n"
3469 << "with: " << *Origin << "\n");
3470 Inst->setOperand(Idx, Origin);
3471 }
3472 };
3473
3474 /// Hide the operands of an instruction.
3475 /// Do as if this instruction was not using any of its operands.
3476 class OperandsHider : public TypePromotionAction {
3477 /// The list of original operands.
3478 SmallVector<Value *, 4> OriginalValues;
3479
3480 public:
3481 /// Remove \p Inst from the uses of the operands of \p Inst.
3482 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3483 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3484 unsigned NumOpnds = Inst->getNumOperands();
3485 OriginalValues.reserve(NumOpnds);
3486 for (unsigned It = 0; It < NumOpnds; ++It) {
3487 // Save the current operand.
3488 Value *Val = Inst->getOperand(It);
3489 OriginalValues.push_back(Val);
3490 // Set a dummy one.
3491 // We could use OperandSetter here, but that would imply an overhead
3492 // that we are not willing to pay.
3493 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3494 }
3495 }
3496
3497 /// Restore the original list of uses.
3498 void undo() override {
3499 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3500 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3501 Inst->setOperand(It, OriginalValues[It]);
3502 }
3503 };
3504
3505 /// Build a truncate instruction.
3506 class TruncBuilder : public TypePromotionAction {
3507 Value *Val;
3508
3509 public:
3510 /// Build a truncate instruction of \p Opnd producing a \p Ty
3511 /// result.
3512 /// trunc Opnd to Ty.
3513 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3514 IRBuilder<> Builder(Opnd);
3515 Builder.SetCurrentDebugLocation(DebugLoc());
3516 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3517 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3518 }
3519
3520 /// Get the built value.
3521 Value *getBuiltValue() { return Val; }
3522
3523 /// Remove the built instruction.
3524 void undo() override {
3525 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3526 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3527 IVal->eraseFromParent();
3528 }
3529 };
3530
3531 /// Build a sign extension instruction.
3532 class SExtBuilder : public TypePromotionAction {
3533 Value *Val;
3534
3535 public:
3536 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3537 /// result.
3538 /// sext Opnd to Ty.
3539 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3540 : TypePromotionAction(InsertPt) {
3541 IRBuilder<> Builder(InsertPt);
3542 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3543 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3544 }
3545
3546 /// Get the built value.
3547 Value *getBuiltValue() { return Val; }
3548
3549 /// Remove the built instruction.
3550 void undo() override {
3551 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3552 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3553 IVal->eraseFromParent();
3554 }
3555 };
3556
3557 /// Build a zero extension instruction.
3558 class ZExtBuilder : public TypePromotionAction {
3559 Value *Val;
3560
3561 public:
3562 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3563 /// result.
3564 /// zext Opnd to Ty.
3565 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3566 : TypePromotionAction(InsertPt) {
3567 IRBuilder<> Builder(InsertPt);
3568 Builder.SetCurrentDebugLocation(DebugLoc());
3569 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3570 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3571 }
3572
3573 /// Get the built value.
3574 Value *getBuiltValue() { return Val; }
3575
3576 /// Remove the built instruction.
3577 void undo() override {
3578 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3579 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3580 IVal->eraseFromParent();
3581 }
3582 };
3583
3584 /// Mutate an instruction to another type.
3585 class TypeMutator : public TypePromotionAction {
3586 /// Record the original type.
3587 Type *OrigTy;
3588
3589 public:
3590 /// Mutate the type of \p Inst into \p NewTy.
3591 TypeMutator(Instruction *Inst, Type *NewTy)
3592 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3593 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3594 << "\n");
3595 Inst->mutateType(NewTy);
3596 }
3597
3598 /// Mutate the instruction back to its original type.
3599 void undo() override {
3600 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3601 << "\n");
3602 Inst->mutateType(OrigTy);
3603 }
3604 };
3605
3606 /// Replace the uses of an instruction by another instruction.
3607 class UsesReplacer : public TypePromotionAction {
3608 /// Helper structure to keep track of the replaced uses.
3609 struct InstructionAndIdx {
3610 /// The instruction using the instruction.
3611 Instruction *Inst;
3612
3613 /// The index where this instruction is used for Inst.
3614 unsigned Idx;
3615
3616 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3617 : Inst(Inst), Idx(Idx) {}
3618 };
3619
3620 /// Keep track of the original uses (pair Instruction, Index).
3622 /// Keep track of the debug users.
3623 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3624
3625 /// Keep track of the new value so that we can undo it by replacing
3626 /// instances of the new value with the original value.
3627 Value *New;
3628
3630
3631 public:
3632 /// Replace all the use of \p Inst by \p New.
3633 UsesReplacer(Instruction *Inst, Value *New)
3634 : TypePromotionAction(Inst), New(New) {
3635 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3636 << "\n");
3637 // Record the original uses.
3638 for (Use &U : Inst->uses()) {
3639 Instruction *UserI = cast<Instruction>(U.getUser());
3640 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3641 }
3642 // Record the debug uses separately. They are not in the instruction's
3643 // use list, but they are replaced by RAUW.
3644 findDbgValues(Inst, DbgVariableRecords);
3645
3646 // Now, we can replace the uses.
3647 Inst->replaceAllUsesWith(New);
3648 }
3649
3650 /// Reassign the original uses of Inst to Inst.
3651 void undo() override {
3652 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3653 for (InstructionAndIdx &Use : OriginalUses)
3654 Use.Inst->setOperand(Use.Idx, Inst);
3655 // RAUW has replaced all original uses with references to the new value,
3656 // including the debug uses. Since we are undoing the replacements,
3657 // the original debug uses must also be reinstated to maintain the
3658 // correctness and utility of debug value records.
3659 for (DbgVariableRecord *DVR : DbgVariableRecords)
3660 DVR->replaceVariableLocationOp(New, Inst);
3661 }
3662 };
3663
3664 /// Remove an instruction from the IR.
3665 class InstructionRemover : public TypePromotionAction {
3666 /// Original position of the instruction.
3667 InsertionHandler Inserter;
3668
3669 /// Helper structure to hide all the link to the instruction. In other
3670 /// words, this helps to do as if the instruction was removed.
3671 OperandsHider Hider;
3672
3673 /// Keep track of the uses replaced, if any.
3674 UsesReplacer *Replacer = nullptr;
3675
3676 /// Keep track of instructions removed.
3677 SetOfInstrs &RemovedInsts;
3678
3679 public:
3680 /// Remove all reference of \p Inst and optionally replace all its
3681 /// uses with New.
3682 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3683 /// \pre If !Inst->use_empty(), then New != nullptr
3684 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3685 Value *New = nullptr)
3686 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3687 RemovedInsts(RemovedInsts) {
3688 if (New)
3689 Replacer = new UsesReplacer(Inst, New);
3690 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3691 RemovedInsts.insert(Inst);
3692 /// The instructions removed here will be freed after completing
3693 /// optimizeBlock() for all blocks as we need to keep track of the
3694 /// removed instructions during promotion.
3695 Inst->removeFromParent();
3696 }
3697
3698 ~InstructionRemover() override { delete Replacer; }
3699
3700 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3701 InstructionRemover(const InstructionRemover &other) = delete;
3702
3703 /// Resurrect the instruction and reassign it to the proper uses if
3704 /// new value was provided when build this action.
3705 void undo() override {
3706 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3707 Inserter.insert(Inst);
3708 if (Replacer)
3709 Replacer->undo();
3710 Hider.undo();
3711 RemovedInsts.erase(Inst);
3712 }
3713 };
3714
3715public:
3716 /// Restoration point.
3717 /// The restoration point is a pointer to an action instead of an iterator
3718 /// because the iterator may be invalidated but not the pointer.
3719 using ConstRestorationPt = const TypePromotionAction *;
3720
3721 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3722 : RemovedInsts(RemovedInsts) {}
3723
3724 /// Advocate every changes made in that transaction. Return true if any change
3725 /// happen.
3726 bool commit();
3727
3728 /// Undo all the changes made after the given point.
3729 void rollback(ConstRestorationPt Point);
3730
3731 /// Get the current restoration point.
3732 ConstRestorationPt getRestorationPoint() const;
3733
3734 /// \name API for IR modification with state keeping to support rollback.
3735 /// @{
3736 /// Same as Instruction::setOperand.
3737 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3738
3739 /// Same as Instruction::eraseFromParent.
3740 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3741
3742 /// Same as Value::replaceAllUsesWith.
3743 void replaceAllUsesWith(Instruction *Inst, Value *New);
3744
3745 /// Same as Value::mutateType.
3746 void mutateType(Instruction *Inst, Type *NewTy);
3747
3748 /// Same as IRBuilder::createTrunc.
3749 Value *createTrunc(Instruction *Opnd, Type *Ty);
3750
3751 /// Same as IRBuilder::createSExt.
3752 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3753
3754 /// Same as IRBuilder::createZExt.
3755 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3756
3757private:
3758 /// The ordered list of actions made so far.
3760
3761 using CommitPt =
3762 SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3763
3764 SetOfInstrs &RemovedInsts;
3765};
3766
3767} // end anonymous namespace
3768
3769void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3770 Value *NewVal) {
3771 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3772 Inst, Idx, NewVal));
3773}
3774
3775void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3776 Value *NewVal) {
3777 Actions.push_back(
3778 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3779 Inst, RemovedInsts, NewVal));
3780}
3781
3782void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3783 Value *New) {
3784 Actions.push_back(
3785 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3786}
3787
3788void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3789 Actions.push_back(
3790 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3791}
3792
3793Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3794 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3795 Value *Val = Ptr->getBuiltValue();
3796 Actions.push_back(std::move(Ptr));
3797 return Val;
3798}
3799
3800Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3801 Type *Ty) {
3802 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3803 Value *Val = Ptr->getBuiltValue();
3804 Actions.push_back(std::move(Ptr));
3805 return Val;
3806}
3807
3808Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3809 Type *Ty) {
3810 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3811 Value *Val = Ptr->getBuiltValue();
3812 Actions.push_back(std::move(Ptr));
3813 return Val;
3814}
3815
3816TypePromotionTransaction::ConstRestorationPt
3817TypePromotionTransaction::getRestorationPoint() const {
3818 return !Actions.empty() ? Actions.back().get() : nullptr;
3819}
3820
3821bool TypePromotionTransaction::commit() {
3822 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3823 Action->commit();
3824 bool Modified = !Actions.empty();
3825 Actions.clear();
3826 return Modified;
3827}
3828
3829void TypePromotionTransaction::rollback(
3830 TypePromotionTransaction::ConstRestorationPt Point) {
3831 while (!Actions.empty() && Point != Actions.back().get()) {
3832 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3833 Curr->undo();
3834 }
3835}
3836
3837namespace {
3838
3839/// A helper class for matching addressing modes.
3840///
3841/// This encapsulates the logic for matching the target-legal addressing modes.
3842class AddressingModeMatcher {
3843 SmallVectorImpl<Instruction *> &AddrModeInsts;
3844 const TargetLowering &TLI;
3845 const TargetRegisterInfo &TRI;
3846 const DataLayout &DL;
3847 const LoopInfo &LI;
3848 const std::function<const DominatorTree &()> getDTFn;
3849
3850 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3851 /// the memory instruction that we're computing this address for.
3852 Type *AccessTy;
3853 unsigned AddrSpace;
3854 Instruction *MemoryInst;
3855
3856 /// This is the addressing mode that we're building up. This is
3857 /// part of the return value of this addressing mode matching stuff.
3858 ExtAddrMode &AddrMode;
3859
3860 /// The instructions inserted by other CodeGenPrepare optimizations.
3861 const SetOfInstrs &InsertedInsts;
3862
3863 /// A map from the instructions to their type before promotion.
3864 InstrToOrigTy &PromotedInsts;
3865
3866 /// The ongoing transaction where every action should be registered.
3867 TypePromotionTransaction &TPT;
3868
3869 // A GEP which has too large offset to be folded into the addressing mode.
3870 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3871
3872 /// This is set to true when we should not do profitability checks.
3873 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3874 bool IgnoreProfitability;
3875
3876 /// True if we are optimizing for size.
3877 bool OptSize = false;
3878
3879 ProfileSummaryInfo *PSI;
3880 BlockFrequencyInfo *BFI;
3881
3882 AddressingModeMatcher(
3883 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
3884 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3885 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3886 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3887 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3888 TypePromotionTransaction &TPT,
3889 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3890 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3891 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3892 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3893 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3894 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3895 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3896 IgnoreProfitability = false;
3897 }
3898
3899public:
3900 /// Find the maximal addressing mode that a load/store of V can fold,
3901 /// give an access type of AccessTy. This returns a list of involved
3902 /// instructions in AddrModeInsts.
3903 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3904 /// optimizations.
3905 /// \p PromotedInsts maps the instructions to their type before promotion.
3906 /// \p The ongoing transaction where every action should be registered.
3907 static ExtAddrMode
3908 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3909 SmallVectorImpl<Instruction *> &AddrModeInsts,
3910 const TargetLowering &TLI, const LoopInfo &LI,
3911 const std::function<const DominatorTree &()> getDTFn,
3912 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3913 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3914 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3915 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3916 ExtAddrMode Result;
3917
3918 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3919 AccessTy, AS, MemoryInst, Result,
3920 InsertedInsts, PromotedInsts, TPT,
3921 LargeOffsetGEP, OptSize, PSI, BFI)
3922 .matchAddr(V, 0);
3923 (void)Success;
3924 assert(Success && "Couldn't select *anything*?");
3925 return Result;
3926 }
3927
3928private:
3929 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3930 bool matchAddr(Value *Addr, unsigned Depth);
3931 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3932 bool *MovedAway = nullptr);
3933 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3934 ExtAddrMode &AMBefore,
3935 ExtAddrMode &AMAfter);
3936 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3937 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3938 Value *PromotedOperand) const;
3939};
3940
3941class PhiNodeSet;
3942
3943/// An iterator for PhiNodeSet.
3944class PhiNodeSetIterator {
3945 PhiNodeSet *const Set;
3946 size_t CurrentIndex = 0;
3947
3948public:
3949 /// The constructor. Start should point to either a valid element, or be equal
3950 /// to the size of the underlying SmallVector of the PhiNodeSet.
3951 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3952 PHINode *operator*() const;
3953 PhiNodeSetIterator &operator++();
3954 bool operator==(const PhiNodeSetIterator &RHS) const;
3955 bool operator!=(const PhiNodeSetIterator &RHS) const;
3956};
3957
3958/// Keeps a set of PHINodes.
3959///
3960/// This is a minimal set implementation for a specific use case:
3961/// It is very fast when there are very few elements, but also provides good
3962/// performance when there are many. It is similar to SmallPtrSet, but also
3963/// provides iteration by insertion order, which is deterministic and stable
3964/// across runs. It is also similar to SmallSetVector, but provides removing
3965/// elements in O(1) time. This is achieved by not actually removing the element
3966/// from the underlying vector, so comes at the cost of using more memory, but
3967/// that is fine, since PhiNodeSets are used as short lived objects.
3968class PhiNodeSet {
3969 friend class PhiNodeSetIterator;
3970
3971 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3972 using iterator = PhiNodeSetIterator;
3973
3974 /// Keeps the elements in the order of their insertion in the underlying
3975 /// vector. To achieve constant time removal, it never deletes any element.
3977
3978 /// Keeps the elements in the underlying set implementation. This (and not the
3979 /// NodeList defined above) is the source of truth on whether an element
3980 /// is actually in the collection.
3981 MapType NodeMap;
3982
3983 /// Points to the first valid (not deleted) element when the set is not empty
3984 /// and the value is not zero. Equals to the size of the underlying vector
3985 /// when the set is empty. When the value is 0, as in the beginning, the
3986 /// first element may or may not be valid.
3987 size_t FirstValidElement = 0;
3988
3989public:
3990 /// Inserts a new element to the collection.
3991 /// \returns true if the element is actually added, i.e. was not in the
3992 /// collection before the operation.
3993 bool insert(PHINode *Ptr) {
3994 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3995 NodeList.push_back(Ptr);
3996 return true;
3997 }
3998 return false;
3999 }
4000
4001 /// Removes the element from the collection.
4002 /// \returns whether the element is actually removed, i.e. was in the
4003 /// collection before the operation.
4004 bool erase(PHINode *Ptr) {
4005 if (NodeMap.erase(Ptr)) {
4006 SkipRemovedElements(FirstValidElement);
4007 return true;
4008 }
4009 return false;
4010 }
4011
4012 /// Removes all elements and clears the collection.
4013 void clear() {
4014 NodeMap.clear();
4015 NodeList.clear();
4016 FirstValidElement = 0;
4017 }
4018
4019 /// \returns an iterator that will iterate the elements in the order of
4020 /// insertion.
4021 iterator begin() {
4022 if (FirstValidElement == 0)
4023 SkipRemovedElements(FirstValidElement);
4024 return PhiNodeSetIterator(this, FirstValidElement);
4025 }
4026
4027 /// \returns an iterator that points to the end of the collection.
4028 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
4029
4030 /// Returns the number of elements in the collection.
4031 size_t size() const { return NodeMap.size(); }
4032
4033 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
4034 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
4035
4036private:
4037 /// Updates the CurrentIndex so that it will point to a valid element.
4038 ///
4039 /// If the element of NodeList at CurrentIndex is valid, it does not
4040 /// change it. If there are no more valid elements, it updates CurrentIndex
4041 /// to point to the end of the NodeList.
4042 void SkipRemovedElements(size_t &CurrentIndex) {
4043 while (CurrentIndex < NodeList.size()) {
4044 auto it = NodeMap.find(NodeList[CurrentIndex]);
4045 // If the element has been deleted and added again later, NodeMap will
4046 // point to a different index, so CurrentIndex will still be invalid.
4047 if (it != NodeMap.end() && it->second == CurrentIndex)
4048 break;
4049 ++CurrentIndex;
4050 }
4051 }
4052};
4053
4054PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
4055 : Set(Set), CurrentIndex(Start) {}
4056
4057PHINode *PhiNodeSetIterator::operator*() const {
4058 assert(CurrentIndex < Set->NodeList.size() &&
4059 "PhiNodeSet access out of range");
4060 return Set->NodeList[CurrentIndex];
4061}
4062
4063PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4064 assert(CurrentIndex < Set->NodeList.size() &&
4065 "PhiNodeSet access out of range");
4066 ++CurrentIndex;
4067 Set->SkipRemovedElements(CurrentIndex);
4068 return *this;
4069}
4070
4071bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4072 return CurrentIndex == RHS.CurrentIndex;
4073}
4074
4075bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4076 return !((*this) == RHS);
4077}
4078
4079/// Keep track of simplification of Phi nodes.
4080/// Accept the set of all phi nodes and erase phi node from this set
4081/// if it is simplified.
4082class SimplificationTracker {
4083 DenseMap<Value *, Value *> Storage;
4084 // Tracks newly created Phi nodes. The elements are iterated by insertion
4085 // order.
4086 PhiNodeSet AllPhiNodes;
4087 // Tracks newly created Select nodes.
4088 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
4089
4090public:
4091 Value *Get(Value *V) {
4092 do {
4093 auto SV = Storage.find(V);
4094 if (SV == Storage.end())
4095 return V;
4096 V = SV->second;
4097 } while (true);
4098 }
4099
4100 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4101
4102 void ReplacePhi(PHINode *From, PHINode *To) {
4103 Value *OldReplacement = Get(From);
4104 while (OldReplacement != From) {
4105 From = To;
4106 To = dyn_cast<PHINode>(OldReplacement);
4107 OldReplacement = Get(From);
4108 }
4109 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4110 Put(From, To);
4111 From->replaceAllUsesWith(To);
4112 AllPhiNodes.erase(From);
4113 From->eraseFromParent();
4114 }
4115
4116 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4117
4118 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4119
4120 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4121
4122 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4123
4124 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4125
4126 void destroyNewNodes(Type *CommonType) {
4127 // For safe erasing, replace the uses with dummy value first.
4128 auto *Dummy = PoisonValue::get(CommonType);
4129 for (auto *I : AllPhiNodes) {
4130 I->replaceAllUsesWith(Dummy);
4131 I->eraseFromParent();
4132 }
4133 AllPhiNodes.clear();
4134 for (auto *I : AllSelectNodes) {
4135 I->replaceAllUsesWith(Dummy);
4136 I->eraseFromParent();
4137 }
4138 AllSelectNodes.clear();
4139 }
4140};
4141
4142/// A helper class for combining addressing modes.
4143class AddressingModeCombiner {
4144 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4145 typedef std::pair<PHINode *, PHINode *> PHIPair;
4146
4147private:
4148 /// The addressing modes we've collected.
4150
4151 /// The field in which the AddrModes differ, when we have more than one.
4152 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4153
4154 /// Are the AddrModes that we have all just equal to their original values?
4155 bool AllAddrModesTrivial = true;
4156
4157 /// Common Type for all different fields in addressing modes.
4158 Type *CommonType = nullptr;
4159
4160 const DataLayout &DL;
4161
4162 /// Original Address.
4163 Value *Original;
4164
4165 /// Common value among addresses
4166 Value *CommonValue = nullptr;
4167
4168public:
4169 AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue)
4170 : DL(DL), Original(OriginalValue) {}
4171
4172 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4173
4174 /// Get the combined AddrMode
4175 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4176
4177 /// Add a new AddrMode if it's compatible with the AddrModes we already
4178 /// have.
4179 /// \return True iff we succeeded in doing so.
4180 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4181 // Take note of if we have any non-trivial AddrModes, as we need to detect
4182 // when all AddrModes are trivial as then we would introduce a phi or select
4183 // which just duplicates what's already there.
4184 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4185
4186 // If this is the first addrmode then everything is fine.
4187 if (AddrModes.empty()) {
4188 AddrModes.emplace_back(NewAddrMode);
4189 return true;
4190 }
4191
4192 // Figure out how different this is from the other address modes, which we
4193 // can do just by comparing against the first one given that we only care
4194 // about the cumulative difference.
4195 ExtAddrMode::FieldName ThisDifferentField =
4196 AddrModes[0].compare(NewAddrMode);
4197 if (DifferentField == ExtAddrMode::NoField)
4198 DifferentField = ThisDifferentField;
4199 else if (DifferentField != ThisDifferentField)
4200 DifferentField = ExtAddrMode::MultipleFields;
4201
4202 // If NewAddrMode differs in more than one dimension we cannot handle it.
4203 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4204
4205 // If Scale Field is different then we reject.
4206 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4207
4208 // We also must reject the case when base offset is different and
4209 // scale reg is not null, we cannot handle this case due to merge of
4210 // different offsets will be used as ScaleReg.
4211 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4212 !NewAddrMode.ScaledReg);
4213
4214 // We also must reject the case when GV is different and BaseReg installed
4215 // due to we want to use base reg as a merge of GV values.
4216 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4217 !NewAddrMode.HasBaseReg);
4218
4219 // Even if NewAddMode is the same we still need to collect it due to
4220 // original value is different. And later we will need all original values
4221 // as anchors during finding the common Phi node.
4222 if (CanHandle)
4223 AddrModes.emplace_back(NewAddrMode);
4224 else
4225 AddrModes.clear();
4226
4227 return CanHandle;
4228 }
4229
4230 /// Combine the addressing modes we've collected into a single
4231 /// addressing mode.
4232 /// \return True iff we successfully combined them or we only had one so
4233 /// didn't need to combine them anyway.
4234 bool combineAddrModes() {
4235 // If we have no AddrModes then they can't be combined.
4236 if (AddrModes.size() == 0)
4237 return false;
4238
4239 // A single AddrMode can trivially be combined.
4240 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4241 return true;
4242
4243 // If the AddrModes we collected are all just equal to the value they are
4244 // derived from then combining them wouldn't do anything useful.
4245 if (AllAddrModesTrivial)
4246 return false;
4247
4248 if (!addrModeCombiningAllowed())
4249 return false;
4250
4251 // Build a map between <original value, basic block where we saw it> to
4252 // value of base register.
4253 // Bail out if there is no common type.
4254 FoldAddrToValueMapping Map;
4255 if (!initializeMap(Map))
4256 return false;
4257
4258 CommonValue = findCommon(Map);
4259 if (CommonValue)
4260 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4261 return CommonValue != nullptr;
4262 }
4263
4264private:
4265 /// `CommonValue` may be a placeholder inserted by us.
4266 /// If the placeholder is not used, we should remove this dead instruction.
4267 void eraseCommonValueIfDead() {
4268 if (CommonValue && CommonValue->use_empty())
4269 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4270 CommonInst->eraseFromParent();
4271 }
4272
4273 /// Initialize Map with anchor values. For address seen
4274 /// we set the value of different field saw in this address.
4275 /// At the same time we find a common type for different field we will
4276 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4277 /// Return false if there is no common type found.
4278 bool initializeMap(FoldAddrToValueMapping &Map) {
4279 // Keep track of keys where the value is null. We will need to replace it
4280 // with constant null when we know the common type.
4281 SmallVector<Value *, 2> NullValue;
4282 Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4283 for (auto &AM : AddrModes) {
4284 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4285 if (DV) {
4286 auto *Type = DV->getType();
4287 if (CommonType && CommonType != Type)
4288 return false;
4289 CommonType = Type;
4290 Map[AM.OriginalValue] = DV;
4291 } else {
4292 NullValue.push_back(AM.OriginalValue);
4293 }
4294 }
4295 assert(CommonType && "At least one non-null value must be!");
4296 for (auto *V : NullValue)
4297 Map[V] = Constant::getNullValue(CommonType);
4298 return true;
4299 }
4300
4301 /// We have mapping between value A and other value B where B was a field in
4302 /// addressing mode represented by A. Also we have an original value C
4303 /// representing an address we start with. Traversing from C through phi and
4304 /// selects we ended up with A's in a map. This utility function tries to find
4305 /// a value V which is a field in addressing mode C and traversing through phi
4306 /// nodes and selects we will end up in corresponded values B in a map.
4307 /// The utility will create a new Phi/Selects if needed.
4308 // The simple example looks as follows:
4309 // BB1:
4310 // p1 = b1 + 40
4311 // br cond BB2, BB3
4312 // BB2:
4313 // p2 = b2 + 40
4314 // br BB3
4315 // BB3:
4316 // p = phi [p1, BB1], [p2, BB2]
4317 // v = load p
4318 // Map is
4319 // p1 -> b1
4320 // p2 -> b2
4321 // Request is
4322 // p -> ?
4323 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4324 Value *findCommon(FoldAddrToValueMapping &Map) {
4325 // Tracks the simplification of newly created phi nodes. The reason we use
4326 // this mapping is because we will add new created Phi nodes in AddrToBase.
4327 // Simplification of Phi nodes is recursive, so some Phi node may
4328 // be simplified after we added it to AddrToBase. In reality this
4329 // simplification is possible only if original phi/selects were not
4330 // simplified yet.
4331 // Using this mapping we can find the current value in AddrToBase.
4332 SimplificationTracker ST;
4333
4334 // First step, DFS to create PHI nodes for all intermediate blocks.
4335 // Also fill traverse order for the second step.
4336 SmallVector<Value *, 32> TraverseOrder;
4337 InsertPlaceholders(Map, TraverseOrder, ST);
4338
4339 // Second Step, fill new nodes by merged values and simplify if possible.
4340 FillPlaceholders(Map, TraverseOrder, ST);
4341
4342 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4343 ST.destroyNewNodes(CommonType);
4344 return nullptr;
4345 }
4346
4347 // Now we'd like to match New Phi nodes to existed ones.
4348 unsigned PhiNotMatchedCount = 0;
4349 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4350 ST.destroyNewNodes(CommonType);
4351 return nullptr;
4352 }
4353
4354 auto *Result = ST.Get(Map.find(Original)->second);
4355 if (Result) {
4356 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4357 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4358 }
4359 return Result;
4360 }
4361
4362 /// Try to match PHI node to Candidate.
4363 /// Matcher tracks the matched Phi nodes.
4364 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4365 SmallSetVector<PHIPair, 8> &Matcher,
4366 PhiNodeSet &PhiNodesToMatch) {
4367 SmallVector<PHIPair, 8> WorkList;
4368 Matcher.insert({PHI, Candidate});
4369 SmallPtrSet<PHINode *, 8> MatchedPHIs;
4370 MatchedPHIs.insert(PHI);
4371 WorkList.push_back({PHI, Candidate});
4372 SmallSet<PHIPair, 8> Visited;
4373 while (!WorkList.empty()) {
4374 auto Item = WorkList.pop_back_val();
4375 if (!Visited.insert(Item).second)
4376 continue;
4377 // We iterate over all incoming values to Phi to compare them.
4378 // If values are different and both of them Phi and the first one is a
4379 // Phi we added (subject to match) and both of them is in the same basic
4380 // block then we can match our pair if values match. So we state that
4381 // these values match and add it to work list to verify that.
4382 for (auto *B : Item.first->blocks()) {
4383 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4384 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4385 if (FirstValue == SecondValue)
4386 continue;
4387
4388 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4389 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4390
4391 // One of them is not Phi or
4392 // The first one is not Phi node from the set we'd like to match or
4393 // Phi nodes from different basic blocks then
4394 // we will not be able to match.
4395 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4396 FirstPhi->getParent() != SecondPhi->getParent())
4397 return false;
4398
4399 // If we already matched them then continue.
4400 if (Matcher.count({FirstPhi, SecondPhi}))
4401 continue;
4402 // So the values are different and does not match. So we need them to
4403 // match. (But we register no more than one match per PHI node, so that
4404 // we won't later try to replace them twice.)
4405 if (MatchedPHIs.insert(FirstPhi).second)
4406 Matcher.insert({FirstPhi, SecondPhi});
4407 // But me must check it.
4408 WorkList.push_back({FirstPhi, SecondPhi});
4409 }
4410 }
4411 return true;
4412 }
4413
4414 /// For the given set of PHI nodes (in the SimplificationTracker) try
4415 /// to find their equivalents.
4416 /// Returns false if this matching fails and creation of new Phi is disabled.
4417 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4418 unsigned &PhiNotMatchedCount) {
4419 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4420 // order, so the replacements (ReplacePhi) are also done in a deterministic
4421 // order.
4422 SmallSetVector<PHIPair, 8> Matched;
4423 SmallPtrSet<PHINode *, 8> WillNotMatch;
4424 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4425 while (PhiNodesToMatch.size()) {
4426 PHINode *PHI = *PhiNodesToMatch.begin();
4427
4428 // Add us, if no Phi nodes in the basic block we do not match.
4429 WillNotMatch.clear();
4430 WillNotMatch.insert(PHI);
4431
4432 // Traverse all Phis until we found equivalent or fail to do that.
4433 bool IsMatched = false;
4434 for (auto &P : PHI->getParent()->phis()) {
4435 // Skip new Phi nodes.
4436 if (PhiNodesToMatch.count(&P))
4437 continue;
4438 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4439 break;
4440 // If it does not match, collect all Phi nodes from matcher.
4441 // if we end up with no match, them all these Phi nodes will not match
4442 // later.
4443 WillNotMatch.insert_range(llvm::make_first_range(Matched));
4444 Matched.clear();
4445 }
4446 if (IsMatched) {
4447 // Replace all matched values and erase them.
4448 for (auto MV : Matched)
4449 ST.ReplacePhi(MV.first, MV.second);
4450 Matched.clear();
4451 continue;
4452 }
4453 // If we are not allowed to create new nodes then bail out.
4454 if (!AllowNewPhiNodes)
4455 return false;
4456 // Just remove all seen values in matcher. They will not match anything.
4457 PhiNotMatchedCount += WillNotMatch.size();
4458 for (auto *P : WillNotMatch)
4459 PhiNodesToMatch.erase(P);
4460 }
4461 return true;
4462 }
4463 /// Fill the placeholders with values from predecessors and simplify them.
4464 void FillPlaceholders(FoldAddrToValueMapping &Map,
4465 SmallVectorImpl<Value *> &TraverseOrder,
4466 SimplificationTracker &ST) {
4467 while (!TraverseOrder.empty()) {
4468 Value *Current = TraverseOrder.pop_back_val();
4469 assert(Map.contains(Current) && "No node to fill!!!");
4470 Value *V = Map[Current];
4471
4472 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4473 // CurrentValue also must be Select.
4474 auto *CurrentSelect = cast<SelectInst>(Current);
4475 auto *TrueValue = CurrentSelect->getTrueValue();
4476 assert(Map.contains(TrueValue) && "No True Value!");
4477 Select->setTrueValue(ST.Get(Map[TrueValue]));
4478 auto *FalseValue = CurrentSelect->getFalseValue();
4479 assert(Map.contains(FalseValue) && "No False Value!");
4480 Select->setFalseValue(ST.Get(Map[FalseValue]));
4481 } else {
4482 // Must be a Phi node then.
4483 auto *PHI = cast<PHINode>(V);
4484 // Fill the Phi node with values from predecessors.
4485 for (auto *B : predecessors(PHI->getParent())) {
4486 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4487 assert(Map.contains(PV) && "No predecessor Value!");
4488 PHI->addIncoming(ST.Get(Map[PV]), B);
4489 }
4490 }
4491 }
4492 }
4493
4494 /// Starting from original value recursively iterates over def-use chain up to
4495 /// known ending values represented in a map. For each traversed phi/select
4496 /// inserts a placeholder Phi or Select.
4497 /// Reports all new created Phi/Select nodes by adding them to set.
4498 /// Also reports and order in what values have been traversed.
4499 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4500 SmallVectorImpl<Value *> &TraverseOrder,
4501 SimplificationTracker &ST) {
4502 SmallVector<Value *, 32> Worklist;
4503 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4504 "Address must be a Phi or Select node");
4505 auto *Dummy = PoisonValue::get(CommonType);
4506 Worklist.push_back(Original);
4507 while (!Worklist.empty()) {
4508 Value *Current = Worklist.pop_back_val();
4509 // if it is already visited or it is an ending value then skip it.
4510 if (Map.contains(Current))
4511 continue;
4512 TraverseOrder.push_back(Current);
4513
4514 // CurrentValue must be a Phi node or select. All others must be covered
4515 // by anchors.
4516 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4517 // Is it OK to get metadata from OrigSelect?!
4518 // Create a Select placeholder with dummy value.
4519 SelectInst *Select =
4520 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4521 CurrentSelect->getName(),
4522 CurrentSelect->getIterator(), CurrentSelect);
4523 Map[Current] = Select;
4524 ST.insertNewSelect(Select);
4525 // We are interested in True and False values.
4526 Worklist.push_back(CurrentSelect->getTrueValue());
4527 Worklist.push_back(CurrentSelect->getFalseValue());
4528 } else {
4529 // It must be a Phi node then.
4530 PHINode *CurrentPhi = cast<PHINode>(Current);
4531 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4532 PHINode *PHI =
4533 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4534 Map[Current] = PHI;
4535 ST.insertNewPhi(PHI);
4536 append_range(Worklist, CurrentPhi->incoming_values());
4537 }
4538 }
4539 }
4540
4541 bool addrModeCombiningAllowed() {
4543 return false;
4544 switch (DifferentField) {
4545 default:
4546 return false;
4547 case ExtAddrMode::BaseRegField:
4549 case ExtAddrMode::BaseGVField:
4550 return AddrSinkCombineBaseGV;
4551 case ExtAddrMode::BaseOffsField:
4553 case ExtAddrMode::ScaledRegField:
4555 }
4556 }
4557};
4558} // end anonymous namespace
4559
4560/// Try adding ScaleReg*Scale to the current addressing mode.
4561/// Return true and update AddrMode if this addr mode is legal for the target,
4562/// false if not.
4563bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4564 unsigned Depth) {
4565 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4566 // mode. Just process that directly.
4567 if (Scale == 1)
4568 return matchAddr(ScaleReg, Depth);
4569
4570 // If the scale is 0, it takes nothing to add this.
4571 if (Scale == 0)
4572 return true;
4573
4574 // If we already have a scale of this value, we can add to it, otherwise, we
4575 // need an available scale field.
4576 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4577 return false;
4578
4579 ExtAddrMode TestAddrMode = AddrMode;
4580
4581 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4582 // [A+B + A*7] -> [B+A*8].
4583 TestAddrMode.Scale += Scale;
4584 TestAddrMode.ScaledReg = ScaleReg;
4585
4586 // If the new address isn't legal, bail out.
4587 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4588 return false;
4589
4590 // It was legal, so commit it.
4591 AddrMode = TestAddrMode;
4592
4593 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4594 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4595 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4596 // go any further: we can reuse it and cannot eliminate it.
4597 ConstantInt *CI = nullptr;
4598 Value *AddLHS = nullptr;
4599 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4600 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4601 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4602 TestAddrMode.InBounds = false;
4603 TestAddrMode.ScaledReg = AddLHS;
4604 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4605
4606 // If this addressing mode is legal, commit it and remember that we folded
4607 // this instruction.
4608 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4609 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4610 AddrMode = TestAddrMode;
4611 return true;
4612 }
4613 // Restore status quo.
4614 TestAddrMode = AddrMode;
4615 }
4616
4617 // If this is an add recurrence with a constant step, return the increment
4618 // instruction and the canonicalized step.
4619 auto GetConstantStep =
4620 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4621 auto *PN = dyn_cast<PHINode>(V);
4622 if (!PN)
4623 return std::nullopt;
4624 auto IVInc = getIVIncrement(PN, &LI);
4625 if (!IVInc)
4626 return std::nullopt;
4627 // TODO: The result of the intrinsics above is two-complement. However when
4628 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4629 // If it has nuw or nsw flags, we need to make sure that these flags are
4630 // inferrable at the point of memory instruction. Otherwise we are replacing
4631 // well-defined two-complement computation with poison. Currently, to avoid
4632 // potentially complex analysis needed to prove this, we reject such cases.
4633 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4634 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4635 return std::nullopt;
4636 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4637 return std::make_pair(IVInc->first, ConstantStep->getValue());
4638 return std::nullopt;
4639 };
4640
4641 // Try to account for the following special case:
4642 // 1. ScaleReg is an inductive variable;
4643 // 2. We use it with non-zero offset;
4644 // 3. IV's increment is available at the point of memory instruction.
4645 //
4646 // In this case, we may reuse the IV increment instead of the IV Phi to
4647 // achieve the following advantages:
4648 // 1. If IV step matches the offset, we will have no need in the offset;
4649 // 2. Even if they don't match, we will reduce the overlap of living IV
4650 // and IV increment, that will potentially lead to better register
4651 // assignment.
4652 if (AddrMode.BaseOffs) {
4653 if (auto IVStep = GetConstantStep(ScaleReg)) {
4654 Instruction *IVInc = IVStep->first;
4655 // The following assert is important to ensure a lack of infinite loops.
4656 // This transforms is (intentionally) the inverse of the one just above.
4657 // If they don't agree on the definition of an increment, we'd alternate
4658 // back and forth indefinitely.
4659 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4660 APInt Step = IVStep->second;
4661 APInt Offset = Step * AddrMode.Scale;
4662 if (Offset.isSignedIntN(64)) {
4663 TestAddrMode.InBounds = false;
4664 TestAddrMode.ScaledReg = IVInc;
4665 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4666 // If this addressing mode is legal, commit it..
4667 // (Note that we defer the (expensive) domtree base legality check
4668 // to the very last possible point.)
4669 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4670 getDTFn().dominates(IVInc, MemoryInst)) {
4671 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4672 AddrMode = TestAddrMode;
4673 return true;
4674 }
4675 // Restore status quo.
4676 TestAddrMode = AddrMode;
4677 }
4678 }
4679 }
4680
4681 // Otherwise, just return what we have.
4682 return true;
4683}
4684
4685/// This is a little filter, which returns true if an addressing computation
4686/// involving I might be folded into a load/store accessing it.
4687/// This doesn't need to be perfect, but needs to accept at least
4688/// the set of instructions that MatchOperationAddr can.
4690 switch (I->getOpcode()) {
4691 case Instruction::BitCast:
4692 case Instruction::AddrSpaceCast:
4693 // Don't touch identity bitcasts.
4694 if (I->getType() == I->getOperand(0)->getType())
4695 return false;
4696 return I->getType()->isIntOrPtrTy();
4697 case Instruction::PtrToInt:
4698 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4699 return true;
4700 case Instruction::IntToPtr:
4701 // We know the input is intptr_t, so this is foldable.
4702 return true;
4703 case Instruction::Add:
4704 return true;
4705 case Instruction::Mul:
4706 case Instruction::Shl:
4707 // Can only handle X*C and X << C.
4708 return isa<ConstantInt>(I->getOperand(1));
4709 case Instruction::GetElementPtr:
4710 return true;
4711 default:
4712 return false;
4713 }
4714}
4715
4716/// Check whether or not \p Val is a legal instruction for \p TLI.
4717/// \note \p Val is assumed to be the product of some type promotion.
4718/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4719/// to be legal, as the non-promoted value would have had the same state.
4721 const DataLayout &DL, Value *Val) {
4722 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4723 if (!PromotedInst)
4724 return false;
4725 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4726 // If the ISDOpcode is undefined, it was undefined before the promotion.
4727 if (!ISDOpcode)
4728 return true;
4729 // Otherwise, check if the promoted instruction is legal or not.
4730 return TLI.isOperationLegalOrCustom(
4731 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4732}
4733
4734namespace {
4735
4736/// Hepler class to perform type promotion.
4737class TypePromotionHelper {
4738 /// Utility function to add a promoted instruction \p ExtOpnd to
4739 /// \p PromotedInsts and record the type of extension we have seen.
4740 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4741 Instruction *ExtOpnd, bool IsSExt) {
4742 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4743 auto [It, Inserted] = PromotedInsts.try_emplace(ExtOpnd);
4744 if (!Inserted) {
4745 // If the new extension is same as original, the information in
4746 // PromotedInsts[ExtOpnd] is still correct.
4747 if (It->second.getInt() == ExtTy)
4748 return;
4749
4750 // Now the new extension is different from old extension, we make
4751 // the type information invalid by setting extension type to
4752 // BothExtension.
4753 ExtTy = BothExtension;
4754 }
4755 It->second = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4756 }
4757
4758 /// Utility function to query the original type of instruction \p Opnd
4759 /// with a matched extension type. If the extension doesn't match, we
4760 /// cannot use the information we had on the original type.
4761 /// BothExtension doesn't match any extension type.
4762 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4763 Instruction *Opnd, bool IsSExt) {
4764 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4765 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4766 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4767 return It->second.getPointer();
4768 return nullptr;
4769 }
4770
4771 /// Utility function to check whether or not a sign or zero extension
4772 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4773 /// either using the operands of \p Inst or promoting \p Inst.
4774 /// The type of the extension is defined by \p IsSExt.
4775 /// In other words, check if:
4776 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4777 /// #1 Promotion applies:
4778 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4779 /// #2 Operand reuses:
4780 /// ext opnd1 to ConsideredExtType.
4781 /// \p PromotedInsts maps the instructions to their type before promotion.
4782 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4783 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4784
4785 /// Utility function to determine if \p OpIdx should be promoted when
4786 /// promoting \p Inst.
4787 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4788 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4789 }
4790
4791 /// Utility function to promote the operand of \p Ext when this
4792 /// operand is a promotable trunc or sext or zext.
4793 /// \p PromotedInsts maps the instructions to their type before promotion.
4794 /// \p CreatedInstsCost[out] contains the cost of all instructions
4795 /// created to promote the operand of Ext.
4796 /// Newly added extensions are inserted in \p Exts.
4797 /// Newly added truncates are inserted in \p Truncs.
4798 /// Should never be called directly.
4799 /// \return The promoted value which is used instead of Ext.
4800 static Value *promoteOperandForTruncAndAnyExt(
4801 Instruction *Ext, TypePromotionTransaction &TPT,
4802 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4803 SmallVectorImpl<Instruction *> *Exts,
4804 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
4805
4806 /// Utility function to promote the operand of \p Ext when this
4807 /// operand is promotable and is not a supported trunc or sext.
4808 /// \p PromotedInsts maps the instructions to their type before promotion.
4809 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4810 /// created to promote the operand of Ext.
4811 /// Newly added extensions are inserted in \p Exts.
4812 /// Newly added truncates are inserted in \p Truncs.
4813 /// Should never be called directly.
4814 /// \return The promoted value which is used instead of Ext.
4815 static Value *promoteOperandForOther(Instruction *Ext,
4816 TypePromotionTransaction &TPT,
4817 InstrToOrigTy &PromotedInsts,
4818 unsigned &CreatedInstsCost,
4819 SmallVectorImpl<Instruction *> *Exts,
4820 SmallVectorImpl<Instruction *> *Truncs,
4821 const TargetLowering &TLI, bool IsSExt);
4822
4823 /// \see promoteOperandForOther.
4824 static Value *signExtendOperandForOther(
4825 Instruction *Ext, TypePromotionTransaction &TPT,
4826 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4827 SmallVectorImpl<Instruction *> *Exts,
4828 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4829 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4830 Exts, Truncs, TLI, true);
4831 }
4832
4833 /// \see promoteOperandForOther.
4834 static Value *zeroExtendOperandForOther(
4835 Instruction *Ext, TypePromotionTransaction &TPT,
4836 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4837 SmallVectorImpl<Instruction *> *Exts,
4838 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4839 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4840 Exts, Truncs, TLI, false);
4841 }
4842
4843public:
4844 /// Type for the utility function that promotes the operand of Ext.
4845 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4846 InstrToOrigTy &PromotedInsts,
4847 unsigned &CreatedInstsCost,
4848 SmallVectorImpl<Instruction *> *Exts,
4849 SmallVectorImpl<Instruction *> *Truncs,
4850 const TargetLowering &TLI);
4851
4852 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4853 /// action to promote the operand of \p Ext instead of using Ext.
4854 /// \return NULL if no promotable action is possible with the current
4855 /// sign extension.
4856 /// \p InsertedInsts keeps track of all the instructions inserted by the
4857 /// other CodeGenPrepare optimizations. This information is important
4858 /// because we do not want to promote these instructions as CodeGenPrepare
4859 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4860 /// \p PromotedInsts maps the instructions to their type before promotion.
4861 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4862 const TargetLowering &TLI,
4863 const InstrToOrigTy &PromotedInsts);
4864};
4865
4866} // end anonymous namespace
4867
4868bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4869 Type *ConsideredExtType,
4870 const InstrToOrigTy &PromotedInsts,
4871 bool IsSExt) {
4872 // The promotion helper does not know how to deal with vector types yet.
4873 // To be able to fix that, we would need to fix the places where we
4874 // statically extend, e.g., constants and such.
4875 if (Inst->getType()->isVectorTy())
4876 return false;
4877
4878 // We can always get through zext.
4879 if (isa<ZExtInst>(Inst))
4880 return true;
4881
4882 // sext(sext) is ok too.
4883 if (IsSExt && isa<SExtInst>(Inst))
4884 return true;
4885
4886 // We can get through binary operator, if it is legal. In other words, the
4887 // binary operator must have a nuw or nsw flag.
4888 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4889 if (isa<OverflowingBinaryOperator>(BinOp) &&
4890 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4891 (IsSExt && BinOp->hasNoSignedWrap())))
4892 return true;
4893
4894 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4895 if ((Inst->getOpcode() == Instruction::And ||
4896 Inst->getOpcode() == Instruction::Or))
4897 return true;
4898
4899 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4900 if (Inst->getOpcode() == Instruction::Xor) {
4901 // Make sure it is not a NOT.
4902 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4903 if (!Cst->getValue().isAllOnes())
4904 return true;
4905 }
4906
4907 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4908 // It may change a poisoned value into a regular value, like
4909 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4910 // poisoned value regular value
4911 // It should be OK since undef covers valid value.
4912 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4913 return true;
4914
4915 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4916 // It may change a poisoned value into a regular value, like
4917 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4918 // poisoned value regular value
4919 // It should be OK since undef covers valid value.
4920 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4921 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4922 if (ExtInst->hasOneUse()) {
4923 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4924 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4925 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4926 if (Cst &&
4927 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4928 return true;
4929 }
4930 }
4931 }
4932
4933 // Check if we can do the following simplification.
4934 // ext(trunc(opnd)) --> ext(opnd)
4935 if (!isa<TruncInst>(Inst))
4936 return false;
4937
4938 Value *OpndVal = Inst->getOperand(0);
4939 // Check if we can use this operand in the extension.
4940 // If the type is larger than the result type of the extension, we cannot.
4941 if (!OpndVal->getType()->isIntegerTy() ||
4942 OpndVal->getType()->getIntegerBitWidth() >
4943 ConsideredExtType->getIntegerBitWidth())
4944 return false;
4945
4946 // If the operand of the truncate is not an instruction, we will not have
4947 // any information on the dropped bits.
4948 // (Actually we could for constant but it is not worth the extra logic).
4949 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4950 if (!Opnd)
4951 return false;
4952
4953 // Check if the source of the type is narrow enough.
4954 // I.e., check that trunc just drops extended bits of the same kind of
4955 // the extension.
4956 // #1 get the type of the operand and check the kind of the extended bits.
4957 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4958 if (OpndType)
4959 ;
4960 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4961 OpndType = Opnd->getOperand(0)->getType();
4962 else
4963 return false;
4964
4965 // #2 check that the truncate just drops extended bits.
4966 return Inst->getType()->getIntegerBitWidth() >=
4967 OpndType->getIntegerBitWidth();
4968}
4969
4970TypePromotionHelper::Action TypePromotionHelper::getAction(
4971 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4972 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4973 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4974 "Unexpected instruction type");
4975 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4976 Type *ExtTy = Ext->getType();
4977 bool IsSExt = isa<SExtInst>(Ext);
4978 // If the operand of the extension is not an instruction, we cannot
4979 // get through.
4980 // If it, check we can get through.
4981 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4982 return nullptr;
4983
4984 // Do not promote if the operand has been added by codegenprepare.
4985 // Otherwise, it means we are undoing an optimization that is likely to be
4986 // redone, thus causing potential infinite loop.
4987 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4988 return nullptr;
4989
4990 // SExt or Trunc instructions.
4991 // Return the related handler.
4992 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4993 isa<ZExtInst>(ExtOpnd))
4994 return promoteOperandForTruncAndAnyExt;
4995
4996 // Regular instruction.
4997 // Abort early if we will have to insert non-free instructions.
4998 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4999 return nullptr;
5000 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
5001}
5002
5003Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
5004 Instruction *SExt, TypePromotionTransaction &TPT,
5005 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5006 SmallVectorImpl<Instruction *> *Exts,
5007 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
5008 // By construction, the operand of SExt is an instruction. Otherwise we cannot
5009 // get through it and this method should not be called.
5010 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
5011 Value *ExtVal = SExt;
5012 bool HasMergedNonFreeExt = false;
5013 if (isa<ZExtInst>(SExtOpnd)) {
5014 // Replace s|zext(zext(opnd))
5015 // => zext(opnd).
5016 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
5017 Value *ZExt =
5018 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
5019 TPT.replaceAllUsesWith(SExt, ZExt);
5020 TPT.eraseInstruction(SExt);
5021 ExtVal = ZExt;
5022 } else {
5023 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
5024 // => z|sext(opnd).
5025 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
5026 }
5027 CreatedInstsCost = 0;
5028
5029 // Remove dead code.
5030 if (SExtOpnd->use_empty())
5031 TPT.eraseInstruction(SExtOpnd);
5032
5033 // Check if the extension is still needed.
5034 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
5035 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
5036 if (ExtInst) {
5037 if (Exts)
5038 Exts->push_back(ExtInst);
5039 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
5040 }
5041 return ExtVal;
5042 }
5043
5044 // At this point we have: ext ty opnd to ty.
5045 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
5046 Value *NextVal = ExtInst->getOperand(0);
5047 TPT.eraseInstruction(ExtInst, NextVal);
5048 return NextVal;
5049}
5050
5051Value *TypePromotionHelper::promoteOperandForOther(
5052 Instruction *Ext, TypePromotionTransaction &TPT,
5053 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5054 SmallVectorImpl<Instruction *> *Exts,
5055 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
5056 bool IsSExt) {
5057 // By construction, the operand of Ext is an instruction. Otherwise we cannot
5058 // get through it and this method should not be called.
5059 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
5060 CreatedInstsCost = 0;
5061 if (!ExtOpnd->hasOneUse()) {
5062 // ExtOpnd will be promoted.
5063 // All its uses, but Ext, will need to use a truncated value of the
5064 // promoted version.
5065 // Create the truncate now.
5066 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
5067 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5068 // Insert it just after the definition.
5069 ITrunc->moveAfter(ExtOpnd);
5070 if (Truncs)
5071 Truncs->push_back(ITrunc);
5072 }
5073
5074 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5075 // Restore the operand of Ext (which has been replaced by the previous call
5076 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5077 TPT.setOperand(Ext, 0, ExtOpnd);
5078 }
5079
5080 // Get through the Instruction:
5081 // 1. Update its type.
5082 // 2. Replace the uses of Ext by Inst.
5083 // 3. Extend each operand that needs to be extended.
5084
5085 // Remember the original type of the instruction before promotion.
5086 // This is useful to know that the high bits are sign extended bits.
5087 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5088 // Step #1.
5089 TPT.mutateType(ExtOpnd, Ext->getType());
5090 // Step #2.
5091 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5092 // Step #3.
5093 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5094 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5095 ++OpIdx) {
5096 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5097 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5098 !shouldExtOperand(ExtOpnd, OpIdx)) {
5099 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5100 continue;
5101 }
5102 // Check if we can statically extend the operand.
5103 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5104 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5105 LLVM_DEBUG(dbgs() << "Statically extend\n");
5106 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5107 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5108 : Cst->getValue().zext(BitWidth);
5109 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5110 continue;
5111 }
5112 // UndefValue are typed, so we have to statically sign extend them.
5113 if (isa<UndefValue>(Opnd)) {
5114 LLVM_DEBUG(dbgs() << "Statically extend\n");
5115 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5116 continue;
5117 }
5118
5119 // Otherwise we have to explicitly sign extend the operand.
5120 Value *ValForExtOpnd = IsSExt
5121 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5122 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5123 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5124 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5125 if (!InstForExtOpnd)
5126 continue;
5127
5128 if (Exts)
5129 Exts->push_back(InstForExtOpnd);
5130
5131 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5132 }
5133 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5134 TPT.eraseInstruction(Ext);
5135 return ExtOpnd;
5136}
5137
5138/// Check whether or not promoting an instruction to a wider type is profitable.
5139/// \p NewCost gives the cost of extension instructions created by the
5140/// promotion.
5141/// \p OldCost gives the cost of extension instructions before the promotion
5142/// plus the number of instructions that have been
5143/// matched in the addressing mode the promotion.
5144/// \p PromotedOperand is the value that has been promoted.
5145/// \return True if the promotion is profitable, false otherwise.
5146bool AddressingModeMatcher::isPromotionProfitable(
5147 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5148 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5149 << '\n');
5150 // The cost of the new extensions is greater than the cost of the
5151 // old extension plus what we folded.
5152 // This is not profitable.
5153 if (NewCost > OldCost)
5154 return false;
5155 if (NewCost < OldCost)
5156 return true;
5157 // The promotion is neutral but it may help folding the sign extension in
5158 // loads for instance.
5159 // Check that we did not create an illegal instruction.
5160 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5161}
5162
5163/// Given an instruction or constant expr, see if we can fold the operation
5164/// into the addressing mode. If so, update the addressing mode and return
5165/// true, otherwise return false without modifying AddrMode.
5166/// If \p MovedAway is not NULL, it contains the information of whether or
5167/// not AddrInst has to be folded into the addressing mode on success.
5168/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5169/// because it has been moved away.
5170/// Thus AddrInst must not be added in the matched instructions.
5171/// This state can happen when AddrInst is a sext, since it may be moved away.
5172/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5173/// not be referenced anymore.
5174bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5175 unsigned Depth,
5176 bool *MovedAway) {
5177 // Avoid exponential behavior on extremely deep expression trees.
5178 if (Depth >= 5)
5179 return false;
5180
5181 // By default, all matched instructions stay in place.
5182 if (MovedAway)
5183 *MovedAway = false;
5184
5185 switch (Opcode) {
5186 case Instruction::PtrToInt:
5187 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5188 return matchAddr(AddrInst->getOperand(0), Depth);
5189 case Instruction::IntToPtr: {
5190 auto AS = AddrInst->getType()->getPointerAddressSpace();
5191 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5192 // This inttoptr is a no-op if the integer type is pointer sized.
5193 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5194 return matchAddr(AddrInst->getOperand(0), Depth);
5195 return false;
5196 }
5197 case Instruction::BitCast:
5198 // BitCast is always a noop, and we can handle it as long as it is
5199 // int->int or pointer->pointer (we don't want int<->fp or something).
5200 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5201 // Don't touch identity bitcasts. These were probably put here by LSR,
5202 // and we don't want to mess around with them. Assume it knows what it
5203 // is doing.
5204 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5205 return matchAddr(AddrInst->getOperand(0), Depth);
5206 return false;
5207 case Instruction::AddrSpaceCast: {
5208 unsigned SrcAS =
5209 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5210 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5211 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5212 return matchAddr(AddrInst->getOperand(0), Depth);
5213 return false;
5214 }
5215 case Instruction::Add: {
5216 // Check to see if we can merge in one operand, then the other. If so, we
5217 // win.
5218 ExtAddrMode BackupAddrMode = AddrMode;
5219 unsigned OldSize = AddrModeInsts.size();
5220 // Start a transaction at this point.
5221 // The LHS may match but not the RHS.
5222 // Therefore, we need a higher level restoration point to undo partially
5223 // matched operation.
5224 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5225 TPT.getRestorationPoint();
5226
5227 // Try to match an integer constant second to increase its chance of ending
5228 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5229 int First = 0, Second = 1;
5230 if (isa<ConstantInt>(AddrInst->getOperand(First))
5231 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5232 std::swap(First, Second);
5233 AddrMode.InBounds = false;
5234 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5235 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5236 return true;
5237
5238 // Restore the old addr mode info.
5239 AddrMode = BackupAddrMode;
5240 AddrModeInsts.resize(OldSize);
5241 TPT.rollback(LastKnownGood);
5242
5243 // Otherwise this was over-aggressive. Try merging operands in the opposite
5244 // order.
5245 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5246 matchAddr(AddrInst->getOperand(First), Depth + 1))
5247 return true;
5248
5249 // Otherwise we definitely can't merge the ADD in.
5250 AddrMode = BackupAddrMode;
5251 AddrModeInsts.resize(OldSize);
5252 TPT.rollback(LastKnownGood);
5253 break;
5254 }
5255 // case Instruction::Or:
5256 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5257 // break;
5258 case Instruction::Mul:
5259 case Instruction::Shl: {
5260 // Can only handle X*C and X << C.
5261 AddrMode.InBounds = false;
5262 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5263 if (!RHS || RHS->getBitWidth() > 64)
5264 return false;
5265 int64_t Scale = Opcode == Instruction::Shl
5266 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5267 : RHS->getSExtValue();
5268
5269 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5270 }
5271 case Instruction::GetElementPtr: {
5272 // Scan the GEP. We check it if it contains constant offsets and at most
5273 // one variable offset.
5274 int VariableOperand = -1;
5275 unsigned VariableScale = 0;
5276
5277 int64_t ConstantOffset = 0;
5278 gep_type_iterator GTI = gep_type_begin(AddrInst);
5279 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5280 if (StructType *STy = GTI.getStructTypeOrNull()) {
5281 const StructLayout *SL = DL.getStructLayout(STy);
5282 unsigned Idx =
5283 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5284 ConstantOffset += SL->getElementOffset(Idx);
5285 } else {
5286 TypeSize TS = GTI.getSequentialElementStride(DL);
5287 if (TS.isNonZero()) {
5288 // The optimisations below currently only work for fixed offsets.
5289 if (TS.isScalable())
5290 return false;
5291 int64_t TypeSize = TS.getFixedValue();
5292 if (ConstantInt *CI =
5293 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5294 const APInt &CVal = CI->getValue();
5295 if (CVal.getSignificantBits() <= 64) {
5296 ConstantOffset += CVal.getSExtValue() * TypeSize;
5297 continue;
5298 }
5299 }
5300 // We only allow one variable index at the moment.
5301 if (VariableOperand != -1)
5302 return false;
5303
5304 // Remember the variable index.
5305 VariableOperand = i;
5306 VariableScale = TypeSize;
5307 }
5308 }
5309 }
5310
5311 // A common case is for the GEP to only do a constant offset. In this case,
5312 // just add it to the disp field and check validity.
5313 if (VariableOperand == -1) {
5314 AddrMode.BaseOffs += ConstantOffset;
5315 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5316 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5317 AddrMode.InBounds = false;
5318 return true;
5319 }
5320 AddrMode.BaseOffs -= ConstantOffset;
5321
5323 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5324 ConstantOffset > 0) {
5325 // Record GEPs with non-zero offsets as candidates for splitting in
5326 // the event that the offset cannot fit into the r+i addressing mode.
5327 // Simple and common case that only one GEP is used in calculating the
5328 // address for the memory access.
5329 Value *Base = AddrInst->getOperand(0);
5330 auto *BaseI = dyn_cast<Instruction>(Base);
5331 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5333 (BaseI && !isa<CastInst>(BaseI) &&
5334 !isa<GetElementPtrInst>(BaseI))) {
5335 // Make sure the parent block allows inserting non-PHI instructions
5336 // before the terminator.
5337 BasicBlock *Parent = BaseI ? BaseI->getParent()
5338 : &GEP->getFunction()->getEntryBlock();
5339 if (!Parent->getTerminator()->isEHPad())
5340 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5341 }
5342 }
5343
5344 return false;
5345 }
5346
5347 // Save the valid addressing mode in case we can't match.
5348 ExtAddrMode BackupAddrMode = AddrMode;
5349 unsigned OldSize = AddrModeInsts.size();
5350
5351 // See if the scale and offset amount is valid for this target.
5352 AddrMode.BaseOffs += ConstantOffset;
5353 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5354 AddrMode.InBounds = false;
5355
5356 // Match the base operand of the GEP.
5357 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5358 // If it couldn't be matched, just stuff the value in a register.
5359 if (AddrMode.HasBaseReg) {
5360 AddrMode = BackupAddrMode;
5361 AddrModeInsts.resize(OldSize);
5362 return false;
5363 }
5364 AddrMode.HasBaseReg = true;
5365 AddrMode.BaseReg = AddrInst->getOperand(0);
5366 }
5367
5368 // Match the remaining variable portion of the GEP.
5369 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5370 Depth)) {
5371 // If it couldn't be matched, try stuffing the base into a register
5372 // instead of matching it, and retrying the match of the scale.
5373 AddrMode = BackupAddrMode;
5374 AddrModeInsts.resize(OldSize);
5375 if (AddrMode.HasBaseReg)
5376 return false;
5377 AddrMode.HasBaseReg = true;
5378 AddrMode.BaseReg = AddrInst->getOperand(0);
5379 AddrMode.BaseOffs += ConstantOffset;
5380 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5381 VariableScale, Depth)) {
5382 // If even that didn't work, bail.
5383 AddrMode = BackupAddrMode;
5384 AddrModeInsts.resize(OldSize);
5385 return false;
5386 }
5387 }
5388
5389 return true;
5390 }
5391 case Instruction::SExt:
5392 case Instruction::ZExt: {
5393 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5394 if (!Ext)
5395 return false;
5396
5397 // Try to move this ext out of the way of the addressing mode.
5398 // Ask for a method for doing so.
5399 TypePromotionHelper::Action TPH =
5400 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5401 if (!TPH)
5402 return false;
5403
5404 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5405 TPT.getRestorationPoint();
5406 unsigned CreatedInstsCost = 0;
5407 unsigned ExtCost = !TLI.isExtFree(Ext);
5408 Value *PromotedOperand =
5409 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5410 // SExt has been moved away.
5411 // Thus either it will be rematched later in the recursive calls or it is
5412 // gone. Anyway, we must not fold it into the addressing mode at this point.
5413 // E.g.,
5414 // op = add opnd, 1
5415 // idx = ext op
5416 // addr = gep base, idx
5417 // is now:
5418 // promotedOpnd = ext opnd <- no match here
5419 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5420 // addr = gep base, op <- match
5421 if (MovedAway)
5422 *MovedAway = true;
5423
5424 assert(PromotedOperand &&
5425 "TypePromotionHelper should have filtered out those cases");
5426
5427 ExtAddrMode BackupAddrMode = AddrMode;
5428 unsigned OldSize = AddrModeInsts.size();
5429
5430 if (!matchAddr(PromotedOperand, Depth) ||
5431 // The total of the new cost is equal to the cost of the created
5432 // instructions.
5433 // The total of the old cost is equal to the cost of the extension plus
5434 // what we have saved in the addressing mode.
5435 !isPromotionProfitable(CreatedInstsCost,
5436 ExtCost + (AddrModeInsts.size() - OldSize),
5437 PromotedOperand)) {
5438 AddrMode = BackupAddrMode;
5439 AddrModeInsts.resize(OldSize);
5440 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5441 TPT.rollback(LastKnownGood);
5442 return false;
5443 }
5444
5445 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5446 AddrMode.replaceWith(Ext, PromotedOperand);
5447 return true;
5448 }
5449 case Instruction::Call:
5450 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5451 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5452 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5453 if (TLI.addressingModeSupportsTLS(GV))
5454 return matchAddr(AddrInst->getOperand(0), Depth);
5455 }
5456 }
5457 break;
5458 }
5459 return false;
5460}
5461
5462/// If we can, try to add the value of 'Addr' into the current addressing mode.
5463/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5464/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5465/// for the target.
5466///
5467bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5468 // Start a transaction at this point that we will rollback if the matching
5469 // fails.
5470 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5471 TPT.getRestorationPoint();
5472 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5473 if (CI->getValue().isSignedIntN(64)) {
5474 // Check if the addition would result in a signed overflow.
5475 int64_t Result;
5476 bool Overflow =
5477 AddOverflow(AddrMode.BaseOffs, CI->getSExtValue(), Result);
5478 if (!Overflow) {
5479 // Fold in immediates if legal for the target.
5480 AddrMode.BaseOffs = Result;
5481 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5482 return true;
5483 AddrMode.BaseOffs -= CI->getSExtValue();
5484 }
5485 }
5486 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5487 // If this is a global variable, try to fold it into the addressing mode.
5488 if (!AddrMode.BaseGV) {
5489 AddrMode.BaseGV = GV;
5490 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5491 return true;
5492 AddrMode.BaseGV = nullptr;
5493 }
5494 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5495 ExtAddrMode BackupAddrMode = AddrMode;
5496 unsigned OldSize = AddrModeInsts.size();
5497
5498 // Check to see if it is possible to fold this operation.
5499 bool MovedAway = false;
5500 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5501 // This instruction may have been moved away. If so, there is nothing
5502 // to check here.
5503 if (MovedAway)
5504 return true;
5505 // Okay, it's possible to fold this. Check to see if it is actually
5506 // *profitable* to do so. We use a simple cost model to avoid increasing
5507 // register pressure too much.
5508 if (I->hasOneUse() ||
5509 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5510 AddrModeInsts.push_back(I);
5511 return true;
5512 }
5513
5514 // It isn't profitable to do this, roll back.
5515 AddrMode = BackupAddrMode;
5516 AddrModeInsts.resize(OldSize);
5517 TPT.rollback(LastKnownGood);
5518 }
5519 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5520 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5521 return true;
5522 TPT.rollback(LastKnownGood);
5523 } else if (isa<ConstantPointerNull>(Addr)) {
5524 // Null pointer gets folded without affecting the addressing mode.
5525 return true;
5526 }
5527
5528 // Worse case, the target should support [reg] addressing modes. :)
5529 if (!AddrMode.HasBaseReg) {
5530 AddrMode.HasBaseReg = true;
5531 AddrMode.BaseReg = Addr;
5532 // Still check for legality in case the target supports [imm] but not [i+r].
5533 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5534 return true;
5535 AddrMode.HasBaseReg = false;
5536 AddrMode.BaseReg = nullptr;
5537 }
5538
5539 // If the base register is already taken, see if we can do [r+r].
5540 if (AddrMode.Scale == 0) {
5541 AddrMode.Scale = 1;
5542 AddrMode.ScaledReg = Addr;
5543 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5544 return true;
5545 AddrMode.Scale = 0;
5546 AddrMode.ScaledReg = nullptr;
5547 }
5548 // Couldn't match.
5549 TPT.rollback(LastKnownGood);
5550 return false;
5551}
5552
5553/// Check to see if all uses of OpVal by the specified inline asm call are due
5554/// to memory operands. If so, return true, otherwise return false.
5556 const TargetLowering &TLI,
5557 const TargetRegisterInfo &TRI) {
5558 const Function *F = CI->getFunction();
5559 TargetLowering::AsmOperandInfoVector TargetConstraints =
5560 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5561
5562 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5563 // Compute the constraint code and ConstraintType to use.
5564 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5565
5566 // If this asm operand is our Value*, and if it isn't an indirect memory
5567 // operand, we can't fold it! TODO: Also handle C_Address?
5568 if (OpInfo.CallOperandVal == OpVal &&
5569 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5570 !OpInfo.isIndirect))
5571 return false;
5572 }
5573
5574 return true;
5575}
5576
5577/// Recursively walk all the uses of I until we find a memory use.
5578/// If we find an obviously non-foldable instruction, return true.
5579/// Add accessed addresses and types to MemoryUses.
5581 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5582 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5583 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5584 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5585 // If we already considered this instruction, we're done.
5586 if (!ConsideredInsts.insert(I).second)
5587 return false;
5588
5589 // If this is an obviously unfoldable instruction, bail out.
5590 if (!MightBeFoldableInst(I))
5591 return true;
5592
5593 // Loop over all the uses, recursively processing them.
5594 for (Use &U : I->uses()) {
5595 // Conservatively return true if we're seeing a large number or a deep chain
5596 // of users. This avoids excessive compilation times in pathological cases.
5597 if (SeenInsts++ >= MaxAddressUsersToScan)
5598 return true;
5599
5600 Instruction *UserI = cast<Instruction>(U.getUser());
5601 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5602 MemoryUses.push_back({&U, LI->getType()});
5603 continue;
5604 }
5605
5606 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5607 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5608 return true; // Storing addr, not into addr.
5609 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5610 continue;
5611 }
5612
5613 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5614 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5615 return true; // Storing addr, not into addr.
5616 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5617 continue;
5618 }
5619
5621 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5622 return true; // Storing addr, not into addr.
5623 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5624 continue;
5625 }
5626
5629 Type *AccessTy;
5630 if (!TLI.getAddrModeArguments(II, PtrOps, AccessTy))
5631 return true;
5632
5633 if (!find(PtrOps, U.get()))
5634 return true;
5635
5636 MemoryUses.push_back({&U, AccessTy});
5637 continue;
5638 }
5639
5640 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5641 if (CI->hasFnAttr(Attribute::Cold)) {
5642 // If this is a cold call, we can sink the addressing calculation into
5643 // the cold path. See optimizeCallInst
5644 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5645 continue;
5646 }
5647
5648 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5649 if (!IA)
5650 return true;
5651
5652 // If this is a memory operand, we're cool, otherwise bail out.
5653 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5654 return true;
5655 continue;
5656 }
5657
5658 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5659 PSI, BFI, SeenInsts))
5660 return true;
5661 }
5662
5663 return false;
5664}
5665
5667 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5668 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5670 unsigned SeenInsts = 0;
5671 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5672 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5673 PSI, BFI, SeenInsts);
5674}
5675
5676
5677/// Return true if Val is already known to be live at the use site that we're
5678/// folding it into. If so, there is no cost to include it in the addressing
5679/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5680/// instruction already.
5681bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5682 Value *KnownLive1,
5683 Value *KnownLive2) {
5684 // If Val is either of the known-live values, we know it is live!
5685 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5686 return true;
5687
5688 // All values other than instructions and arguments (e.g. constants) are live.
5689 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5690 return true;
5691
5692 // If Val is a constant sized alloca in the entry block, it is live, this is
5693 // true because it is just a reference to the stack/frame pointer, which is
5694 // live for the whole function.
5695 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5696 if (AI->isStaticAlloca())
5697 return true;
5698
5699 // Check to see if this value is already used in the memory instruction's
5700 // block. If so, it's already live into the block at the very least, so we
5701 // can reasonably fold it.
5702 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5703}
5704
5705/// It is possible for the addressing mode of the machine to fold the specified
5706/// instruction into a load or store that ultimately uses it.
5707/// However, the specified instruction has multiple uses.
5708/// Given this, it may actually increase register pressure to fold it
5709/// into the load. For example, consider this code:
5710///
5711/// X = ...
5712/// Y = X+1
5713/// use(Y) -> nonload/store
5714/// Z = Y+1
5715/// load Z
5716///
5717/// In this case, Y has multiple uses, and can be folded into the load of Z
5718/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5719/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5720/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5721/// number of computations either.
5722///
5723/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5724/// X was live across 'load Z' for other reasons, we actually *would* want to
5725/// fold the addressing mode in the Z case. This would make Y die earlier.
5726bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5727 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5728 if (IgnoreProfitability)
5729 return true;
5730
5731 // AMBefore is the addressing mode before this instruction was folded into it,
5732 // and AMAfter is the addressing mode after the instruction was folded. Get
5733 // the set of registers referenced by AMAfter and subtract out those
5734 // referenced by AMBefore: this is the set of values which folding in this
5735 // address extends the lifetime of.
5736 //
5737 // Note that there are only two potential values being referenced here,
5738 // BaseReg and ScaleReg (global addresses are always available, as are any
5739 // folded immediates).
5740 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5741
5742 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5743 // lifetime wasn't extended by adding this instruction.
5744 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5745 BaseReg = nullptr;
5746 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5747 ScaledReg = nullptr;
5748
5749 // If folding this instruction (and it's subexprs) didn't extend any live
5750 // ranges, we're ok with it.
5751 if (!BaseReg && !ScaledReg)
5752 return true;
5753
5754 // If all uses of this instruction can have the address mode sunk into them,
5755 // we can remove the addressing mode and effectively trade one live register
5756 // for another (at worst.) In this context, folding an addressing mode into
5757 // the use is just a particularly nice way of sinking it.
5759 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5760 return false; // Has a non-memory, non-foldable use!
5761
5762 // Now that we know that all uses of this instruction are part of a chain of
5763 // computation involving only operations that could theoretically be folded
5764 // into a memory use, loop over each of these memory operation uses and see
5765 // if they could *actually* fold the instruction. The assumption is that
5766 // addressing modes are cheap and that duplicating the computation involved
5767 // many times is worthwhile, even on a fastpath. For sinking candidates
5768 // (i.e. cold call sites), this serves as a way to prevent excessive code
5769 // growth since most architectures have some reasonable small and fast way to
5770 // compute an effective address. (i.e LEA on x86)
5771 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5772 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5773 Value *Address = Pair.first->get();
5774 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5775 Type *AddressAccessTy = Pair.second;
5776 unsigned AS = Address->getType()->getPointerAddressSpace();
5777
5778 // Do a match against the root of this address, ignoring profitability. This
5779 // will tell us if the addressing mode for the memory operation will
5780 // *actually* cover the shared instruction.
5781 ExtAddrMode Result;
5782 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5783 0);
5784 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5785 TPT.getRestorationPoint();
5786 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5787 AddressAccessTy, AS, UserI, Result,
5788 InsertedInsts, PromotedInsts, TPT,
5789 LargeOffsetGEP, OptSize, PSI, BFI);
5790 Matcher.IgnoreProfitability = true;
5791 bool Success = Matcher.matchAddr(Address, 0);
5792 (void)Success;
5793 assert(Success && "Couldn't select *anything*?");
5794
5795 // The match was to check the profitability, the changes made are not
5796 // part of the original matcher. Therefore, they should be dropped
5797 // otherwise the original matcher will not present the right state.
5798 TPT.rollback(LastKnownGood);
5799
5800 // If the match didn't cover I, then it won't be shared by it.
5801 if (!is_contained(MatchedAddrModeInsts, I))
5802 return false;
5803
5804 MatchedAddrModeInsts.clear();
5805 }
5806
5807 return true;
5808}
5809
5810/// Return true if the specified values are defined in a
5811/// different basic block than BB.
5812static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5814 return I->getParent() != BB;
5815 return false;
5816}
5817
5818// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5819// is the first instruction that will use Addr. So we need to find the first
5820// user of Addr in current BB.
5822 Value *SunkAddr) {
5823 if (Addr->hasOneUse())
5824 return MemoryInst->getIterator();
5825
5826 // We already have a SunkAddr in current BB, but we may need to insert cast
5827 // instruction after it.
5828 if (SunkAddr) {
5829 if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5830 return std::next(AddrInst->getIterator());
5831 }
5832
5833 // Find the first user of Addr in current BB.
5834 Instruction *Earliest = MemoryInst;
5835 for (User *U : Addr->users()) {
5836 Instruction *UserInst = dyn_cast<Instruction>(U);
5837 if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5838 if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5839 continue;
5840 if (UserInst->comesBefore(Earliest))
5841 Earliest = UserInst;
5842 }
5843 }
5844 return Earliest->getIterator();
5845}
5846
5847/// Sink addressing mode computation immediate before MemoryInst if doing so
5848/// can be done without increasing register pressure. The need for the
5849/// register pressure constraint means this can end up being an all or nothing
5850/// decision for all uses of the same addressing computation.
5851///
5852/// Load and Store Instructions often have addressing modes that can do
5853/// significant amounts of computation. As such, instruction selection will try
5854/// to get the load or store to do as much computation as possible for the
5855/// program. The problem is that isel can only see within a single block. As
5856/// such, we sink as much legal addressing mode work into the block as possible.
5857///
5858/// This method is used to optimize both load/store and inline asms with memory
5859/// operands. It's also used to sink addressing computations feeding into cold
5860/// call sites into their (cold) basic block.
5861///
5862/// The motivation for handling sinking into cold blocks is that doing so can
5863/// both enable other address mode sinking (by satisfying the register pressure
5864/// constraint above), and reduce register pressure globally (by removing the
5865/// addressing mode computation from the fast path entirely.).
5866bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5867 Type *AccessTy, unsigned AddrSpace) {
5868 Value *Repl = Addr;
5869
5870 // Try to collapse single-value PHI nodes. This is necessary to undo
5871 // unprofitable PRE transformations.
5872 SmallVector<Value *, 8> worklist;
5873 SmallPtrSet<Value *, 16> Visited;
5874 worklist.push_back(Addr);
5875
5876 // Use a worklist to iteratively look through PHI and select nodes, and
5877 // ensure that the addressing mode obtained from the non-PHI/select roots of
5878 // the graph are compatible.
5879 bool PhiOrSelectSeen = false;
5880 SmallVector<Instruction *, 16> AddrModeInsts;
5881 AddressingModeCombiner AddrModes(*DL, Addr);
5882 TypePromotionTransaction TPT(RemovedInsts);
5883 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5884 TPT.getRestorationPoint();
5885 while (!worklist.empty()) {
5886 Value *V = worklist.pop_back_val();
5887
5888 // We allow traversing cyclic Phi nodes.
5889 // In case of success after this loop we ensure that traversing through
5890 // Phi nodes ends up with all cases to compute address of the form
5891 // BaseGV + Base + Scale * Index + Offset
5892 // where Scale and Offset are constans and BaseGV, Base and Index
5893 // are exactly the same Values in all cases.
5894 // It means that BaseGV, Scale and Offset dominate our memory instruction
5895 // and have the same value as they had in address computation represented
5896 // as Phi. So we can safely sink address computation to memory instruction.
5897 if (!Visited.insert(V).second)
5898 continue;
5899
5900 // For a PHI node, push all of its incoming values.
5901 if (PHINode *P = dyn_cast<PHINode>(V)) {
5902 append_range(worklist, P->incoming_values());
5903 PhiOrSelectSeen = true;
5904 continue;
5905 }
5906 // Similar for select.
5907 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5908 worklist.push_back(SI->getFalseValue());
5909 worklist.push_back(SI->getTrueValue());
5910 PhiOrSelectSeen = true;
5911 continue;
5912 }
5913
5914 // For non-PHIs, determine the addressing mode being computed. Note that
5915 // the result may differ depending on what other uses our candidate
5916 // addressing instructions might have.
5917 AddrModeInsts.clear();
5918 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5919 0);
5920 // Defer the query (and possible computation of) the dom tree to point of
5921 // actual use. It's expected that most address matches don't actually need
5922 // the domtree.
5923 auto getDTFn = [this]() -> const DominatorTree & { return getDT(); };
5924 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5925 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5926 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5927 BFI);
5928
5929 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5930 if (GEP && !NewGEPBases.count(GEP)) {
5931 // If splitting the underlying data structure can reduce the offset of a
5932 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5933 // previously split data structures.
5934 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5935 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5936 }
5937
5938 NewAddrMode.OriginalValue = V;
5939 if (!AddrModes.addNewAddrMode(NewAddrMode))
5940 break;
5941 }
5942
5943 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5944 // or we have multiple but either couldn't combine them or combining them
5945 // wouldn't do anything useful, bail out now.
5946 if (!AddrModes.combineAddrModes()) {
5947 TPT.rollback(LastKnownGood);
5948 return false;
5949 }
5950 bool Modified = TPT.commit();
5951
5952 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5953 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5954
5955 // If all the instructions matched are already in this BB, don't do anything.
5956 // If we saw a Phi node then it is not local definitely, and if we saw a
5957 // select then we want to push the address calculation past it even if it's
5958 // already in this BB.
5959 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5960 return IsNonLocalValue(V, MemoryInst->getParent());
5961 })) {
5962 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5963 << "\n");
5964 return Modified;
5965 }
5966
5967 // Now that we determined the addressing expression we want to use and know
5968 // that we have to sink it into this block. Check to see if we have already
5969 // done this for some other load/store instr in this block. If so, reuse
5970 // the computation. Before attempting reuse, check if the address is valid
5971 // as it may have been erased.
5972
5973 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5974
5975 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5976 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5977
5978 // The current BB may be optimized multiple times, we can't guarantee the
5979 // reuse of Addr happens later, call findInsertPos to find an appropriate
5980 // insert position.
5981 auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5982
5983 // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5984 if (!SunkAddr) {
5985 auto &DT = getDT();
5986 if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5987 (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5988 return Modified;
5989 }
5990
5991 IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5992
5993 if (SunkAddr) {
5994 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5995 << " for " << *MemoryInst << "\n");
5996 if (SunkAddr->getType() != Addr->getType()) {
5997 if (SunkAddr->getType()->getPointerAddressSpace() !=
5998 Addr->getType()->getPointerAddressSpace() &&
5999 !DL->isNonIntegralPointerType(Addr->getType())) {
6000 // There are two reasons the address spaces might not match: a no-op
6001 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6002 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6003 // TODO: allow bitcast between different address space pointers with the
6004 // same size.
6005 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6006 SunkAddr =
6007 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6008 } else
6009 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6010 }
6012 SubtargetInfo->addrSinkUsingGEPs())) {
6013 // By default, we use the GEP-based method when AA is used later. This
6014 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
6015 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6016 << " for " << *MemoryInst << "\n");
6017 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
6018
6019 // First, find the pointer.
6020 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
6021 ResultPtr = AddrMode.BaseReg;
6022 AddrMode.BaseReg = nullptr;
6023 }
6024
6025 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
6026 // We can't add more than one pointer together, nor can we scale a
6027 // pointer (both of which seem meaningless).
6028 if (ResultPtr || AddrMode.Scale != 1)
6029 return Modified;
6030
6031 ResultPtr = AddrMode.ScaledReg;
6032 AddrMode.Scale = 0;
6033 }
6034
6035 // It is only safe to sign extend the BaseReg if we know that the math
6036 // required to create it did not overflow before we extend it. Since
6037 // the original IR value was tossed in favor of a constant back when
6038 // the AddrMode was created we need to bail out gracefully if widths
6039 // do not match instead of extending it.
6040 //
6041 // (See below for code to add the scale.)
6042 if (AddrMode.Scale) {
6043 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6044 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
6045 cast<IntegerType>(ScaledRegTy)->getBitWidth())
6046 return Modified;
6047 }
6048
6049 GlobalValue *BaseGV = AddrMode.BaseGV;
6050 if (BaseGV != nullptr) {
6051 if (ResultPtr)
6052 return Modified;
6053
6054 if (BaseGV->isThreadLocal()) {
6055 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
6056 } else {
6057 ResultPtr = BaseGV;
6058 }
6059 }
6060
6061 // If the real base value actually came from an inttoptr, then the matcher
6062 // will look through it and provide only the integer value. In that case,
6063 // use it here.
6064 if (!DL->isNonIntegralPointerType(Addr->getType())) {
6065 if (!ResultPtr && AddrMode.BaseReg) {
6066 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
6067 "sunkaddr");
6068 AddrMode.BaseReg = nullptr;
6069 } else if (!ResultPtr && AddrMode.Scale == 1) {
6070 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
6071 "sunkaddr");
6072 AddrMode.Scale = 0;
6073 }
6074 }
6075
6076 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6077 !AddrMode.BaseOffs) {
6078 SunkAddr = Constant::getNullValue(Addr->getType());
6079 } else if (!ResultPtr) {
6080 return Modified;
6081 } else {
6082 Type *I8PtrTy =
6083 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
6084
6085 // Start with the base register. Do this first so that subsequent address
6086 // matching finds it last, which will prevent it from trying to match it
6087 // as the scaled value in case it happens to be a mul. That would be
6088 // problematic if we've sunk a different mul for the scale, because then
6089 // we'd end up sinking both muls.
6090 if (AddrMode.BaseReg) {
6091 Value *V = AddrMode.BaseReg;
6092 if (V->getType() != IntPtrTy)
6093 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6094
6095 ResultIndex = V;
6096 }
6097
6098 // Add the scale value.
6099 if (AddrMode.Scale) {
6100 Value *V = AddrMode.ScaledReg;
6101 if (V->getType() == IntPtrTy) {
6102 // done.
6103 } else {
6104 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6105 cast<IntegerType>(V->getType())->getBitWidth() &&
6106 "We can't transform if ScaledReg is too narrow");
6107 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6108 }
6109
6110 if (AddrMode.Scale != 1)
6111 V = Builder.CreateMul(
6112 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6113 if (ResultIndex)
6114 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
6115 else
6116 ResultIndex = V;
6117 }
6118
6119 // Add in the Base Offset if present.
6120 if (AddrMode.BaseOffs) {
6121 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6122 if (ResultIndex) {
6123 // We need to add this separately from the scale above to help with
6124 // SDAG consecutive load/store merging.
6125 if (ResultPtr->getType() != I8PtrTy)
6126 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6127 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6128 AddrMode.InBounds);
6129 }
6130
6131 ResultIndex = V;
6132 }
6133
6134 if (!ResultIndex) {
6135 auto PtrInst = dyn_cast<Instruction>(ResultPtr);
6136 // We know that we have a pointer without any offsets. If this pointer
6137 // originates from a different basic block than the current one, we
6138 // must be able to recreate it in the current basic block.
6139 // We do not support the recreation of any instructions yet.
6140 if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
6141 return Modified;
6142 SunkAddr = ResultPtr;
6143 } else {
6144 if (ResultPtr->getType() != I8PtrTy)
6145 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6146 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6147 AddrMode.InBounds);
6148 }
6149
6150 if (SunkAddr->getType() != Addr->getType()) {
6151 if (SunkAddr->getType()->getPointerAddressSpace() !=
6152 Addr->getType()->getPointerAddressSpace() &&
6153 !DL->isNonIntegralPointerType(Addr->getType())) {
6154 // There are two reasons the address spaces might not match: a no-op
6155 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6156 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6157 // TODO: allow bitcast between different address space pointers with
6158 // the same size.
6159 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6160 SunkAddr =
6161 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6162 } else
6163 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6164 }
6165 }
6166 } else {
6167 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6168 // non-integral pointers, so in that case bail out now.
6169 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6170 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6171 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6172 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6173 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6174 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6175 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6176 (AddrMode.BaseGV &&
6177 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6178 return Modified;
6179
6180 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6181 << " for " << *MemoryInst << "\n");
6182 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6183 Value *Result = nullptr;
6184
6185 // Start with the base register. Do this first so that subsequent address
6186 // matching finds it last, which will prevent it from trying to match it
6187 // as the scaled value in case it happens to be a mul. That would be
6188 // problematic if we've sunk a different mul for the scale, because then
6189 // we'd end up sinking both muls.
6190 if (AddrMode.BaseReg) {
6191 Value *V = AddrMode.BaseReg;
6192 if (V->getType()->isPointerTy())
6193 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6194 if (V->getType() != IntPtrTy)
6195 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6196 Result = V;
6197 }
6198
6199 // Add the scale value.
6200 if (AddrMode.Scale) {
6201 Value *V = AddrMode.ScaledReg;
6202 if (V->getType() == IntPtrTy) {
6203 // done.
6204 } else if (V->getType()->isPointerTy()) {
6205 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6206 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6207 cast<IntegerType>(V->getType())->getBitWidth()) {
6208 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6209 } else {
6210 // It is only safe to sign extend the BaseReg if we know that the math
6211 // required to create it did not overflow before we extend it. Since
6212 // the original IR value was tossed in favor of a constant back when
6213 // the AddrMode was created we need to bail out gracefully if widths
6214 // do not match instead of extending it.
6216 if (I && (Result != AddrMode.BaseReg))
6217 I->eraseFromParent();
6218 return Modified;
6219 }
6220 if (AddrMode.Scale != 1)
6221 V = Builder.CreateMul(
6222 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6223 if (Result)
6224 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6225 else
6226 Result = V;
6227 }
6228
6229 // Add in the BaseGV if present.
6230 GlobalValue *BaseGV = AddrMode.BaseGV;
6231 if (BaseGV != nullptr) {
6232 Value *BaseGVPtr;
6233 if (BaseGV->isThreadLocal()) {
6234 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6235 } else {
6236 BaseGVPtr = BaseGV;
6237 }
6238 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6239 if (Result)
6240 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6241 else
6242 Result = V;
6243 }
6244
6245 // Add in the Base Offset if present.
6246 if (AddrMode.BaseOffs) {
6247 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6248 if (Result)
6249 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6250 else
6251 Result = V;
6252 }
6253
6254 if (!Result)
6255 SunkAddr = Constant::getNullValue(Addr->getType());
6256 else
6257 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6258 }
6259
6260 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6261 // Store the newly computed address into the cache. In the case we reused a
6262 // value, this should be idempotent.
6263 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6264
6265 // If we have no uses, recursively delete the value and all dead instructions
6266 // using it.
6267 if (Repl->use_empty()) {
6268 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6269 RecursivelyDeleteTriviallyDeadInstructions(
6270 Repl, TLInfo, nullptr,
6271 [&](Value *V) { removeAllAssertingVHReferences(V); });
6272 });
6273 }
6274 ++NumMemoryInsts;
6275 return true;
6276}
6277
6278/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6279/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6280/// only handle a 2 operand GEP in the same basic block or a splat constant
6281/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6282/// index.
6283///
6284/// If the existing GEP has a vector base pointer that is splat, we can look
6285/// through the splat to find the scalar pointer. If we can't find a scalar
6286/// pointer there's nothing we can do.
6287///
6288/// If we have a GEP with more than 2 indices where the middle indices are all
6289/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6290///
6291/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6292/// followed by a GEP with an all zeroes vector index. This will enable
6293/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6294/// zero index.
6295bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6296 Value *Ptr) {
6297 Value *NewAddr;
6298
6299 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6300 // Don't optimize GEPs that don't have indices.
6301 if (!GEP->hasIndices())
6302 return false;
6303
6304 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6305 // FIXME: We should support this by sinking the GEP.
6306 if (MemoryInst->getParent() != GEP->getParent())
6307 return false;
6308
6309 SmallVector<Value *, 2> Ops(GEP->operands());
6310
6311 bool RewriteGEP = false;
6312
6313 if (Ops[0]->getType()->isVectorTy()) {
6314 Ops[0] = getSplatValue(Ops[0]);
6315 if (!Ops[0])
6316 return false;
6317 RewriteGEP = true;
6318 }
6319
6320 unsigned FinalIndex = Ops.size() - 1;
6321
6322 // Ensure all but the last index is 0.
6323 // FIXME: This isn't strictly required. All that's required is that they are
6324 // all scalars or splats.
6325 for (unsigned i = 1; i < FinalIndex; ++i) {
6326 auto *C = dyn_cast<Constant>(Ops[i]);
6327 if (!C)
6328 return false;
6329 if (isa<VectorType>(C->getType()))
6330 C = C->getSplatValue();
6331 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6332 if (!CI || !CI->isZero())
6333 return false;
6334 // Scalarize the index if needed.
6335 Ops[i] = CI;
6336 }
6337
6338 // Try to scalarize the final index.
6339 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6340 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6341 auto *C = dyn_cast<ConstantInt>(V);
6342 // Don't scalarize all zeros vector.
6343 if (!C || !C->isZero()) {
6344 Ops[FinalIndex] = V;
6345 RewriteGEP = true;
6346 }
6347 }
6348 }
6349
6350 // If we made any changes or the we have extra operands, we need to generate
6351 // new instructions.
6352 if (!RewriteGEP && Ops.size() == 2)
6353 return false;
6354
6355 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6356
6357 IRBuilder<> Builder(MemoryInst);
6358
6359 Type *SourceTy = GEP->getSourceElementType();
6360 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6361
6362 // If the final index isn't a vector, emit a scalar GEP containing all ops
6363 // and a vector GEP with all zeroes final index.
6364 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6365 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6366 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6367 auto *SecondTy = GetElementPtrInst::getIndexedType(
6368 SourceTy, ArrayRef(Ops).drop_front());
6369 NewAddr =
6370 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6371 } else {
6372 Value *Base = Ops[0];
6373 Value *Index = Ops[FinalIndex];
6374
6375 // Create a scalar GEP if there are more than 2 operands.
6376 if (Ops.size() != 2) {
6377 // Replace the last index with 0.
6378 Ops[FinalIndex] =
6379 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6380 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6382 SourceTy, ArrayRef(Ops).drop_front());
6383 }
6384
6385 // Now create the GEP with scalar pointer and vector index.
6386 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6387 }
6388 } else if (!isa<Constant>(Ptr)) {
6389 // Not a GEP, maybe its a splat and we can create a GEP to enable
6390 // SelectionDAGBuilder to use it as a uniform base.
6391 Value *V = getSplatValue(Ptr);
6392 if (!V)
6393 return false;
6394
6395 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6396
6397 IRBuilder<> Builder(MemoryInst);
6398
6399 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6400 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6401 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6402 Type *ScalarTy;
6403 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6404 Intrinsic::masked_gather) {
6405 ScalarTy = MemoryInst->getType()->getScalarType();
6406 } else {
6407 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6408 Intrinsic::masked_scatter);
6409 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6410 }
6411 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6412 } else {
6413 // Constant, SelectionDAGBuilder knows to check if its a splat.
6414 return false;
6415 }
6416
6417 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6418
6419 // If we have no uses, recursively delete the value and all dead instructions
6420 // using it.
6421 if (Ptr->use_empty())
6423 Ptr, TLInfo, nullptr,
6424 [&](Value *V) { removeAllAssertingVHReferences(V); });
6425
6426 return true;
6427}
6428
6429// This is a helper for CodeGenPrepare::optimizeMulWithOverflow.
6430// Check the pattern we are interested in where there are maximum 2 uses
6431// of the intrinsic which are the extract instructions.
6433 ExtractValueInst *&OverflowExtract) {
6434 // Bail out if it's more than 2 users:
6435 if (I->hasNUsesOrMore(3))
6436 return false;
6437
6438 for (User *U : I->users()) {
6439 auto *Extract = dyn_cast<ExtractValueInst>(U);
6440 if (!Extract || Extract->getNumIndices() != 1)
6441 return false;
6442
6443 unsigned Index = Extract->getIndices()[0];
6444 if (Index == 0)
6445 MulExtract = Extract;
6446 else if (Index == 1)
6447 OverflowExtract = Extract;
6448 else
6449 return false;
6450 }
6451 return true;
6452}
6453
6454// Rewrite the mul_with_overflow intrinsic by checking if both of the
6455// operands' value ranges are within the legal type. If so, we can optimize the
6456// multiplication algorithm. This code is supposed to be written during the step
6457// of type legalization, but given that we need to reconstruct the IR which is
6458// not doable there, we do it here.
6459// The IR after the optimization will look like:
6460// entry:
6461// if signed:
6462// ( (lhs_lo>>BW-1) ^ lhs_hi) || ( (rhs_lo>>BW-1) ^ rhs_hi) ? overflow,
6463// overflow_no
6464// else:
6465// (lhs_hi != 0) || (rhs_hi != 0) ? overflow, overflow_no
6466// overflow_no:
6467// overflow:
6468// overflow.res:
6469// \returns true if optimization was applied
6470// TODO: This optimization can be further improved to optimize branching on
6471// overflow where the 'overflow_no' BB can branch directly to the false
6472// successor of overflow, but that would add additional complexity so we leave
6473// it for future work.
6474bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned,
6475 ModifyDT &ModifiedDT) {
6476 // Check if target supports this optimization.
6478 I->getContext(),
6479 TLI->getValueType(*DL, I->getType()->getContainedType(0))))
6480 return false;
6481
6482 ExtractValueInst *MulExtract = nullptr, *OverflowExtract = nullptr;
6483 if (!matchOverflowPattern(I, MulExtract, OverflowExtract))
6484 return false;
6485
6486 // Keep track of the instruction to stop reoptimizing it again.
6487 InsertedInsts.insert(I);
6488
6489 Value *LHS = I->getOperand(0);
6490 Value *RHS = I->getOperand(1);
6491 Type *Ty = LHS->getType();
6492 unsigned VTHalfBitWidth = Ty->getScalarSizeInBits() / 2;
6493 Type *LegalTy = Ty->getWithNewBitWidth(VTHalfBitWidth);
6494
6495 // New BBs:
6496 BasicBlock *OverflowEntryBB =
6497 splitBlockBefore(I->getParent(), I, DTU, LI, nullptr, "");
6498 OverflowEntryBB->takeName(I->getParent());
6499 // Keep the 'br' instruction that is generated as a result of the split to be
6500 // erased/replaced later.
6501 Instruction *OldTerminator = OverflowEntryBB->getTerminator();
6502 BasicBlock *NoOverflowBB =
6503 BasicBlock::Create(I->getContext(), "overflow.no", I->getFunction());
6504 NoOverflowBB->moveAfter(OverflowEntryBB);
6505 BasicBlock *OverflowBB =
6506 BasicBlock::Create(I->getContext(), "overflow", I->getFunction());
6507 OverflowBB->moveAfter(NoOverflowBB);
6508
6509 // BB overflow.entry:
6510 IRBuilder<> Builder(OverflowEntryBB);
6511 // Extract low and high halves of LHS:
6512 Value *LoLHS = Builder.CreateTrunc(LHS, LegalTy, "lo.lhs");
6513 Value *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
6514 HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs");
6515
6516 // Extract low and high halves of RHS:
6517 Value *LoRHS = Builder.CreateTrunc(RHS, LegalTy, "lo.rhs");
6518 Value *HiRHS = Builder.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr");
6519 HiRHS = Builder.CreateTrunc(HiRHS, LegalTy, "hi.rhs");
6520
6521 Value *IsAnyBitTrue;
6522 if (IsSigned) {
6523 Value *SignLoLHS =
6524 Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs");
6525 Value *SignLoRHS =
6526 Builder.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs");
6527 Value *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS);
6528 Value *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS);
6529 Value *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs");
6530 IsAnyBitTrue = Builder.CreateCmp(ICmpInst::ICMP_NE, Or,
6531 ConstantInt::getNullValue(Or->getType()));
6532 } else {
6533 Value *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS,
6534 ConstantInt::getNullValue(LegalTy));
6535 Value *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS,
6536 ConstantInt::getNullValue(LegalTy));
6537 IsAnyBitTrue = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs");
6538 }
6539 Builder.CreateCondBr(IsAnyBitTrue, OverflowBB, NoOverflowBB);
6540
6541 // BB overflow.no:
6542 Builder.SetInsertPoint(NoOverflowBB);
6543 Value *ExtLoLHS, *ExtLoRHS;
6544 if (IsSigned) {
6545 ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext");
6546 ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext");
6547 } else {
6548 ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext");
6549 ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext");
6550 }
6551
6552 Value *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.overflow.no");
6553
6554 // Create the 'overflow.res' BB to merge the results of
6555 // the two paths:
6556 BasicBlock *OverflowResBB = I->getParent();
6557 OverflowResBB->setName("overflow.res");
6558
6559 // BB overflow.no: jump to overflow.res BB
6560 Builder.CreateBr(OverflowResBB);
6561 // No we don't need the old terminator in overflow.entry BB, erase it:
6562 OldTerminator->eraseFromParent();
6563
6564 // BB overflow.res:
6565 Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt());
6566 // Create PHI nodes to merge results from no.overflow BB and overflow BB to
6567 // replace the extract instructions.
6568 PHINode *OverflowResPHI = Builder.CreatePHI(Ty, 2),
6569 *OverflowFlagPHI =
6570 Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2);
6571
6572 // Add the incoming values from no.overflow BB and later from overflow BB.
6573 OverflowResPHI->addIncoming(Mul, NoOverflowBB);
6574 OverflowFlagPHI->addIncoming(ConstantInt::getFalse(I->getContext()),
6575 NoOverflowBB);
6576
6577 // Replace all users of MulExtract and OverflowExtract to use the PHI nodes.
6578 if (MulExtract) {
6579 MulExtract->replaceAllUsesWith(OverflowResPHI);
6580 MulExtract->eraseFromParent();
6581 }
6582 if (OverflowExtract) {
6583 OverflowExtract->replaceAllUsesWith(OverflowFlagPHI);
6584 OverflowExtract->eraseFromParent();
6585 }
6586
6587 // Remove the intrinsic from parent (overflow.res BB) as it will be part of
6588 // overflow BB
6589 I->removeFromParent();
6590 // BB overflow:
6591 I->insertInto(OverflowBB, OverflowBB->end());
6592 Builder.SetInsertPoint(OverflowBB, OverflowBB->end());
6593 Value *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow");
6594 Value *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag");
6595 Builder.CreateBr(OverflowResBB);
6596
6597 // Add The Extracted values to the PHINodes in the overflow.res BB.
6598 OverflowResPHI->addIncoming(MulOverflow, OverflowBB);
6599 OverflowFlagPHI->addIncoming(OverflowFlag, OverflowBB);
6600
6601 DTU->applyUpdates({{DominatorTree::Insert, OverflowEntryBB, OverflowBB},
6602 {DominatorTree::Insert, OverflowEntryBB, NoOverflowBB},
6603 {DominatorTree::Insert, NoOverflowBB, OverflowResBB},
6604 {DominatorTree::Delete, OverflowEntryBB, OverflowResBB},
6605 {DominatorTree::Insert, OverflowBB, OverflowResBB}});
6606
6607 ModifiedDT = ModifyDT::ModifyBBDT;
6608 return true;
6609}
6610
6611/// If there are any memory operands, use OptimizeMemoryInst to sink their
6612/// address computing into the block when possible / profitable.
6613bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6614 bool MadeChange = false;
6615
6616 const TargetRegisterInfo *TRI =
6618 TargetLowering::AsmOperandInfoVector TargetConstraints =
6619 TLI->ParseConstraints(*DL, TRI, *CS);
6620 unsigned ArgNo = 0;
6621 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6622 // Compute the constraint code and ConstraintType to use.
6623 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6624
6625 // TODO: Also handle C_Address?
6626 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6627 OpInfo.isIndirect) {
6628 Value *OpVal = CS->getArgOperand(ArgNo++);
6629 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6630 } else if (OpInfo.Type == InlineAsm::isInput)
6631 ArgNo++;
6632 }
6633
6634 return MadeChange;
6635}
6636
6637/// Check if all the uses of \p Val are equivalent (or free) zero or
6638/// sign extensions.
6639static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6640 assert(!Val->use_empty() && "Input must have at least one use");
6641 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6642 bool IsSExt = isa<SExtInst>(FirstUser);
6643 Type *ExtTy = FirstUser->getType();
6644 for (const User *U : Val->users()) {
6645 const Instruction *UI = cast<Instruction>(U);
6646 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6647 return false;
6648 Type *CurTy = UI->getType();
6649 // Same input and output types: Same instruction after CSE.
6650 if (CurTy == ExtTy)
6651 continue;
6652
6653 // If IsSExt is true, we are in this situation:
6654 // a = Val
6655 // b = sext ty1 a to ty2
6656 // c = sext ty1 a to ty3
6657 // Assuming ty2 is shorter than ty3, this could be turned into:
6658 // a = Val
6659 // b = sext ty1 a to ty2
6660 // c = sext ty2 b to ty3
6661 // However, the last sext is not free.
6662 if (IsSExt)
6663 return false;
6664
6665 // This is a ZExt, maybe this is free to extend from one type to another.
6666 // In that case, we would not account for a different use.
6667 Type *NarrowTy;
6668 Type *LargeTy;
6669 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6670 CurTy->getScalarType()->getIntegerBitWidth()) {
6671 NarrowTy = CurTy;
6672 LargeTy = ExtTy;
6673 } else {
6674 NarrowTy = ExtTy;
6675 LargeTy = CurTy;
6676 }
6677
6678 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6679 return false;
6680 }
6681 // All uses are the same or can be derived from one another for free.
6682 return true;
6683}
6684
6685/// Try to speculatively promote extensions in \p Exts and continue
6686/// promoting through newly promoted operands recursively as far as doing so is
6687/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6688/// When some promotion happened, \p TPT contains the proper state to revert
6689/// them.
6690///
6691/// \return true if some promotion happened, false otherwise.
6692bool CodeGenPrepare::tryToPromoteExts(
6693 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6694 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6695 unsigned CreatedInstsCost) {
6696 bool Promoted = false;
6697
6698 // Iterate over all the extensions to try to promote them.
6699 for (auto *I : Exts) {
6700 // Early check if we directly have ext(load).
6701 if (isa<LoadInst>(I->getOperand(0))) {
6702 ProfitablyMovedExts.push_back(I);
6703 continue;
6704 }
6705
6706 // Check whether or not we want to do any promotion. The reason we have
6707 // this check inside the for loop is to catch the case where an extension
6708 // is directly fed by a load because in such case the extension can be moved
6709 // up without any promotion on its operands.
6711 return false;
6712
6713 // Get the action to perform the promotion.
6714 TypePromotionHelper::Action TPH =
6715 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6716 // Check if we can promote.
6717 if (!TPH) {
6718 // Save the current extension as we cannot move up through its operand.
6719 ProfitablyMovedExts.push_back(I);
6720 continue;
6721 }
6722
6723 // Save the current state.
6724 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6725 TPT.getRestorationPoint();
6726 SmallVector<Instruction *, 4> NewExts;
6727 unsigned NewCreatedInstsCost = 0;
6728 unsigned ExtCost = !TLI->isExtFree(I);
6729 // Promote.
6730 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6731 &NewExts, nullptr, *TLI);
6732 assert(PromotedVal &&
6733 "TypePromotionHelper should have filtered out those cases");
6734
6735 // We would be able to merge only one extension in a load.
6736 // Therefore, if we have more than 1 new extension we heuristically
6737 // cut this search path, because it means we degrade the code quality.
6738 // With exactly 2, the transformation is neutral, because we will merge
6739 // one extension but leave one. However, we optimistically keep going,
6740 // because the new extension may be removed too. Also avoid replacing a
6741 // single free extension with multiple extensions, as this increases the
6742 // number of IR instructions while not providing any savings.
6743 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6744 // FIXME: It would be possible to propagate a negative value instead of
6745 // conservatively ceiling it to 0.
6746 TotalCreatedInstsCost =
6747 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6748 if (!StressExtLdPromotion &&
6749 (TotalCreatedInstsCost > 1 ||
6750 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6751 (ExtCost == 0 && NewExts.size() > 1))) {
6752 // This promotion is not profitable, rollback to the previous state, and
6753 // save the current extension in ProfitablyMovedExts as the latest
6754 // speculative promotion turned out to be unprofitable.
6755 TPT.rollback(LastKnownGood);
6756 ProfitablyMovedExts.push_back(I);
6757 continue;
6758 }
6759 // Continue promoting NewExts as far as doing so is profitable.
6760 SmallVector<Instruction *, 2> NewlyMovedExts;
6761 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6762 bool NewPromoted = false;
6763 for (auto *ExtInst : NewlyMovedExts) {
6764 Instruction *MovedExt = cast<Instruction>(ExtInst);
6765 Value *ExtOperand = MovedExt->getOperand(0);
6766 // If we have reached to a load, we need this extra profitability check
6767 // as it could potentially be merged into an ext(load).
6768 if (isa<LoadInst>(ExtOperand) &&
6769 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6770 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6771 continue;
6772
6773 ProfitablyMovedExts.push_back(MovedExt);
6774 NewPromoted = true;
6775 }
6776
6777 // If none of speculative promotions for NewExts is profitable, rollback
6778 // and save the current extension (I) as the last profitable extension.
6779 if (!NewPromoted) {
6780 TPT.rollback(LastKnownGood);
6781 ProfitablyMovedExts.push_back(I);
6782 continue;
6783 }
6784 // The promotion is profitable.
6785 Promoted = true;
6786 }
6787 return Promoted;
6788}
6789
6790/// Merging redundant sexts when one is dominating the other.
6791bool CodeGenPrepare::mergeSExts(Function &F) {
6792 bool Changed = false;
6793 for (auto &Entry : ValToSExtendedUses) {
6794 SExts &Insts = Entry.second;
6795 SExts CurPts;
6796 for (Instruction *Inst : Insts) {
6797 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6798 Inst->getOperand(0) != Entry.first)
6799 continue;
6800 bool inserted = false;
6801 for (auto &Pt : CurPts) {
6802 if (getDT().dominates(Inst, Pt)) {
6803 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6804 RemovedInsts.insert(Pt);
6805 Pt->removeFromParent();
6806 Pt = Inst;
6807 inserted = true;
6808 Changed = true;
6809 break;
6810 }
6811 if (!getDT().dominates(Pt, Inst))
6812 // Give up if we need to merge in a common dominator as the
6813 // experiments show it is not profitable.
6814 continue;
6815 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6816 RemovedInsts.insert(Inst);
6817 Inst->removeFromParent();
6818 inserted = true;
6819 Changed = true;
6820 break;
6821 }
6822 if (!inserted)
6823 CurPts.push_back(Inst);
6824 }
6825 }
6826 return Changed;
6827}
6828
6829// Splitting large data structures so that the GEPs accessing them can have
6830// smaller offsets so that they can be sunk to the same blocks as their users.
6831// For example, a large struct starting from %base is split into two parts
6832// where the second part starts from %new_base.
6833//
6834// Before:
6835// BB0:
6836// %base =
6837//
6838// BB1:
6839// %gep0 = gep %base, off0
6840// %gep1 = gep %base, off1
6841// %gep2 = gep %base, off2
6842//
6843// BB2:
6844// %load1 = load %gep0
6845// %load2 = load %gep1
6846// %load3 = load %gep2
6847//
6848// After:
6849// BB0:
6850// %base =
6851// %new_base = gep %base, off0
6852//
6853// BB1:
6854// %new_gep0 = %new_base
6855// %new_gep1 = gep %new_base, off1 - off0
6856// %new_gep2 = gep %new_base, off2 - off0
6857//
6858// BB2:
6859// %load1 = load i32, i32* %new_gep0
6860// %load2 = load i32, i32* %new_gep1
6861// %load3 = load i32, i32* %new_gep2
6862//
6863// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6864// their offsets are smaller enough to fit into the addressing mode.
6865bool CodeGenPrepare::splitLargeGEPOffsets() {
6866 bool Changed = false;
6867 for (auto &Entry : LargeOffsetGEPMap) {
6868 Value *OldBase = Entry.first;
6869 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6870 &LargeOffsetGEPs = Entry.second;
6871 auto compareGEPOffset =
6872 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6873 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6874 if (LHS.first == RHS.first)
6875 return false;
6876 if (LHS.second != RHS.second)
6877 return LHS.second < RHS.second;
6878 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6879 };
6880 // Sorting all the GEPs of the same data structures based on the offsets.
6881 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6882 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6883 // Skip if all the GEPs have the same offsets.
6884 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6885 continue;
6886 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6887 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6888 Value *NewBaseGEP = nullptr;
6889
6890 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6891 GetElementPtrInst *GEP) {
6892 LLVMContext &Ctx = GEP->getContext();
6893 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6894 Type *I8PtrTy =
6895 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6896
6897 BasicBlock::iterator NewBaseInsertPt;
6898 BasicBlock *NewBaseInsertBB;
6899 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6900 // If the base of the struct is an instruction, the new base will be
6901 // inserted close to it.
6902 NewBaseInsertBB = BaseI->getParent();
6903 if (isa<PHINode>(BaseI))
6904 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6905 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6906 NewBaseInsertBB =
6907 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), &getDT(), LI);
6908 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6909 } else
6910 NewBaseInsertPt = std::next(BaseI->getIterator());
6911 } else {
6912 // If the current base is an argument or global value, the new base
6913 // will be inserted to the entry block.
6914 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6915 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6916 }
6917 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6918 // Create a new base.
6919 // TODO: Avoid implicit trunc?
6920 // See https://github.com/llvm/llvm-project/issues/112510.
6921 Value *BaseIndex =
6922 ConstantInt::getSigned(PtrIdxTy, BaseOffset, /*ImplicitTrunc=*/true);
6923 NewBaseGEP = OldBase;
6924 if (NewBaseGEP->getType() != I8PtrTy)
6925 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6926 NewBaseGEP =
6927 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6928 NewGEPBases.insert(NewBaseGEP);
6929 return;
6930 };
6931
6932 // Check whether all the offsets can be encoded with prefered common base.
6933 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6934 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6935 BaseOffset = PreferBase;
6936 // Create a new base if the offset of the BaseGEP can be decoded with one
6937 // instruction.
6938 createNewBase(BaseOffset, OldBase, BaseGEP);
6939 }
6940
6941 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6942 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6943 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6944 int64_t Offset = LargeOffsetGEP->second;
6945 if (Offset != BaseOffset) {
6946 TargetLowering::AddrMode AddrMode;
6947 AddrMode.HasBaseReg = true;
6948 AddrMode.BaseOffs = Offset - BaseOffset;
6949 // The result type of the GEP might not be the type of the memory
6950 // access.
6951 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6952 GEP->getResultElementType(),
6953 GEP->getAddressSpace())) {
6954 // We need to create a new base if the offset to the current base is
6955 // too large to fit into the addressing mode. So, a very large struct
6956 // may be split into several parts.
6957 BaseGEP = GEP;
6958 BaseOffset = Offset;
6959 NewBaseGEP = nullptr;
6960 }
6961 }
6962
6963 // Generate a new GEP to replace the current one.
6964 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6965
6966 if (!NewBaseGEP) {
6967 // Create a new base if we don't have one yet. Find the insertion
6968 // pointer for the new base first.
6969 createNewBase(BaseOffset, OldBase, GEP);
6970 }
6971
6972 IRBuilder<> Builder(GEP);
6973 Value *NewGEP = NewBaseGEP;
6974 if (Offset != BaseOffset) {
6975 // Calculate the new offset for the new GEP.
6976 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6977 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6978 }
6979 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6980 LargeOffsetGEPID.erase(GEP);
6981 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6982 GEP->eraseFromParent();
6983 Changed = true;
6984 }
6985 }
6986 return Changed;
6987}
6988
6989bool CodeGenPrepare::optimizePhiType(
6990 PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
6991 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6992 // We are looking for a collection on interconnected phi nodes that together
6993 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6994 // are of the same type. Convert the whole set of nodes to the type of the
6995 // bitcast.
6996 Type *PhiTy = I->getType();
6997 Type *ConvertTy = nullptr;
6998 if (Visited.count(I) ||
6999 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
7000 return false;
7001
7002 SmallVector<Instruction *, 4> Worklist;
7003 Worklist.push_back(cast<Instruction>(I));
7004 SmallPtrSet<PHINode *, 4> PhiNodes;
7005 SmallPtrSet<ConstantData *, 4> Constants;
7006 PhiNodes.insert(I);
7007 Visited.insert(I);
7008 SmallPtrSet<Instruction *, 4> Defs;
7009 SmallPtrSet<Instruction *, 4> Uses;
7010 // This works by adding extra bitcasts between load/stores and removing
7011 // existing bitcasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
7012 // we can get in the situation where we remove a bitcast in one iteration
7013 // just to add it again in the next. We need to ensure that at least one
7014 // bitcast we remove are anchored to something that will not change back.
7015 bool AnyAnchored = false;
7016
7017 while (!Worklist.empty()) {
7018 Instruction *II = Worklist.pop_back_val();
7019
7020 if (auto *Phi = dyn_cast<PHINode>(II)) {
7021 // Handle Defs, which might also be PHI's
7022 for (Value *V : Phi->incoming_values()) {
7023 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
7024 if (!PhiNodes.count(OpPhi)) {
7025 if (!Visited.insert(OpPhi).second)
7026 return false;
7027 PhiNodes.insert(OpPhi);
7028 Worklist.push_back(OpPhi);
7029 }
7030 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
7031 if (!OpLoad->isSimple())
7032 return false;
7033 if (Defs.insert(OpLoad).second)
7034 Worklist.push_back(OpLoad);
7035 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
7036 if (Defs.insert(OpEx).second)
7037 Worklist.push_back(OpEx);
7038 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7039 if (!ConvertTy)
7040 ConvertTy = OpBC->getOperand(0)->getType();
7041 if (OpBC->getOperand(0)->getType() != ConvertTy)
7042 return false;
7043 if (Defs.insert(OpBC).second) {
7044 Worklist.push_back(OpBC);
7045 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
7046 !isa<ExtractElementInst>(OpBC->getOperand(0));
7047 }
7048 } else if (auto *OpC = dyn_cast<ConstantData>(V))
7049 Constants.insert(OpC);
7050 else
7051 return false;
7052 }
7053 }
7054
7055 // Handle uses which might also be phi's
7056 for (User *V : II->users()) {
7057 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
7058 if (!PhiNodes.count(OpPhi)) {
7059 if (Visited.count(OpPhi))
7060 return false;
7061 PhiNodes.insert(OpPhi);
7062 Visited.insert(OpPhi);
7063 Worklist.push_back(OpPhi);
7064 }
7065 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
7066 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
7067 return false;
7068 Uses.insert(OpStore);
7069 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7070 if (!ConvertTy)
7071 ConvertTy = OpBC->getType();
7072 if (OpBC->getType() != ConvertTy)
7073 return false;
7074 Uses.insert(OpBC);
7075 AnyAnchored |=
7076 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
7077 } else {
7078 return false;
7079 }
7080 }
7081 }
7082
7083 if (!ConvertTy || !AnyAnchored || PhiTy == ConvertTy ||
7084 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
7085 return false;
7086
7087 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
7088 << *ConvertTy << "\n");
7089
7090 // Create all the new phi nodes of the new type, and bitcast any loads to the
7091 // correct type.
7092 ValueToValueMap ValMap;
7093 for (ConstantData *C : Constants)
7094 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
7095 for (Instruction *D : Defs) {
7096 if (isa<BitCastInst>(D)) {
7097 ValMap[D] = D->getOperand(0);
7098 DeletedInstrs.insert(D);
7099 } else {
7100 BasicBlock::iterator insertPt = std::next(D->getIterator());
7101 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
7102 }
7103 }
7104 for (PHINode *Phi : PhiNodes)
7105 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
7106 Phi->getName() + ".tc", Phi->getIterator());
7107 // Pipe together all the PhiNodes.
7108 for (PHINode *Phi : PhiNodes) {
7109 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
7110 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
7111 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
7112 Phi->getIncomingBlock(i));
7113 Visited.insert(NewPhi);
7114 }
7115 // And finally pipe up the stores and bitcasts
7116 for (Instruction *U : Uses) {
7117 if (isa<BitCastInst>(U)) {
7118 DeletedInstrs.insert(U);
7119 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
7120 } else {
7121 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
7122 U->getIterator()));
7123 }
7124 }
7125
7126 // Save the removed phis to be deleted later.
7127 DeletedInstrs.insert_range(PhiNodes);
7128 return true;
7129}
7130
7131bool CodeGenPrepare::optimizePhiTypes(Function &F) {
7132 if (!OptimizePhiTypes)
7133 return false;
7134
7135 bool Changed = false;
7136 SmallPtrSet<PHINode *, 4> Visited;
7137 SmallPtrSet<Instruction *, 4> DeletedInstrs;
7138
7139 // Attempt to optimize all the phis in the functions to the correct type.
7140 for (auto &BB : F)
7141 for (auto &Phi : BB.phis())
7142 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
7143
7144 // Remove any old phi's that have been converted.
7145 for (auto *I : DeletedInstrs) {
7146 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
7147 I->eraseFromParent();
7148 }
7149
7150 return Changed;
7151}
7152
7153/// Return true, if an ext(load) can be formed from an extension in
7154/// \p MovedExts.
7155bool CodeGenPrepare::canFormExtLd(
7156 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
7157 Instruction *&Inst, bool HasPromoted) {
7158 for (auto *MovedExtInst : MovedExts) {
7159 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
7160 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
7161 Inst = MovedExtInst;
7162 break;
7163 }
7164 }
7165 if (!LI)
7166 return false;
7167
7168 // If they're already in the same block, there's nothing to do.
7169 // Make the cheap checks first if we did not promote.
7170 // If we promoted, we need to check if it is indeed profitable.
7171 if (!HasPromoted && LI->getParent() == Inst->getParent())
7172 return false;
7173
7174 return TLI->isExtLoad(LI, Inst, *DL);
7175}
7176
7177/// Move a zext or sext fed by a load into the same basic block as the load,
7178/// unless conditions are unfavorable. This allows SelectionDAG to fold the
7179/// extend into the load.
7180///
7181/// E.g.,
7182/// \code
7183/// %ld = load i32* %addr
7184/// %add = add nuw i32 %ld, 4
7185/// %zext = zext i32 %add to i64
7186// \endcode
7187/// =>
7188/// \code
7189/// %ld = load i32* %addr
7190/// %zext = zext i32 %ld to i64
7191/// %add = add nuw i64 %zext, 4
7192/// \encode
7193/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
7194/// allow us to match zext(load i32*) to i64.
7195///
7196/// Also, try to promote the computations used to obtain a sign extended
7197/// value used into memory accesses.
7198/// E.g.,
7199/// \code
7200/// a = add nsw i32 b, 3
7201/// d = sext i32 a to i64
7202/// e = getelementptr ..., i64 d
7203/// \endcode
7204/// =>
7205/// \code
7206/// f = sext i32 b to i64
7207/// a = add nsw i64 f, 3
7208/// e = getelementptr ..., i64 a
7209/// \endcode
7210///
7211/// \p Inst[in/out] the extension may be modified during the process if some
7212/// promotions apply.
7213bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
7214 bool AllowPromotionWithoutCommonHeader = false;
7215 /// See if it is an interesting sext operations for the address type
7216 /// promotion before trying to promote it, e.g., the ones with the right
7217 /// type and used in memory accesses.
7218 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
7219 *Inst, AllowPromotionWithoutCommonHeader);
7220 TypePromotionTransaction TPT(RemovedInsts);
7221 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
7222 TPT.getRestorationPoint();
7224 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
7225 Exts.push_back(Inst);
7226
7227 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
7228
7229 // Look for a load being extended.
7230 LoadInst *LI = nullptr;
7231 Instruction *ExtFedByLoad;
7232
7233 // Try to promote a chain of computation if it allows to form an extended
7234 // load.
7235 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
7236 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7237 TPT.commit();
7238 // Move the extend into the same block as the load.
7239 ExtFedByLoad->moveAfter(LI);
7240 ++NumExtsMoved;
7241 Inst = ExtFedByLoad;
7242 return true;
7243 }
7244
7245 // Continue promoting SExts if known as considerable depending on targets.
7246 if (ATPConsiderable &&
7247 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7248 HasPromoted, TPT, SpeculativelyMovedExts))
7249 return true;
7250
7251 TPT.rollback(LastKnownGood);
7252 return false;
7253}
7254
7255// Perform address type promotion if doing so is profitable.
7256// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7257// instructions that sign extended the same initial value. However, if
7258// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7259// extension is just profitable.
7260bool CodeGenPrepare::performAddressTypePromotion(
7261 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
7262 bool HasPromoted, TypePromotionTransaction &TPT,
7263 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7264 bool Promoted = false;
7265 SmallPtrSet<Instruction *, 1> UnhandledExts;
7266 bool AllSeenFirst = true;
7267 for (auto *I : SpeculativelyMovedExts) {
7268 Value *HeadOfChain = I->getOperand(0);
7269 auto AlreadySeen = SeenChainsForSExt.find(HeadOfChain);
7270 // If there is an unhandled SExt which has the same header, try to promote
7271 // it as well.
7272 if (AlreadySeen != SeenChainsForSExt.end()) {
7273 if (AlreadySeen->second != nullptr)
7274 UnhandledExts.insert(AlreadySeen->second);
7275 AllSeenFirst = false;
7276 }
7277 }
7278
7279 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
7280 SpeculativelyMovedExts.size() == 1)) {
7281 TPT.commit();
7282 if (HasPromoted)
7283 Promoted = true;
7284 for (auto *I : SpeculativelyMovedExts) {
7285 Value *HeadOfChain = I->getOperand(0);
7286 SeenChainsForSExt[HeadOfChain] = nullptr;
7287 ValToSExtendedUses[HeadOfChain].push_back(I);
7288 }
7289 // Update Inst as promotion happen.
7290 Inst = SpeculativelyMovedExts.pop_back_val();
7291 } else {
7292 // This is the first chain visited from the header, keep the current chain
7293 // as unhandled. Defer to promote this until we encounter another SExt
7294 // chain derived from the same header.
7295 for (auto *I : SpeculativelyMovedExts) {
7296 Value *HeadOfChain = I->getOperand(0);
7297 SeenChainsForSExt[HeadOfChain] = Inst;
7298 }
7299 return false;
7300 }
7301
7302 if (!AllSeenFirst && !UnhandledExts.empty())
7303 for (auto *VisitedSExt : UnhandledExts) {
7304 if (RemovedInsts.count(VisitedSExt))
7305 continue;
7306 TypePromotionTransaction TPT(RemovedInsts);
7308 SmallVector<Instruction *, 2> Chains;
7309 Exts.push_back(VisitedSExt);
7310 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
7311 TPT.commit();
7312 if (HasPromoted)
7313 Promoted = true;
7314 for (auto *I : Chains) {
7315 Value *HeadOfChain = I->getOperand(0);
7316 // Mark this as handled.
7317 SeenChainsForSExt[HeadOfChain] = nullptr;
7318 ValToSExtendedUses[HeadOfChain].push_back(I);
7319 }
7320 }
7321 return Promoted;
7322}
7323
7324bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7325 BasicBlock *DefBB = I->getParent();
7326
7327 // If the result of a {s|z}ext and its source are both live out, rewrite all
7328 // other uses of the source with result of extension.
7329 Value *Src = I->getOperand(0);
7330 if (Src->hasOneUse())
7331 return false;
7332
7333 // Only do this xform if truncating is free.
7334 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7335 return false;
7336
7337 // Only safe to perform the optimization if the source is also defined in
7338 // this block.
7339 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7340 return false;
7341
7342 bool DefIsLiveOut = false;
7343 for (User *U : I->users()) {
7345
7346 // Figure out which BB this ext is used in.
7347 BasicBlock *UserBB = UI->getParent();
7348 if (UserBB == DefBB)
7349 continue;
7350 DefIsLiveOut = true;
7351 break;
7352 }
7353 if (!DefIsLiveOut)
7354 return false;
7355
7356 // Make sure none of the uses are PHI nodes.
7357 for (User *U : Src->users()) {
7359 BasicBlock *UserBB = UI->getParent();
7360 if (UserBB == DefBB)
7361 continue;
7362 // Be conservative. We don't want this xform to end up introducing
7363 // reloads just before load / store instructions.
7364 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7365 return false;
7366 }
7367
7368 // InsertedTruncs - Only insert one trunc in each block once.
7369 DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
7370
7371 bool MadeChange = false;
7372 for (Use &U : Src->uses()) {
7373 Instruction *User = cast<Instruction>(U.getUser());
7374
7375 // Figure out which BB this ext is used in.
7376 BasicBlock *UserBB = User->getParent();
7377 if (UserBB == DefBB)
7378 continue;
7379
7380 // Both src and def are live in this block. Rewrite the use.
7381 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7382
7383 if (!InsertedTrunc) {
7384 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7385 assert(InsertPt != UserBB->end());
7386 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7387 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7388 InsertedInsts.insert(InsertedTrunc);
7389 }
7390
7391 // Replace a use of the {s|z}ext source with a use of the result.
7392 U = InsertedTrunc;
7393 ++NumExtUses;
7394 MadeChange = true;
7395 }
7396
7397 return MadeChange;
7398}
7399
7400// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7401// just after the load if the target can fold this into one extload instruction,
7402// with the hope of eliminating some of the other later "and" instructions using
7403// the loaded value. "and"s that are made trivially redundant by the insertion
7404// of the new "and" are removed by this function, while others (e.g. those whose
7405// path from the load goes through a phi) are left for isel to potentially
7406// remove.
7407//
7408// For example:
7409//
7410// b0:
7411// x = load i32
7412// ...
7413// b1:
7414// y = and x, 0xff
7415// z = use y
7416//
7417// becomes:
7418//
7419// b0:
7420// x = load i32
7421// x' = and x, 0xff
7422// ...
7423// b1:
7424// z = use x'
7425//
7426// whereas:
7427//
7428// b0:
7429// x1 = load i32
7430// ...
7431// b1:
7432// x2 = load i32
7433// ...
7434// b2:
7435// x = phi x1, x2
7436// y = and x, 0xff
7437//
7438// becomes (after a call to optimizeLoadExt for each load):
7439//
7440// b0:
7441// x1 = load i32
7442// x1' = and x1, 0xff
7443// ...
7444// b1:
7445// x2 = load i32
7446// x2' = and x2, 0xff
7447// ...
7448// b2:
7449// x = phi x1', x2'
7450// y = and x, 0xff
7451bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7452 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7453 return false;
7454
7455 // Skip loads we've already transformed.
7456 if (Load->hasOneUse() &&
7457 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7458 return false;
7459
7460 // Look at all uses of Load, looking through phis, to determine how many bits
7461 // of the loaded value are needed.
7462 SmallVector<Instruction *, 8> WorkList;
7463 SmallPtrSet<Instruction *, 16> Visited;
7464 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7465 SmallVector<Instruction *, 8> DropFlags;
7466 for (auto *U : Load->users())
7467 WorkList.push_back(cast<Instruction>(U));
7468
7469 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7470 unsigned BitWidth = LoadResultVT.getSizeInBits();
7471 // If the BitWidth is 0, do not try to optimize the type
7472 if (BitWidth == 0)
7473 return false;
7474
7475 APInt DemandBits(BitWidth, 0);
7476 APInt WidestAndBits(BitWidth, 0);
7477
7478 while (!WorkList.empty()) {
7479 Instruction *I = WorkList.pop_back_val();
7480
7481 // Break use-def graph loops.
7482 if (!Visited.insert(I).second)
7483 continue;
7484
7485 // For a PHI node, push all of its users.
7486 if (auto *Phi = dyn_cast<PHINode>(I)) {
7487 for (auto *U : Phi->users())
7488 WorkList.push_back(cast<Instruction>(U));
7489 continue;
7490 }
7491
7492 switch (I->getOpcode()) {
7493 case Instruction::And: {
7494 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7495 if (!AndC)
7496 return false;
7497 APInt AndBits = AndC->getValue();
7498 DemandBits |= AndBits;
7499 // Keep track of the widest and mask we see.
7500 if (AndBits.ugt(WidestAndBits))
7501 WidestAndBits = AndBits;
7502 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7503 AndsToMaybeRemove.push_back(I);
7504 break;
7505 }
7506
7507 case Instruction::Shl: {
7508 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7509 if (!ShlC)
7510 return false;
7511 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7512 DemandBits.setLowBits(BitWidth - ShiftAmt);
7513 DropFlags.push_back(I);
7514 break;
7515 }
7516
7517 case Instruction::Trunc: {
7518 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7519 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7520 DemandBits.setLowBits(TruncBitWidth);
7521 DropFlags.push_back(I);
7522 break;
7523 }
7524
7525 default:
7526 return false;
7527 }
7528 }
7529
7530 uint32_t ActiveBits = DemandBits.getActiveBits();
7531 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7532 // target even if isLoadLegal says an i1 EXTLOAD is valid. For example,
7533 // for the AArch64 target isLoadLegal(i32, i1, ..., ZEXTLOAD, false) returns
7534 // true, but (and (load x) 1) is not matched as a single instruction, rather
7535 // as a LDR followed by an AND.
7536 // TODO: Look into removing this restriction by fixing backends to either
7537 // return false for isLoadLegal for i1 or have them select this pattern to
7538 // a single instruction.
7539 //
7540 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7541 // mask, since these are the only ands that will be removed by isel.
7542 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7543 WidestAndBits != DemandBits)
7544 return false;
7545
7546 LLVMContext &Ctx = Load->getType()->getContext();
7547 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7548 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7549
7550 // Reject cases that won't be matched as extloads.
7551 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7552 !TLI->isLoadLegal(LoadResultVT, TruncVT, Load->getAlign(),
7553 Load->getPointerAddressSpace(), ISD::ZEXTLOAD, false))
7554 return false;
7555
7556 IRBuilder<> Builder(Load->getNextNode());
7557 auto *NewAnd = cast<Instruction>(
7558 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7559 // Mark this instruction as "inserted by CGP", so that other
7560 // optimizations don't touch it.
7561 InsertedInsts.insert(NewAnd);
7562
7563 // Replace all uses of load with new and (except for the use of load in the
7564 // new and itself).
7565 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7566 NewAnd->setOperand(0, Load);
7567
7568 // Remove any and instructions that are now redundant.
7569 for (auto *And : AndsToMaybeRemove)
7570 // Check that the and mask is the same as the one we decided to put on the
7571 // new and.
7572 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7573 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7574 if (&*CurInstIterator == And)
7575 CurInstIterator = std::next(And->getIterator());
7576 And->eraseFromParent();
7577 ++NumAndUses;
7578 }
7579
7580 // NSW flags may not longer hold.
7581 for (auto *Inst : DropFlags)
7582 Inst->setHasNoSignedWrap(false);
7583
7584 ++NumAndsAdded;
7585 return true;
7586}
7587
7588/// Check if V (an operand of a select instruction) is an expensive instruction
7589/// that is only used once.
7591 auto *I = dyn_cast<Instruction>(V);
7592 // If it's safe to speculatively execute, then it should not have side
7593 // effects; therefore, it's safe to sink and possibly *not* execute.
7594 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7595 TTI->isExpensiveToSpeculativelyExecute(I);
7596}
7597
7598/// Returns true if a SelectInst should be turned into an explicit branch.
7600 const TargetLowering *TLI,
7601 SelectInst *SI) {
7602 // If even a predictable select is cheap, then a branch can't be cheaper.
7603 if (!TLI->isPredictableSelectExpensive())
7604 return false;
7605
7606 // FIXME: This should use the same heuristics as IfConversion to determine
7607 // whether a select is better represented as a branch.
7608
7609 // If metadata tells us that the select condition is obviously predictable,
7610 // then we want to replace the select with a branch.
7611 uint64_t TrueWeight, FalseWeight;
7612 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7613 uint64_t Max = std::max(TrueWeight, FalseWeight);
7614 uint64_t Sum = TrueWeight + FalseWeight;
7615 if (Sum != 0) {
7616 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7617 if (Probability > TTI->getPredictableBranchThreshold())
7618 return true;
7619 }
7620 }
7621
7622 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7623
7624 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7625 // comparison condition. If the compare has more than one use, there's
7626 // probably another cmov or setcc around, so it's not worth emitting a branch.
7627 if (!Cmp || !Cmp->hasOneUse())
7628 return false;
7629
7630 // If either operand of the select is expensive and only needed on one side
7631 // of the select, we should form a branch.
7632 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7633 sinkSelectOperand(TTI, SI->getFalseValue()))
7634 return true;
7635
7636 return false;
7637}
7638
7639/// If \p isTrue is true, return the true value of \p SI, otherwise return
7640/// false value of \p SI. If the true/false value of \p SI is defined by any
7641/// select instructions in \p Selects, look through the defining select
7642/// instruction until the true/false value is not defined in \p Selects.
7643static Value *
7645 const SmallPtrSet<const Instruction *, 2> &Selects) {
7646 Value *V = nullptr;
7647
7648 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7649 DefSI = dyn_cast<SelectInst>(V)) {
7650 assert(DefSI->getCondition() == SI->getCondition() &&
7651 "The condition of DefSI does not match with SI");
7652 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7653 }
7654
7655 assert(V && "Failed to get select true/false value");
7656 return V;
7657}
7658
7659bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7660 assert(Shift->isShift() && "Expected a shift");
7661
7662 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7663 // general vector shifts, and (3) the shift amount is a select-of-splatted
7664 // values, hoist the shifts before the select:
7665 // shift Op0, (select Cond, TVal, FVal) -->
7666 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7667 //
7668 // This is inverting a generic IR transform when we know that the cost of a
7669 // general vector shift is more than the cost of 2 shift-by-scalars.
7670 // We can't do this effectively in SDAG because we may not be able to
7671 // determine if the select operands are splats from within a basic block.
7672 Type *Ty = Shift->getType();
7673 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7674 return false;
7675 Value *Cond, *TVal, *FVal;
7676 if (!match(Shift->getOperand(1),
7677 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7678 return false;
7679 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7680 return false;
7681
7682 IRBuilder<> Builder(Shift);
7683 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7684 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7685 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7686 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7687 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7688 Shift->eraseFromParent();
7689 return true;
7690}
7691
7692bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7693 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7694 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7695 "Expected a funnel shift");
7696
7697 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7698 // than general vector shifts, and (3) the shift amount is select-of-splatted
7699 // values, hoist the funnel shifts before the select:
7700 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7701 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7702 //
7703 // This is inverting a generic IR transform when we know that the cost of a
7704 // general vector shift is more than the cost of 2 shift-by-scalars.
7705 // We can't do this effectively in SDAG because we may not be able to
7706 // determine if the select operands are splats from within a basic block.
7707 Type *Ty = Fsh->getType();
7708 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7709 return false;
7710 Value *Cond, *TVal, *FVal;
7711 if (!match(Fsh->getOperand(2),
7712 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7713 return false;
7714 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7715 return false;
7716
7717 IRBuilder<> Builder(Fsh);
7718 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7719 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7720 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7721 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7722 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7723 Fsh->eraseFromParent();
7724 return true;
7725}
7726
7727/// If we have a SelectInst that will likely profit from branch prediction,
7728/// turn it into a branch.
7729bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7731 return false;
7732
7733 // If the SelectOptimize pass is enabled, selects have already been optimized.
7735 return false;
7736
7737 // Find all consecutive select instructions that share the same condition.
7739 ASI.push_back(SI);
7741 It != SI->getParent()->end(); ++It) {
7742 SelectInst *I = dyn_cast<SelectInst>(&*It);
7743 if (I && SI->getCondition() == I->getCondition()) {
7744 ASI.push_back(I);
7745 } else {
7746 break;
7747 }
7748 }
7749
7750 SelectInst *LastSI = ASI.back();
7751 // Increment the current iterator to skip all the rest of select instructions
7752 // because they will be either "not lowered" or "all lowered" to branch.
7753 CurInstIterator = std::next(LastSI->getIterator());
7754 // Examine debug-info attached to the consecutive select instructions. They
7755 // won't be individually optimised by optimizeInst, so we need to perform
7756 // DbgVariableRecord maintenence here instead.
7757 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7758 fixupDbgVariableRecordsOnInst(*SI);
7759
7760 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7761
7762 // Can we convert the 'select' to CF ?
7763 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7764 return false;
7765
7766 TargetLowering::SelectSupportKind SelectKind;
7767 if (SI->getType()->isVectorTy())
7768 SelectKind = TargetLowering::ScalarCondVectorVal;
7769 else
7770 SelectKind = TargetLowering::ScalarValSelect;
7771
7772 if (TLI->isSelectSupported(SelectKind) &&
7774 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI)))
7775 return false;
7776
7777 // Transform a sequence like this:
7778 // start:
7779 // %cmp = cmp uge i32 %a, %b
7780 // %sel = select i1 %cmp, i32 %c, i32 %d
7781 //
7782 // Into:
7783 // start:
7784 // %cmp = cmp uge i32 %a, %b
7785 // %cmp.frozen = freeze %cmp
7786 // br i1 %cmp.frozen, label %select.true, label %select.false
7787 // select.true:
7788 // br label %select.end
7789 // select.false:
7790 // br label %select.end
7791 // select.end:
7792 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7793 //
7794 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7795 // In addition, we may sink instructions that produce %c or %d from
7796 // the entry block into the destination(s) of the new branch.
7797 // If the true or false blocks do not contain a sunken instruction, that
7798 // block and its branch may be optimized away. In that case, one side of the
7799 // first branch will point directly to select.end, and the corresponding PHI
7800 // predecessor block will be the start block.
7801 // The CFG is altered here and we update the DominatorTree and the LoopInfo,
7802 // but we don't set a ModifiedDT flag to avoid restarting the function walk in
7803 // runOnFunction for each select optimized.
7804
7805 // Collect values that go on the true side and the values that go on the false
7806 // side.
7807 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7808 for (SelectInst *SI : ASI) {
7809 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7810 TrueInstrs.push_back(cast<Instruction>(V));
7811 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7812 FalseInstrs.push_back(cast<Instruction>(V));
7813 }
7814
7815 // Split the select block, according to how many (if any) values go on each
7816 // side.
7817 BasicBlock *StartBlock = SI->getParent();
7818 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7819 // We should split before any debug-info.
7820 SplitPt.setHeadBit(true);
7821
7822 IRBuilder<> IB(SI);
7823 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7824
7825 BasicBlock *TrueBlock = nullptr;
7826 BasicBlock *FalseBlock = nullptr;
7827 BasicBlock *EndBlock = nullptr;
7828 UncondBrInst *TrueBranch = nullptr;
7829 UncondBrInst *FalseBranch = nullptr;
7830 if (TrueInstrs.size() == 0) {
7831 FalseBranch = cast<UncondBrInst>(
7832 SplitBlockAndInsertIfElse(CondFr, SplitPt, false, nullptr, DTU, LI));
7833 FalseBlock = FalseBranch->getParent();
7834 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7835 } else if (FalseInstrs.size() == 0) {
7836 TrueBranch = cast<UncondBrInst>(
7837 SplitBlockAndInsertIfThen(CondFr, SplitPt, false, nullptr, DTU, LI));
7838 TrueBlock = TrueBranch->getParent();
7839 EndBlock = TrueBranch->getSuccessor();
7840 } else {
7841 Instruction *ThenTerm = nullptr;
7842 Instruction *ElseTerm = nullptr;
7843 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7844 nullptr, DTU, LI);
7845 TrueBranch = cast<UncondBrInst>(ThenTerm);
7846 FalseBranch = cast<UncondBrInst>(ElseTerm);
7847 TrueBlock = TrueBranch->getParent();
7848 FalseBlock = FalseBranch->getParent();
7849 EndBlock = TrueBranch->getSuccessor();
7850 }
7851
7852 EndBlock->setName("select.end");
7853 if (TrueBlock)
7854 TrueBlock->setName("select.true.sink");
7855 if (FalseBlock)
7856 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7857 : "select.false.sink");
7858
7859 if (IsHugeFunc) {
7860 if (TrueBlock)
7861 FreshBBs.insert(TrueBlock);
7862 if (FalseBlock)
7863 FreshBBs.insert(FalseBlock);
7864 FreshBBs.insert(EndBlock);
7865 }
7866
7867 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7868
7869 static const unsigned MD[] = {
7870 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7871 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7872 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7873
7874 // Sink expensive instructions into the conditional blocks to avoid executing
7875 // them speculatively.
7876 for (Instruction *I : TrueInstrs)
7877 I->moveBefore(TrueBranch->getIterator());
7878 for (Instruction *I : FalseInstrs)
7879 I->moveBefore(FalseBranch->getIterator());
7880
7881 // If we did not create a new block for one of the 'true' or 'false' paths
7882 // of the condition, it means that side of the branch goes to the end block
7883 // directly and the path originates from the start block from the point of
7884 // view of the new PHI.
7885 if (TrueBlock == nullptr)
7886 TrueBlock = StartBlock;
7887 else if (FalseBlock == nullptr)
7888 FalseBlock = StartBlock;
7889
7890 SmallPtrSet<const Instruction *, 2> INS(llvm::from_range, ASI);
7891 // Use reverse iterator because later select may use the value of the
7892 // earlier select, and we need to propagate value through earlier select
7893 // to get the PHI operand.
7894 for (SelectInst *SI : llvm::reverse(ASI)) {
7895 // The select itself is replaced with a PHI Node.
7896 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7897 PN->insertBefore(EndBlock->begin());
7898 PN->takeName(SI);
7899 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7900 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7901 PN->setDebugLoc(SI->getDebugLoc());
7902
7903 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7904 SI->eraseFromParent();
7905 INS.erase(SI);
7906 ++NumSelectsExpanded;
7907 }
7908
7909 // Instruct OptimizeBlock to skip to the next block.
7910 CurInstIterator = StartBlock->end();
7911 return true;
7912}
7913
7914/// Some targets only accept certain types for splat inputs. For example a VDUP
7915/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7916/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7917bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7918 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7920 m_Undef(), m_ZeroMask())))
7921 return false;
7922 Type *NewType = TLI->shouldConvertSplatType(SVI);
7923 if (!NewType)
7924 return false;
7925
7926 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7927 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7928 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7929 "Expected a type of the same size!");
7930 auto *NewVecType =
7931 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7932
7933 // Create a bitcast (shuffle (insert (bitcast(..))))
7934 IRBuilder<> Builder(SVI->getContext());
7935 Builder.SetInsertPoint(SVI);
7936 Value *BC1 = Builder.CreateBitCast(
7937 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7938 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7939 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7940
7941 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7943 SVI, TLInfo, nullptr,
7944 [&](Value *V) { removeAllAssertingVHReferences(V); });
7945
7946 // Also hoist the bitcast up to its operand if it they are not in the same
7947 // block.
7948 if (auto *BCI = dyn_cast<Instruction>(BC1))
7949 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7950 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7951 !Op->isTerminator() && !Op->isEHPad())
7952 BCI->moveAfter(Op);
7953
7954 return true;
7955}
7956
7957bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7958 // If the operands of I can be folded into a target instruction together with
7959 // I, duplicate and sink them.
7960 SmallVector<Use *, 4> OpsToSink;
7961 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7962 return false;
7963
7964 // OpsToSink can contain multiple uses in a use chain (e.g.
7965 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7966 // uses must come first, so we process the ops in reverse order so as to not
7967 // create invalid IR.
7968 BasicBlock *TargetBB = I->getParent();
7969 bool Changed = false;
7970 SmallVector<Use *, 4> ToReplace;
7971 Instruction *InsertPoint = I;
7972 for (Use *U : reverse(OpsToSink)) {
7973 auto *UI = cast<Instruction>(U->get());
7974 if (isa<PHINode>(UI) || UI->mayHaveSideEffects() || UI->mayReadFromMemory())
7975 continue;
7976 if (UI->getParent() == TargetBB) {
7977 if (UI->comesBefore(InsertPoint))
7978 InsertPoint = UI;
7979 continue;
7980 }
7981 ToReplace.push_back(U);
7982 }
7983
7984 SetVector<Instruction *> MaybeDead;
7985 DenseMap<Instruction *, Instruction *> NewInstructions;
7986 for (Use *U : ToReplace) {
7987 auto *UI = cast<Instruction>(U->get());
7988 Instruction *NI = UI->clone();
7989
7990 if (IsHugeFunc) {
7991 // Now we clone an instruction, its operands' defs may sink to this BB
7992 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7993 for (Value *Op : NI->operands())
7994 if (auto *OpDef = dyn_cast<Instruction>(Op))
7995 FreshBBs.insert(OpDef->getParent());
7996 }
7997
7998 NewInstructions[UI] = NI;
7999 MaybeDead.insert(UI);
8000 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
8001 NI->insertBefore(InsertPoint->getIterator());
8002 InsertPoint = NI;
8003 InsertedInsts.insert(NI);
8004
8005 // Update the use for the new instruction, making sure that we update the
8006 // sunk instruction uses, if it is part of a chain that has already been
8007 // sunk.
8008 Instruction *OldI = cast<Instruction>(U->getUser());
8009 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
8010 It->second->setOperand(U->getOperandNo(), NI);
8011 else
8012 U->set(NI);
8013 Changed = true;
8014 }
8015
8016 // Remove instructions that are dead after sinking.
8017 for (auto *I : MaybeDead) {
8018 if (!I->hasNUsesOrMore(1)) {
8019 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
8020 I->eraseFromParent();
8021 }
8022 }
8023
8024 return Changed;
8025}
8026
8027bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
8028 Value *Cond = SI->getCondition();
8029 Type *OldType = Cond->getType();
8030 LLVMContext &Context = Cond->getContext();
8031 EVT OldVT = TLI->getValueType(*DL, OldType);
8033 unsigned RegWidth = RegType.getSizeInBits();
8034
8035 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
8036 return false;
8037
8038 // If the register width is greater than the type width, expand the condition
8039 // of the switch instruction and each case constant to the width of the
8040 // register. By widening the type of the switch condition, subsequent
8041 // comparisons (for case comparisons) will not need to be extended to the
8042 // preferred register width, so we will potentially eliminate N-1 extends,
8043 // where N is the number of cases in the switch.
8044 auto *NewType = Type::getIntNTy(Context, RegWidth);
8045
8046 // Extend the switch condition and case constants using the target preferred
8047 // extend unless the switch condition is a function argument with an extend
8048 // attribute. In that case, we can avoid an unnecessary mask/extension by
8049 // matching the argument extension instead.
8050 Instruction::CastOps ExtType = Instruction::ZExt;
8051 // Some targets prefer SExt over ZExt.
8052 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
8053 ExtType = Instruction::SExt;
8054
8055 if (auto *Arg = dyn_cast<Argument>(Cond)) {
8056 if (Arg->hasSExtAttr())
8057 ExtType = Instruction::SExt;
8058 if (Arg->hasZExtAttr())
8059 ExtType = Instruction::ZExt;
8060 }
8061
8062 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
8063 ExtInst->insertBefore(SI->getIterator());
8064 ExtInst->setDebugLoc(SI->getDebugLoc());
8065 SI->setCondition(ExtInst);
8066 for (auto Case : SI->cases()) {
8067 const APInt &NarrowConst = Case.getCaseValue()->getValue();
8068 APInt WideConst = (ExtType == Instruction::ZExt)
8069 ? NarrowConst.zext(RegWidth)
8070 : NarrowConst.sext(RegWidth);
8071 Case.setValue(ConstantInt::get(Context, WideConst));
8072 }
8073
8074 return true;
8075}
8076
8077bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
8078 // The SCCP optimization tends to produce code like this:
8079 // switch(x) { case 42: phi(42, ...) }
8080 // Materializing the constant for the phi-argument needs instructions; So we
8081 // change the code to:
8082 // switch(x) { case 42: phi(x, ...) }
8083
8084 Value *Condition = SI->getCondition();
8085 // Avoid endless loop in degenerate case.
8086 if (isa<ConstantInt>(*Condition))
8087 return false;
8088
8089 bool Changed = false;
8090 BasicBlock *SwitchBB = SI->getParent();
8091 Type *ConditionType = Condition->getType();
8092
8093 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
8094 ConstantInt *CaseValue = Case.getCaseValue();
8095 BasicBlock *CaseBB = Case.getCaseSuccessor();
8096 // Set to true if we previously checked that `CaseBB` is only reached by
8097 // a single case from this switch.
8098 bool CheckedForSinglePred = false;
8099 for (PHINode &PHI : CaseBB->phis()) {
8100 Type *PHIType = PHI.getType();
8101 // If ZExt is free then we can also catch patterns like this:
8102 // switch((i32)x) { case 42: phi((i64)42, ...); }
8103 // and replace `(i64)42` with `zext i32 %x to i64`.
8104 bool TryZExt =
8105 PHIType->isIntegerTy() &&
8106 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
8107 TLI->isZExtFree(ConditionType, PHIType);
8108 if (PHIType == ConditionType || TryZExt) {
8109 // Set to true to skip this case because of multiple preds.
8110 bool SkipCase = false;
8111 Value *Replacement = nullptr;
8112 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
8113 Value *PHIValue = PHI.getIncomingValue(I);
8114 if (PHIValue != CaseValue) {
8115 if (!TryZExt)
8116 continue;
8117 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
8118 if (!PHIValueInt ||
8119 PHIValueInt->getValue() !=
8120 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
8121 continue;
8122 }
8123 if (PHI.getIncomingBlock(I) != SwitchBB)
8124 continue;
8125 // We cannot optimize if there are multiple case labels jumping to
8126 // this block. This check may get expensive when there are many
8127 // case labels so we test for it last.
8128 if (!CheckedForSinglePred) {
8129 CheckedForSinglePred = true;
8130 if (SI->findCaseDest(CaseBB) == nullptr) {
8131 SkipCase = true;
8132 break;
8133 }
8134 }
8135
8136 if (Replacement == nullptr) {
8137 if (PHIValue == CaseValue) {
8138 Replacement = Condition;
8139 } else {
8140 IRBuilder<> Builder(SI);
8141 Replacement = Builder.CreateZExt(Condition, PHIType);
8142 }
8143 }
8144 PHI.setIncomingValue(I, Replacement);
8145 Changed = true;
8146 }
8147 if (SkipCase)
8148 break;
8149 }
8150 }
8151 }
8152 return Changed;
8153}
8154
8155bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
8156 bool Changed = optimizeSwitchType(SI);
8157 Changed |= optimizeSwitchPhiConstants(SI);
8158 return Changed;
8159}
8160
8161namespace {
8162
8163/// Helper class to promote a scalar operation to a vector one.
8164/// This class is used to move downward extractelement transition.
8165/// E.g.,
8166/// a = vector_op <2 x i32>
8167/// b = extractelement <2 x i32> a, i32 0
8168/// c = scalar_op b
8169/// store c
8170///
8171/// =>
8172/// a = vector_op <2 x i32>
8173/// c = vector_op a (equivalent to scalar_op on the related lane)
8174/// * d = extractelement <2 x i32> c, i32 0
8175/// * store d
8176/// Assuming both extractelement and store can be combine, we get rid of the
8177/// transition.
8178class VectorPromoteHelper {
8179 /// DataLayout associated with the current module.
8180 const DataLayout &DL;
8181
8182 /// Used to perform some checks on the legality of vector operations.
8183 const TargetLowering &TLI;
8184
8185 /// Used to estimated the cost of the promoted chain.
8186 const TargetTransformInfo &TTI;
8187
8188 /// The transition being moved downwards.
8189 Instruction *Transition;
8190
8191 /// The sequence of instructions to be promoted.
8192 SmallVector<Instruction *, 4> InstsToBePromoted;
8193
8194 /// Cost of combining a store and an extract.
8195 unsigned StoreExtractCombineCost;
8196
8197 /// Instruction that will be combined with the transition.
8198 Instruction *CombineInst = nullptr;
8199
8200 /// The instruction that represents the current end of the transition.
8201 /// Since we are faking the promotion until we reach the end of the chain
8202 /// of computation, we need a way to get the current end of the transition.
8203 Instruction *getEndOfTransition() const {
8204 if (InstsToBePromoted.empty())
8205 return Transition;
8206 return InstsToBePromoted.back();
8207 }
8208
8209 /// Return the index of the original value in the transition.
8210 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
8211 /// c, is at index 0.
8212 unsigned getTransitionOriginalValueIdx() const {
8213 assert(isa<ExtractElementInst>(Transition) &&
8214 "Other kind of transitions are not supported yet");
8215 return 0;
8216 }
8217
8218 /// Return the index of the index in the transition.
8219 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
8220 /// is at index 1.
8221 unsigned getTransitionIdx() const {
8222 assert(isa<ExtractElementInst>(Transition) &&
8223 "Other kind of transitions are not supported yet");
8224 return 1;
8225 }
8226
8227 /// Get the type of the transition.
8228 /// This is the type of the original value.
8229 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8230 /// transition is <2 x i32>.
8231 Type *getTransitionType() const {
8232 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
8233 }
8234
8235 /// Promote \p ToBePromoted by moving \p Def downward through.
8236 /// I.e., we have the following sequence:
8237 /// Def = Transition <ty1> a to <ty2>
8238 /// b = ToBePromoted <ty2> Def, ...
8239 /// =>
8240 /// b = ToBePromoted <ty1> a, ...
8241 /// Def = Transition <ty1> ToBePromoted to <ty2>
8242 void promoteImpl(Instruction *ToBePromoted);
8243
8244 /// Check whether or not it is profitable to promote all the
8245 /// instructions enqueued to be promoted.
8246 bool isProfitableToPromote() {
8247 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
8248 unsigned Index = isa<ConstantInt>(ValIdx)
8249 ? cast<ConstantInt>(ValIdx)->getZExtValue()
8250 : -1;
8251 Type *PromotedType = getTransitionType();
8252
8253 StoreInst *ST = cast<StoreInst>(CombineInst);
8254 unsigned AS = ST->getPointerAddressSpace();
8255 // Check if this store is supported.
8257 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
8258 ST->getAlign())) {
8259 // If this is not supported, there is no way we can combine
8260 // the extract with the store.
8261 return false;
8262 }
8263
8264 // The scalar chain of computation has to pay for the transition
8265 // scalar to vector.
8266 // The vector chain has to account for the combining cost.
8269 InstructionCost ScalarCost =
8270 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
8271 InstructionCost VectorCost = StoreExtractCombineCost;
8272 for (const auto &Inst : InstsToBePromoted) {
8273 // Compute the cost.
8274 // By construction, all instructions being promoted are arithmetic ones.
8275 // Moreover, one argument is a constant that can be viewed as a splat
8276 // constant.
8277 Value *Arg0 = Inst->getOperand(0);
8278 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
8279 isa<ConstantFP>(Arg0);
8280 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8281 if (IsArg0Constant)
8283 else
8285
8286 ScalarCost += TTI.getArithmeticInstrCost(
8287 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
8288 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
8289 CostKind, Arg0Info, Arg1Info);
8290 }
8291 LLVM_DEBUG(
8292 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8293 << ScalarCost << "\nVector: " << VectorCost << '\n');
8294 return ScalarCost > VectorCost;
8295 }
8296
8297 /// Generate a constant vector with \p Val with the same
8298 /// number of elements as the transition.
8299 /// \p UseSplat defines whether or not \p Val should be replicated
8300 /// across the whole vector.
8301 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8302 /// otherwise we generate a vector with as many poison as possible:
8303 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8304 /// used at the index of the extract.
8305 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8306 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8307 if (!UseSplat) {
8308 // If we cannot determine where the constant must be, we have to
8309 // use a splat constant.
8310 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8311 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8312 ExtractIdx = CstVal->getSExtValue();
8313 else
8314 UseSplat = true;
8315 }
8316
8317 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8318 if (UseSplat)
8319 return ConstantVector::getSplat(EC, Val);
8320
8321 if (!EC.isScalable()) {
8322 SmallVector<Constant *, 4> ConstVec;
8323 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8324 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8325 if (Idx == ExtractIdx)
8326 ConstVec.push_back(Val);
8327 else
8328 ConstVec.push_back(PoisonVal);
8329 }
8330 return ConstantVector::get(ConstVec);
8331 } else
8333 "Generate scalable vector for non-splat is unimplemented");
8334 }
8335
8336 /// Check if promoting to a vector type an operand at \p OperandIdx
8337 /// in \p Use can trigger undefined behavior.
8338 static bool canCauseUndefinedBehavior(const Instruction *Use,
8339 unsigned OperandIdx) {
8340 // This is not safe to introduce undef when the operand is on
8341 // the right hand side of a division-like instruction.
8342 if (OperandIdx != 1)
8343 return false;
8344 switch (Use->getOpcode()) {
8345 default:
8346 return false;
8347 case Instruction::SDiv:
8348 case Instruction::UDiv:
8349 case Instruction::SRem:
8350 case Instruction::URem:
8351 return true;
8352 case Instruction::FDiv:
8353 case Instruction::FRem:
8354 return !Use->hasNoNaNs();
8355 }
8356 llvm_unreachable(nullptr);
8357 }
8358
8359public:
8360 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8361 const TargetTransformInfo &TTI, Instruction *Transition,
8362 unsigned CombineCost)
8363 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8364 StoreExtractCombineCost(CombineCost) {
8365 assert(Transition && "Do not know how to promote null");
8366 }
8367
8368 /// Check if we can promote \p ToBePromoted to \p Type.
8369 bool canPromote(const Instruction *ToBePromoted) const {
8370 // We could support CastInst too.
8371 return isa<BinaryOperator>(ToBePromoted);
8372 }
8373
8374 /// Check if it is profitable to promote \p ToBePromoted
8375 /// by moving downward the transition through.
8376 bool shouldPromote(const Instruction *ToBePromoted) const {
8377 // Promote only if all the operands can be statically expanded.
8378 // Indeed, we do not want to introduce any new kind of transitions.
8379 for (const Use &U : ToBePromoted->operands()) {
8380 const Value *Val = U.get();
8381 if (Val == getEndOfTransition()) {
8382 // If the use is a division and the transition is on the rhs,
8383 // we cannot promote the operation, otherwise we may create a
8384 // division by zero.
8385 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8386 return false;
8387 continue;
8388 }
8389 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8390 !isa<ConstantFP>(Val))
8391 return false;
8392 }
8393 // Check that the resulting operation is legal.
8394 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8395 if (!ISDOpcode)
8396 return false;
8397 return StressStoreExtract ||
8399 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8400 }
8401
8402 /// Check whether or not \p Use can be combined
8403 /// with the transition.
8404 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8405 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8406
8407 /// Record \p ToBePromoted as part of the chain to be promoted.
8408 void enqueueForPromotion(Instruction *ToBePromoted) {
8409 InstsToBePromoted.push_back(ToBePromoted);
8410 }
8411
8412 /// Set the instruction that will be combined with the transition.
8413 void recordCombineInstruction(Instruction *ToBeCombined) {
8414 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8415 CombineInst = ToBeCombined;
8416 }
8417
8418 /// Promote all the instructions enqueued for promotion if it is
8419 /// is profitable.
8420 /// \return True if the promotion happened, false otherwise.
8421 bool promote() {
8422 // Check if there is something to promote.
8423 // Right now, if we do not have anything to combine with,
8424 // we assume the promotion is not profitable.
8425 if (InstsToBePromoted.empty() || !CombineInst)
8426 return false;
8427
8428 // Check cost.
8429 if (!StressStoreExtract && !isProfitableToPromote())
8430 return false;
8431
8432 // Promote.
8433 for (auto &ToBePromoted : InstsToBePromoted)
8434 promoteImpl(ToBePromoted);
8435 InstsToBePromoted.clear();
8436 return true;
8437 }
8438};
8439
8440} // end anonymous namespace
8441
8442void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8443 // At this point, we know that all the operands of ToBePromoted but Def
8444 // can be statically promoted.
8445 // For Def, we need to use its parameter in ToBePromoted:
8446 // b = ToBePromoted ty1 a
8447 // Def = Transition ty1 b to ty2
8448 // Move the transition down.
8449 // 1. Replace all uses of the promoted operation by the transition.
8450 // = ... b => = ... Def.
8451 assert(ToBePromoted->getType() == Transition->getType() &&
8452 "The type of the result of the transition does not match "
8453 "the final type");
8454 ToBePromoted->replaceAllUsesWith(Transition);
8455 // 2. Update the type of the uses.
8456 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8457 Type *TransitionTy = getTransitionType();
8458 ToBePromoted->mutateType(TransitionTy);
8459 // 3. Update all the operands of the promoted operation with promoted
8460 // operands.
8461 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8462 for (Use &U : ToBePromoted->operands()) {
8463 Value *Val = U.get();
8464 Value *NewVal = nullptr;
8465 if (Val == Transition)
8466 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8467 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8468 isa<ConstantFP>(Val)) {
8469 // Use a splat constant if it is not safe to use undef.
8470 NewVal = getConstantVector(
8471 cast<Constant>(Val),
8472 isa<UndefValue>(Val) ||
8473 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8474 } else
8475 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8476 "this?");
8477 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8478 }
8479 Transition->moveAfter(ToBePromoted);
8480 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8481}
8482
8483/// Some targets can do store(extractelement) with one instruction.
8484/// Try to push the extractelement towards the stores when the target
8485/// has this feature and this is profitable.
8486bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8487 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8488 if (DisableStoreExtract ||
8491 Inst->getOperand(1), CombineCost)))
8492 return false;
8493
8494 // At this point we know that Inst is a vector to scalar transition.
8495 // Try to move it down the def-use chain, until:
8496 // - We can combine the transition with its single use
8497 // => we got rid of the transition.
8498 // - We escape the current basic block
8499 // => we would need to check that we are moving it at a cheaper place and
8500 // we do not do that for now.
8501 BasicBlock *Parent = Inst->getParent();
8502 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8503 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8504 // If the transition has more than one use, assume this is not going to be
8505 // beneficial.
8506 while (Inst->hasOneUse()) {
8507 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8508 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8509
8510 if (ToBePromoted->getParent() != Parent) {
8511 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8512 << ToBePromoted->getParent()->getName()
8513 << ") than the transition (" << Parent->getName()
8514 << ").\n");
8515 return false;
8516 }
8517
8518 if (VPH.canCombine(ToBePromoted)) {
8519 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8520 << "will be combined with: " << *ToBePromoted << '\n');
8521 VPH.recordCombineInstruction(ToBePromoted);
8522 bool Changed = VPH.promote();
8523 NumStoreExtractExposed += Changed;
8524 return Changed;
8525 }
8526
8527 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8528 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8529 return false;
8530
8531 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8532
8533 VPH.enqueueForPromotion(ToBePromoted);
8534 Inst = ToBePromoted;
8535 }
8536 return false;
8537}
8538
8539/// For the instruction sequence of store below, F and I values
8540/// are bundled together as an i64 value before being stored into memory.
8541/// Sometimes it is more efficient to generate separate stores for F and I,
8542/// which can remove the bitwise instructions or sink them to colder places.
8543///
8544/// (store (or (zext (bitcast F to i32) to i64),
8545/// (shl (zext I to i64), 32)), addr) -->
8546/// (store F, addr) and (store I, addr+4)
8547///
8548/// Similarly, splitting for other merged store can also be beneficial, like:
8549/// For pair of {i32, i32}, i64 store --> two i32 stores.
8550/// For pair of {i32, i16}, i64 store --> two i32 stores.
8551/// For pair of {i16, i16}, i32 store --> two i16 stores.
8552/// For pair of {i16, i8}, i32 store --> two i16 stores.
8553/// For pair of {i8, i8}, i16 store --> two i8 stores.
8554///
8555/// We allow each target to determine specifically which kind of splitting is
8556/// supported.
8557///
8558/// The store patterns are commonly seen from the simple code snippet below
8559/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8560/// void goo(const std::pair<int, float> &);
8561/// hoo() {
8562/// ...
8563/// goo(std::make_pair(tmp, ftmp));
8564/// ...
8565/// }
8566///
8567/// Although we already have similar splitting in DAG Combine, we duplicate
8568/// it in CodeGenPrepare to catch the case in which pattern is across
8569/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8570/// during code expansion.
8572 const TargetLowering &TLI) {
8573 // Handle simple but common cases only.
8574 Type *StoreType = SI.getValueOperand()->getType();
8575
8576 // The code below assumes shifting a value by <number of bits>,
8577 // whereas scalable vectors would have to be shifted by
8578 // <2log(vscale) + number of bits> in order to store the
8579 // low/high parts. Bailing out for now.
8580 if (StoreType->isScalableTy())
8581 return false;
8582
8583 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8584 DL.getTypeSizeInBits(StoreType) == 0)
8585 return false;
8586
8587 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8588 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8589 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8590 return false;
8591
8592 // Don't split the store if it is volatile or atomic.
8593 if (!SI.isSimple())
8594 return false;
8595
8596 // Match the following patterns:
8597 // (store (or (zext LValue to i64),
8598 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8599 // or
8600 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8601 // (zext LValue to i64),
8602 // Expect both operands of OR and the first operand of SHL have only
8603 // one use.
8604 Value *LValue, *HValue;
8605 if (!match(SI.getValueOperand(),
8608 m_SpecificInt(HalfValBitSize))))))
8609 return false;
8610
8611 // Check LValue and HValue are int with size less or equal than 32.
8612 if (!LValue->getType()->isIntegerTy() ||
8613 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8614 !HValue->getType()->isIntegerTy() ||
8615 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8616 return false;
8617
8618 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8619 // as the input of target query.
8620 auto *LBC = dyn_cast<BitCastInst>(LValue);
8621 auto *HBC = dyn_cast<BitCastInst>(HValue);
8622 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8623 : EVT::getEVT(LValue->getType());
8624 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8625 : EVT::getEVT(HValue->getType());
8626 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8627 return false;
8628
8629 // Start to split store.
8630 IRBuilder<> Builder(SI.getContext());
8631 Builder.SetInsertPoint(&SI);
8632
8633 // If LValue/HValue is a bitcast in another BB, create a new one in current
8634 // BB so it may be merged with the splitted stores by dag combiner.
8635 if (LBC && LBC->getParent() != SI.getParent())
8636 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8637 if (HBC && HBC->getParent() != SI.getParent())
8638 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8639
8640 bool IsLE = SI.getDataLayout().isLittleEndian();
8641 auto CreateSplitStore = [&](Value *V, bool Upper) {
8642 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8643 Value *Addr = SI.getPointerOperand();
8644 Align Alignment = SI.getAlign();
8645 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8646 if (IsOffsetStore) {
8647 Addr = Builder.CreateGEP(
8648 SplitStoreType, Addr,
8649 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8650
8651 // When splitting the store in half, naturally one half will retain the
8652 // alignment of the original wider store, regardless of whether it was
8653 // over-aligned or not, while the other will require adjustment.
8654 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8655 }
8656 Builder.CreateAlignedStore(V, Addr, Alignment);
8657 };
8658
8659 CreateSplitStore(LValue, false);
8660 CreateSplitStore(HValue, true);
8661
8662 // Delete the old store.
8663 SI.eraseFromParent();
8664 return true;
8665}
8666
8667// Return true if the GEP has two operands, the first operand is of a sequential
8668// type, and the second operand is a constant.
8671 return GEP->getNumOperands() == 2 && I.isSequential() &&
8672 isa<ConstantInt>(GEP->getOperand(1));
8673}
8674
8675// Try unmerging GEPs to reduce liveness interference (register pressure) across
8676// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8677// reducing liveness interference across those edges benefits global register
8678// allocation. Currently handles only certain cases.
8679//
8680// For example, unmerge %GEPI and %UGEPI as below.
8681//
8682// ---------- BEFORE ----------
8683// SrcBlock:
8684// ...
8685// %GEPIOp = ...
8686// ...
8687// %GEPI = gep %GEPIOp, Idx
8688// ...
8689// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8690// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8691// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8692// %UGEPI)
8693//
8694// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8695// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8696// ...
8697//
8698// DstBi:
8699// ...
8700// %UGEPI = gep %GEPIOp, UIdx
8701// ...
8702// ---------------------------
8703//
8704// ---------- AFTER ----------
8705// SrcBlock:
8706// ... (same as above)
8707// (* %GEPI is still alive on the indirectbr edges)
8708// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8709// unmerging)
8710// ...
8711//
8712// DstBi:
8713// ...
8714// %UGEPI = gep %GEPI, (UIdx-Idx)
8715// ...
8716// ---------------------------
8717//
8718// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8719// no longer alive on them.
8720//
8721// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8722// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8723// not to disable further simplications and optimizations as a result of GEP
8724// merging.
8725//
8726// Note this unmerging may increase the length of the data flow critical path
8727// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8728// between the register pressure and the length of data-flow critical
8729// path. Restricting this to the uncommon IndirectBr case would minimize the
8730// impact of potentially longer critical path, if any, and the impact on compile
8731// time.
8733 const TargetTransformInfo *TTI) {
8734 BasicBlock *SrcBlock = GEPI->getParent();
8735 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8736 // (non-IndirectBr) cases exit early here.
8737 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8738 return false;
8739 // Check that GEPI is a simple gep with a single constant index.
8740 if (!GEPSequentialConstIndexed(GEPI))
8741 return false;
8742 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8743 // Check that GEPI is a cheap one.
8744 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8747 return false;
8748 Value *GEPIOp = GEPI->getOperand(0);
8749 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8750 if (!isa<Instruction>(GEPIOp))
8751 return false;
8752 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8753 if (GEPIOpI->getParent() != SrcBlock)
8754 return false;
8755 // Check that GEP is used outside the block, meaning it's alive on the
8756 // IndirectBr edge(s).
8757 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8758 if (auto *I = dyn_cast<Instruction>(Usr)) {
8759 if (I->getParent() != SrcBlock) {
8760 return true;
8761 }
8762 }
8763 return false;
8764 }))
8765 return false;
8766 // The second elements of the GEP chains to be unmerged.
8767 std::vector<GetElementPtrInst *> UGEPIs;
8768 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8769 // on IndirectBr edges.
8770 for (User *Usr : GEPIOp->users()) {
8771 if (Usr == GEPI)
8772 continue;
8773 // Check if Usr is an Instruction. If not, give up.
8774 if (!isa<Instruction>(Usr))
8775 return false;
8776 auto *UI = cast<Instruction>(Usr);
8777 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8778 if (UI->getParent() == SrcBlock)
8779 continue;
8780 // Check if Usr is a GEP. If not, give up.
8781 if (!isa<GetElementPtrInst>(Usr))
8782 return false;
8783 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8784 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8785 // the pointer operand to it. If so, record it in the vector. If not, give
8786 // up.
8787 if (!GEPSequentialConstIndexed(UGEPI))
8788 return false;
8789 if (UGEPI->getOperand(0) != GEPIOp)
8790 return false;
8791 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8792 return false;
8793 if (GEPIIdx->getType() !=
8794 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8795 return false;
8796 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8797 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8800 return false;
8801 UGEPIs.push_back(UGEPI);
8802 }
8803 if (UGEPIs.size() == 0)
8804 return false;
8805 // Check the materializing cost of (Uidx-Idx).
8806 for (GetElementPtrInst *UGEPI : UGEPIs) {
8807 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8808 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8810 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8811 if (ImmCost > TargetTransformInfo::TCC_Basic)
8812 return false;
8813 }
8814 // Now unmerge between GEPI and UGEPIs.
8815 for (GetElementPtrInst *UGEPI : UGEPIs) {
8816 UGEPI->setOperand(0, GEPI);
8817 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8818 auto NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8819 Constant *NewUGEPIIdx = ConstantInt::get(GEPIIdx->getType(), NewIdx);
8820 UGEPI->setOperand(1, NewUGEPIIdx);
8821
8822 auto SourceFlags = GEPI->getNoWrapFlags();
8823 // Intersect flags to avoid UB in updated GEP.
8824 auto TargetFlags =
8825 UGEPI->getNoWrapFlags().intersectForOffsetAdd(SourceFlags);
8826 // If UGEPI now has a negative index, drop the nuw flag.
8827 if (NewIdx.isNegative() && TargetFlags.hasNoUnsignedWrap())
8828 TargetFlags = TargetFlags.withoutNoUnsignedWrap();
8829 UGEPI->setNoWrapFlags(TargetFlags);
8830 }
8831 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8832 // alive on IndirectBr edges).
8833 assert(llvm::none_of(GEPIOp->users(),
8834 [&](User *Usr) {
8835 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8836 }) &&
8837 "GEPIOp is used outside SrcBlock");
8838 return true;
8839}
8840
8841static bool optimizeBranch(CondBrInst *Branch, const TargetLowering &TLI,
8843 bool IsHugeFunc) {
8844 // Try and convert
8845 // %c = icmp ult %x, 8
8846 // br %c, bla, blb
8847 // %tc = lshr %x, 3
8848 // to
8849 // %tc = lshr %x, 3
8850 // %c = icmp eq %tc, 0
8851 // br %c, bla, blb
8852 // Creating the cmp to zero can be better for the backend, especially if the
8853 // lshr produces flags that can be used automatically.
8854 if (!TLI.preferZeroCompareBranch())
8855 return false;
8856
8857 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8858 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8859 return false;
8860
8861 Value *X = Cmp->getOperand(0);
8862 if (!X->hasUseList())
8863 return false;
8864
8865 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8866
8867 for (auto *U : X->users()) {
8869 // A quick dominance check
8870 if (!UI ||
8871 (UI->getParent() != Branch->getParent() &&
8872 UI->getParent() != Branch->getSuccessor(0) &&
8873 UI->getParent() != Branch->getSuccessor(1)) ||
8874 (UI->getParent() != Branch->getParent() &&
8875 !UI->getParent()->getSinglePredecessor()))
8876 continue;
8877
8878 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8879 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8880 IRBuilder<> Builder(Branch);
8881 if (UI->getParent() != Branch->getParent())
8882 UI->moveBefore(Branch->getIterator());
8884 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8885 ConstantInt::get(UI->getType(), 0));
8886 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8887 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8888 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8889 return true;
8890 }
8891 if (Cmp->isEquality() &&
8892 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8893 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))) ||
8894 match(UI, m_Xor(m_Specific(X), m_SpecificInt(CmpC))))) {
8895 IRBuilder<> Builder(Branch);
8896 if (UI->getParent() != Branch->getParent())
8897 UI->moveBefore(Branch->getIterator());
8899 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8900 ConstantInt::get(UI->getType(), 0));
8901 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8902 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8903 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8904 return true;
8905 }
8906 }
8907 return false;
8908}
8909
8910bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8911 bool AnyChange = false;
8912 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8913
8914 // Bail out if we inserted the instruction to prevent optimizations from
8915 // stepping on each other's toes.
8916 if (InsertedInsts.count(I))
8917 return AnyChange;
8918
8919 // TODO: Move into the switch on opcode below here.
8920 if (PHINode *P = dyn_cast<PHINode>(I)) {
8921 // It is possible for very late stage optimizations (such as SimplifyCFG)
8922 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8923 // trivial PHI, go ahead and zap it here.
8924 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8925 LargeOffsetGEPMap.erase(P);
8926 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8927 P->eraseFromParent();
8928 ++NumPHIsElim;
8929 return true;
8930 }
8931 return AnyChange;
8932 }
8933
8934 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8935 // If the source of the cast is a constant, then this should have
8936 // already been constant folded. The only reason NOT to constant fold
8937 // it is if something (e.g. LSR) was careful to place the constant
8938 // evaluation in a block other than then one that uses it (e.g. to hoist
8939 // the address of globals out of a loop). If this is the case, we don't
8940 // want to forward-subst the cast.
8941 if (isa<Constant>(CI->getOperand(0)))
8942 return AnyChange;
8943
8944 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8945 return true;
8946
8948 isa<TruncInst>(I)) &&
8950 I, LI->getLoopFor(I->getParent()), *TTI))
8951 return true;
8952
8953 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8954 /// Sink a zext or sext into its user blocks if the target type doesn't
8955 /// fit in one register
8956 if (TLI->getTypeAction(CI->getContext(),
8957 TLI->getValueType(*DL, CI->getType())) ==
8958 TargetLowering::TypeExpandInteger) {
8959 return SinkCast(CI);
8960 } else {
8962 I, LI->getLoopFor(I->getParent()), *TTI))
8963 return true;
8964
8965 bool MadeChange = optimizeExt(I);
8966 return MadeChange | optimizeExtUses(I);
8967 }
8968 }
8969 return AnyChange;
8970 }
8971
8972 if (auto *Cmp = dyn_cast<CmpInst>(I))
8973 if (optimizeCmp(Cmp, ModifiedDT))
8974 return true;
8975
8976 if (match(I, m_URem(m_Value(), m_Value())))
8977 if (optimizeURem(I))
8978 return true;
8979
8980 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8981 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8982 bool Modified = optimizeLoadExt(LI);
8983 unsigned AS = LI->getPointerAddressSpace();
8984 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8985 return Modified;
8986 }
8987
8988 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8989 if (splitMergedValStore(*SI, *DL, *TLI))
8990 return true;
8991 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8992 unsigned AS = SI->getPointerAddressSpace();
8993 return optimizeMemoryInst(I, SI->getOperand(1),
8994 SI->getOperand(0)->getType(), AS);
8995 }
8996
8997 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8998 unsigned AS = RMW->getPointerAddressSpace();
8999 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
9000 }
9001
9002 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
9003 unsigned AS = CmpX->getPointerAddressSpace();
9004 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
9005 CmpX->getCompareOperand()->getType(), AS);
9006 }
9007
9008 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
9009
9010 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
9011 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
9012 return true;
9013
9014 // TODO: Move this into the switch on opcode - it handles shifts already.
9015 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
9016 BinOp->getOpcode() == Instruction::LShr)) {
9017 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
9018 if (CI && TLI->hasExtractBitsInsn())
9019 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
9020 return true;
9021 }
9022
9023 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
9024 if (GEPI->hasAllZeroIndices()) {
9025 /// The GEP operand must be a pointer, so must its result -> BitCast
9026 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
9027 GEPI->getName(), GEPI->getIterator());
9028 NC->setDebugLoc(GEPI->getDebugLoc());
9029 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
9031 GEPI, TLInfo, nullptr,
9032 [&](Value *V) { removeAllAssertingVHReferences(V); });
9033 ++NumGEPsElim;
9034 optimizeInst(NC, ModifiedDT);
9035 return true;
9036 }
9038 return true;
9039 }
9040 }
9041
9042 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
9043 // freeze(icmp a, const)) -> icmp (freeze a), const
9044 // This helps generate efficient conditional jumps.
9045 Instruction *CmpI = nullptr;
9046 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
9047 CmpI = II;
9048 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
9049 CmpI = F->getFastMathFlags().none() ? F : nullptr;
9050
9051 if (CmpI && CmpI->hasOneUse()) {
9052 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
9053 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
9055 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
9057 if (Const0 || Const1) {
9058 if (!Const0 || !Const1) {
9059 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
9060 F->takeName(FI);
9061 CmpI->setOperand(Const0 ? 1 : 0, F);
9062 }
9063 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
9064 FI->eraseFromParent();
9065 return true;
9066 }
9067 }
9068 return AnyChange;
9069 }
9070
9071 if (tryToSinkFreeOperands(I))
9072 return true;
9073
9074 switch (I->getOpcode()) {
9075 case Instruction::Shl:
9076 case Instruction::LShr:
9077 case Instruction::AShr:
9078 return optimizeShiftInst(cast<BinaryOperator>(I));
9079 case Instruction::Call:
9080 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
9081 case Instruction::Select:
9082 return optimizeSelectInst(cast<SelectInst>(I));
9083 case Instruction::ShuffleVector:
9084 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
9085 case Instruction::Switch:
9086 return optimizeSwitchInst(cast<SwitchInst>(I));
9087 case Instruction::ExtractElement:
9088 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
9089 case Instruction::CondBr:
9090 return optimizeBranch(cast<CondBrInst>(I), *TLI, FreshBBs, IsHugeFunc);
9091 }
9092
9093 return AnyChange;
9094}
9095
9096/// Given an OR instruction, check to see if this is a bitreverse
9097/// idiom. If so, insert the new intrinsic and return true.
9098bool CodeGenPrepare::makeBitReverse(Instruction &I) {
9099 if (!I.getType()->isIntegerTy() ||
9101 TLI->getValueType(*DL, I.getType(), true)))
9102 return false;
9103
9104 SmallVector<Instruction *, 4> Insts;
9105 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
9106 return false;
9107 Instruction *LastInst = Insts.back();
9108 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
9110 &I, TLInfo, nullptr,
9111 [&](Value *V) { removeAllAssertingVHReferences(V); });
9112 return true;
9113}
9114
9115// In this pass we look for GEP and cast instructions that are used
9116// across basic blocks and rewrite them to improve basic-block-at-a-time
9117// selection.
9118bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
9119 SunkAddrs.clear();
9120 bool MadeChange = false;
9121
9122 do {
9123 CurInstIterator = BB.begin();
9124 ModifiedDT = ModifyDT::NotModifyDT;
9125 while (CurInstIterator != BB.end()) {
9126 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
9127 if (ModifiedDT != ModifyDT::NotModifyDT) {
9128 // For huge function we tend to quickly go though the inner optmization
9129 // opportunities in the BB. So we go back to the BB head to re-optimize
9130 // each instruction instead of go back to the function head.
9131 if (IsHugeFunc)
9132 break;
9133 return true;
9134 }
9135 }
9136 } while (ModifiedDT == ModifyDT::ModifyInstDT);
9137
9138 bool MadeBitReverse = true;
9139 while (MadeBitReverse) {
9140 MadeBitReverse = false;
9141 for (auto &I : reverse(BB)) {
9142 if (makeBitReverse(I)) {
9143 MadeBitReverse = MadeChange = true;
9144 break;
9145 }
9146 }
9147 }
9148 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
9149
9150 return MadeChange;
9151}
9152
9153bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
9154 bool AnyChange = false;
9155 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
9156 AnyChange |= fixupDbgVariableRecord(DVR);
9157 return AnyChange;
9158}
9159
9160// FIXME: should updating debug-info really cause the "changed" flag to fire,
9161// which can cause a function to be reprocessed?
9162bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
9163 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
9164 DVR.Type != DbgVariableRecord::LocationType::Assign)
9165 return false;
9166
9167 // Does this DbgVariableRecord refer to a sunk address calculation?
9168 bool AnyChange = false;
9169 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
9170 DVR.location_ops().end());
9171 for (Value *Location : LocationOps) {
9172 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
9173 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
9174 if (SunkAddr) {
9175 // Point dbg.value at locally computed address, which should give the best
9176 // opportunity to be accurately lowered. This update may change the type
9177 // of pointer being referred to; however this makes no difference to
9178 // debugging information, and we can't generate bitcasts that may affect
9179 // codegen.
9180 DVR.replaceVariableLocationOp(Location, SunkAddr);
9181 AnyChange = true;
9182 }
9183 }
9184 return AnyChange;
9185}
9186
9188 DVR->removeFromParent();
9189 BasicBlock *VIBB = VI->getParent();
9190 if (isa<PHINode>(VI))
9191 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
9192 else
9193 VIBB->insertDbgRecordAfter(DVR, &*VI);
9194}
9195
9196// A llvm.dbg.value may be using a value before its definition, due to
9197// optimizations in this pass and others. Scan for such dbg.values, and rescue
9198// them by moving the dbg.value to immediately after the value definition.
9199// FIXME: Ideally this should never be necessary, and this has the potential
9200// to re-order dbg.value intrinsics.
9201bool CodeGenPrepare::placeDbgValues(Function &F) {
9202 bool MadeChange = false;
9203 DominatorTree &DT = getDT();
9204
9205 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
9206 SmallVector<Instruction *, 4> VIs;
9207 for (Value *V : DbgItem->location_ops())
9208 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
9209 VIs.push_back(VI);
9210
9211 // This item may depend on multiple instructions, complicating any
9212 // potential sink. This block takes the defensive approach, opting to
9213 // "undef" the item if it has more than one instruction and any of them do
9214 // not dominate iem.
9215 for (Instruction *VI : VIs) {
9216 if (VI->isTerminator())
9217 continue;
9218
9219 // If VI is a phi in a block with an EHPad terminator, we can't insert
9220 // after it.
9221 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
9222 continue;
9223
9224 // If the defining instruction dominates the dbg.value, we do not need
9225 // to move the dbg.value.
9226 if (DT.dominates(VI, Position))
9227 continue;
9228
9229 // If we depend on multiple instructions and any of them doesn't
9230 // dominate this DVI, we probably can't salvage it: moving it to
9231 // after any of the instructions could cause us to lose the others.
9232 if (VIs.size() > 1) {
9233 LLVM_DEBUG(
9234 dbgs()
9235 << "Unable to find valid location for Debug Value, undefing:\n"
9236 << *DbgItem);
9237 DbgItem->setKillLocation();
9238 break;
9239 }
9240
9241 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9242 << *DbgItem << ' ' << *VI);
9243 DbgInserterHelper(DbgItem, VI->getIterator());
9244 MadeChange = true;
9245 ++NumDbgValueMoved;
9246 }
9247 };
9248
9249 for (BasicBlock &BB : F) {
9250 for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
9251 // Process any DbgVariableRecord records attached to this
9252 // instruction.
9253 for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
9254 filterDbgVars(Insn.getDbgRecordRange()))) {
9255 if (DVR.Type != DbgVariableRecord::LocationType::Value)
9256 continue;
9257 DbgProcessor(&DVR, &Insn);
9258 }
9259 }
9260 }
9261
9262 return MadeChange;
9263}
9264
9265// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9266// probes can be chained dependencies of other regular DAG nodes and block DAG
9267// combine optimizations.
9268bool CodeGenPrepare::placePseudoProbes(Function &F) {
9269 bool MadeChange = false;
9270 for (auto &Block : F) {
9271 // Move the rest probes to the beginning of the block.
9272 auto FirstInst = Block.getFirstInsertionPt();
9273 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9274 ++FirstInst;
9275 BasicBlock::iterator I(FirstInst);
9276 I++;
9277 while (I != Block.end()) {
9278 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9279 II->moveBefore(FirstInst);
9280 MadeChange = true;
9281 }
9282 }
9283 }
9284 return MadeChange;
9285}
9286
9287/// Some targets prefer to split a conditional branch like:
9288/// \code
9289/// %0 = icmp ne i32 %a, 0
9290/// %1 = icmp ne i32 %b, 0
9291/// %or.cond = or i1 %0, %1
9292/// br i1 %or.cond, label %TrueBB, label %FalseBB
9293/// \endcode
9294/// into multiple branch instructions like:
9295/// \code
9296/// bb1:
9297/// %0 = icmp ne i32 %a, 0
9298/// br i1 %0, label %TrueBB, label %bb2
9299/// bb2:
9300/// %1 = icmp ne i32 %b, 0
9301/// br i1 %1, label %TrueBB, label %FalseBB
9302/// \endcode
9303/// This usually allows instruction selection to do even further optimizations
9304/// and combine the compare with the branch instruction. Currently this is
9305/// applied for targets which have "cheap" jump instructions.
9306///
9307/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9308///
9309bool CodeGenPrepare::splitBranchCondition(Function &F) {
9310 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9311 return false;
9312
9313 bool MadeChange = false;
9314 for (auto &BB : F) {
9315 // Does this BB end with the following?
9316 // %cond1 = icmp|fcmp|binary instruction ...
9317 // %cond2 = icmp|fcmp|binary instruction ...
9318 // %cond.or = or|and i1 %cond1, cond2
9319 // br i1 %cond.or label %dest1, label %dest2"
9320 Instruction *LogicOp;
9321 BasicBlock *TBB, *FBB;
9322 if (!match(BB.getTerminator(),
9323 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9324 continue;
9325
9326 auto *Br1 = cast<CondBrInst>(BB.getTerminator());
9327 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9328 continue;
9329
9330 // The merging of mostly empty BB can cause a degenerate branch.
9331 if (TBB == FBB)
9332 continue;
9333
9334 unsigned Opc;
9335 Value *Cond1, *Cond2;
9336 if (match(LogicOp,
9337 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9338 Opc = Instruction::And;
9339 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9340 m_OneUse(m_Value(Cond2)))))
9341 Opc = Instruction::Or;
9342 else
9343 continue;
9344
9345 auto IsGoodCond = [](Value *Cond) {
9346 return match(
9347 Cond,
9349 m_LogicalOr(m_Value(), m_Value()))));
9350 };
9351 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9352 continue;
9353
9354 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9355
9356 // Create a new BB.
9357 auto *TmpBB =
9358 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9359 BB.getParent(), BB.getNextNode());
9360 if (IsHugeFunc)
9361 FreshBBs.insert(TmpBB);
9362
9363 // Update original basic block by using the first condition directly by the
9364 // branch instruction and removing the no longer needed and/or instruction.
9365 Br1->setCondition(Cond1);
9366 LogicOp->eraseFromParent();
9367
9368 // Depending on the condition we have to either replace the true or the
9369 // false successor of the original branch instruction.
9370 if (Opc == Instruction::And)
9371 Br1->setSuccessor(0, TmpBB);
9372 else
9373 Br1->setSuccessor(1, TmpBB);
9374
9375 // Fill in the new basic block.
9376 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9377 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9378 I->removeFromParent();
9379 I->insertBefore(Br2->getIterator());
9380 }
9381
9382 // Update PHI nodes in both successors. The original BB needs to be
9383 // replaced in one successor's PHI nodes, because the branch comes now from
9384 // the newly generated BB (NewBB). In the other successor we need to add one
9385 // incoming edge to the PHI nodes, because both branch instructions target
9386 // now the same successor. Depending on the original branch condition
9387 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9388 // we perform the correct update for the PHI nodes.
9389 // This doesn't change the successor order of the just created branch
9390 // instruction (or any other instruction).
9391 if (Opc == Instruction::Or)
9392 std::swap(TBB, FBB);
9393
9394 // Replace the old BB with the new BB.
9395 TBB->replacePhiUsesWith(&BB, TmpBB);
9396
9397 // Add another incoming edge from the new BB.
9398 for (PHINode &PN : FBB->phis()) {
9399 auto *Val = PN.getIncomingValueForBlock(&BB);
9400 PN.addIncoming(Val, TmpBB);
9401 }
9402
9403 if (Loop *L = LI->getLoopFor(&BB))
9404 L->addBasicBlockToLoop(TmpBB, *LI);
9405
9406 // The edge we need to delete starts at BB and ends at whatever TBB ends
9407 // up pointing to.
9408 DTU->applyUpdates({{DominatorTree::Insert, &BB, TmpBB},
9409 {DominatorTree::Insert, TmpBB, TBB},
9410 {DominatorTree::Insert, TmpBB, FBB},
9411 {DominatorTree::Delete, &BB, TBB}});
9412
9413 // Update the branch weights (from SelectionDAGBuilder::
9414 // FindMergedConditions).
9415 if (Opc == Instruction::Or) {
9416 // Codegen X | Y as:
9417 // BB1:
9418 // jmp_if_X TBB
9419 // jmp TmpBB
9420 // TmpBB:
9421 // jmp_if_Y TBB
9422 // jmp FBB
9423 //
9424
9425 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9426 // The requirement is that
9427 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9428 // = TrueProb for original BB.
9429 // Assuming the original weights are A and B, one choice is to set BB1's
9430 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9431 // assumes that
9432 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9433 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9434 // TmpBB, but the math is more complicated.
9435 uint64_t TrueWeight, FalseWeight;
9436 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9437 uint64_t NewTrueWeight = TrueWeight;
9438 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9439 setFittedBranchWeights(*Br1, {NewTrueWeight, NewFalseWeight},
9440 hasBranchWeightOrigin(*Br1));
9441
9442 NewTrueWeight = TrueWeight;
9443 NewFalseWeight = 2 * FalseWeight;
9444 setFittedBranchWeights(*Br2, {NewTrueWeight, NewFalseWeight},
9445 /*IsExpected=*/false);
9446 }
9447 } else {
9448 // Codegen X & Y as:
9449 // BB1:
9450 // jmp_if_X TmpBB
9451 // jmp FBB
9452 // TmpBB:
9453 // jmp_if_Y TBB
9454 // jmp FBB
9455 //
9456 // This requires creation of TmpBB after CurBB.
9457
9458 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9459 // The requirement is that
9460 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9461 // = FalseProb for original BB.
9462 // Assuming the original weights are A and B, one choice is to set BB1's
9463 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9464 // assumes that
9465 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9466 uint64_t TrueWeight, FalseWeight;
9467 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9468 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9469 uint64_t NewFalseWeight = FalseWeight;
9470 setFittedBranchWeights(*Br1, {NewTrueWeight, NewFalseWeight},
9471 /*IsExpected=*/false);
9472
9473 NewTrueWeight = 2 * TrueWeight;
9474 NewFalseWeight = FalseWeight;
9475 setFittedBranchWeights(*Br2, {NewTrueWeight, NewFalseWeight},
9476 /*IsExpected=*/false);
9477 }
9478 }
9479
9480 MadeChange = true;
9481
9482 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9483 TmpBB->dump());
9484 }
9485 return MadeChange;
9486}
#define Success
return SDValue()
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static bool optimizeBranch(CondBrInst *Branch, const TargetLowering &TLI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static bool matchOverflowPattern(Instruction *&I, ExtractValueInst *&MulExtract, ExtractValueInst *&OverflowExtract)
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, Value *SunkAddr)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool despeculateCountZeros(IntrinsicInst *CountZeros, DomTreeUpdater *DTU, LoopInfo *LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1457
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define P(N)
ppc ctr loops verify
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static DominatorTree getDomTree(Function &F)
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
unsigned logBase2() const
Definition APInt.h:1784
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An instruction that atomically checks whether a specified value is in a memory location,...
static unsigned getPointerOperandIndex()
an instruction that atomically reads a memory location, combines it with another value,...
static unsigned getPointerOperandIndex()
Analysis pass providing the BasicBlockSectionsProfileReader.
LLVM_ABI bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:687
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
BinaryOps getOpcode() const
Definition InstrTypes.h:409
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Analysis pass which computes BranchProbabilityInfo.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:512
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:728
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Conditional Branch instruction.
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI void removeFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
LLVM_ABI iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:332
unsigned size() const
Definition DenseMap.h:114
iterator end()
Definition DenseMap.h:85
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
LLVM_ABI void deleteBB(BasicBlock *DelBB)
Delete DelBB.
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:314
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
bool none() const
Definition FMF.h:57
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:869
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:809
LLVM_ABI const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
bool isBBPendingDeletion(BasicBlockT *DelBB) const
Returns true if DelBB is awaiting deletion.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
LLVM_ABI bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition Globals.cpp:406
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:624
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
LLVM_ABI std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:612
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
iterator find(const KeyT &Key)
Definition MapVector.h:156
iterator end()
Definition MapVector.h:69
bool empty() const
Definition MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:126
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:210
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
VectorType * getType() const
Overload to return most specific vector type.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
bool erase(const T &V)
Definition SmallSet.h:200
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:774
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
virtual bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, EVT VT) const
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool hasMultipleConditionRegisters(EVT VT) const
Does the target have multiple (allocatable) condition registers that can be used to store the results...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy,Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:313
BasicBlock * getSuccessor(unsigned i=0) const
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:552
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:972
LLVM_ABI bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition Value.cpp:238
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:712
bool use_empty() const
Definition Value.h:346
user_iterator user_end()
Definition Value.h:410
iterator_range< use_iterator > uses()
Definition Value.h:380
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:806
user_iterator_impl< User > user_iterator
Definition Value.h:391
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
bool pointsToAliveValue() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isNonZero() const
Definition TypeSize.h:155
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
m_Intrinsic_Ty< Opnd0 >::Ty m_Ctpop(const Opnd0 &Op0)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:50
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
std::enable_if_t< std::is_signed_v< T >, T > MulOverflow(T X, T Y, T &Result)
Multiply two signed integers, computing the two's complement truncated result, returning true if an o...
Definition MathExtras.h:753
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2264
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1687
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2142
constexpr from_range_t from_range
LLVM_ABI BasicBlock * splitBlockBefore(BasicBlock *Old, BasicBlock::iterator SplitPt, DomTreeUpdater *DTU, LoopInfo *LI, MemorySSAUpdater *MSSAU, const Twine &BBName="")
Split the specified block at the specified instruction SplitPt.
LLVM_ABI Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr, DomTreeUpdater *DTU=nullptr)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2133
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
LLVM_ABI bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2199
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition Local.cpp:3807
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1398
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
LLVM_ABI ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition Analysis.cpp:203
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:53
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
LLVM_ABI bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition Analysis.cpp:588
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool VerifyDomInfo
Enables verification of dominator trees.
constexpr unsigned BitWidth
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:107
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition MathExtras.h:701
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
std::pair< Value *, FPClassTest > fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
DenseMap< const Value *, Value * > ValueToValueMap
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:271
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
This contains information for each constraint that we are lowering.