LLVM 23.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
46#include "llvm/Config/llvm-config.h"
47#include "llvm/IR/Argument.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/BasicBlock.h"
50#include "llvm/IR/CFG.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/Constants.h"
53#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/DebugInfo.h"
56#include "llvm/IR/Dominators.h"
57#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalValue.h"
61#include "llvm/IR/IRBuilder.h"
62#include "llvm/IR/InlineAsm.h"
63#include "llvm/IR/InstrTypes.h"
64#include "llvm/IR/Instruction.h"
67#include "llvm/IR/Intrinsics.h"
68#include "llvm/IR/IntrinsicsAArch64.h"
69#include "llvm/IR/LLVMContext.h"
70#include "llvm/IR/MDBuilder.h"
71#include "llvm/IR/Module.h"
72#include "llvm/IR/Operator.h"
75#include "llvm/IR/Statepoint.h"
76#include "llvm/IR/Type.h"
77#include "llvm/IR/Use.h"
78#include "llvm/IR/User.h"
79#include "llvm/IR/Value.h"
80#include "llvm/IR/ValueHandle.h"
81#include "llvm/IR/ValueMap.h"
83#include "llvm/Pass.h"
89#include "llvm/Support/Debug.h"
99#include <algorithm>
100#include <cassert>
101#include <cstdint>
102#include <iterator>
103#include <limits>
104#include <memory>
105#include <optional>
106#include <utility>
107#include <vector>
108
109using namespace llvm;
110using namespace llvm::PatternMatch;
111
112#define DEBUG_TYPE "codegenprepare"
113
114STATISTIC(NumBlocksElim, "Number of blocks eliminated");
115STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
116STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
117STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
118 "sunken Cmps");
119STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
120 "of sunken Casts");
121STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
122 "computations were sunk");
123STATISTIC(NumMemoryInstsPhiCreated,
124 "Number of phis created when address "
125 "computations were sunk to memory instructions");
126STATISTIC(NumMemoryInstsSelectCreated,
127 "Number of select created when address "
128 "computations were sunk to memory instructions");
129STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
130STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
131STATISTIC(NumAndsAdded,
132 "Number of and mask instructions added to form ext loads");
133STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
134STATISTIC(NumRetsDup, "Number of return instructions duplicated");
135STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
136STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
137STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
138
140 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
141 cl::desc("Disable branch optimizations in CodeGenPrepare"));
142
143static cl::opt<bool>
144 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
145 cl::desc("Disable GC optimizations in CodeGenPrepare"));
146
147static cl::opt<bool>
148 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
149 cl::init(false),
150 cl::desc("Disable select to branch conversion."));
151
152static cl::opt<bool>
153 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
154 cl::desc("Address sinking in CGP using GEPs."));
155
156static cl::opt<bool>
157 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
158 cl::desc("Enable sinking and/cmp into branches."));
159
161 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
162 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
163
165 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
166 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
167
169 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
170 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
171 "CodeGenPrepare"));
172
174 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
175 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
176 "optimization in CodeGenPrepare"));
177
179 "disable-preheader-prot", cl::Hidden, cl::init(false),
180 cl::desc("Disable protection against removing loop preheaders"));
181
183 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
184 cl::desc("Use profile info to add section prefix for hot/cold functions"));
185
187 "profile-unknown-in-special-section", cl::Hidden,
188 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
189 "profile, we cannot tell the function is cold for sure because "
190 "it may be a function newly added without ever being sampled. "
191 "With the flag enabled, compiler can put such profile unknown "
192 "functions into a special section, so runtime system can choose "
193 "to handle it in a different way than .text section, to save "
194 "RAM for example. "));
195
197 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
198 cl::desc("Use the basic-block-sections profile to determine the text "
199 "section prefix for hot functions. Functions with "
200 "basic-block-sections profile will be placed in `.text.hot` "
201 "regardless of their FDO profile info. Other functions won't be "
202 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
203 "profiles."));
204
206 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
207 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
208 "(frequency of destination block) is greater than this ratio"));
209
211 "force-split-store", cl::Hidden, cl::init(false),
212 cl::desc("Force store splitting no matter what the target query says."));
213
215 "cgp-type-promotion-merge", cl::Hidden,
216 cl::desc("Enable merging of redundant sexts when one is dominating"
217 " the other."),
218 cl::init(true));
219
221 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
222 cl::desc("Disables combining addressing modes with different parts "
223 "in optimizeMemoryInst."));
224
225static cl::opt<bool>
226 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
227 cl::desc("Allow creation of Phis in Address sinking."));
228
230 "addr-sink-new-select", cl::Hidden, cl::init(true),
231 cl::desc("Allow creation of selects in Address sinking."));
232
234 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
235 cl::desc("Allow combining of BaseReg field in Address sinking."));
236
238 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
239 cl::desc("Allow combining of BaseGV field in Address sinking."));
240
242 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
243 cl::desc("Allow combining of BaseOffs field in Address sinking."));
244
246 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
247 cl::desc("Allow combining of ScaledReg field in Address sinking."));
248
249static cl::opt<bool>
250 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
251 cl::init(true),
252 cl::desc("Enable splitting large offset of GEP."));
253
255 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
256 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
257
258static cl::opt<bool>
259 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
260 cl::desc("Enable BFI update verification for "
261 "CodeGenPrepare."));
262
263static cl::opt<bool>
264 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
265 cl::desc("Enable converting phi types in CodeGenPrepare"));
266
268 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
269 cl::desc("Least BB number of huge function."));
270
272 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
274 cl::desc("Max number of address users to look at"));
275
276static cl::opt<bool>
277 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
278 cl::desc("Disable elimination of dead PHI nodes."));
279
280namespace {
281
282enum ExtType {
283 ZeroExtension, // Zero extension has been seen.
284 SignExtension, // Sign extension has been seen.
285 BothExtension // This extension type is used if we saw sext after
286 // ZeroExtension had been set, or if we saw zext after
287 // SignExtension had been set. It makes the type
288 // information of a promoted instruction invalid.
289};
290
291enum ModifyDT {
292 NotModifyDT, // Not Modify any DT.
293 ModifyBBDT, // Modify the Basic Block Dominator Tree.
294 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
295 // This usually means we move/delete/insert instruction
296 // in a Basic Block. So we should re-iterate instructions
297 // in such Basic Block.
298};
299
300using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
301using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
302using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
304using ValueToSExts = MapVector<Value *, SExts>;
305
306class TypePromotionTransaction;
307
308class CodeGenPrepare {
309 friend class CodeGenPrepareLegacyPass;
310 const TargetMachine *TM = nullptr;
311 const TargetSubtargetInfo *SubtargetInfo = nullptr;
312 const TargetLowering *TLI = nullptr;
313 const TargetRegisterInfo *TRI = nullptr;
314 const TargetTransformInfo *TTI = nullptr;
315 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
316 const TargetLibraryInfo *TLInfo = nullptr;
317 DomTreeUpdater *DTU = nullptr;
318 LoopInfo *LI = nullptr;
319 BlockFrequencyInfo *BFI;
320 BranchProbabilityInfo *BPI;
321 ProfileSummaryInfo *PSI = nullptr;
322
323 /// As we scan instructions optimizing them, this is the next instruction
324 /// to optimize. Transforms that can invalidate this should update it.
325 BasicBlock::iterator CurInstIterator;
326
327 /// Keeps track of non-local addresses that have been sunk into a block.
328 /// This allows us to avoid inserting duplicate code for blocks with
329 /// multiple load/stores of the same address. The usage of WeakTrackingVH
330 /// enables SunkAddrs to be treated as a cache whose entries can be
331 /// invalidated if a sunken address computation has been erased.
332 ValueMap<Value *, WeakTrackingVH> SunkAddrs;
333
334 /// Keeps track of all instructions inserted for the current function.
335 SetOfInstrs InsertedInsts;
336
337 /// Keeps track of the type of the related instruction before their
338 /// promotion for the current function.
339 InstrToOrigTy PromotedInsts;
340
341 /// Keep track of instructions removed during promotion.
342 SetOfInstrs RemovedInsts;
343
344 /// Keep track of sext chains based on their initial value.
345 DenseMap<Value *, Instruction *> SeenChainsForSExt;
346
347 /// Keep track of GEPs accessing the same data structures such as structs or
348 /// arrays that are candidates to be split later because of their large
349 /// size.
350 MapVector<AssertingVH<Value>,
352 LargeOffsetGEPMap;
353
354 /// Keep track of new GEP base after splitting the GEPs having large offset.
355 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
356
357 /// Map serial numbers to Large offset GEPs.
358 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
359
360 /// Keep track of SExt promoted.
361 ValueToSExts ValToSExtendedUses;
362
363 /// True if the function has the OptSize attribute.
364 bool OptSize;
365
366 /// DataLayout for the Function being processed.
367 const DataLayout *DL = nullptr;
368
369public:
370 CodeGenPrepare() = default;
371 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
372 /// If encounter huge function, we need to limit the build time.
373 bool IsHugeFunc = false;
374
375 /// FreshBBs is like worklist, it collected the updated BBs which need
376 /// to be optimized again.
377 /// Note: Consider building time in this pass, when a BB updated, we need
378 /// to insert such BB into FreshBBs for huge function.
379 SmallPtrSet<BasicBlock *, 32> FreshBBs;
380
381 void releaseMemory() {
382 // Clear per function information.
383 InsertedInsts.clear();
384 PromotedInsts.clear();
385 FreshBBs.clear();
386 }
387
388 bool run(Function &F, FunctionAnalysisManager &AM);
389
390private:
391 template <typename F>
392 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
393 // Substituting can cause recursive simplifications, which can invalidate
394 // our iterator. Use a WeakTrackingVH to hold onto it in case this
395 // happens.
396 Value *CurValue = &*CurInstIterator;
397 WeakTrackingVH IterHandle(CurValue);
398
399 f();
400
401 // If the iterator instruction was recursively deleted, start over at the
402 // start of the block.
403 if (IterHandle != CurValue) {
404 CurInstIterator = BB->begin();
405 SunkAddrs.clear();
406 }
407 }
408
409 // Get the DominatorTree, updating it if necessary.
410 DominatorTree &getDT() { return DTU->getDomTree(); }
411
412 void removeAllAssertingVHReferences(Value *V);
413 bool eliminateAssumptions(Function &F);
414 bool eliminateFallThrough(Function &F);
415 bool eliminateMostlyEmptyBlocks(Function &F, bool &ResetLI);
416 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
417 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
418 bool eliminateMostlyEmptyBlock(BasicBlock *BB);
419 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
420 bool isPreheader);
421 bool makeBitReverse(Instruction &I);
422 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
423 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
424 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
425 unsigned AddrSpace);
426 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
427 bool optimizeMulWithOverflow(Instruction *I, bool IsSigned,
428 ModifyDT &ModifiedDT);
429 bool optimizeInlineAsmInst(CallInst *CS);
430 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
431 bool optimizeExt(Instruction *&I);
432 bool optimizeExtUses(Instruction *I);
433 bool optimizeLoadExt(LoadInst *Load);
434 bool optimizeShiftInst(BinaryOperator *BO);
435 bool optimizeFunnelShift(IntrinsicInst *Fsh);
436 bool optimizeSelectInst(SelectInst *SI);
437 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
438 bool optimizeSwitchType(SwitchInst *SI);
439 bool optimizeSwitchPhiConstants(SwitchInst *SI);
440 bool optimizeSwitchInst(SwitchInst *SI);
441 bool optimizeExtractElementInst(Instruction *Inst);
442 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
443 bool fixupDbgVariableRecord(DbgVariableRecord &I);
444 bool fixupDbgVariableRecordsOnInst(Instruction &I);
445 bool placeDbgValues(Function &F);
446 bool placePseudoProbes(Function &F);
447 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
448 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
449 bool tryToPromoteExts(TypePromotionTransaction &TPT,
450 const SmallVectorImpl<Instruction *> &Exts,
451 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
452 unsigned CreatedInstsCost = 0);
453 bool mergeSExts(Function &F);
454 bool splitLargeGEPOffsets();
455 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
456 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
457 bool optimizePhiTypes(Function &F);
458 bool performAddressTypePromotion(
459 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
460 bool HasPromoted, TypePromotionTransaction &TPT,
461 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
462 bool splitBranchCondition(Function &F);
463 bool simplifyOffsetableRelocate(GCStatepointInst &I);
464
465 bool tryToSinkFreeOperands(Instruction *I);
466 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
467 CmpInst *Cmp, Intrinsic::ID IID);
468 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
469 bool optimizeURem(Instruction *Rem);
470 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
471 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
472 bool unfoldPowerOf2Test(CmpInst *Cmp);
473 void verifyBFIUpdates(Function &F);
474 bool _run(Function &F);
475};
476
477class CodeGenPrepareLegacyPass : public FunctionPass {
478public:
479 static char ID; // Pass identification, replacement for typeid
480
481 CodeGenPrepareLegacyPass() : FunctionPass(ID) {}
482
483 bool runOnFunction(Function &F) override;
484
485 StringRef getPassName() const override { return "CodeGen Prepare"; }
486
487 void getAnalysisUsage(AnalysisUsage &AU) const override {
488 // FIXME: When we can selectively preserve passes, preserve the domtree.
489 AU.addRequired<ProfileSummaryInfoWrapperPass>();
490 AU.addRequired<TargetLibraryInfoWrapperPass>();
491 AU.addRequired<TargetPassConfig>();
492 AU.addRequired<TargetTransformInfoWrapperPass>();
493 AU.addRequired<DominatorTreeWrapperPass>();
494 AU.addRequired<LoopInfoWrapperPass>();
495 AU.addRequired<BranchProbabilityInfoWrapperPass>();
496 AU.addRequired<BlockFrequencyInfoWrapperPass>();
497 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
498 }
499};
500
501} // end anonymous namespace
502
503char CodeGenPrepareLegacyPass::ID = 0;
504
505bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
506 if (skipFunction(F))
507 return false;
508 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
509 CodeGenPrepare CGP(TM);
510 CGP.DL = &F.getDataLayout();
511 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
512 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
513 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
514 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
515 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
516 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
517 CGP.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
518 CGP.BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
519 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
520 auto BBSPRWP =
521 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
522 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
523 DomTreeUpdater DTUpdater(
524 &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
525 DomTreeUpdater::UpdateStrategy::Lazy);
526 CGP.DTU = &DTUpdater;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
540INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
541 "Optimize for code generation", false, false)
542
544 return new CodeGenPrepareLegacyPass();
545}
546
549 CodeGenPrepare CGP(TM);
550
551 bool Changed = CGP.run(F, AM);
552 if (!Changed)
553 return PreservedAnalyses::all();
554
558 return PA;
559}
560
561bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
562 DL = &F.getDataLayout();
563 SubtargetInfo = TM->getSubtargetImpl(F);
564 TLI = SubtargetInfo->getTargetLowering();
565 TRI = SubtargetInfo->getRegisterInfo();
566 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
568 LI = &AM.getResult<LoopAnalysis>(F);
571 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
572 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
573 BBSectionsProfileReader =
576 DomTreeUpdater::UpdateStrategy::Lazy);
577 DTU = &DTUpdater;
578 return _run(F);
579}
580
581bool CodeGenPrepare::_run(Function &F) {
582 bool EverMadeChange = false;
583
584 OptSize = F.hasOptSize();
585 // Use the basic-block-sections profile to promote hot functions to .text.hot
586 // if requested.
587 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
588 BBSectionsProfileReader->isFunctionHot(F.getName())) {
589 (void)F.setSectionPrefix("hot");
590 } else if (ProfileGuidedSectionPrefix) {
591 // The hot attribute overwrites profile count based hotness while profile
592 // counts based hotness overwrite the cold attribute.
593 // This is a conservative behabvior.
594 if (F.hasFnAttribute(Attribute::Hot) ||
595 PSI->isFunctionHotInCallGraph(&F, *BFI))
596 (void)F.setSectionPrefix("hot");
597 // If PSI shows this function is not hot, we will placed the function
598 // into unlikely section if (1) PSI shows this is a cold function, or
599 // (2) the function has a attribute of cold.
600 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
601 F.hasFnAttribute(Attribute::Cold))
602 (void)F.setSectionPrefix("unlikely");
605 (void)F.setSectionPrefix("unknown");
606 }
607
608 /// This optimization identifies DIV instructions that can be
609 /// profitably bypassed and carried out with a shorter, faster divide.
610 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
611 const DenseMap<unsigned int, unsigned int> &BypassWidths =
613 BasicBlock *BB = &*F.begin();
614 while (BB != nullptr) {
615 // bypassSlowDivision may create new BBs, but we don't want to reapply the
616 // optimization to those blocks.
617 BasicBlock *Next = BB->getNextNode();
618 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI))
619 EverMadeChange |= bypassSlowDivision(BB, BypassWidths, DTU, LI);
620 BB = Next;
621 }
622 }
623
624 // Get rid of @llvm.assume builtins before attempting to eliminate empty
625 // blocks, since there might be blocks that only contain @llvm.assume calls
626 // (plus arguments that we can get rid of).
627 EverMadeChange |= eliminateAssumptions(F);
628
629 auto resetLoopInfo = [this]() {
630 LI->releaseMemory();
631 LI->analyze(DTU->getDomTree());
632 };
633
634 // Eliminate blocks that contain only PHI nodes and an
635 // unconditional branch.
636 bool ResetLI = false;
637 EverMadeChange |= eliminateMostlyEmptyBlocks(F, ResetLI);
638 if (ResetLI)
639 resetLoopInfo();
640
642 EverMadeChange |= splitBranchCondition(F);
643
644 // Split some critical edges where one of the sources is an indirect branch,
645 // to help generate sane code for PHIs involving such edges.
646 bool Split = SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true,
647 BPI, BFI, DTU);
648 EverMadeChange |= Split;
649 if (Split)
650 resetLoopInfo();
651
652#ifndef NDEBUG
653 if (VerifyDomInfo)
654 assert(getDT().verify(DominatorTree::VerificationLevel::Fast) &&
655 "Incorrect DominatorTree updates in CGP");
656
657 if (VerifyLoopInfo)
658 LI->verify(getDT());
659#endif
660
661 // If we are optimzing huge function, we need to consider the build time.
662 // Because the basic algorithm's complex is near O(N!).
663 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
664
665 bool MadeChange = true;
666 bool FuncIterated = false;
667 while (MadeChange) {
668 MadeChange = false;
669
670 // This is required because optimizeBlock() calls getDT() inside the loop
671 // below, which flushes pending updates and may delete dead blocks, leading
672 // to iterator invalidation.
673 DTU->flush();
674
675 for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
676 if (FuncIterated && !FreshBBs.contains(&BB))
677 continue;
678
679 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
680 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
681
682 MadeChange |= Changed;
683 if (IsHugeFunc) {
684 // If the BB is updated, it may still has chance to be optimized.
685 // This usually happen at sink optimization.
686 // For example:
687 //
688 // bb0:
689 // %and = and i32 %a, 4
690 // %cmp = icmp eq i32 %and, 0
691 //
692 // If the %cmp sink to other BB, the %and will has chance to sink.
693 if (Changed)
694 FreshBBs.insert(&BB);
695 else if (FuncIterated)
696 FreshBBs.erase(&BB);
697 } else {
698 // For small/normal functions, we restart BB iteration if the dominator
699 // tree of the Function was changed.
700 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
701 break;
702 }
703 }
704 // We have iterated all the BB in the (only work for huge) function.
705 FuncIterated = IsHugeFunc;
706
707 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
708 MadeChange |= mergeSExts(F);
709 if (!LargeOffsetGEPMap.empty())
710 MadeChange |= splitLargeGEPOffsets();
711 MadeChange |= optimizePhiTypes(F);
712
713 if (MadeChange)
714 eliminateFallThrough(F);
715
716#ifndef NDEBUG
717 if (VerifyDomInfo)
718 assert(getDT().verify(DominatorTree::VerificationLevel::Fast) &&
719 "Incorrect DominatorTree updates in CGP");
720
721 if (VerifyLoopInfo)
722 LI->verify(getDT());
723#endif
724
725 // Really free removed instructions during promotion.
726 for (Instruction *I : RemovedInsts)
727 I->deleteValue();
728
729 EverMadeChange |= MadeChange;
730 SeenChainsForSExt.clear();
731 ValToSExtendedUses.clear();
732 RemovedInsts.clear();
733 LargeOffsetGEPMap.clear();
734 LargeOffsetGEPID.clear();
735 }
736
737 NewGEPBases.clear();
738 SunkAddrs.clear();
739
740 // LoopInfo is not needed anymore and ConstantFoldTerminator can break it.
741 LI = nullptr;
742
743 if (!DisableBranchOpts) {
744 MadeChange = false;
745 // Use a set vector to get deterministic iteration order. The order the
746 // blocks are removed may affect whether or not PHI nodes in successors
747 // are removed.
748 SmallSetVector<BasicBlock *, 8> WorkList;
749 for (BasicBlock &BB : F) {
751 MadeChange |= ConstantFoldTerminator(&BB, true, nullptr, DTU);
752 if (!MadeChange)
753 continue;
754
755 for (BasicBlock *Succ : Successors)
756 if (pred_empty(Succ))
757 WorkList.insert(Succ);
758 }
759
760 // Delete the dead blocks and any of their dead successors.
761 MadeChange |= !WorkList.empty();
762 while (!WorkList.empty()) {
763 BasicBlock *BB = WorkList.pop_back_val();
765
766 DeleteDeadBlock(BB, DTU);
767
768 for (BasicBlock *Succ : Successors)
769 if (pred_empty(Succ))
770 WorkList.insert(Succ);
771 }
772
773 // Flush pending DT updates in order to finalise deletion of dead blocks.
774 DTU->flush();
775
776 // Merge pairs of basic blocks with unconditional branches, connected by
777 // a single edge.
778 if (EverMadeChange || MadeChange)
779 MadeChange |= eliminateFallThrough(F);
780
781 EverMadeChange |= MadeChange;
782 }
783
784 if (!DisableGCOpts) {
786 for (BasicBlock &BB : F)
787 for (Instruction &I : BB)
788 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
789 Statepoints.push_back(SP);
790 for (auto &I : Statepoints)
791 EverMadeChange |= simplifyOffsetableRelocate(*I);
792 }
793
794 // Do this last to clean up use-before-def scenarios introduced by other
795 // preparatory transforms.
796 EverMadeChange |= placeDbgValues(F);
797 EverMadeChange |= placePseudoProbes(F);
798
799#ifndef NDEBUG
801 verifyBFIUpdates(F);
802#endif
803
804 return EverMadeChange;
805}
806
807bool CodeGenPrepare::eliminateAssumptions(Function &F) {
808 bool MadeChange = false;
809 for (BasicBlock &BB : F) {
810 CurInstIterator = BB.begin();
811 while (CurInstIterator != BB.end()) {
812 Instruction *I = &*(CurInstIterator++);
813 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
814 MadeChange = true;
815 Value *Operand = Assume->getOperand(0);
816 Assume->eraseFromParent();
817
818 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
819 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
820 });
821 }
822 }
823 }
824 return MadeChange;
825}
826
827/// An instruction is about to be deleted, so remove all references to it in our
828/// GEP-tracking data strcutures.
829void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
830 LargeOffsetGEPMap.erase(V);
831 NewGEPBases.erase(V);
832
834 if (!GEP)
835 return;
836
837 LargeOffsetGEPID.erase(GEP);
838
839 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
840 if (VecI == LargeOffsetGEPMap.end())
841 return;
842
843 auto &GEPVector = VecI->second;
844 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
845
846 if (GEPVector.empty())
847 LargeOffsetGEPMap.erase(VecI);
848}
849
850// Verify BFI has been updated correctly by recomputing BFI and comparing them.
851[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
852 DominatorTree NewDT(F);
853 LoopInfo NewLI(NewDT);
854 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
855 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
856 NewBFI.verifyMatch(*BFI);
857}
858
859/// Merge basic blocks which are connected by a single edge, where one of the
860/// basic blocks has a single successor pointing to the other basic block,
861/// which has a single predecessor.
862bool CodeGenPrepare::eliminateFallThrough(Function &F) {
863 bool Changed = false;
864 SmallPtrSet<BasicBlock *, 8> Preds;
865 // Scan all of the blocks in the function, except for the entry block.
866 for (auto &Block : llvm::drop_begin(F)) {
867 auto *BB = &Block;
868 if (DTU->isBBPendingDeletion(BB))
869 continue;
870 // If the destination block has a single pred, then this is a trivial
871 // edge, just collapse it.
872 BasicBlock *SinglePred = BB->getSinglePredecessor();
873
874 // Don't merge if BB's address is taken.
875 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
876 continue;
877
878 if (isa<UncondBrInst>(SinglePred->getTerminator())) {
879 Changed = true;
880 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
881
882 // Merge BB into SinglePred and delete it.
883 MergeBlockIntoPredecessor(BB, DTU, LI);
884 Preds.insert(SinglePred);
885
886 if (IsHugeFunc) {
887 // Update FreshBBs to optimize the merged BB.
888 FreshBBs.insert(SinglePred);
889 FreshBBs.erase(BB);
890 }
891 }
892 }
893
894 // (Repeatedly) merging blocks into their predecessors can create redundant
895 // debug intrinsics.
896 for (auto *Pred : Preds)
897 if (!DTU->isBBPendingDeletion(Pred))
899
900 return Changed;
901}
902
903/// Find a destination block from BB if BB is mergeable empty block.
904BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
905 // If this block doesn't end with an uncond branch, ignore it.
906 UncondBrInst *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
907 if (!BI)
908 return nullptr;
909
910 // If the instruction before the branch (skipping debug info) isn't a phi
911 // node, then other stuff is happening here.
912 BasicBlock::iterator BBI = BI->getIterator();
913 if (BBI != BB->begin()) {
914 --BBI;
915 if (!isa<PHINode>(BBI))
916 return nullptr;
917 }
918
919 // Do not break infinite loops.
920 BasicBlock *DestBB = BI->getSuccessor();
921 if (DestBB == BB)
922 return nullptr;
923
924 if (!canMergeBlocks(BB, DestBB))
925 DestBB = nullptr;
926
927 return DestBB;
928}
929
930/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
931/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
932/// edges in ways that are non-optimal for isel. Start by eliminating these
933/// blocks so we can split them the way we want them.
934bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F, bool &ResetLI) {
935 SmallPtrSet<BasicBlock *, 16> Preheaders;
936 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
937 while (!LoopList.empty()) {
938 Loop *L = LoopList.pop_back_val();
939 llvm::append_range(LoopList, *L);
940 if (BasicBlock *Preheader = L->getLoopPreheader())
941 Preheaders.insert(Preheader);
942 }
943
944 ResetLI = false;
945 bool MadeChange = false;
946 // Note that this intentionally skips the entry block.
947 for (auto &Block : llvm::drop_begin(F)) {
948 // Delete phi nodes that could block deleting other empty blocks.
950 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
951 }
952
953 for (auto &Block : llvm::drop_begin(F)) {
954 auto *BB = &Block;
955 if (DTU->isBBPendingDeletion(BB))
956 continue;
957 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
958 if (!DestBB ||
959 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
960 continue;
961
962 ResetLI |= eliminateMostlyEmptyBlock(BB);
963 MadeChange = true;
964 }
965 return MadeChange;
966}
967
968bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
969 BasicBlock *DestBB,
970 bool isPreheader) {
971 // Do not delete loop preheaders if doing so would create a critical edge.
972 // Loop preheaders can be good locations to spill registers. If the
973 // preheader is deleted and we create a critical edge, registers may be
974 // spilled in the loop body instead.
975 if (!DisablePreheaderProtect && isPreheader &&
976 !(BB->getSinglePredecessor() &&
978 return false;
979
980 // Skip merging if the block's successor is also a successor to any callbr
981 // that leads to this block.
982 // FIXME: Is this really needed? Is this a correctness issue?
983 for (BasicBlock *Pred : predecessors(BB)) {
984 if (isa<CallBrInst>(Pred->getTerminator()) &&
985 llvm::is_contained(successors(Pred), DestBB))
986 return false;
987 }
988
989 // Try to skip merging if the unique predecessor of BB is terminated by a
990 // switch or indirect branch instruction, and BB is used as an incoming block
991 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
992 // add COPY instructions in the predecessor of BB instead of BB (if it is not
993 // merged). Note that the critical edge created by merging such blocks wont be
994 // split in MachineSink because the jump table is not analyzable. By keeping
995 // such empty block (BB), ISel will place COPY instructions in BB, not in the
996 // predecessor of BB.
997 BasicBlock *Pred = BB->getUniquePredecessor();
998 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
1000 return true;
1001
1002 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
1003 return true;
1004
1005 // We use a simple cost heuristic which determine skipping merging is
1006 // profitable if the cost of skipping merging is less than the cost of
1007 // merging : Cost(skipping merging) < Cost(merging BB), where the
1008 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1009 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1010 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1011 // Freq(Pred) / Freq(BB) > 2.
1012 // Note that if there are multiple empty blocks sharing the same incoming
1013 // value for the PHIs in the DestBB, we consider them together. In such
1014 // case, Cost(merging BB) will be the sum of their frequencies.
1015
1016 if (!isa<PHINode>(DestBB->begin()))
1017 return true;
1018
1019 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1020
1021 // Find all other incoming blocks from which incoming values of all PHIs in
1022 // DestBB are the same as the ones from BB.
1023 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1024 if (DestBBPred == BB)
1025 continue;
1026
1027 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1028 return DestPN.getIncomingValueForBlock(BB) ==
1029 DestPN.getIncomingValueForBlock(DestBBPred);
1030 }))
1031 SameIncomingValueBBs.insert(DestBBPred);
1032 }
1033
1034 // See if all BB's incoming values are same as the value from Pred. In this
1035 // case, no reason to skip merging because COPYs are expected to be place in
1036 // Pred already.
1037 if (SameIncomingValueBBs.count(Pred))
1038 return true;
1039
1040 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1041 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1042
1043 for (auto *SameValueBB : SameIncomingValueBBs)
1044 if (SameValueBB->getUniquePredecessor() == Pred &&
1045 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1046 BBFreq += BFI->getBlockFreq(SameValueBB);
1047
1048 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1049 return !Limit || PredFreq <= *Limit;
1050}
1051
1052/// Return true if we can merge BB into DestBB if there is a single
1053/// unconditional branch between them, and BB contains no other non-phi
1054/// instructions.
1055bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1056 const BasicBlock *DestBB) const {
1057 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1058 // the successor. If there are more complex condition (e.g. preheaders),
1059 // don't mess around with them.
1060 for (const PHINode &PN : BB->phis()) {
1061 for (const User *U : PN.users()) {
1062 const Instruction *UI = cast<Instruction>(U);
1063 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1064 return false;
1065 // If User is inside DestBB block and it is a PHINode then check
1066 // incoming value. If incoming value is not from BB then this is
1067 // a complex condition (e.g. preheaders) we want to avoid here.
1068 if (UI->getParent() == DestBB) {
1069 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1070 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1071 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1072 if (Insn && Insn->getParent() == BB &&
1073 Insn->getParent() != UPN->getIncomingBlock(I))
1074 return false;
1075 }
1076 }
1077 }
1078 }
1079
1080 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1081 // and DestBB may have conflicting incoming values for the block. If so, we
1082 // can't merge the block.
1083 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1084 if (!DestBBPN)
1085 return true; // no conflict.
1086
1087 // Collect the preds of BB.
1088 SmallPtrSet<const BasicBlock *, 16> BBPreds;
1089 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1090 // It is faster to get preds from a PHI than with pred_iterator.
1091 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1092 BBPreds.insert(BBPN->getIncomingBlock(i));
1093 } else {
1094 BBPreds.insert_range(predecessors(BB));
1095 }
1096
1097 // Walk the preds of DestBB.
1098 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1099 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1100 if (BBPreds.count(Pred)) { // Common predecessor?
1101 for (const PHINode &PN : DestBB->phis()) {
1102 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1103 const Value *V2 = PN.getIncomingValueForBlock(BB);
1104
1105 // If V2 is a phi node in BB, look up what the mapped value will be.
1106 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1107 if (V2PN->getParent() == BB)
1108 V2 = V2PN->getIncomingValueForBlock(Pred);
1109
1110 // If there is a conflict, bail out.
1111 if (V1 != V2)
1112 return false;
1113 }
1114 }
1115 }
1116
1117 return true;
1118}
1119
1120/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1121static void replaceAllUsesWith(Value *Old, Value *New,
1123 bool IsHuge) {
1124 auto *OldI = dyn_cast<Instruction>(Old);
1125 if (OldI) {
1126 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1127 UI != E; ++UI) {
1129 if (IsHuge)
1130 FreshBBs.insert(User->getParent());
1131 }
1132 }
1133 Old->replaceAllUsesWith(New);
1134}
1135
1136/// Eliminate a basic block that has only phi's and an unconditional branch in
1137/// it.
1138/// Indicate that the LoopInfo was modified only if it wasn't updated.
1139bool CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1140 UncondBrInst *BI = cast<UncondBrInst>(BB->getTerminator());
1141 BasicBlock *DestBB = BI->getSuccessor();
1142
1143 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1144 << *BB << *DestBB);
1145
1146 // If the destination block has a single pred, then this is a trivial edge,
1147 // just collapse it.
1148 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1149 if (SinglePred != DestBB) {
1150 assert(SinglePred == BB &&
1151 "Single predecessor not the same as predecessor");
1152 // Merge DestBB into SinglePred/BB and delete it.
1153 MergeBlockIntoPredecessor(DestBB, DTU, LI);
1154 // Note: BB(=SinglePred) will not be deleted on this path.
1155 // DestBB(=its single successor) is the one that was deleted.
1156 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1157
1158 if (IsHugeFunc) {
1159 // Update FreshBBs to optimize the merged BB.
1160 FreshBBs.insert(SinglePred);
1161 FreshBBs.erase(DestBB);
1162 }
1163 return false;
1164 }
1165 }
1166
1167 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1168 // to handle the new incoming edges it is about to have.
1169 for (PHINode &PN : DestBB->phis()) {
1170 // Remove the incoming value for BB, and remember it.
1171 Value *InVal = PN.removeIncomingValue(BB, false);
1172
1173 // Two options: either the InVal is a phi node defined in BB or it is some
1174 // value that dominates BB.
1175 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1176 if (InValPhi && InValPhi->getParent() == BB) {
1177 // Add all of the input values of the input PHI as inputs of this phi.
1178 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1179 PN.addIncoming(InValPhi->getIncomingValue(i),
1180 InValPhi->getIncomingBlock(i));
1181 } else {
1182 // Otherwise, add one instance of the dominating value for each edge that
1183 // we will be adding.
1184 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1185 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1186 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1187 } else {
1188 for (BasicBlock *Pred : predecessors(BB))
1189 PN.addIncoming(InVal, Pred);
1190 }
1191 }
1192 }
1193
1194 // Preserve loop Metadata.
1195 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1196 for (auto *Pred : predecessors(BB))
1197 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1198 }
1199
1200 // The PHIs are now updated, change everything that refers to BB to use
1201 // DestBB and remove BB.
1203 SmallPtrSet<BasicBlock *, 8> SeenPreds;
1204 SmallPtrSet<BasicBlock *, 8> PredOfDestBB(llvm::from_range,
1205 predecessors(DestBB));
1206 for (auto *Pred : predecessors(BB)) {
1207 if (!PredOfDestBB.contains(Pred)) {
1208 if (SeenPreds.insert(Pred).second)
1209 DTUpdates.push_back({DominatorTree::Insert, Pred, DestBB});
1210 }
1211 }
1212 SeenPreds.clear();
1213 for (auto *Pred : predecessors(BB)) {
1214 if (SeenPreds.insert(Pred).second)
1215 DTUpdates.push_back({DominatorTree::Delete, Pred, BB});
1216 }
1217 DTUpdates.push_back({DominatorTree::Delete, BB, DestBB});
1218 BB->replaceAllUsesWith(DestBB);
1219 DTU->applyUpdates(DTUpdates);
1220 DTU->deleteBB(BB);
1221 ++NumBlocksElim;
1222
1223 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1224 return true;
1225}
1226
1227// Computes a map of base pointer relocation instructions to corresponding
1228// derived pointer relocation instructions given a vector of all relocate calls
1230 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1232 &RelocateInstMap) {
1233 // Collect information in two maps: one primarily for locating the base object
1234 // while filling the second map; the second map is the final structure holding
1235 // a mapping between Base and corresponding Derived relocate calls
1237 for (auto *ThisRelocate : AllRelocateCalls) {
1238 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1239 ThisRelocate->getDerivedPtrIndex());
1240 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1241 }
1242 for (auto &Item : RelocateIdxMap) {
1243 std::pair<unsigned, unsigned> Key = Item.first;
1244 if (Key.first == Key.second)
1245 // Base relocation: nothing to insert
1246 continue;
1247
1248 GCRelocateInst *I = Item.second;
1249 auto BaseKey = std::make_pair(Key.first, Key.first);
1250
1251 // We're iterating over RelocateIdxMap so we cannot modify it.
1252 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1253 if (MaybeBase == RelocateIdxMap.end())
1254 // TODO: We might want to insert a new base object relocate and gep off
1255 // that, if there are enough derived object relocates.
1256 continue;
1257
1258 RelocateInstMap[MaybeBase->second].push_back(I);
1259 }
1260}
1261
1262// Accepts a GEP and extracts the operands into a vector provided they're all
1263// small integer constants
1265 SmallVectorImpl<Value *> &OffsetV) {
1266 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1267 // Only accept small constant integer operands
1268 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1269 if (!Op || Op->getZExtValue() > 20)
1270 return false;
1271 }
1272
1273 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1274 OffsetV.push_back(GEP->getOperand(i));
1275 return true;
1276}
1277
1278// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1279// replace, computes a replacement, and affects it.
1280static bool
1282 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1283 bool MadeChange = false;
1284 // We must ensure the relocation of derived pointer is defined after
1285 // relocation of base pointer. If we find a relocation corresponding to base
1286 // defined earlier than relocation of base then we move relocation of base
1287 // right before found relocation. We consider only relocation in the same
1288 // basic block as relocation of base. Relocations from other basic block will
1289 // be skipped by optimization and we do not care about them.
1290 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1291 &*R != RelocatedBase; ++R)
1292 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1293 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1294 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1295 RelocatedBase->moveBefore(RI->getIterator());
1296 MadeChange = true;
1297 break;
1298 }
1299
1300 for (GCRelocateInst *ToReplace : Targets) {
1301 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1302 "Not relocating a derived object of the original base object");
1303 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1304 // A duplicate relocate call. TODO: coalesce duplicates.
1305 continue;
1306 }
1307
1308 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1309 // Base and derived relocates are in different basic blocks.
1310 // In this case transform is only valid when base dominates derived
1311 // relocate. However it would be too expensive to check dominance
1312 // for each such relocate, so we skip the whole transformation.
1313 continue;
1314 }
1315
1316 Value *Base = ToReplace->getBasePtr();
1317 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1318 if (!Derived || Derived->getPointerOperand() != Base)
1319 continue;
1320
1322 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1323 continue;
1324
1325 // Create a Builder and replace the target callsite with a gep
1326 assert(RelocatedBase->getNextNode() &&
1327 "Should always have one since it's not a terminator");
1328
1329 // Insert after RelocatedBase
1330 IRBuilder<> Builder(RelocatedBase->getNextNode());
1331 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1332
1333 // If gc_relocate does not match the actual type, cast it to the right type.
1334 // In theory, there must be a bitcast after gc_relocate if the type does not
1335 // match, and we should reuse it to get the derived pointer. But it could be
1336 // cases like this:
1337 // bb1:
1338 // ...
1339 // %g1 = call coldcc i8 addrspace(1)*
1340 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1341 //
1342 // bb2:
1343 // ...
1344 // %g2 = call coldcc i8 addrspace(1)*
1345 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1346 //
1347 // merge:
1348 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1349 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1350 //
1351 // In this case, we can not find the bitcast any more. So we insert a new
1352 // bitcast no matter there is already one or not. In this way, we can handle
1353 // all cases, and the extra bitcast should be optimized away in later
1354 // passes.
1355 Value *ActualRelocatedBase = RelocatedBase;
1356 if (RelocatedBase->getType() != Base->getType()) {
1357 ActualRelocatedBase =
1358 Builder.CreateBitCast(RelocatedBase, Base->getType());
1359 }
1360 Value *Replacement =
1361 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1362 ArrayRef(OffsetV));
1363 Replacement->takeName(ToReplace);
1364 // If the newly generated derived pointer's type does not match the original
1365 // derived pointer's type, cast the new derived pointer to match it. Same
1366 // reasoning as above.
1367 Value *ActualReplacement = Replacement;
1368 if (Replacement->getType() != ToReplace->getType()) {
1369 ActualReplacement =
1370 Builder.CreateBitCast(Replacement, ToReplace->getType());
1371 }
1372 ToReplace->replaceAllUsesWith(ActualReplacement);
1373 ToReplace->eraseFromParent();
1374
1375 MadeChange = true;
1376 }
1377 return MadeChange;
1378}
1379
1380// Turns this:
1381//
1382// %base = ...
1383// %ptr = gep %base + 15
1384// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1385// %base' = relocate(%tok, i32 4, i32 4)
1386// %ptr' = relocate(%tok, i32 4, i32 5)
1387// %val = load %ptr'
1388//
1389// into this:
1390//
1391// %base = ...
1392// %ptr = gep %base + 15
1393// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1394// %base' = gc.relocate(%tok, i32 4, i32 4)
1395// %ptr' = gep %base' + 15
1396// %val = load %ptr'
1397bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1398 bool MadeChange = false;
1399 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1400 for (auto *U : I.users())
1401 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1402 // Collect all the relocate calls associated with a statepoint
1403 AllRelocateCalls.push_back(Relocate);
1404
1405 // We need at least one base pointer relocation + one derived pointer
1406 // relocation to mangle
1407 if (AllRelocateCalls.size() < 2)
1408 return false;
1409
1410 // RelocateInstMap is a mapping from the base relocate instruction to the
1411 // corresponding derived relocate instructions
1412 MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
1413 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1414 if (RelocateInstMap.empty())
1415 return false;
1416
1417 for (auto &Item : RelocateInstMap)
1418 // Item.first is the RelocatedBase to offset against
1419 // Item.second is the vector of Targets to replace
1420 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1421 return MadeChange;
1422}
1423
1424/// Sink the specified cast instruction into its user blocks.
1425static bool SinkCast(CastInst *CI) {
1426 BasicBlock *DefBB = CI->getParent();
1427
1428 /// InsertedCasts - Only insert a cast in each block once.
1430
1431 bool MadeChange = false;
1432 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1433 UI != E;) {
1434 Use &TheUse = UI.getUse();
1436
1437 // Figure out which BB this cast is used in. For PHI's this is the
1438 // appropriate predecessor block.
1439 BasicBlock *UserBB = User->getParent();
1440 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1441 UserBB = PN->getIncomingBlock(TheUse);
1442 }
1443
1444 // Preincrement use iterator so we don't invalidate it.
1445 ++UI;
1446
1447 // The first insertion point of a block containing an EH pad is after the
1448 // pad. If the pad is the user, we cannot sink the cast past the pad.
1449 if (User->isEHPad())
1450 continue;
1451
1452 // If the block selected to receive the cast is an EH pad that does not
1453 // allow non-PHI instructions before the terminator, we can't sink the
1454 // cast.
1455 if (UserBB->getTerminator()->isEHPad())
1456 continue;
1457
1458 // If this user is in the same block as the cast, don't change the cast.
1459 if (UserBB == DefBB)
1460 continue;
1461
1462 // If we have already inserted a cast into this block, use it.
1463 CastInst *&InsertedCast = InsertedCasts[UserBB];
1464
1465 if (!InsertedCast) {
1466 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1467 assert(InsertPt != UserBB->end());
1468 InsertedCast = cast<CastInst>(CI->clone());
1469 InsertedCast->insertBefore(*UserBB, InsertPt);
1470 }
1471
1472 // Replace a use of the cast with a use of the new cast.
1473 TheUse = InsertedCast;
1474 MadeChange = true;
1475 ++NumCastUses;
1476 }
1477
1478 // If we removed all uses, nuke the cast.
1479 if (CI->use_empty()) {
1480 salvageDebugInfo(*CI);
1481 CI->eraseFromParent();
1482 MadeChange = true;
1483 }
1484
1485 return MadeChange;
1486}
1487
1488/// If the specified cast instruction is a noop copy (e.g. it's casting from
1489/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1490/// reduce the number of virtual registers that must be created and coalesced.
1491///
1492/// Return true if any changes are made.
1494 const DataLayout &DL) {
1495 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1496 // than sinking only nop casts, but is helpful on some platforms.
1497 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1498 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1499 ASC->getDestAddressSpace()))
1500 return false;
1501 }
1502
1503 // If this is a noop copy,
1504 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1505 EVT DstVT = TLI.getValueType(DL, CI->getType());
1506
1507 // This is an fp<->int conversion?
1508 if (SrcVT.isInteger() != DstVT.isInteger())
1509 return false;
1510
1511 // If this is an extension, it will be a zero or sign extension, which
1512 // isn't a noop.
1513 if (SrcVT.bitsLT(DstVT))
1514 return false;
1515
1516 // If these values will be promoted, find out what they will be promoted
1517 // to. This helps us consider truncates on PPC as noop copies when they
1518 // are.
1519 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1521 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1522 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1524 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1525
1526 // If, after promotion, these are the same types, this is a noop copy.
1527 if (SrcVT != DstVT)
1528 return false;
1529
1530 return SinkCast(CI);
1531}
1532
1533// Match a simple increment by constant operation. Note that if a sub is
1534// matched, the step is negated (as if the step had been canonicalized to
1535// an add, even though we leave the instruction alone.)
1536static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1537 Constant *&Step) {
1538 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1540 m_Instruction(LHS), m_Constant(Step)))))
1541 return true;
1542 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1544 m_Instruction(LHS), m_Constant(Step))))) {
1545 Step = ConstantExpr::getNeg(Step);
1546 return true;
1547 }
1548 return false;
1549}
1550
1551/// If given \p PN is an inductive variable with value IVInc coming from the
1552/// backedge, and on each iteration it gets increased by Step, return pair
1553/// <IVInc, Step>. Otherwise, return std::nullopt.
1554static std::optional<std::pair<Instruction *, Constant *>>
1555getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1556 const Loop *L = LI->getLoopFor(PN->getParent());
1557 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1558 return std::nullopt;
1559 auto *IVInc =
1560 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1561 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1562 return std::nullopt;
1563 Instruction *LHS = nullptr;
1564 Constant *Step = nullptr;
1565 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1566 return std::make_pair(IVInc, Step);
1567 return std::nullopt;
1568}
1569
1570static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1571 auto *I = dyn_cast<Instruction>(V);
1572 if (!I)
1573 return false;
1574 Instruction *LHS = nullptr;
1575 Constant *Step = nullptr;
1576 if (!matchIncrement(I, LHS, Step))
1577 return false;
1578 if (auto *PN = dyn_cast<PHINode>(LHS))
1579 if (auto IVInc = getIVIncrement(PN, LI))
1580 return IVInc->first == I;
1581 return false;
1582}
1583
1584bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1585 Value *Arg0, Value *Arg1,
1586 CmpInst *Cmp,
1587 Intrinsic::ID IID) {
1588 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1589 if (!isIVIncrement(BO, LI))
1590 return false;
1591 const Loop *L = LI->getLoopFor(BO->getParent());
1592 assert(L && "L should not be null after isIVIncrement()");
1593 // Do not risk on moving increment into a child loop.
1594 if (LI->getLoopFor(Cmp->getParent()) != L)
1595 return false;
1596
1597 // Finally, we need to ensure that the insert point will dominate all
1598 // existing uses of the increment.
1599
1600 auto &DT = getDT();
1601 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1602 // If we're moving up the dom tree, all uses are trivially dominated.
1603 // (This is the common case for code produced by LSR.)
1604 return true;
1605
1606 // Otherwise, special case the single use in the phi recurrence.
1607 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1608 };
1609 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1610 // We used to use a dominator tree here to allow multi-block optimization.
1611 // But that was problematic because:
1612 // 1. It could cause a perf regression by hoisting the math op into the
1613 // critical path.
1614 // 2. It could cause a perf regression by creating a value that was live
1615 // across multiple blocks and increasing register pressure.
1616 // 3. Use of a dominator tree could cause large compile-time regression.
1617 // This is because we recompute the DT on every change in the main CGP
1618 // run-loop. The recomputing is probably unnecessary in many cases, so if
1619 // that was fixed, using a DT here would be ok.
1620 //
1621 // There is one important particular case we still want to handle: if BO is
1622 // the IV increment. Important properties that make it profitable:
1623 // - We can speculate IV increment anywhere in the loop (as long as the
1624 // indvar Phi is its only user);
1625 // - Upon computing Cmp, we effectively compute something equivalent to the
1626 // IV increment (despite it loops differently in the IR). So moving it up
1627 // to the cmp point does not really increase register pressure.
1628 return false;
1629 }
1630
1631 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1632 if (BO->getOpcode() == Instruction::Add &&
1633 IID == Intrinsic::usub_with_overflow) {
1634 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1636 }
1637
1638 // Insert at the first instruction of the pair.
1639 Instruction *InsertPt = nullptr;
1640 for (Instruction &Iter : *Cmp->getParent()) {
1641 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1642 // the overflow intrinsic are defined.
1643 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1644 InsertPt = &Iter;
1645 break;
1646 }
1647 }
1648 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1649
1650 IRBuilder<> Builder(InsertPt);
1651 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1652 if (BO->getOpcode() != Instruction::Xor) {
1653 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1654 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1655 } else
1656 assert(BO->hasOneUse() &&
1657 "Patterns with XOr should use the BO only in the compare");
1658 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1659 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1660 Cmp->eraseFromParent();
1661 BO->eraseFromParent();
1662 return true;
1663}
1664
1665/// Match special-case patterns that check for unsigned add overflow.
1667 BinaryOperator *&Add) {
1668 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1669 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1670 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1671
1672 // We are not expecting non-canonical/degenerate code. Just bail out.
1673 if (isa<Constant>(A))
1674 return false;
1675
1676 ICmpInst::Predicate Pred = Cmp->getPredicate();
1677 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1678 B = ConstantInt::get(B->getType(), 1);
1679 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1680 B = Constant::getAllOnesValue(B->getType());
1681 else
1682 return false;
1683
1684 // Check the users of the variable operand of the compare looking for an add
1685 // with the adjusted constant.
1686 for (User *U : A->users()) {
1687 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1689 return true;
1690 }
1691 }
1692 return false;
1693}
1694
1695/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1696/// intrinsic. Return true if any changes were made.
1697bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1698 ModifyDT &ModifiedDT) {
1699 bool EdgeCase = false;
1700 Value *A, *B;
1701 BinaryOperator *Add;
1702 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1704 return false;
1705 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1706 A = Add->getOperand(0);
1707 B = Add->getOperand(1);
1708 EdgeCase = true;
1709 }
1710
1712 TLI->getValueType(*DL, Add->getType()),
1713 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1714 return false;
1715
1716 // We don't want to move around uses of condition values this late, so we
1717 // check if it is legal to create the call to the intrinsic in the basic
1718 // block containing the icmp.
1719 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1720 return false;
1721
1722 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1723 Intrinsic::uadd_with_overflow))
1724 return false;
1725
1726 // Reset callers - do not crash by iterating over a dead instruction.
1727 ModifiedDT = ModifyDT::ModifyInstDT;
1728 return true;
1729}
1730
1731bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1732 ModifyDT &ModifiedDT) {
1733 // We are not expecting non-canonical/degenerate code. Just bail out.
1734 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1735 if (isa<Constant>(A) && isa<Constant>(B))
1736 return false;
1737
1738 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1739 ICmpInst::Predicate Pred = Cmp->getPredicate();
1740 if (Pred == ICmpInst::ICMP_UGT) {
1741 std::swap(A, B);
1742 Pred = ICmpInst::ICMP_ULT;
1743 }
1744 // Convert special-case: (A == 0) is the same as (A u< 1).
1745 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1746 B = ConstantInt::get(B->getType(), 1);
1747 Pred = ICmpInst::ICMP_ULT;
1748 }
1749 // Convert special-case: (A != 0) is the same as (0 u< A).
1750 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1751 std::swap(A, B);
1752 Pred = ICmpInst::ICMP_ULT;
1753 }
1754 if (Pred != ICmpInst::ICMP_ULT)
1755 return false;
1756
1757 // Walk the users of a variable operand of a compare looking for a subtract or
1758 // add with that same operand. Also match the 2nd operand of the compare to
1759 // the add/sub, but that may be a negated constant operand of an add.
1760 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1761 BinaryOperator *Sub = nullptr;
1762 for (User *U : CmpVariableOperand->users()) {
1763 // A - B, A u< B --> usubo(A, B)
1764 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1766 break;
1767 }
1768
1769 // A + (-C), A u< C (canonicalized form of (sub A, C))
1770 const APInt *CmpC, *AddC;
1771 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1772 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1774 break;
1775 }
1776 }
1777 if (!Sub)
1778 return false;
1779
1781 TLI->getValueType(*DL, Sub->getType()),
1782 Sub->hasNUsesOrMore(1)))
1783 return false;
1784
1785 // We don't want to move around uses of condition values this late, so we
1786 // check if it is legal to create the call to the intrinsic in the basic
1787 // block containing the icmp.
1788 if (Sub->getParent() != Cmp->getParent() && !Sub->hasOneUse())
1789 return false;
1790
1791 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1792 Cmp, Intrinsic::usub_with_overflow))
1793 return false;
1794
1795 // Reset callers - do not crash by iterating over a dead instruction.
1796 ModifiedDT = ModifyDT::ModifyInstDT;
1797 return true;
1798}
1799
1800// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1801// The same transformation exists in DAG combiner, but we repeat it here because
1802// DAG builder can break the pattern by moving icmp into a successor block.
1803bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1804 CmpPredicate Pred;
1805 Value *X;
1806 const APInt *C;
1807
1808 // (icmp (ctpop x), c)
1809 if (!match(Cmp, m_ICmp(Pred, m_Ctpop(m_Value(X)), m_APIntAllowPoison(C))))
1810 return false;
1811
1812 // We're only interested in "is power of 2 [or zero]" patterns.
1813 bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1814 bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1815 (Pred == CmpInst::ICMP_UGT && *C == 1);
1816 if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1817 return false;
1818
1819 // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1820 // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1821 // and otherwise expand ctpop into a few simple instructions.
1822 Type *OpTy = X->getType();
1823 if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1824 // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1825 if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1826 return false;
1827
1828 // ctpop(x) == 1 -> ctpop(x) u< 2
1829 // ctpop(x) != 1 -> ctpop(x) u> 1
1830 if (Pred == ICmpInst::ICMP_EQ) {
1831 Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1832 Cmp->setPredicate(ICmpInst::ICMP_ULT);
1833 } else {
1834 Cmp->setPredicate(ICmpInst::ICMP_UGT);
1835 }
1836 return true;
1837 }
1838
1839 Value *NewCmp;
1840 if (IsPowerOf2OrZeroTest ||
1841 (IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1842 // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1843 // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1844 IRBuilder<> Builder(Cmp);
1845 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1846 Value *And = Builder.CreateAnd(X, Sub);
1847 CmpInst::Predicate NewPred =
1848 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1850 : CmpInst::ICMP_NE;
1851 NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1852 } else {
1853 // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1854 // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1855 IRBuilder<> Builder(Cmp);
1856 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1857 Value *Xor = Builder.CreateXor(X, Sub);
1858 CmpInst::Predicate NewPred =
1860 NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1861 }
1862
1863 Cmp->replaceAllUsesWith(NewCmp);
1865 return true;
1866}
1867
1868/// Sink the given CmpInst into user blocks to reduce the number of virtual
1869/// registers that must be created and coalesced. This is a clear win except on
1870/// targets with multiple condition code registers (PowerPC), where it might
1871/// lose; some adjustment may be wanted there.
1872///
1873/// Return true if any changes are made.
1874static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
1875 const DataLayout &DL) {
1876 if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1877 return false;
1878
1879 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1880 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1881 return false;
1882
1883 bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
1884 return isa<PHINode>(U) ||
1885 cast<Instruction>(U)->getParent() == Cmp->getParent();
1886 });
1887
1888 // Avoid sinking larger than legal integer comparisons unless its ONLY used in
1889 // another BB.
1890 if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
1891 Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
1892 DL.getLargestLegalIntTypeSizeInBits())
1893 return false;
1894
1895 // Only insert a cmp in each block once.
1897
1898 bool MadeChange = false;
1899 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1900 UI != E;) {
1901 Use &TheUse = UI.getUse();
1903
1904 // Preincrement use iterator so we don't invalidate it.
1905 ++UI;
1906
1907 // Don't bother for PHI nodes.
1908 if (isa<PHINode>(User))
1909 continue;
1910
1911 // Figure out which BB this cmp is used in.
1912 BasicBlock *UserBB = User->getParent();
1913 BasicBlock *DefBB = Cmp->getParent();
1914
1915 // If this user is in the same block as the cmp, don't change the cmp.
1916 if (UserBB == DefBB)
1917 continue;
1918
1919 // If we have already inserted a cmp into this block, use it.
1920 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1921
1922 if (!InsertedCmp) {
1923 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1924 assert(InsertPt != UserBB->end());
1925 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1926 Cmp->getOperand(0), Cmp->getOperand(1), "");
1927 InsertedCmp->insertBefore(*UserBB, InsertPt);
1928 // Propagate the debug info.
1929 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1930 }
1931
1932 // Replace a use of the cmp with a use of the new cmp.
1933 TheUse = InsertedCmp;
1934 MadeChange = true;
1935 ++NumCmpUses;
1936 }
1937
1938 // If we removed all uses, nuke the cmp.
1939 if (Cmp->use_empty()) {
1940 Cmp->eraseFromParent();
1941 MadeChange = true;
1942 }
1943
1944 return MadeChange;
1945}
1946
1947/// For pattern like:
1948///
1949/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1950/// ...
1951/// DomBB:
1952/// ...
1953/// br DomCond, TrueBB, CmpBB
1954/// CmpBB: (with DomBB being the single predecessor)
1955/// ...
1956/// Cmp = icmp eq CmpOp0, CmpOp1
1957/// ...
1958///
1959/// It would use two comparison on targets that lowering of icmp sgt/slt is
1960/// different from lowering of icmp eq (PowerPC). This function try to convert
1961/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1962/// After that, DomCond and Cmp can use the same comparison so reduce one
1963/// comparison.
1964///
1965/// Return true if any changes are made.
1967 const TargetLowering &TLI) {
1969 return false;
1970
1971 ICmpInst::Predicate Pred = Cmp->getPredicate();
1972 if (Pred != ICmpInst::ICMP_EQ)
1973 return false;
1974
1975 // If icmp eq has users other than CondBrInst and SelectInst, converting it to
1976 // icmp slt/sgt would introduce more redundant LLVM IR.
1977 for (User *U : Cmp->users()) {
1978 if (isa<CondBrInst>(U))
1979 continue;
1980 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1981 continue;
1982 return false;
1983 }
1984
1985 // This is a cheap/incomplete check for dominance - just match a single
1986 // predecessor with a conditional branch.
1987 BasicBlock *CmpBB = Cmp->getParent();
1988 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1989 if (!DomBB)
1990 return false;
1991
1992 // We want to ensure that the only way control gets to the comparison of
1993 // interest is that a less/greater than comparison on the same operands is
1994 // false.
1995 Value *DomCond;
1996 BasicBlock *TrueBB, *FalseBB;
1997 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1998 return false;
1999 if (CmpBB != FalseBB)
2000 return false;
2001
2002 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
2003 CmpPredicate DomPred;
2004 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
2005 return false;
2006 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
2007 return false;
2008
2009 // Convert the equality comparison to the opposite of the dominating
2010 // comparison and swap the direction for all branch/select users.
2011 // We have conceptually converted:
2012 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
2013 // to
2014 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
2015 // And similarly for branches.
2016 for (User *U : Cmp->users()) {
2017 if (auto *BI = dyn_cast<CondBrInst>(U)) {
2018 BI->swapSuccessors();
2019 continue;
2020 }
2021 if (auto *SI = dyn_cast<SelectInst>(U)) {
2022 // Swap operands
2023 SI->swapValues();
2024 SI->swapProfMetadata();
2025 continue;
2026 }
2027 llvm_unreachable("Must be a branch or a select");
2028 }
2029 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
2030 return true;
2031}
2032
2033/// Many architectures use the same instruction for both subtract and cmp. Try
2034/// to swap cmp operands to match subtract operations to allow for CSE.
2036 Value *Op0 = Cmp->getOperand(0);
2037 Value *Op1 = Cmp->getOperand(1);
2038 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
2039 isa<Constant>(Op1) || Op0 == Op1)
2040 return false;
2041
2042 // If a subtract already has the same operands as a compare, swapping would be
2043 // bad. If a subtract has the same operands as a compare but in reverse order,
2044 // then swapping is good.
2045 int GoodToSwap = 0;
2046 unsigned NumInspected = 0;
2047 for (const User *U : Op0->users()) {
2048 // Avoid walking many users.
2049 if (++NumInspected > 128)
2050 return false;
2051 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
2052 GoodToSwap++;
2053 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
2054 GoodToSwap--;
2055 }
2056
2057 if (GoodToSwap > 0) {
2058 Cmp->swapOperands();
2059 return true;
2060 }
2061 return false;
2062}
2063
2064static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
2065 const DataLayout &DL) {
2066 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
2067 if (!FCmp)
2068 return false;
2069
2070 // Don't fold if the target offers free fabs and the predicate is legal.
2071 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
2072 if (TLI.isFAbsFree(VT) &&
2074 VT.getSimpleVT()))
2075 return false;
2076
2077 // Reverse the canonicalization if it is a FP class test
2078 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2079 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
2080 };
2081 auto [ClassVal, ClassTest] =
2082 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
2083 FCmp->getOperand(0), FCmp->getOperand(1));
2084 if (!ClassVal)
2085 return false;
2086
2087 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
2088 return false;
2089
2090 IRBuilder<> Builder(Cmp);
2091 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
2092 Cmp->replaceAllUsesWith(IsFPClass);
2094 return true;
2095}
2096
2098 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
2099 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
2100 Value *Incr, *RemAmt;
2101 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
2102 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
2103 return false;
2104
2105 Value *AddInst, *AddOffset;
2106 // Find out loop increment PHI.
2107 auto *PN = dyn_cast<PHINode>(Incr);
2108 if (PN != nullptr) {
2109 AddInst = nullptr;
2110 AddOffset = nullptr;
2111 } else {
2112 // Search through a NUW add on top of the loop increment.
2113 Value *V0, *V1;
2114 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2115 return false;
2116
2117 AddInst = Incr;
2118 PN = dyn_cast<PHINode>(V0);
2119 if (PN != nullptr) {
2120 AddOffset = V1;
2121 } else {
2122 PN = dyn_cast<PHINode>(V1);
2123 AddOffset = V0;
2124 }
2125 }
2126
2127 if (!PN)
2128 return false;
2129
2130 // This isn't strictly necessary, what we really need is one increment and any
2131 // amount of initial values all being the same.
2132 if (PN->getNumIncomingValues() != 2)
2133 return false;
2134
2135 // Only trivially analyzable loops.
2136 Loop *L = LI->getLoopFor(PN->getParent());
2137 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2138 return false;
2139
2140 // Req that the remainder is in the loop
2141 if (!L->contains(Rem))
2142 return false;
2143
2144 // Only works if the remainder amount is a loop invaraint
2145 if (!L->isLoopInvariant(RemAmt))
2146 return false;
2147
2148 // Only works if the AddOffset is a loop invaraint
2149 if (AddOffset && !L->isLoopInvariant(AddOffset))
2150 return false;
2151
2152 // Is the PHI a loop increment?
2153 auto LoopIncrInfo = getIVIncrement(PN, LI);
2154 if (!LoopIncrInfo)
2155 return false;
2156
2157 // We need remainder_amount % increment_amount to be zero. Increment of one
2158 // satisfies that without any special logic and is overwhelmingly the common
2159 // case.
2160 if (!match(LoopIncrInfo->second, m_One()))
2161 return false;
2162
2163 // Need the increment to not overflow.
2164 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2165 return false;
2166
2167 // Set output variables.
2168 RemAmtOut = RemAmt;
2169 LoopIncrPNOut = PN;
2170 AddInstOut = AddInst;
2171 AddOffsetOut = AddOffset;
2172
2173 return true;
2174}
2175
2176// Try to transform:
2177//
2178// for(i = Start; i < End; ++i)
2179// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2180//
2181// ->
2182//
2183// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2184// for(i = Start; i < End; ++i, ++rem)
2185// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2187 const LoopInfo *LI,
2189 bool IsHuge) {
2190 Value *AddOffset, *RemAmt, *AddInst;
2191 PHINode *LoopIncrPN;
2192 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2193 AddOffset, LoopIncrPN))
2194 return false;
2195
2196 // Only non-constant remainder as the extra IV is probably not profitable
2197 // in that case.
2198 //
2199 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2200 // we can rule out register pressure and ensure this `urem` is executed each
2201 // iteration, its probably profitable to handle the const case as well.
2202 //
2203 // Potential TODO(2): Should we have a check for how "nested" this remainder
2204 // operation is? The new code runs every iteration so if the remainder is
2205 // guarded behind unlikely conditions this might not be worth it.
2206 if (match(RemAmt, m_ImmConstant()))
2207 return false;
2208
2209 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2210 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2211 // If we have add create initial value for remainder.
2212 // The logic here is:
2213 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2214 //
2215 // Only proceed if the expression simplifies (otherwise we can't fully
2216 // optimize out the urem).
2217 if (AddInst) {
2218 assert(AddOffset && "We found an add but missing values");
2219 // Without dom-condition/assumption cache we aren't likely to get much out
2220 // of a context instruction.
2221 Start = simplifyAddInst(Start, AddOffset,
2222 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2223 /*IsNUW=*/true, *DL);
2224 if (!Start)
2225 return false;
2226 }
2227
2228 // If we can't fully optimize out the `rem`, skip this transform.
2229 Start = simplifyURemInst(Start, RemAmt, *DL);
2230 if (!Start)
2231 return false;
2232
2233 // Create new remainder with induction variable.
2234 Type *Ty = Rem->getType();
2235 IRBuilder<> Builder(Rem->getContext());
2236
2237 Builder.SetInsertPoint(LoopIncrPN);
2238 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2239
2240 Builder.SetInsertPoint(cast<Instruction>(
2241 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2242 // `(add (urem x, y), 1)` is always nuw.
2243 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2244 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2245 Value *RemSel =
2246 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2247
2248 NewRem->addIncoming(Start, L->getLoopPreheader());
2249 NewRem->addIncoming(RemSel, L->getLoopLatch());
2250
2251 // Insert all touched BBs.
2252 FreshBBs.insert(LoopIncrPN->getParent());
2253 FreshBBs.insert(L->getLoopLatch());
2254 FreshBBs.insert(Rem->getParent());
2255 if (AddInst)
2256 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2257 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2258 Rem->eraseFromParent();
2259 if (AddInst && AddInst->use_empty())
2260 cast<Instruction>(AddInst)->eraseFromParent();
2261 return true;
2262}
2263
2264bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2265 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2266 return true;
2267 return false;
2268}
2269
2270bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2271 if (sinkCmpExpression(Cmp, *TLI, *DL))
2272 return true;
2273
2274 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2275 return true;
2276
2277 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2278 return true;
2279
2280 if (unfoldPowerOf2Test(Cmp))
2281 return true;
2282
2283 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2284 return true;
2285
2287 return true;
2288
2289 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2290 return true;
2291
2292 return false;
2293}
2294
2295/// Duplicate and sink the given 'and' instruction into user blocks where it is
2296/// used in a compare to allow isel to generate better code for targets where
2297/// this operation can be combined.
2298///
2299/// Return true if any changes are made.
2301 SetOfInstrs &InsertedInsts) {
2302 // Double-check that we're not trying to optimize an instruction that was
2303 // already optimized by some other part of this pass.
2304 assert(!InsertedInsts.count(AndI) &&
2305 "Attempting to optimize already optimized and instruction");
2306 (void)InsertedInsts;
2307
2308 // Nothing to do for single use in same basic block.
2309 if (AndI->hasOneUse() &&
2310 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2311 return false;
2312
2313 // Try to avoid cases where sinking/duplicating is likely to increase register
2314 // pressure.
2315 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2316 !isa<ConstantInt>(AndI->getOperand(1)) &&
2317 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2318 return false;
2319
2320 for (auto *U : AndI->users()) {
2322
2323 // Only sink 'and' feeding icmp with 0.
2324 if (!isa<ICmpInst>(User))
2325 return false;
2326
2327 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2328 if (!CmpC || !CmpC->isZero())
2329 return false;
2330 }
2331
2332 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2333 return false;
2334
2335 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2336 LLVM_DEBUG(AndI->getParent()->dump());
2337
2338 // Push the 'and' into the same block as the icmp 0. There should only be
2339 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2340 // others, so we don't need to keep track of which BBs we insert into.
2341 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2342 UI != E;) {
2343 Use &TheUse = UI.getUse();
2345
2346 // Preincrement use iterator so we don't invalidate it.
2347 ++UI;
2348
2349 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2350
2351 // Keep the 'and' in the same place if the use is already in the same block.
2352 Instruction *InsertPt =
2353 User->getParent() == AndI->getParent() ? AndI : User;
2354 Instruction *InsertedAnd = BinaryOperator::Create(
2355 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2356 InsertPt->getIterator());
2357 // Propagate the debug info.
2358 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2359
2360 // Replace a use of the 'and' with a use of the new 'and'.
2361 TheUse = InsertedAnd;
2362 ++NumAndUses;
2363 LLVM_DEBUG(User->getParent()->dump());
2364 }
2365
2366 // We removed all uses, nuke the and.
2367 AndI->eraseFromParent();
2368 return true;
2369}
2370
2371/// Check if the candidates could be combined with a shift instruction, which
2372/// includes:
2373/// 1. Truncate instruction
2374/// 2. And instruction and the imm is a mask of the low bits:
2375/// imm & (imm+1) == 0
2377 if (!isa<TruncInst>(User)) {
2378 if (User->getOpcode() != Instruction::And ||
2380 return false;
2381
2382 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2383
2384 if ((Cimm & (Cimm + 1)).getBoolValue())
2385 return false;
2386 }
2387 return true;
2388}
2389
2390/// Sink both shift and truncate instruction to the use of truncate's BB.
2391static bool
2394 const TargetLowering &TLI, const DataLayout &DL) {
2395 BasicBlock *UserBB = User->getParent();
2397 auto *TruncI = cast<TruncInst>(User);
2398 bool MadeChange = false;
2399
2400 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2401 TruncE = TruncI->user_end();
2402 TruncUI != TruncE;) {
2403
2404 Use &TruncTheUse = TruncUI.getUse();
2405 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2406 // Preincrement use iterator so we don't invalidate it.
2407
2408 ++TruncUI;
2409
2410 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2411 if (!ISDOpcode)
2412 continue;
2413
2414 // If the use is actually a legal node, there will not be an
2415 // implicit truncate.
2416 // FIXME: always querying the result type is just an
2417 // approximation; some nodes' legality is determined by the
2418 // operand or other means. There's no good way to find out though.
2420 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2421 continue;
2422
2423 // Don't bother for PHI nodes.
2424 if (isa<PHINode>(TruncUser))
2425 continue;
2426
2427 BasicBlock *TruncUserBB = TruncUser->getParent();
2428
2429 if (UserBB == TruncUserBB)
2430 continue;
2431
2432 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2433 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2434
2435 if (!InsertedShift && !InsertedTrunc) {
2436 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2437 assert(InsertPt != TruncUserBB->end());
2438 // Sink the shift
2439 if (ShiftI->getOpcode() == Instruction::AShr)
2440 InsertedShift =
2441 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2442 else
2443 InsertedShift =
2444 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2445 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2446 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2447
2448 // Sink the trunc
2449 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2450 TruncInsertPt++;
2451 // It will go ahead of any debug-info.
2452 TruncInsertPt.setHeadBit(true);
2453 assert(TruncInsertPt != TruncUserBB->end());
2454
2455 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2456 TruncI->getType(), "");
2457 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2458 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2459
2460 MadeChange = true;
2461
2462 TruncTheUse = InsertedTrunc;
2463 }
2464 }
2465 return MadeChange;
2466}
2467
2468/// Sink the shift *right* instruction into user blocks if the uses could
2469/// potentially be combined with this shift instruction and generate BitExtract
2470/// instruction. It will only be applied if the architecture supports BitExtract
2471/// instruction. Here is an example:
2472/// BB1:
2473/// %x.extract.shift = lshr i64 %arg1, 32
2474/// BB2:
2475/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2476/// ==>
2477///
2478/// BB2:
2479/// %x.extract.shift.1 = lshr i64 %arg1, 32
2480/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2481///
2482/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2483/// instruction.
2484/// Return true if any changes are made.
2486 const TargetLowering &TLI,
2487 const DataLayout &DL) {
2488 BasicBlock *DefBB = ShiftI->getParent();
2489
2490 /// Only insert instructions in each block once.
2492
2493 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2494
2495 bool MadeChange = false;
2496 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2497 UI != E;) {
2498 Use &TheUse = UI.getUse();
2500 // Preincrement use iterator so we don't invalidate it.
2501 ++UI;
2502
2503 // Don't bother for PHI nodes.
2504 if (isa<PHINode>(User))
2505 continue;
2506
2508 continue;
2509
2510 BasicBlock *UserBB = User->getParent();
2511
2512 if (UserBB == DefBB) {
2513 // If the shift and truncate instruction are in the same BB. The use of
2514 // the truncate(TruncUse) may still introduce another truncate if not
2515 // legal. In this case, we would like to sink both shift and truncate
2516 // instruction to the BB of TruncUse.
2517 // for example:
2518 // BB1:
2519 // i64 shift.result = lshr i64 opnd, imm
2520 // trunc.result = trunc shift.result to i16
2521 //
2522 // BB2:
2523 // ----> We will have an implicit truncate here if the architecture does
2524 // not have i16 compare.
2525 // cmp i16 trunc.result, opnd2
2526 //
2527 if (isa<TruncInst>(User) &&
2528 shiftIsLegal
2529 // If the type of the truncate is legal, no truncate will be
2530 // introduced in other basic blocks.
2531 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2532 MadeChange =
2533 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2534
2535 continue;
2536 }
2537 // If we have already inserted a shift into this block, use it.
2538 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2539
2540 if (!InsertedShift) {
2541 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2542 assert(InsertPt != UserBB->end());
2543
2544 if (ShiftI->getOpcode() == Instruction::AShr)
2545 InsertedShift =
2546 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2547 else
2548 InsertedShift =
2549 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2550 InsertedShift->insertBefore(*UserBB, InsertPt);
2551 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2552
2553 MadeChange = true;
2554 }
2555
2556 // Replace a use of the shift with a use of the new shift.
2557 TheUse = InsertedShift;
2558 }
2559
2560 // If we removed all uses, or there are none, nuke the shift.
2561 if (ShiftI->use_empty()) {
2562 salvageDebugInfo(*ShiftI);
2563 ShiftI->eraseFromParent();
2564 MadeChange = true;
2565 }
2566
2567 return MadeChange;
2568}
2569
2570/// If counting leading or trailing zeros is an expensive operation and a zero
2571/// input is defined, add a check for zero to avoid calling the intrinsic.
2572///
2573/// We want to transform:
2574/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2575///
2576/// into:
2577/// entry:
2578/// %cmpz = icmp eq i64 %A, 0
2579/// br i1 %cmpz, label %cond.end, label %cond.false
2580/// cond.false:
2581/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2582/// br label %cond.end
2583/// cond.end:
2584/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2585///
2586/// If the transform is performed, return true and set ModifiedDT to true.
2587static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2588 DomTreeUpdater *DTU, LoopInfo *LI,
2589 const TargetLowering *TLI,
2590 const DataLayout *DL, ModifyDT &ModifiedDT,
2592 bool IsHugeFunc) {
2593 // If a zero input is undefined, it doesn't make sense to despeculate that.
2594 if (match(CountZeros->getOperand(1), m_One()))
2595 return false;
2596
2597 // If it's cheap to speculate, there's nothing to do.
2598 Type *Ty = CountZeros->getType();
2599 auto IntrinsicID = CountZeros->getIntrinsicID();
2600 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2601 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2602 return false;
2603
2604 // Only handle scalar cases. Anything else requires too much work.
2605 unsigned SizeInBits = Ty->getScalarSizeInBits();
2606 if (Ty->isVectorTy())
2607 return false;
2608
2609 // Bail if the value is never zero.
2610 Use &Op = CountZeros->getOperandUse(0);
2611 if (isKnownNonZero(Op, *DL))
2612 return false;
2613
2614 // The intrinsic will be sunk behind a compare against zero and branch.
2615 BasicBlock *StartBlock = CountZeros->getParent();
2616 BasicBlock *CallBlock = SplitBlock(StartBlock, CountZeros, DTU, LI,
2617 /* MSSAU */ nullptr, "cond.false");
2618 if (IsHugeFunc)
2619 FreshBBs.insert(CallBlock);
2620
2621 // Create another block after the count zero intrinsic. A PHI will be added
2622 // in this block to select the result of the intrinsic or the bit-width
2623 // constant if the input to the intrinsic is zero.
2624 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2625 // Any debug-info after CountZeros should not be included.
2626 SplitPt.setHeadBit(true);
2627 BasicBlock *EndBlock = SplitBlock(CallBlock, &*SplitPt, DTU, LI,
2628 /* MSSAU */ nullptr, "cond.end");
2629 if (IsHugeFunc)
2630 FreshBBs.insert(EndBlock);
2631
2632 // Set up a builder to create a compare, conditional branch, and PHI.
2633 IRBuilder<> Builder(CountZeros->getContext());
2634 Builder.SetInsertPoint(StartBlock->getTerminator());
2635 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2636
2637 // Replace the unconditional branch that was created by the first split with
2638 // a compare against zero and a conditional branch.
2639 Value *Zero = Constant::getNullValue(Ty);
2640 // Avoid introducing branch on poison. This also replaces the ctz operand.
2642 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2643 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2644 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2645 StartBlock->getTerminator()->eraseFromParent();
2646 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, EndBlock}});
2647
2648 // Create a PHI in the end block to select either the output of the intrinsic
2649 // or the bit width of the operand.
2650 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2651 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2652 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2653 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2654 PN->addIncoming(BitWidth, StartBlock);
2655 PN->addIncoming(CountZeros, CallBlock);
2656
2657 // We are explicitly handling the zero case, so we can set the intrinsic's
2658 // undefined zero argument to 'true'. This will also prevent reprocessing the
2659 // intrinsic; we only despeculate when a zero input is defined.
2660 CountZeros->setArgOperand(1, Builder.getTrue());
2661 ModifiedDT = ModifyDT::ModifyBBDT;
2662 return true;
2663}
2664
2665bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2666 BasicBlock *BB = CI->getParent();
2667
2668 // Sink address computing for memory operands into the block.
2669 if (CI->isInlineAsm() && optimizeInlineAsmInst(CI))
2670 return true;
2671
2672 // Align the pointer arguments to this call if the target thinks it's a good
2673 // idea
2674 unsigned MinSize;
2675 Align PrefAlign;
2676 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2677 for (auto &Arg : CI->args()) {
2678 // We want to align both objects whose address is used directly and
2679 // objects whose address is used in casts and GEPs, though it only makes
2680 // sense for GEPs if the offset is a multiple of the desired alignment and
2681 // if size - offset meets the size threshold.
2682 if (!Arg->getType()->isPointerTy())
2683 continue;
2684 APInt Offset(DL->getIndexSizeInBits(
2685 cast<PointerType>(Arg->getType())->getAddressSpace()),
2686 0);
2687 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2688 uint64_t Offset2 = Offset.getLimitedValue();
2689 if (!isAligned(PrefAlign, Offset2))
2690 continue;
2691 AllocaInst *AI;
2692 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign) {
2693 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(*DL);
2694 if (AllocaSize && AllocaSize->getKnownMinValue() >= MinSize + Offset2)
2695 AI->setAlignment(PrefAlign);
2696 }
2697 // Global variables can only be aligned if they are defined in this
2698 // object (i.e. they are uniquely initialized in this object), and
2699 // over-aligning global variables that have an explicit section is
2700 // forbidden.
2701 GlobalVariable *GV;
2702 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2703 GV->getPointerAlignment(*DL) < PrefAlign &&
2704 GV->getGlobalSize(*DL) >= MinSize + Offset2)
2705 GV->setAlignment(PrefAlign);
2706 }
2707 }
2708 // If this is a memcpy (or similar) then we may be able to improve the
2709 // alignment.
2710 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2711 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2712 MaybeAlign MIDestAlign = MI->getDestAlign();
2713 if (!MIDestAlign || DestAlign > *MIDestAlign)
2714 MI->setDestAlignment(DestAlign);
2715 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2716 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2717 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2718 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2719 MTI->setSourceAlignment(SrcAlign);
2720 }
2721 }
2722
2723 // If we have a cold call site, try to sink addressing computation into the
2724 // cold block. This interacts with our handling for loads and stores to
2725 // ensure that we can fold all uses of a potential addressing computation
2726 // into their uses. TODO: generalize this to work over profiling data
2727 if (CI->hasFnAttr(Attribute::Cold) &&
2728 !llvm::shouldOptimizeForSize(BB, PSI, BFI))
2729 for (auto &Arg : CI->args()) {
2730 if (!Arg->getType()->isPointerTy())
2731 continue;
2732 unsigned AS = Arg->getType()->getPointerAddressSpace();
2733 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2734 return true;
2735 }
2736
2737 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2738 if (II) {
2739 switch (II->getIntrinsicID()) {
2740 default:
2741 break;
2742 case Intrinsic::assume:
2743 llvm_unreachable("llvm.assume should have been removed already");
2744 case Intrinsic::allow_runtime_check:
2745 case Intrinsic::allow_ubsan_check:
2746 case Intrinsic::experimental_widenable_condition: {
2747 // Give up on future widening opportunities so that we can fold away dead
2748 // paths and merge blocks before going into block-local instruction
2749 // selection.
2750 if (II->use_empty()) {
2751 II->eraseFromParent();
2752 return true;
2753 }
2754 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2755 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2756 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2757 });
2758 return true;
2759 }
2760 case Intrinsic::objectsize:
2761 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2762 case Intrinsic::is_constant:
2763 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2764 case Intrinsic::aarch64_stlxr:
2765 case Intrinsic::aarch64_stxr: {
2766 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2767 if (!ExtVal || !ExtVal->hasOneUse() ||
2768 ExtVal->getParent() == CI->getParent())
2769 return false;
2770 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2771 ExtVal->moveBefore(CI->getIterator());
2772 // Mark this instruction as "inserted by CGP", so that other
2773 // optimizations don't touch it.
2774 InsertedInsts.insert(ExtVal);
2775 return true;
2776 }
2777
2778 case Intrinsic::launder_invariant_group:
2779 case Intrinsic::strip_invariant_group: {
2780 Value *ArgVal = II->getArgOperand(0);
2781 auto it = LargeOffsetGEPMap.find(II);
2782 if (it != LargeOffsetGEPMap.end()) {
2783 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2784 // Make sure not to have to deal with iterator invalidation
2785 // after possibly adding ArgVal to LargeOffsetGEPMap.
2786 auto GEPs = std::move(it->second);
2787 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2788 LargeOffsetGEPMap.erase(II);
2789 }
2790
2791 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2792 II->eraseFromParent();
2793 return true;
2794 }
2795 case Intrinsic::cttz:
2796 case Intrinsic::ctlz:
2797 // If counting zeros is expensive, try to avoid it.
2798 return despeculateCountZeros(II, DTU, LI, TLI, DL, ModifiedDT, FreshBBs,
2799 IsHugeFunc);
2800 case Intrinsic::fshl:
2801 case Intrinsic::fshr:
2802 return optimizeFunnelShift(II);
2803 case Intrinsic::masked_gather:
2804 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2805 case Intrinsic::masked_scatter:
2806 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2807 case Intrinsic::masked_load:
2808 // Treat v1X masked load as load X type.
2809 if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2810 if (VT->getNumElements() == 1) {
2811 Value *PtrVal = II->getArgOperand(0);
2812 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2813 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2814 return true;
2815 }
2816 }
2817 return false;
2818 case Intrinsic::masked_store:
2819 // Treat v1X masked store as store X type.
2820 if (auto *VT =
2821 dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2822 if (VT->getNumElements() == 1) {
2823 Value *PtrVal = II->getArgOperand(1);
2824 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2825 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2826 return true;
2827 }
2828 }
2829 return false;
2830 case Intrinsic::umul_with_overflow:
2831 return optimizeMulWithOverflow(II, /*IsSigned=*/false, ModifiedDT);
2832 case Intrinsic::smul_with_overflow:
2833 return optimizeMulWithOverflow(II, /*IsSigned=*/true, ModifiedDT);
2834 }
2835
2836 SmallVector<Value *, 2> PtrOps;
2837 Type *AccessTy;
2838 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2839 while (!PtrOps.empty()) {
2840 Value *PtrVal = PtrOps.pop_back_val();
2841 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2842 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2843 return true;
2844 }
2845 }
2846
2847 // From here on out we're working with named functions.
2848 auto *Callee = CI->getCalledFunction();
2849 if (!Callee)
2850 return false;
2851
2852 // Lower all default uses of _chk calls. This is very similar
2853 // to what InstCombineCalls does, but here we are only lowering calls
2854 // to fortified library functions (e.g. __memcpy_chk) that have the default
2855 // "don't know" as the objectsize. Anything else should be left alone.
2856 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2857 IRBuilder<> Builder(CI);
2858 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2859 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2860 CI->eraseFromParent();
2861 return true;
2862 }
2863
2864 // SCCP may have propagated, among other things, C++ static variables across
2865 // calls. If this happens to be the case, we may want to undo it in order to
2866 // avoid redundant pointer computation of the constant, as the function method
2867 // returning the constant needs to be executed anyways.
2868 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2869 if (!F->getReturnType()->isPointerTy())
2870 return nullptr;
2871
2872 GlobalVariable *UniformValue = nullptr;
2873 for (auto &BB : *F) {
2874 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2875 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2876 if (!UniformValue)
2877 UniformValue = V;
2878 else if (V != UniformValue)
2879 return nullptr;
2880 } else {
2881 return nullptr;
2882 }
2883 }
2884 }
2885
2886 return UniformValue;
2887 };
2888
2889 if (Callee->hasExactDefinition()) {
2890 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2891 bool MadeChange = false;
2892 for (Use &U : make_early_inc_range(RV->uses())) {
2893 auto *I = dyn_cast<Instruction>(U.getUser());
2894 if (!I || I->getParent() != CI->getParent()) {
2895 // Limit to the same basic block to avoid extending the call-site live
2896 // range, which otherwise could increase register pressure.
2897 continue;
2898 }
2899 if (CI->comesBefore(I)) {
2900 U.set(CI);
2901 MadeChange = true;
2902 }
2903 }
2904
2905 return MadeChange;
2906 }
2907 }
2908
2909 return false;
2910}
2911
2913 const CallInst *CI) {
2914 assert(CI && CI->use_empty());
2915
2916 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2917 switch (II->getIntrinsicID()) {
2918 case Intrinsic::memset:
2919 case Intrinsic::memcpy:
2920 case Intrinsic::memmove:
2921 return true;
2922 default:
2923 return false;
2924 }
2925
2926 LibFunc LF;
2927 Function *Callee = CI->getCalledFunction();
2928 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2929 switch (LF) {
2930 case LibFunc_strcpy:
2931 case LibFunc_strncpy:
2932 case LibFunc_strcat:
2933 case LibFunc_strncat:
2934 return true;
2935 default:
2936 return false;
2937 }
2938
2939 return false;
2940}
2941
2942/// Look for opportunities to duplicate return instructions to the predecessor
2943/// to enable tail call optimizations. The case it is currently looking for is
2944/// the following one. Known intrinsics or library function that may be tail
2945/// called are taken into account as well.
2946/// @code
2947/// bb0:
2948/// %tmp0 = tail call i32 @f0()
2949/// br label %return
2950/// bb1:
2951/// %tmp1 = tail call i32 @f1()
2952/// br label %return
2953/// bb2:
2954/// %tmp2 = tail call i32 @f2()
2955/// br label %return
2956/// return:
2957/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2958/// ret i32 %retval
2959/// @endcode
2960///
2961/// =>
2962///
2963/// @code
2964/// bb0:
2965/// %tmp0 = tail call i32 @f0()
2966/// ret i32 %tmp0
2967/// bb1:
2968/// %tmp1 = tail call i32 @f1()
2969/// ret i32 %tmp1
2970/// bb2:
2971/// %tmp2 = tail call i32 @f2()
2972/// ret i32 %tmp2
2973/// @endcode
2974bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2975 ModifyDT &ModifiedDT) {
2976 if (!BB->getTerminator())
2977 return false;
2978
2979 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2980 if (!RetI)
2981 return false;
2982
2983 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2984
2985 PHINode *PN = nullptr;
2986 ExtractValueInst *EVI = nullptr;
2987 BitCastInst *BCI = nullptr;
2988 Value *V = RetI->getReturnValue();
2989 if (V) {
2990 BCI = dyn_cast<BitCastInst>(V);
2991 if (BCI)
2992 V = BCI->getOperand(0);
2993
2995 if (EVI) {
2996 V = EVI->getOperand(0);
2997 if (!llvm::all_of(EVI->indices(), equal_to(0)))
2998 return false;
2999 }
3000
3001 PN = dyn_cast<PHINode>(V);
3002 }
3003
3004 if (PN && PN->getParent() != BB)
3005 return false;
3006
3007 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
3008 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
3009 if (BC && BC->hasOneUse())
3010 Inst = BC->user_back();
3011
3012 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
3013 return II->getIntrinsicID() == Intrinsic::lifetime_end;
3014 return false;
3015 };
3016
3018
3019 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
3020 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
3021 II && II->getIntrinsicID() == Intrinsic::fake_use) {
3022 // Record the instruction so it can be preserved when the exit block is
3023 // removed. Do not preserve the fake use that uses the result of the
3024 // PHI instruction.
3025 // Do not copy fake uses that use the result of a PHI node.
3026 // FIXME: If we do want to copy the fake use into the return blocks, we
3027 // have to figure out which of the PHI node operands to use for each
3028 // copy.
3029 if (!isa<PHINode>(II->getOperand(0))) {
3030 FakeUses.push_back(II);
3031 }
3032 return true;
3033 }
3034
3035 return false;
3036 };
3037
3038 // Make sure there are no instructions between the first instruction
3039 // and return.
3041 // Skip over pseudo-probes and the bitcast.
3042 while (&*BI == BCI || &*BI == EVI || isa<PseudoProbeInst>(BI) ||
3043 isLifetimeEndOrBitCastFor(&*BI) || isFakeUse(&*BI))
3044 BI = std::next(BI);
3045 if (&*BI != RetI)
3046 return false;
3047
3048 // Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
3049 // call.
3050 auto MayBePermittedAsTailCall = [&](const auto *CI) {
3051 return TLI->mayBeEmittedAsTailCall(CI) &&
3052 attributesPermitTailCall(BB->getParent(), CI, RetI, *TLI);
3053 };
3054
3055 SmallVector<BasicBlock *, 4> TailCallBBs;
3056 // Record the call instructions so we can insert any fake uses
3057 // that need to be preserved before them.
3059 if (PN) {
3060 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
3061 // Look through bitcasts.
3062 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
3063 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
3064 BasicBlock *PredBB = PN->getIncomingBlock(I);
3065 // Make sure the phi value is indeed produced by the tail call.
3066 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3067 MayBePermittedAsTailCall(CI)) {
3068 TailCallBBs.push_back(PredBB);
3069 CallInsts.push_back(CI);
3070 } else {
3071 // Consider the cases in which the phi value is indirectly produced by
3072 // the tail call, for example when encountering memset(), memmove(),
3073 // strcpy(), whose return value may have been optimized out. In such
3074 // cases, the value needs to be the first function argument.
3075 //
3076 // bb0:
3077 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3078 // br label %return
3079 // return:
3080 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3081 if (PredBB && PredBB->getSingleSuccessor() == BB)
3083 PredBB->getTerminator()->getPrevNode());
3084
3085 if (CI && CI->use_empty() &&
3086 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3087 IncomingVal == CI->getArgOperand(0) &&
3088 MayBePermittedAsTailCall(CI)) {
3089 TailCallBBs.push_back(PredBB);
3090 CallInsts.push_back(CI);
3091 }
3092 }
3093 }
3094 } else {
3095 SmallPtrSet<BasicBlock *, 4> VisitedBBs;
3096 for (BasicBlock *Pred : predecessors(BB)) {
3097 if (!VisitedBBs.insert(Pred).second)
3098 continue;
3099 if (Instruction *I = Pred->rbegin()->getPrevNode()) {
3100 CallInst *CI = dyn_cast<CallInst>(I);
3101 if (CI && CI->use_empty() && MayBePermittedAsTailCall(CI)) {
3102 // Either we return void or the return value must be the first
3103 // argument of a known intrinsic or library function.
3104 if (!V || isa<UndefValue>(V) ||
3105 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3106 V == CI->getArgOperand(0))) {
3107 TailCallBBs.push_back(Pred);
3108 CallInsts.push_back(CI);
3109 }
3110 }
3111 }
3112 }
3113 }
3114
3115 bool Changed = false;
3116 for (auto const &TailCallBB : TailCallBBs) {
3117 // Make sure the call instruction is followed by an unconditional branch to
3118 // the return block.
3119 UncondBrInst *BI = dyn_cast<UncondBrInst>(TailCallBB->getTerminator());
3120 if (!BI || BI->getSuccessor() != BB)
3121 continue;
3122
3123 // Duplicate the return into TailCallBB.
3124 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB, DTU);
3126 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3127 BFI->setBlockFreq(BB,
3128 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3129 ModifiedDT = ModifyDT::ModifyBBDT;
3130 Changed = true;
3131 ++NumRetsDup;
3132 }
3133
3134 // If we eliminated all predecessors of the block, delete the block now.
3135 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3136 // Copy the fake uses found in the original return block to all blocks
3137 // that contain tail calls.
3138 for (auto *CI : CallInsts) {
3139 for (auto const *FakeUse : FakeUses) {
3140 auto *ClonedInst = FakeUse->clone();
3141 ClonedInst->insertBefore(CI->getIterator());
3142 }
3143 }
3144 DTU->deleteBB(BB);
3145 }
3146
3147 return Changed;
3148}
3149
3150//===----------------------------------------------------------------------===//
3151// Memory Optimization
3152//===----------------------------------------------------------------------===//
3153
3154namespace {
3155
3156/// This is an extended version of TargetLowering::AddrMode
3157/// which holds actual Value*'s for register values.
3158struct ExtAddrMode : public TargetLowering::AddrMode {
3159 Value *BaseReg = nullptr;
3160 Value *ScaledReg = nullptr;
3161 Value *OriginalValue = nullptr;
3162 bool InBounds = true;
3163
3164 enum FieldName {
3165 NoField = 0x00,
3166 BaseRegField = 0x01,
3167 BaseGVField = 0x02,
3168 BaseOffsField = 0x04,
3169 ScaledRegField = 0x08,
3170 ScaleField = 0x10,
3171 MultipleFields = 0xff
3172 };
3173
3174 ExtAddrMode() = default;
3175
3176 void print(raw_ostream &OS) const;
3177 void dump() const;
3178
3179 // Replace From in ExtAddrMode with To.
3180 // E.g., SExt insts may be promoted and deleted. We should replace them with
3181 // the promoted values.
3182 void replaceWith(Value *From, Value *To) {
3183 if (ScaledReg == From)
3184 ScaledReg = To;
3185 }
3186
3187 FieldName compare(const ExtAddrMode &other) {
3188 // First check that the types are the same on each field, as differing types
3189 // is something we can't cope with later on.
3190 if (BaseReg && other.BaseReg &&
3191 BaseReg->getType() != other.BaseReg->getType())
3192 return MultipleFields;
3193 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3194 return MultipleFields;
3195 if (ScaledReg && other.ScaledReg &&
3196 ScaledReg->getType() != other.ScaledReg->getType())
3197 return MultipleFields;
3198
3199 // Conservatively reject 'inbounds' mismatches.
3200 if (InBounds != other.InBounds)
3201 return MultipleFields;
3202
3203 // Check each field to see if it differs.
3204 unsigned Result = NoField;
3205 if (BaseReg != other.BaseReg)
3206 Result |= BaseRegField;
3207 if (BaseGV != other.BaseGV)
3208 Result |= BaseGVField;
3209 if (BaseOffs != other.BaseOffs)
3210 Result |= BaseOffsField;
3211 if (ScaledReg != other.ScaledReg)
3212 Result |= ScaledRegField;
3213 // Don't count 0 as being a different scale, because that actually means
3214 // unscaled (which will already be counted by having no ScaledReg).
3215 if (Scale && other.Scale && Scale != other.Scale)
3216 Result |= ScaleField;
3217
3218 if (llvm::popcount(Result) > 1)
3219 return MultipleFields;
3220 else
3221 return static_cast<FieldName>(Result);
3222 }
3223
3224 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3225 // with no offset.
3226 bool isTrivial() {
3227 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3228 // trivial if at most one of these terms is nonzero, except that BaseGV and
3229 // BaseReg both being zero actually means a null pointer value, which we
3230 // consider to be 'non-zero' here.
3231 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3232 }
3233
3234 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3235 switch (Field) {
3236 default:
3237 return nullptr;
3238 case BaseRegField:
3239 return BaseReg;
3240 case BaseGVField:
3241 return BaseGV;
3242 case ScaledRegField:
3243 return ScaledReg;
3244 case BaseOffsField:
3245 return ConstantInt::getSigned(IntPtrTy, BaseOffs);
3246 }
3247 }
3248
3249 void SetCombinedField(FieldName Field, Value *V,
3250 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3251 switch (Field) {
3252 default:
3253 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3254 break;
3255 case ExtAddrMode::BaseRegField:
3256 BaseReg = V;
3257 break;
3258 case ExtAddrMode::BaseGVField:
3259 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3260 // in the BaseReg field.
3261 assert(BaseReg == nullptr);
3262 BaseReg = V;
3263 BaseGV = nullptr;
3264 break;
3265 case ExtAddrMode::ScaledRegField:
3266 ScaledReg = V;
3267 // If we have a mix of scaled and unscaled addrmodes then we want scale
3268 // to be the scale and not zero.
3269 if (!Scale)
3270 for (const ExtAddrMode &AM : AddrModes)
3271 if (AM.Scale) {
3272 Scale = AM.Scale;
3273 break;
3274 }
3275 break;
3276 case ExtAddrMode::BaseOffsField:
3277 // The offset is no longer a constant, so it goes in ScaledReg with a
3278 // scale of 1.
3279 assert(ScaledReg == nullptr);
3280 ScaledReg = V;
3281 Scale = 1;
3282 BaseOffs = 0;
3283 break;
3284 }
3285 }
3286};
3287
3288#ifndef NDEBUG
3289static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3290 AM.print(OS);
3291 return OS;
3292}
3293#endif
3294
3295#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3296void ExtAddrMode::print(raw_ostream &OS) const {
3297 bool NeedPlus = false;
3298 OS << "[";
3299 if (InBounds)
3300 OS << "inbounds ";
3301 if (BaseGV) {
3302 OS << "GV:";
3303 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3304 NeedPlus = true;
3305 }
3306
3307 if (BaseOffs) {
3308 OS << (NeedPlus ? " + " : "") << BaseOffs;
3309 NeedPlus = true;
3310 }
3311
3312 if (BaseReg) {
3313 OS << (NeedPlus ? " + " : "") << "Base:";
3314 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3315 NeedPlus = true;
3316 }
3317 if (Scale) {
3318 OS << (NeedPlus ? " + " : "") << Scale << "*";
3319 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3320 }
3321
3322 OS << ']';
3323}
3324
3325LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3326 print(dbgs());
3327 dbgs() << '\n';
3328}
3329#endif
3330
3331} // end anonymous namespace
3332
3333namespace {
3334
3335/// This class provides transaction based operation on the IR.
3336/// Every change made through this class is recorded in the internal state and
3337/// can be undone (rollback) until commit is called.
3338/// CGP does not check if instructions could be speculatively executed when
3339/// moved. Preserving the original location would pessimize the debugging
3340/// experience, as well as negatively impact the quality of sample PGO.
3341class TypePromotionTransaction {
3342 /// This represents the common interface of the individual transaction.
3343 /// Each class implements the logic for doing one specific modification on
3344 /// the IR via the TypePromotionTransaction.
3345 class TypePromotionAction {
3346 protected:
3347 /// The Instruction modified.
3348 Instruction *Inst;
3349
3350 public:
3351 /// Constructor of the action.
3352 /// The constructor performs the related action on the IR.
3353 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3354
3355 virtual ~TypePromotionAction() = default;
3356
3357 /// Undo the modification done by this action.
3358 /// When this method is called, the IR must be in the same state as it was
3359 /// before this action was applied.
3360 /// \pre Undoing the action works if and only if the IR is in the exact same
3361 /// state as it was directly after this action was applied.
3362 virtual void undo() = 0;
3363
3364 /// Advocate every change made by this action.
3365 /// When the results on the IR of the action are to be kept, it is important
3366 /// to call this function, otherwise hidden information may be kept forever.
3367 virtual void commit() {
3368 // Nothing to be done, this action is not doing anything.
3369 }
3370 };
3371
3372 /// Utility to remember the position of an instruction.
3373 class InsertionHandler {
3374 /// Position of an instruction.
3375 /// Either an instruction:
3376 /// - Is the first in a basic block: BB is used.
3377 /// - Has a previous instruction: PrevInst is used.
3378 struct {
3379 BasicBlock::iterator PrevInst;
3380 BasicBlock *BB;
3381 } Point;
3382 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3383
3384 /// Remember whether or not the instruction had a previous instruction.
3385 bool HasPrevInstruction;
3386
3387 public:
3388 /// Record the position of \p Inst.
3389 InsertionHandler(Instruction *Inst) {
3390 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3391 BasicBlock *BB = Inst->getParent();
3392
3393 // Record where we would have to re-insert the instruction in the sequence
3394 // of DbgRecords, if we ended up reinserting.
3395 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3396
3397 if (HasPrevInstruction) {
3398 Point.PrevInst = std::prev(Inst->getIterator());
3399 } else {
3400 Point.BB = BB;
3401 }
3402 }
3403
3404 /// Insert \p Inst at the recorded position.
3405 void insert(Instruction *Inst) {
3406 if (HasPrevInstruction) {
3407 if (Inst->getParent())
3408 Inst->removeFromParent();
3409 Inst->insertAfter(Point.PrevInst);
3410 } else {
3411 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3412 if (Inst->getParent())
3413 Inst->moveBefore(*Point.BB, Position);
3414 else
3415 Inst->insertBefore(*Point.BB, Position);
3416 }
3417
3418 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3419 }
3420 };
3421
3422 /// Move an instruction before another.
3423 class InstructionMoveBefore : public TypePromotionAction {
3424 /// Original position of the instruction.
3425 InsertionHandler Position;
3426
3427 public:
3428 /// Move \p Inst before \p Before.
3429 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3430 : TypePromotionAction(Inst), Position(Inst) {
3431 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3432 << "\n");
3433 Inst->moveBefore(Before);
3434 }
3435
3436 /// Move the instruction back to its original position.
3437 void undo() override {
3438 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3439 Position.insert(Inst);
3440 }
3441 };
3442
3443 /// Set the operand of an instruction with a new value.
3444 class OperandSetter : public TypePromotionAction {
3445 /// Original operand of the instruction.
3446 Value *Origin;
3447
3448 /// Index of the modified instruction.
3449 unsigned Idx;
3450
3451 public:
3452 /// Set \p Idx operand of \p Inst with \p NewVal.
3453 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3454 : TypePromotionAction(Inst), Idx(Idx) {
3455 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3456 << "for:" << *Inst << "\n"
3457 << "with:" << *NewVal << "\n");
3458 Origin = Inst->getOperand(Idx);
3459 Inst->setOperand(Idx, NewVal);
3460 }
3461
3462 /// Restore the original value of the instruction.
3463 void undo() override {
3464 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3465 << "for: " << *Inst << "\n"
3466 << "with: " << *Origin << "\n");
3467 Inst->setOperand(Idx, Origin);
3468 }
3469 };
3470
3471 /// Hide the operands of an instruction.
3472 /// Do as if this instruction was not using any of its operands.
3473 class OperandsHider : public TypePromotionAction {
3474 /// The list of original operands.
3475 SmallVector<Value *, 4> OriginalValues;
3476
3477 public:
3478 /// Remove \p Inst from the uses of the operands of \p Inst.
3479 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3480 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3481 unsigned NumOpnds = Inst->getNumOperands();
3482 OriginalValues.reserve(NumOpnds);
3483 for (unsigned It = 0; It < NumOpnds; ++It) {
3484 // Save the current operand.
3485 Value *Val = Inst->getOperand(It);
3486 OriginalValues.push_back(Val);
3487 // Set a dummy one.
3488 // We could use OperandSetter here, but that would imply an overhead
3489 // that we are not willing to pay.
3490 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3491 }
3492 }
3493
3494 /// Restore the original list of uses.
3495 void undo() override {
3496 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3497 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3498 Inst->setOperand(It, OriginalValues[It]);
3499 }
3500 };
3501
3502 /// Build a truncate instruction.
3503 class TruncBuilder : public TypePromotionAction {
3504 Value *Val;
3505
3506 public:
3507 /// Build a truncate instruction of \p Opnd producing a \p Ty
3508 /// result.
3509 /// trunc Opnd to Ty.
3510 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3511 IRBuilder<> Builder(Opnd);
3512 Builder.SetCurrentDebugLocation(DebugLoc());
3513 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3514 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3515 }
3516
3517 /// Get the built value.
3518 Value *getBuiltValue() { return Val; }
3519
3520 /// Remove the built instruction.
3521 void undo() override {
3522 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3523 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3524 IVal->eraseFromParent();
3525 }
3526 };
3527
3528 /// Build a sign extension instruction.
3529 class SExtBuilder : public TypePromotionAction {
3530 Value *Val;
3531
3532 public:
3533 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3534 /// result.
3535 /// sext Opnd to Ty.
3536 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3537 : TypePromotionAction(InsertPt) {
3538 IRBuilder<> Builder(InsertPt);
3539 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3540 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3541 }
3542
3543 /// Get the built value.
3544 Value *getBuiltValue() { return Val; }
3545
3546 /// Remove the built instruction.
3547 void undo() override {
3548 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3549 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3550 IVal->eraseFromParent();
3551 }
3552 };
3553
3554 /// Build a zero extension instruction.
3555 class ZExtBuilder : public TypePromotionAction {
3556 Value *Val;
3557
3558 public:
3559 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3560 /// result.
3561 /// zext Opnd to Ty.
3562 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3563 : TypePromotionAction(InsertPt) {
3564 IRBuilder<> Builder(InsertPt);
3565 Builder.SetCurrentDebugLocation(DebugLoc());
3566 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3567 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3568 }
3569
3570 /// Get the built value.
3571 Value *getBuiltValue() { return Val; }
3572
3573 /// Remove the built instruction.
3574 void undo() override {
3575 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3576 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3577 IVal->eraseFromParent();
3578 }
3579 };
3580
3581 /// Mutate an instruction to another type.
3582 class TypeMutator : public TypePromotionAction {
3583 /// Record the original type.
3584 Type *OrigTy;
3585
3586 public:
3587 /// Mutate the type of \p Inst into \p NewTy.
3588 TypeMutator(Instruction *Inst, Type *NewTy)
3589 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3590 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3591 << "\n");
3592 Inst->mutateType(NewTy);
3593 }
3594
3595 /// Mutate the instruction back to its original type.
3596 void undo() override {
3597 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3598 << "\n");
3599 Inst->mutateType(OrigTy);
3600 }
3601 };
3602
3603 /// Replace the uses of an instruction by another instruction.
3604 class UsesReplacer : public TypePromotionAction {
3605 /// Helper structure to keep track of the replaced uses.
3606 struct InstructionAndIdx {
3607 /// The instruction using the instruction.
3608 Instruction *Inst;
3609
3610 /// The index where this instruction is used for Inst.
3611 unsigned Idx;
3612
3613 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3614 : Inst(Inst), Idx(Idx) {}
3615 };
3616
3617 /// Keep track of the original uses (pair Instruction, Index).
3619 /// Keep track of the debug users.
3620 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3621
3622 /// Keep track of the new value so that we can undo it by replacing
3623 /// instances of the new value with the original value.
3624 Value *New;
3625
3627
3628 public:
3629 /// Replace all the use of \p Inst by \p New.
3630 UsesReplacer(Instruction *Inst, Value *New)
3631 : TypePromotionAction(Inst), New(New) {
3632 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3633 << "\n");
3634 // Record the original uses.
3635 for (Use &U : Inst->uses()) {
3636 Instruction *UserI = cast<Instruction>(U.getUser());
3637 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3638 }
3639 // Record the debug uses separately. They are not in the instruction's
3640 // use list, but they are replaced by RAUW.
3641 findDbgValues(Inst, DbgVariableRecords);
3642
3643 // Now, we can replace the uses.
3644 Inst->replaceAllUsesWith(New);
3645 }
3646
3647 /// Reassign the original uses of Inst to Inst.
3648 void undo() override {
3649 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3650 for (InstructionAndIdx &Use : OriginalUses)
3651 Use.Inst->setOperand(Use.Idx, Inst);
3652 // RAUW has replaced all original uses with references to the new value,
3653 // including the debug uses. Since we are undoing the replacements,
3654 // the original debug uses must also be reinstated to maintain the
3655 // correctness and utility of debug value records.
3656 for (DbgVariableRecord *DVR : DbgVariableRecords)
3657 DVR->replaceVariableLocationOp(New, Inst);
3658 }
3659 };
3660
3661 /// Remove an instruction from the IR.
3662 class InstructionRemover : public TypePromotionAction {
3663 /// Original position of the instruction.
3664 InsertionHandler Inserter;
3665
3666 /// Helper structure to hide all the link to the instruction. In other
3667 /// words, this helps to do as if the instruction was removed.
3668 OperandsHider Hider;
3669
3670 /// Keep track of the uses replaced, if any.
3671 UsesReplacer *Replacer = nullptr;
3672
3673 /// Keep track of instructions removed.
3674 SetOfInstrs &RemovedInsts;
3675
3676 public:
3677 /// Remove all reference of \p Inst and optionally replace all its
3678 /// uses with New.
3679 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3680 /// \pre If !Inst->use_empty(), then New != nullptr
3681 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3682 Value *New = nullptr)
3683 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3684 RemovedInsts(RemovedInsts) {
3685 if (New)
3686 Replacer = new UsesReplacer(Inst, New);
3687 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3688 RemovedInsts.insert(Inst);
3689 /// The instructions removed here will be freed after completing
3690 /// optimizeBlock() for all blocks as we need to keep track of the
3691 /// removed instructions during promotion.
3692 Inst->removeFromParent();
3693 }
3694
3695 ~InstructionRemover() override { delete Replacer; }
3696
3697 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3698 InstructionRemover(const InstructionRemover &other) = delete;
3699
3700 /// Resurrect the instruction and reassign it to the proper uses if
3701 /// new value was provided when build this action.
3702 void undo() override {
3703 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3704 Inserter.insert(Inst);
3705 if (Replacer)
3706 Replacer->undo();
3707 Hider.undo();
3708 RemovedInsts.erase(Inst);
3709 }
3710 };
3711
3712public:
3713 /// Restoration point.
3714 /// The restoration point is a pointer to an action instead of an iterator
3715 /// because the iterator may be invalidated but not the pointer.
3716 using ConstRestorationPt = const TypePromotionAction *;
3717
3718 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3719 : RemovedInsts(RemovedInsts) {}
3720
3721 /// Advocate every changes made in that transaction. Return true if any change
3722 /// happen.
3723 bool commit();
3724
3725 /// Undo all the changes made after the given point.
3726 void rollback(ConstRestorationPt Point);
3727
3728 /// Get the current restoration point.
3729 ConstRestorationPt getRestorationPoint() const;
3730
3731 /// \name API for IR modification with state keeping to support rollback.
3732 /// @{
3733 /// Same as Instruction::setOperand.
3734 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3735
3736 /// Same as Instruction::eraseFromParent.
3737 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3738
3739 /// Same as Value::replaceAllUsesWith.
3740 void replaceAllUsesWith(Instruction *Inst, Value *New);
3741
3742 /// Same as Value::mutateType.
3743 void mutateType(Instruction *Inst, Type *NewTy);
3744
3745 /// Same as IRBuilder::createTrunc.
3746 Value *createTrunc(Instruction *Opnd, Type *Ty);
3747
3748 /// Same as IRBuilder::createSExt.
3749 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3750
3751 /// Same as IRBuilder::createZExt.
3752 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3753
3754private:
3755 /// The ordered list of actions made so far.
3757
3758 using CommitPt =
3759 SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3760
3761 SetOfInstrs &RemovedInsts;
3762};
3763
3764} // end anonymous namespace
3765
3766void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3767 Value *NewVal) {
3768 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3769 Inst, Idx, NewVal));
3770}
3771
3772void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3773 Value *NewVal) {
3774 Actions.push_back(
3775 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3776 Inst, RemovedInsts, NewVal));
3777}
3778
3779void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3780 Value *New) {
3781 Actions.push_back(
3782 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3783}
3784
3785void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3786 Actions.push_back(
3787 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3788}
3789
3790Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3791 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3792 Value *Val = Ptr->getBuiltValue();
3793 Actions.push_back(std::move(Ptr));
3794 return Val;
3795}
3796
3797Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3798 Type *Ty) {
3799 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3800 Value *Val = Ptr->getBuiltValue();
3801 Actions.push_back(std::move(Ptr));
3802 return Val;
3803}
3804
3805Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3806 Type *Ty) {
3807 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3808 Value *Val = Ptr->getBuiltValue();
3809 Actions.push_back(std::move(Ptr));
3810 return Val;
3811}
3812
3813TypePromotionTransaction::ConstRestorationPt
3814TypePromotionTransaction::getRestorationPoint() const {
3815 return !Actions.empty() ? Actions.back().get() : nullptr;
3816}
3817
3818bool TypePromotionTransaction::commit() {
3819 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3820 Action->commit();
3821 bool Modified = !Actions.empty();
3822 Actions.clear();
3823 return Modified;
3824}
3825
3826void TypePromotionTransaction::rollback(
3827 TypePromotionTransaction::ConstRestorationPt Point) {
3828 while (!Actions.empty() && Point != Actions.back().get()) {
3829 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3830 Curr->undo();
3831 }
3832}
3833
3834namespace {
3835
3836/// A helper class for matching addressing modes.
3837///
3838/// This encapsulates the logic for matching the target-legal addressing modes.
3839class AddressingModeMatcher {
3840 SmallVectorImpl<Instruction *> &AddrModeInsts;
3841 const TargetLowering &TLI;
3842 const TargetRegisterInfo &TRI;
3843 const DataLayout &DL;
3844 const LoopInfo &LI;
3845 const std::function<const DominatorTree &()> getDTFn;
3846
3847 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3848 /// the memory instruction that we're computing this address for.
3849 Type *AccessTy;
3850 unsigned AddrSpace;
3851 Instruction *MemoryInst;
3852
3853 /// This is the addressing mode that we're building up. This is
3854 /// part of the return value of this addressing mode matching stuff.
3855 ExtAddrMode &AddrMode;
3856
3857 /// The instructions inserted by other CodeGenPrepare optimizations.
3858 const SetOfInstrs &InsertedInsts;
3859
3860 /// A map from the instructions to their type before promotion.
3861 InstrToOrigTy &PromotedInsts;
3862
3863 /// The ongoing transaction where every action should be registered.
3864 TypePromotionTransaction &TPT;
3865
3866 // A GEP which has too large offset to be folded into the addressing mode.
3867 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3868
3869 /// This is set to true when we should not do profitability checks.
3870 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3871 bool IgnoreProfitability;
3872
3873 /// True if we are optimizing for size.
3874 bool OptSize = false;
3875
3876 ProfileSummaryInfo *PSI;
3877 BlockFrequencyInfo *BFI;
3878
3879 AddressingModeMatcher(
3880 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
3881 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3882 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3883 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3884 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3885 TypePromotionTransaction &TPT,
3886 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3887 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3888 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3889 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3890 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3891 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3892 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3893 IgnoreProfitability = false;
3894 }
3895
3896public:
3897 /// Find the maximal addressing mode that a load/store of V can fold,
3898 /// give an access type of AccessTy. This returns a list of involved
3899 /// instructions in AddrModeInsts.
3900 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3901 /// optimizations.
3902 /// \p PromotedInsts maps the instructions to their type before promotion.
3903 /// \p The ongoing transaction where every action should be registered.
3904 static ExtAddrMode
3905 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3906 SmallVectorImpl<Instruction *> &AddrModeInsts,
3907 const TargetLowering &TLI, const LoopInfo &LI,
3908 const std::function<const DominatorTree &()> getDTFn,
3909 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3910 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3911 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3912 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3913 ExtAddrMode Result;
3914
3915 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3916 AccessTy, AS, MemoryInst, Result,
3917 InsertedInsts, PromotedInsts, TPT,
3918 LargeOffsetGEP, OptSize, PSI, BFI)
3919 .matchAddr(V, 0);
3920 (void)Success;
3921 assert(Success && "Couldn't select *anything*?");
3922 return Result;
3923 }
3924
3925private:
3926 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3927 bool matchAddr(Value *Addr, unsigned Depth);
3928 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3929 bool *MovedAway = nullptr);
3930 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3931 ExtAddrMode &AMBefore,
3932 ExtAddrMode &AMAfter);
3933 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3934 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3935 Value *PromotedOperand) const;
3936};
3937
3938class PhiNodeSet;
3939
3940/// An iterator for PhiNodeSet.
3941class PhiNodeSetIterator {
3942 PhiNodeSet *const Set;
3943 size_t CurrentIndex = 0;
3944
3945public:
3946 /// The constructor. Start should point to either a valid element, or be equal
3947 /// to the size of the underlying SmallVector of the PhiNodeSet.
3948 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3949 PHINode *operator*() const;
3950 PhiNodeSetIterator &operator++();
3951 bool operator==(const PhiNodeSetIterator &RHS) const;
3952 bool operator!=(const PhiNodeSetIterator &RHS) const;
3953};
3954
3955/// Keeps a set of PHINodes.
3956///
3957/// This is a minimal set implementation for a specific use case:
3958/// It is very fast when there are very few elements, but also provides good
3959/// performance when there are many. It is similar to SmallPtrSet, but also
3960/// provides iteration by insertion order, which is deterministic and stable
3961/// across runs. It is also similar to SmallSetVector, but provides removing
3962/// elements in O(1) time. This is achieved by not actually removing the element
3963/// from the underlying vector, so comes at the cost of using more memory, but
3964/// that is fine, since PhiNodeSets are used as short lived objects.
3965class PhiNodeSet {
3966 friend class PhiNodeSetIterator;
3967
3968 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3969 using iterator = PhiNodeSetIterator;
3970
3971 /// Keeps the elements in the order of their insertion in the underlying
3972 /// vector. To achieve constant time removal, it never deletes any element.
3974
3975 /// Keeps the elements in the underlying set implementation. This (and not the
3976 /// NodeList defined above) is the source of truth on whether an element
3977 /// is actually in the collection.
3978 MapType NodeMap;
3979
3980 /// Points to the first valid (not deleted) element when the set is not empty
3981 /// and the value is not zero. Equals to the size of the underlying vector
3982 /// when the set is empty. When the value is 0, as in the beginning, the
3983 /// first element may or may not be valid.
3984 size_t FirstValidElement = 0;
3985
3986public:
3987 /// Inserts a new element to the collection.
3988 /// \returns true if the element is actually added, i.e. was not in the
3989 /// collection before the operation.
3990 bool insert(PHINode *Ptr) {
3991 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3992 NodeList.push_back(Ptr);
3993 return true;
3994 }
3995 return false;
3996 }
3997
3998 /// Removes the element from the collection.
3999 /// \returns whether the element is actually removed, i.e. was in the
4000 /// collection before the operation.
4001 bool erase(PHINode *Ptr) {
4002 if (NodeMap.erase(Ptr)) {
4003 SkipRemovedElements(FirstValidElement);
4004 return true;
4005 }
4006 return false;
4007 }
4008
4009 /// Removes all elements and clears the collection.
4010 void clear() {
4011 NodeMap.clear();
4012 NodeList.clear();
4013 FirstValidElement = 0;
4014 }
4015
4016 /// \returns an iterator that will iterate the elements in the order of
4017 /// insertion.
4018 iterator begin() {
4019 if (FirstValidElement == 0)
4020 SkipRemovedElements(FirstValidElement);
4021 return PhiNodeSetIterator(this, FirstValidElement);
4022 }
4023
4024 /// \returns an iterator that points to the end of the collection.
4025 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
4026
4027 /// Returns the number of elements in the collection.
4028 size_t size() const { return NodeMap.size(); }
4029
4030 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
4031 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
4032
4033private:
4034 /// Updates the CurrentIndex so that it will point to a valid element.
4035 ///
4036 /// If the element of NodeList at CurrentIndex is valid, it does not
4037 /// change it. If there are no more valid elements, it updates CurrentIndex
4038 /// to point to the end of the NodeList.
4039 void SkipRemovedElements(size_t &CurrentIndex) {
4040 while (CurrentIndex < NodeList.size()) {
4041 auto it = NodeMap.find(NodeList[CurrentIndex]);
4042 // If the element has been deleted and added again later, NodeMap will
4043 // point to a different index, so CurrentIndex will still be invalid.
4044 if (it != NodeMap.end() && it->second == CurrentIndex)
4045 break;
4046 ++CurrentIndex;
4047 }
4048 }
4049};
4050
4051PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
4052 : Set(Set), CurrentIndex(Start) {}
4053
4054PHINode *PhiNodeSetIterator::operator*() const {
4055 assert(CurrentIndex < Set->NodeList.size() &&
4056 "PhiNodeSet access out of range");
4057 return Set->NodeList[CurrentIndex];
4058}
4059
4060PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4061 assert(CurrentIndex < Set->NodeList.size() &&
4062 "PhiNodeSet access out of range");
4063 ++CurrentIndex;
4064 Set->SkipRemovedElements(CurrentIndex);
4065 return *this;
4066}
4067
4068bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4069 return CurrentIndex == RHS.CurrentIndex;
4070}
4071
4072bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4073 return !((*this) == RHS);
4074}
4075
4076/// Keep track of simplification of Phi nodes.
4077/// Accept the set of all phi nodes and erase phi node from this set
4078/// if it is simplified.
4079class SimplificationTracker {
4080 DenseMap<Value *, Value *> Storage;
4081 // Tracks newly created Phi nodes. The elements are iterated by insertion
4082 // order.
4083 PhiNodeSet AllPhiNodes;
4084 // Tracks newly created Select nodes.
4085 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
4086
4087public:
4088 Value *Get(Value *V) {
4089 do {
4090 auto SV = Storage.find(V);
4091 if (SV == Storage.end())
4092 return V;
4093 V = SV->second;
4094 } while (true);
4095 }
4096
4097 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4098
4099 void ReplacePhi(PHINode *From, PHINode *To) {
4100 Value *OldReplacement = Get(From);
4101 while (OldReplacement != From) {
4102 From = To;
4103 To = dyn_cast<PHINode>(OldReplacement);
4104 OldReplacement = Get(From);
4105 }
4106 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4107 Put(From, To);
4108 From->replaceAllUsesWith(To);
4109 AllPhiNodes.erase(From);
4110 From->eraseFromParent();
4111 }
4112
4113 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4114
4115 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4116
4117 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4118
4119 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4120
4121 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4122
4123 void destroyNewNodes(Type *CommonType) {
4124 // For safe erasing, replace the uses with dummy value first.
4125 auto *Dummy = PoisonValue::get(CommonType);
4126 for (auto *I : AllPhiNodes) {
4127 I->replaceAllUsesWith(Dummy);
4128 I->eraseFromParent();
4129 }
4130 AllPhiNodes.clear();
4131 for (auto *I : AllSelectNodes) {
4132 I->replaceAllUsesWith(Dummy);
4133 I->eraseFromParent();
4134 }
4135 AllSelectNodes.clear();
4136 }
4137};
4138
4139/// A helper class for combining addressing modes.
4140class AddressingModeCombiner {
4141 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4142 typedef std::pair<PHINode *, PHINode *> PHIPair;
4143
4144private:
4145 /// The addressing modes we've collected.
4147
4148 /// The field in which the AddrModes differ, when we have more than one.
4149 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4150
4151 /// Are the AddrModes that we have all just equal to their original values?
4152 bool AllAddrModesTrivial = true;
4153
4154 /// Common Type for all different fields in addressing modes.
4155 Type *CommonType = nullptr;
4156
4157 const DataLayout &DL;
4158
4159 /// Original Address.
4160 Value *Original;
4161
4162 /// Common value among addresses
4163 Value *CommonValue = nullptr;
4164
4165public:
4166 AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue)
4167 : DL(DL), Original(OriginalValue) {}
4168
4169 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4170
4171 /// Get the combined AddrMode
4172 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4173
4174 /// Add a new AddrMode if it's compatible with the AddrModes we already
4175 /// have.
4176 /// \return True iff we succeeded in doing so.
4177 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4178 // Take note of if we have any non-trivial AddrModes, as we need to detect
4179 // when all AddrModes are trivial as then we would introduce a phi or select
4180 // which just duplicates what's already there.
4181 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4182
4183 // If this is the first addrmode then everything is fine.
4184 if (AddrModes.empty()) {
4185 AddrModes.emplace_back(NewAddrMode);
4186 return true;
4187 }
4188
4189 // Figure out how different this is from the other address modes, which we
4190 // can do just by comparing against the first one given that we only care
4191 // about the cumulative difference.
4192 ExtAddrMode::FieldName ThisDifferentField =
4193 AddrModes[0].compare(NewAddrMode);
4194 if (DifferentField == ExtAddrMode::NoField)
4195 DifferentField = ThisDifferentField;
4196 else if (DifferentField != ThisDifferentField)
4197 DifferentField = ExtAddrMode::MultipleFields;
4198
4199 // If NewAddrMode differs in more than one dimension we cannot handle it.
4200 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4201
4202 // If Scale Field is different then we reject.
4203 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4204
4205 // We also must reject the case when base offset is different and
4206 // scale reg is not null, we cannot handle this case due to merge of
4207 // different offsets will be used as ScaleReg.
4208 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4209 !NewAddrMode.ScaledReg);
4210
4211 // We also must reject the case when GV is different and BaseReg installed
4212 // due to we want to use base reg as a merge of GV values.
4213 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4214 !NewAddrMode.HasBaseReg);
4215
4216 // Even if NewAddMode is the same we still need to collect it due to
4217 // original value is different. And later we will need all original values
4218 // as anchors during finding the common Phi node.
4219 if (CanHandle)
4220 AddrModes.emplace_back(NewAddrMode);
4221 else
4222 AddrModes.clear();
4223
4224 return CanHandle;
4225 }
4226
4227 /// Combine the addressing modes we've collected into a single
4228 /// addressing mode.
4229 /// \return True iff we successfully combined them or we only had one so
4230 /// didn't need to combine them anyway.
4231 bool combineAddrModes() {
4232 // If we have no AddrModes then they can't be combined.
4233 if (AddrModes.size() == 0)
4234 return false;
4235
4236 // A single AddrMode can trivially be combined.
4237 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4238 return true;
4239
4240 // If the AddrModes we collected are all just equal to the value they are
4241 // derived from then combining them wouldn't do anything useful.
4242 if (AllAddrModesTrivial)
4243 return false;
4244
4245 if (!addrModeCombiningAllowed())
4246 return false;
4247
4248 // Build a map between <original value, basic block where we saw it> to
4249 // value of base register.
4250 // Bail out if there is no common type.
4251 FoldAddrToValueMapping Map;
4252 if (!initializeMap(Map))
4253 return false;
4254
4255 CommonValue = findCommon(Map);
4256 if (CommonValue)
4257 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4258 return CommonValue != nullptr;
4259 }
4260
4261private:
4262 /// `CommonValue` may be a placeholder inserted by us.
4263 /// If the placeholder is not used, we should remove this dead instruction.
4264 void eraseCommonValueIfDead() {
4265 if (CommonValue && CommonValue->use_empty())
4266 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4267 CommonInst->eraseFromParent();
4268 }
4269
4270 /// Initialize Map with anchor values. For address seen
4271 /// we set the value of different field saw in this address.
4272 /// At the same time we find a common type for different field we will
4273 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4274 /// Return false if there is no common type found.
4275 bool initializeMap(FoldAddrToValueMapping &Map) {
4276 // Keep track of keys where the value is null. We will need to replace it
4277 // with constant null when we know the common type.
4278 SmallVector<Value *, 2> NullValue;
4279 Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4280 for (auto &AM : AddrModes) {
4281 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4282 if (DV) {
4283 auto *Type = DV->getType();
4284 if (CommonType && CommonType != Type)
4285 return false;
4286 CommonType = Type;
4287 Map[AM.OriginalValue] = DV;
4288 } else {
4289 NullValue.push_back(AM.OriginalValue);
4290 }
4291 }
4292 assert(CommonType && "At least one non-null value must be!");
4293 for (auto *V : NullValue)
4294 Map[V] = Constant::getNullValue(CommonType);
4295 return true;
4296 }
4297
4298 /// We have mapping between value A and other value B where B was a field in
4299 /// addressing mode represented by A. Also we have an original value C
4300 /// representing an address we start with. Traversing from C through phi and
4301 /// selects we ended up with A's in a map. This utility function tries to find
4302 /// a value V which is a field in addressing mode C and traversing through phi
4303 /// nodes and selects we will end up in corresponded values B in a map.
4304 /// The utility will create a new Phi/Selects if needed.
4305 // The simple example looks as follows:
4306 // BB1:
4307 // p1 = b1 + 40
4308 // br cond BB2, BB3
4309 // BB2:
4310 // p2 = b2 + 40
4311 // br BB3
4312 // BB3:
4313 // p = phi [p1, BB1], [p2, BB2]
4314 // v = load p
4315 // Map is
4316 // p1 -> b1
4317 // p2 -> b2
4318 // Request is
4319 // p -> ?
4320 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4321 Value *findCommon(FoldAddrToValueMapping &Map) {
4322 // Tracks the simplification of newly created phi nodes. The reason we use
4323 // this mapping is because we will add new created Phi nodes in AddrToBase.
4324 // Simplification of Phi nodes is recursive, so some Phi node may
4325 // be simplified after we added it to AddrToBase. In reality this
4326 // simplification is possible only if original phi/selects were not
4327 // simplified yet.
4328 // Using this mapping we can find the current value in AddrToBase.
4329 SimplificationTracker ST;
4330
4331 // First step, DFS to create PHI nodes for all intermediate blocks.
4332 // Also fill traverse order for the second step.
4333 SmallVector<Value *, 32> TraverseOrder;
4334 InsertPlaceholders(Map, TraverseOrder, ST);
4335
4336 // Second Step, fill new nodes by merged values and simplify if possible.
4337 FillPlaceholders(Map, TraverseOrder, ST);
4338
4339 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4340 ST.destroyNewNodes(CommonType);
4341 return nullptr;
4342 }
4343
4344 // Now we'd like to match New Phi nodes to existed ones.
4345 unsigned PhiNotMatchedCount = 0;
4346 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4347 ST.destroyNewNodes(CommonType);
4348 return nullptr;
4349 }
4350
4351 auto *Result = ST.Get(Map.find(Original)->second);
4352 if (Result) {
4353 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4354 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4355 }
4356 return Result;
4357 }
4358
4359 /// Try to match PHI node to Candidate.
4360 /// Matcher tracks the matched Phi nodes.
4361 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4362 SmallSetVector<PHIPair, 8> &Matcher,
4363 PhiNodeSet &PhiNodesToMatch) {
4364 SmallVector<PHIPair, 8> WorkList;
4365 Matcher.insert({PHI, Candidate});
4366 SmallPtrSet<PHINode *, 8> MatchedPHIs;
4367 MatchedPHIs.insert(PHI);
4368 WorkList.push_back({PHI, Candidate});
4369 SmallSet<PHIPair, 8> Visited;
4370 while (!WorkList.empty()) {
4371 auto Item = WorkList.pop_back_val();
4372 if (!Visited.insert(Item).second)
4373 continue;
4374 // We iterate over all incoming values to Phi to compare them.
4375 // If values are different and both of them Phi and the first one is a
4376 // Phi we added (subject to match) and both of them is in the same basic
4377 // block then we can match our pair if values match. So we state that
4378 // these values match and add it to work list to verify that.
4379 for (auto *B : Item.first->blocks()) {
4380 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4381 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4382 if (FirstValue == SecondValue)
4383 continue;
4384
4385 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4386 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4387
4388 // One of them is not Phi or
4389 // The first one is not Phi node from the set we'd like to match or
4390 // Phi nodes from different basic blocks then
4391 // we will not be able to match.
4392 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4393 FirstPhi->getParent() != SecondPhi->getParent())
4394 return false;
4395
4396 // If we already matched them then continue.
4397 if (Matcher.count({FirstPhi, SecondPhi}))
4398 continue;
4399 // So the values are different and does not match. So we need them to
4400 // match. (But we register no more than one match per PHI node, so that
4401 // we won't later try to replace them twice.)
4402 if (MatchedPHIs.insert(FirstPhi).second)
4403 Matcher.insert({FirstPhi, SecondPhi});
4404 // But me must check it.
4405 WorkList.push_back({FirstPhi, SecondPhi});
4406 }
4407 }
4408 return true;
4409 }
4410
4411 /// For the given set of PHI nodes (in the SimplificationTracker) try
4412 /// to find their equivalents.
4413 /// Returns false if this matching fails and creation of new Phi is disabled.
4414 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4415 unsigned &PhiNotMatchedCount) {
4416 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4417 // order, so the replacements (ReplacePhi) are also done in a deterministic
4418 // order.
4419 SmallSetVector<PHIPair, 8> Matched;
4420 SmallPtrSet<PHINode *, 8> WillNotMatch;
4421 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4422 while (PhiNodesToMatch.size()) {
4423 PHINode *PHI = *PhiNodesToMatch.begin();
4424
4425 // Add us, if no Phi nodes in the basic block we do not match.
4426 WillNotMatch.clear();
4427 WillNotMatch.insert(PHI);
4428
4429 // Traverse all Phis until we found equivalent or fail to do that.
4430 bool IsMatched = false;
4431 for (auto &P : PHI->getParent()->phis()) {
4432 // Skip new Phi nodes.
4433 if (PhiNodesToMatch.count(&P))
4434 continue;
4435 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4436 break;
4437 // If it does not match, collect all Phi nodes from matcher.
4438 // if we end up with no match, them all these Phi nodes will not match
4439 // later.
4440 WillNotMatch.insert_range(llvm::make_first_range(Matched));
4441 Matched.clear();
4442 }
4443 if (IsMatched) {
4444 // Replace all matched values and erase them.
4445 for (auto MV : Matched)
4446 ST.ReplacePhi(MV.first, MV.second);
4447 Matched.clear();
4448 continue;
4449 }
4450 // If we are not allowed to create new nodes then bail out.
4451 if (!AllowNewPhiNodes)
4452 return false;
4453 // Just remove all seen values in matcher. They will not match anything.
4454 PhiNotMatchedCount += WillNotMatch.size();
4455 for (auto *P : WillNotMatch)
4456 PhiNodesToMatch.erase(P);
4457 }
4458 return true;
4459 }
4460 /// Fill the placeholders with values from predecessors and simplify them.
4461 void FillPlaceholders(FoldAddrToValueMapping &Map,
4462 SmallVectorImpl<Value *> &TraverseOrder,
4463 SimplificationTracker &ST) {
4464 while (!TraverseOrder.empty()) {
4465 Value *Current = TraverseOrder.pop_back_val();
4466 assert(Map.contains(Current) && "No node to fill!!!");
4467 Value *V = Map[Current];
4468
4469 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4470 // CurrentValue also must be Select.
4471 auto *CurrentSelect = cast<SelectInst>(Current);
4472 auto *TrueValue = CurrentSelect->getTrueValue();
4473 assert(Map.contains(TrueValue) && "No True Value!");
4474 Select->setTrueValue(ST.Get(Map[TrueValue]));
4475 auto *FalseValue = CurrentSelect->getFalseValue();
4476 assert(Map.contains(FalseValue) && "No False Value!");
4477 Select->setFalseValue(ST.Get(Map[FalseValue]));
4478 } else {
4479 // Must be a Phi node then.
4480 auto *PHI = cast<PHINode>(V);
4481 // Fill the Phi node with values from predecessors.
4482 for (auto *B : predecessors(PHI->getParent())) {
4483 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4484 assert(Map.contains(PV) && "No predecessor Value!");
4485 PHI->addIncoming(ST.Get(Map[PV]), B);
4486 }
4487 }
4488 }
4489 }
4490
4491 /// Starting from original value recursively iterates over def-use chain up to
4492 /// known ending values represented in a map. For each traversed phi/select
4493 /// inserts a placeholder Phi or Select.
4494 /// Reports all new created Phi/Select nodes by adding them to set.
4495 /// Also reports and order in what values have been traversed.
4496 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4497 SmallVectorImpl<Value *> &TraverseOrder,
4498 SimplificationTracker &ST) {
4499 SmallVector<Value *, 32> Worklist;
4500 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4501 "Address must be a Phi or Select node");
4502 auto *Dummy = PoisonValue::get(CommonType);
4503 Worklist.push_back(Original);
4504 while (!Worklist.empty()) {
4505 Value *Current = Worklist.pop_back_val();
4506 // if it is already visited or it is an ending value then skip it.
4507 if (Map.contains(Current))
4508 continue;
4509 TraverseOrder.push_back(Current);
4510
4511 // CurrentValue must be a Phi node or select. All others must be covered
4512 // by anchors.
4513 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4514 // Is it OK to get metadata from OrigSelect?!
4515 // Create a Select placeholder with dummy value.
4516 SelectInst *Select =
4517 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4518 CurrentSelect->getName(),
4519 CurrentSelect->getIterator(), CurrentSelect);
4520 Map[Current] = Select;
4521 ST.insertNewSelect(Select);
4522 // We are interested in True and False values.
4523 Worklist.push_back(CurrentSelect->getTrueValue());
4524 Worklist.push_back(CurrentSelect->getFalseValue());
4525 } else {
4526 // It must be a Phi node then.
4527 PHINode *CurrentPhi = cast<PHINode>(Current);
4528 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4529 PHINode *PHI =
4530 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4531 Map[Current] = PHI;
4532 ST.insertNewPhi(PHI);
4533 append_range(Worklist, CurrentPhi->incoming_values());
4534 }
4535 }
4536 }
4537
4538 bool addrModeCombiningAllowed() {
4540 return false;
4541 switch (DifferentField) {
4542 default:
4543 return false;
4544 case ExtAddrMode::BaseRegField:
4546 case ExtAddrMode::BaseGVField:
4547 return AddrSinkCombineBaseGV;
4548 case ExtAddrMode::BaseOffsField:
4550 case ExtAddrMode::ScaledRegField:
4552 }
4553 }
4554};
4555} // end anonymous namespace
4556
4557/// Try adding ScaleReg*Scale to the current addressing mode.
4558/// Return true and update AddrMode if this addr mode is legal for the target,
4559/// false if not.
4560bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4561 unsigned Depth) {
4562 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4563 // mode. Just process that directly.
4564 if (Scale == 1)
4565 return matchAddr(ScaleReg, Depth);
4566
4567 // If the scale is 0, it takes nothing to add this.
4568 if (Scale == 0)
4569 return true;
4570
4571 // If we already have a scale of this value, we can add to it, otherwise, we
4572 // need an available scale field.
4573 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4574 return false;
4575
4576 ExtAddrMode TestAddrMode = AddrMode;
4577
4578 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4579 // [A+B + A*7] -> [B+A*8].
4580 TestAddrMode.Scale += Scale;
4581 TestAddrMode.ScaledReg = ScaleReg;
4582
4583 // If the new address isn't legal, bail out.
4584 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4585 return false;
4586
4587 // It was legal, so commit it.
4588 AddrMode = TestAddrMode;
4589
4590 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4591 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4592 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4593 // go any further: we can reuse it and cannot eliminate it.
4594 ConstantInt *CI = nullptr;
4595 Value *AddLHS = nullptr;
4596 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4597 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4598 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4599 TestAddrMode.InBounds = false;
4600 TestAddrMode.ScaledReg = AddLHS;
4601 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4602
4603 // If this addressing mode is legal, commit it and remember that we folded
4604 // this instruction.
4605 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4606 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4607 AddrMode = TestAddrMode;
4608 return true;
4609 }
4610 // Restore status quo.
4611 TestAddrMode = AddrMode;
4612 }
4613
4614 // If this is an add recurrence with a constant step, return the increment
4615 // instruction and the canonicalized step.
4616 auto GetConstantStep =
4617 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4618 auto *PN = dyn_cast<PHINode>(V);
4619 if (!PN)
4620 return std::nullopt;
4621 auto IVInc = getIVIncrement(PN, &LI);
4622 if (!IVInc)
4623 return std::nullopt;
4624 // TODO: The result of the intrinsics above is two-complement. However when
4625 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4626 // If it has nuw or nsw flags, we need to make sure that these flags are
4627 // inferrable at the point of memory instruction. Otherwise we are replacing
4628 // well-defined two-complement computation with poison. Currently, to avoid
4629 // potentially complex analysis needed to prove this, we reject such cases.
4630 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4631 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4632 return std::nullopt;
4633 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4634 return std::make_pair(IVInc->first, ConstantStep->getValue());
4635 return std::nullopt;
4636 };
4637
4638 // Try to account for the following special case:
4639 // 1. ScaleReg is an inductive variable;
4640 // 2. We use it with non-zero offset;
4641 // 3. IV's increment is available at the point of memory instruction.
4642 //
4643 // In this case, we may reuse the IV increment instead of the IV Phi to
4644 // achieve the following advantages:
4645 // 1. If IV step matches the offset, we will have no need in the offset;
4646 // 2. Even if they don't match, we will reduce the overlap of living IV
4647 // and IV increment, that will potentially lead to better register
4648 // assignment.
4649 if (AddrMode.BaseOffs) {
4650 if (auto IVStep = GetConstantStep(ScaleReg)) {
4651 Instruction *IVInc = IVStep->first;
4652 // The following assert is important to ensure a lack of infinite loops.
4653 // This transforms is (intentionally) the inverse of the one just above.
4654 // If they don't agree on the definition of an increment, we'd alternate
4655 // back and forth indefinitely.
4656 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4657 APInt Step = IVStep->second;
4658 APInt Offset = Step * AddrMode.Scale;
4659 if (Offset.isSignedIntN(64)) {
4660 TestAddrMode.InBounds = false;
4661 TestAddrMode.ScaledReg = IVInc;
4662 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4663 // If this addressing mode is legal, commit it..
4664 // (Note that we defer the (expensive) domtree base legality check
4665 // to the very last possible point.)
4666 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4667 getDTFn().dominates(IVInc, MemoryInst)) {
4668 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4669 AddrMode = TestAddrMode;
4670 return true;
4671 }
4672 // Restore status quo.
4673 TestAddrMode = AddrMode;
4674 }
4675 }
4676 }
4677
4678 // Otherwise, just return what we have.
4679 return true;
4680}
4681
4682/// This is a little filter, which returns true if an addressing computation
4683/// involving I might be folded into a load/store accessing it.
4684/// This doesn't need to be perfect, but needs to accept at least
4685/// the set of instructions that MatchOperationAddr can.
4687 switch (I->getOpcode()) {
4688 case Instruction::BitCast:
4689 case Instruction::AddrSpaceCast:
4690 // Don't touch identity bitcasts.
4691 if (I->getType() == I->getOperand(0)->getType())
4692 return false;
4693 return I->getType()->isIntOrPtrTy();
4694 case Instruction::PtrToInt:
4695 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4696 return true;
4697 case Instruction::IntToPtr:
4698 // We know the input is intptr_t, so this is foldable.
4699 return true;
4700 case Instruction::Add:
4701 return true;
4702 case Instruction::Mul:
4703 case Instruction::Shl:
4704 // Can only handle X*C and X << C.
4705 return isa<ConstantInt>(I->getOperand(1));
4706 case Instruction::GetElementPtr:
4707 return true;
4708 default:
4709 return false;
4710 }
4711}
4712
4713/// Check whether or not \p Val is a legal instruction for \p TLI.
4714/// \note \p Val is assumed to be the product of some type promotion.
4715/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4716/// to be legal, as the non-promoted value would have had the same state.
4718 const DataLayout &DL, Value *Val) {
4719 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4720 if (!PromotedInst)
4721 return false;
4722 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4723 // If the ISDOpcode is undefined, it was undefined before the promotion.
4724 if (!ISDOpcode)
4725 return true;
4726 // Otherwise, check if the promoted instruction is legal or not.
4727 return TLI.isOperationLegalOrCustom(
4728 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4729}
4730
4731namespace {
4732
4733/// Hepler class to perform type promotion.
4734class TypePromotionHelper {
4735 /// Utility function to add a promoted instruction \p ExtOpnd to
4736 /// \p PromotedInsts and record the type of extension we have seen.
4737 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4738 Instruction *ExtOpnd, bool IsSExt) {
4739 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4740 auto [It, Inserted] = PromotedInsts.try_emplace(ExtOpnd);
4741 if (!Inserted) {
4742 // If the new extension is same as original, the information in
4743 // PromotedInsts[ExtOpnd] is still correct.
4744 if (It->second.getInt() == ExtTy)
4745 return;
4746
4747 // Now the new extension is different from old extension, we make
4748 // the type information invalid by setting extension type to
4749 // BothExtension.
4750 ExtTy = BothExtension;
4751 }
4752 It->second = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4753 }
4754
4755 /// Utility function to query the original type of instruction \p Opnd
4756 /// with a matched extension type. If the extension doesn't match, we
4757 /// cannot use the information we had on the original type.
4758 /// BothExtension doesn't match any extension type.
4759 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4760 Instruction *Opnd, bool IsSExt) {
4761 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4762 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4763 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4764 return It->second.getPointer();
4765 return nullptr;
4766 }
4767
4768 /// Utility function to check whether or not a sign or zero extension
4769 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4770 /// either using the operands of \p Inst or promoting \p Inst.
4771 /// The type of the extension is defined by \p IsSExt.
4772 /// In other words, check if:
4773 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4774 /// #1 Promotion applies:
4775 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4776 /// #2 Operand reuses:
4777 /// ext opnd1 to ConsideredExtType.
4778 /// \p PromotedInsts maps the instructions to their type before promotion.
4779 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4780 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4781
4782 /// Utility function to determine if \p OpIdx should be promoted when
4783 /// promoting \p Inst.
4784 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4785 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4786 }
4787
4788 /// Utility function to promote the operand of \p Ext when this
4789 /// operand is a promotable trunc or sext or zext.
4790 /// \p PromotedInsts maps the instructions to their type before promotion.
4791 /// \p CreatedInstsCost[out] contains the cost of all instructions
4792 /// created to promote the operand of Ext.
4793 /// Newly added extensions are inserted in \p Exts.
4794 /// Newly added truncates are inserted in \p Truncs.
4795 /// Should never be called directly.
4796 /// \return The promoted value which is used instead of Ext.
4797 static Value *promoteOperandForTruncAndAnyExt(
4798 Instruction *Ext, TypePromotionTransaction &TPT,
4799 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4800 SmallVectorImpl<Instruction *> *Exts,
4801 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
4802
4803 /// Utility function to promote the operand of \p Ext when this
4804 /// operand is promotable and is not a supported trunc or sext.
4805 /// \p PromotedInsts maps the instructions to their type before promotion.
4806 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4807 /// created to promote the operand of Ext.
4808 /// Newly added extensions are inserted in \p Exts.
4809 /// Newly added truncates are inserted in \p Truncs.
4810 /// Should never be called directly.
4811 /// \return The promoted value which is used instead of Ext.
4812 static Value *promoteOperandForOther(Instruction *Ext,
4813 TypePromotionTransaction &TPT,
4814 InstrToOrigTy &PromotedInsts,
4815 unsigned &CreatedInstsCost,
4816 SmallVectorImpl<Instruction *> *Exts,
4817 SmallVectorImpl<Instruction *> *Truncs,
4818 const TargetLowering &TLI, bool IsSExt);
4819
4820 /// \see promoteOperandForOther.
4821 static Value *signExtendOperandForOther(
4822 Instruction *Ext, TypePromotionTransaction &TPT,
4823 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4824 SmallVectorImpl<Instruction *> *Exts,
4825 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4826 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4827 Exts, Truncs, TLI, true);
4828 }
4829
4830 /// \see promoteOperandForOther.
4831 static Value *zeroExtendOperandForOther(
4832 Instruction *Ext, TypePromotionTransaction &TPT,
4833 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4834 SmallVectorImpl<Instruction *> *Exts,
4835 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4836 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4837 Exts, Truncs, TLI, false);
4838 }
4839
4840public:
4841 /// Type for the utility function that promotes the operand of Ext.
4842 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4843 InstrToOrigTy &PromotedInsts,
4844 unsigned &CreatedInstsCost,
4845 SmallVectorImpl<Instruction *> *Exts,
4846 SmallVectorImpl<Instruction *> *Truncs,
4847 const TargetLowering &TLI);
4848
4849 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4850 /// action to promote the operand of \p Ext instead of using Ext.
4851 /// \return NULL if no promotable action is possible with the current
4852 /// sign extension.
4853 /// \p InsertedInsts keeps track of all the instructions inserted by the
4854 /// other CodeGenPrepare optimizations. This information is important
4855 /// because we do not want to promote these instructions as CodeGenPrepare
4856 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4857 /// \p PromotedInsts maps the instructions to their type before promotion.
4858 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4859 const TargetLowering &TLI,
4860 const InstrToOrigTy &PromotedInsts);
4861};
4862
4863} // end anonymous namespace
4864
4865bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4866 Type *ConsideredExtType,
4867 const InstrToOrigTy &PromotedInsts,
4868 bool IsSExt) {
4869 // The promotion helper does not know how to deal with vector types yet.
4870 // To be able to fix that, we would need to fix the places where we
4871 // statically extend, e.g., constants and such.
4872 if (Inst->getType()->isVectorTy())
4873 return false;
4874
4875 // We can always get through zext.
4876 if (isa<ZExtInst>(Inst))
4877 return true;
4878
4879 // sext(sext) is ok too.
4880 if (IsSExt && isa<SExtInst>(Inst))
4881 return true;
4882
4883 // We can get through binary operator, if it is legal. In other words, the
4884 // binary operator must have a nuw or nsw flag.
4885 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4886 if (isa<OverflowingBinaryOperator>(BinOp) &&
4887 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4888 (IsSExt && BinOp->hasNoSignedWrap())))
4889 return true;
4890
4891 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4892 if ((Inst->getOpcode() == Instruction::And ||
4893 Inst->getOpcode() == Instruction::Or))
4894 return true;
4895
4896 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4897 if (Inst->getOpcode() == Instruction::Xor) {
4898 // Make sure it is not a NOT.
4899 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4900 if (!Cst->getValue().isAllOnes())
4901 return true;
4902 }
4903
4904 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4905 // It may change a poisoned value into a regular value, like
4906 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4907 // poisoned value regular value
4908 // It should be OK since undef covers valid value.
4909 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4910 return true;
4911
4912 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4913 // It may change a poisoned value into a regular value, like
4914 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4915 // poisoned value regular value
4916 // It should be OK since undef covers valid value.
4917 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4918 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4919 if (ExtInst->hasOneUse()) {
4920 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4921 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4922 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4923 if (Cst &&
4924 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4925 return true;
4926 }
4927 }
4928 }
4929
4930 // Check if we can do the following simplification.
4931 // ext(trunc(opnd)) --> ext(opnd)
4932 if (!isa<TruncInst>(Inst))
4933 return false;
4934
4935 Value *OpndVal = Inst->getOperand(0);
4936 // Check if we can use this operand in the extension.
4937 // If the type is larger than the result type of the extension, we cannot.
4938 if (!OpndVal->getType()->isIntegerTy() ||
4939 OpndVal->getType()->getIntegerBitWidth() >
4940 ConsideredExtType->getIntegerBitWidth())
4941 return false;
4942
4943 // If the operand of the truncate is not an instruction, we will not have
4944 // any information on the dropped bits.
4945 // (Actually we could for constant but it is not worth the extra logic).
4946 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4947 if (!Opnd)
4948 return false;
4949
4950 // Check if the source of the type is narrow enough.
4951 // I.e., check that trunc just drops extended bits of the same kind of
4952 // the extension.
4953 // #1 get the type of the operand and check the kind of the extended bits.
4954 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4955 if (OpndType)
4956 ;
4957 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4958 OpndType = Opnd->getOperand(0)->getType();
4959 else
4960 return false;
4961
4962 // #2 check that the truncate just drops extended bits.
4963 return Inst->getType()->getIntegerBitWidth() >=
4964 OpndType->getIntegerBitWidth();
4965}
4966
4967TypePromotionHelper::Action TypePromotionHelper::getAction(
4968 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4969 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4970 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4971 "Unexpected instruction type");
4972 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4973 Type *ExtTy = Ext->getType();
4974 bool IsSExt = isa<SExtInst>(Ext);
4975 // If the operand of the extension is not an instruction, we cannot
4976 // get through.
4977 // If it, check we can get through.
4978 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4979 return nullptr;
4980
4981 // Do not promote if the operand has been added by codegenprepare.
4982 // Otherwise, it means we are undoing an optimization that is likely to be
4983 // redone, thus causing potential infinite loop.
4984 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4985 return nullptr;
4986
4987 // SExt or Trunc instructions.
4988 // Return the related handler.
4989 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4990 isa<ZExtInst>(ExtOpnd))
4991 return promoteOperandForTruncAndAnyExt;
4992
4993 // Regular instruction.
4994 // Abort early if we will have to insert non-free instructions.
4995 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4996 return nullptr;
4997 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4998}
4999
5000Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
5001 Instruction *SExt, TypePromotionTransaction &TPT,
5002 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5003 SmallVectorImpl<Instruction *> *Exts,
5004 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
5005 // By construction, the operand of SExt is an instruction. Otherwise we cannot
5006 // get through it and this method should not be called.
5007 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
5008 Value *ExtVal = SExt;
5009 bool HasMergedNonFreeExt = false;
5010 if (isa<ZExtInst>(SExtOpnd)) {
5011 // Replace s|zext(zext(opnd))
5012 // => zext(opnd).
5013 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
5014 Value *ZExt =
5015 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
5016 TPT.replaceAllUsesWith(SExt, ZExt);
5017 TPT.eraseInstruction(SExt);
5018 ExtVal = ZExt;
5019 } else {
5020 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
5021 // => z|sext(opnd).
5022 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
5023 }
5024 CreatedInstsCost = 0;
5025
5026 // Remove dead code.
5027 if (SExtOpnd->use_empty())
5028 TPT.eraseInstruction(SExtOpnd);
5029
5030 // Check if the extension is still needed.
5031 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
5032 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
5033 if (ExtInst) {
5034 if (Exts)
5035 Exts->push_back(ExtInst);
5036 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
5037 }
5038 return ExtVal;
5039 }
5040
5041 // At this point we have: ext ty opnd to ty.
5042 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
5043 Value *NextVal = ExtInst->getOperand(0);
5044 TPT.eraseInstruction(ExtInst, NextVal);
5045 return NextVal;
5046}
5047
5048Value *TypePromotionHelper::promoteOperandForOther(
5049 Instruction *Ext, TypePromotionTransaction &TPT,
5050 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5051 SmallVectorImpl<Instruction *> *Exts,
5052 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
5053 bool IsSExt) {
5054 // By construction, the operand of Ext is an instruction. Otherwise we cannot
5055 // get through it and this method should not be called.
5056 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
5057 CreatedInstsCost = 0;
5058 if (!ExtOpnd->hasOneUse()) {
5059 // ExtOpnd will be promoted.
5060 // All its uses, but Ext, will need to use a truncated value of the
5061 // promoted version.
5062 // Create the truncate now.
5063 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
5064 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5065 // Insert it just after the definition.
5066 ITrunc->moveAfter(ExtOpnd);
5067 if (Truncs)
5068 Truncs->push_back(ITrunc);
5069 }
5070
5071 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5072 // Restore the operand of Ext (which has been replaced by the previous call
5073 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5074 TPT.setOperand(Ext, 0, ExtOpnd);
5075 }
5076
5077 // Get through the Instruction:
5078 // 1. Update its type.
5079 // 2. Replace the uses of Ext by Inst.
5080 // 3. Extend each operand that needs to be extended.
5081
5082 // Remember the original type of the instruction before promotion.
5083 // This is useful to know that the high bits are sign extended bits.
5084 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5085 // Step #1.
5086 TPT.mutateType(ExtOpnd, Ext->getType());
5087 // Step #2.
5088 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5089 // Step #3.
5090 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5091 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5092 ++OpIdx) {
5093 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5094 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5095 !shouldExtOperand(ExtOpnd, OpIdx)) {
5096 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5097 continue;
5098 }
5099 // Check if we can statically extend the operand.
5100 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5101 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5102 LLVM_DEBUG(dbgs() << "Statically extend\n");
5103 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5104 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5105 : Cst->getValue().zext(BitWidth);
5106 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5107 continue;
5108 }
5109 // UndefValue are typed, so we have to statically sign extend them.
5110 if (isa<UndefValue>(Opnd)) {
5111 LLVM_DEBUG(dbgs() << "Statically extend\n");
5112 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5113 continue;
5114 }
5115
5116 // Otherwise we have to explicitly sign extend the operand.
5117 Value *ValForExtOpnd = IsSExt
5118 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5119 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5120 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5121 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5122 if (!InstForExtOpnd)
5123 continue;
5124
5125 if (Exts)
5126 Exts->push_back(InstForExtOpnd);
5127
5128 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5129 }
5130 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5131 TPT.eraseInstruction(Ext);
5132 return ExtOpnd;
5133}
5134
5135/// Check whether or not promoting an instruction to a wider type is profitable.
5136/// \p NewCost gives the cost of extension instructions created by the
5137/// promotion.
5138/// \p OldCost gives the cost of extension instructions before the promotion
5139/// plus the number of instructions that have been
5140/// matched in the addressing mode the promotion.
5141/// \p PromotedOperand is the value that has been promoted.
5142/// \return True if the promotion is profitable, false otherwise.
5143bool AddressingModeMatcher::isPromotionProfitable(
5144 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5145 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5146 << '\n');
5147 // The cost of the new extensions is greater than the cost of the
5148 // old extension plus what we folded.
5149 // This is not profitable.
5150 if (NewCost > OldCost)
5151 return false;
5152 if (NewCost < OldCost)
5153 return true;
5154 // The promotion is neutral but it may help folding the sign extension in
5155 // loads for instance.
5156 // Check that we did not create an illegal instruction.
5157 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5158}
5159
5160/// Given an instruction or constant expr, see if we can fold the operation
5161/// into the addressing mode. If so, update the addressing mode and return
5162/// true, otherwise return false without modifying AddrMode.
5163/// If \p MovedAway is not NULL, it contains the information of whether or
5164/// not AddrInst has to be folded into the addressing mode on success.
5165/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5166/// because it has been moved away.
5167/// Thus AddrInst must not be added in the matched instructions.
5168/// This state can happen when AddrInst is a sext, since it may be moved away.
5169/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5170/// not be referenced anymore.
5171bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5172 unsigned Depth,
5173 bool *MovedAway) {
5174 // Avoid exponential behavior on extremely deep expression trees.
5175 if (Depth >= 5)
5176 return false;
5177
5178 // By default, all matched instructions stay in place.
5179 if (MovedAway)
5180 *MovedAway = false;
5181
5182 switch (Opcode) {
5183 case Instruction::PtrToInt:
5184 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5185 return matchAddr(AddrInst->getOperand(0), Depth);
5186 case Instruction::IntToPtr: {
5187 auto AS = AddrInst->getType()->getPointerAddressSpace();
5188 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5189 // This inttoptr is a no-op if the integer type is pointer sized.
5190 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5191 return matchAddr(AddrInst->getOperand(0), Depth);
5192 return false;
5193 }
5194 case Instruction::BitCast:
5195 // BitCast is always a noop, and we can handle it as long as it is
5196 // int->int or pointer->pointer (we don't want int<->fp or something).
5197 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5198 // Don't touch identity bitcasts. These were probably put here by LSR,
5199 // and we don't want to mess around with them. Assume it knows what it
5200 // is doing.
5201 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5202 return matchAddr(AddrInst->getOperand(0), Depth);
5203 return false;
5204 case Instruction::AddrSpaceCast: {
5205 unsigned SrcAS =
5206 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5207 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5208 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5209 return matchAddr(AddrInst->getOperand(0), Depth);
5210 return false;
5211 }
5212 case Instruction::Add: {
5213 // Check to see if we can merge in one operand, then the other. If so, we
5214 // win.
5215 ExtAddrMode BackupAddrMode = AddrMode;
5216 unsigned OldSize = AddrModeInsts.size();
5217 // Start a transaction at this point.
5218 // The LHS may match but not the RHS.
5219 // Therefore, we need a higher level restoration point to undo partially
5220 // matched operation.
5221 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5222 TPT.getRestorationPoint();
5223
5224 // Try to match an integer constant second to increase its chance of ending
5225 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5226 int First = 0, Second = 1;
5227 if (isa<ConstantInt>(AddrInst->getOperand(First))
5228 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5229 std::swap(First, Second);
5230 AddrMode.InBounds = false;
5231 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5232 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5233 return true;
5234
5235 // Restore the old addr mode info.
5236 AddrMode = BackupAddrMode;
5237 AddrModeInsts.resize(OldSize);
5238 TPT.rollback(LastKnownGood);
5239
5240 // Otherwise this was over-aggressive. Try merging operands in the opposite
5241 // order.
5242 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5243 matchAddr(AddrInst->getOperand(First), Depth + 1))
5244 return true;
5245
5246 // Otherwise we definitely can't merge the ADD in.
5247 AddrMode = BackupAddrMode;
5248 AddrModeInsts.resize(OldSize);
5249 TPT.rollback(LastKnownGood);
5250 break;
5251 }
5252 // case Instruction::Or:
5253 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5254 // break;
5255 case Instruction::Mul:
5256 case Instruction::Shl: {
5257 // Can only handle X*C and X << C.
5258 AddrMode.InBounds = false;
5259 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5260 if (!RHS || RHS->getBitWidth() > 64)
5261 return false;
5262 int64_t Scale = Opcode == Instruction::Shl
5263 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5264 : RHS->getSExtValue();
5265
5266 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5267 }
5268 case Instruction::GetElementPtr: {
5269 // Scan the GEP. We check it if it contains constant offsets and at most
5270 // one variable offset.
5271 int VariableOperand = -1;
5272 unsigned VariableScale = 0;
5273
5274 int64_t ConstantOffset = 0;
5275 gep_type_iterator GTI = gep_type_begin(AddrInst);
5276 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5277 if (StructType *STy = GTI.getStructTypeOrNull()) {
5278 const StructLayout *SL = DL.getStructLayout(STy);
5279 unsigned Idx =
5280 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5281 ConstantOffset += SL->getElementOffset(Idx);
5282 } else {
5283 TypeSize TS = GTI.getSequentialElementStride(DL);
5284 if (TS.isNonZero()) {
5285 // The optimisations below currently only work for fixed offsets.
5286 if (TS.isScalable())
5287 return false;
5288 int64_t TypeSize = TS.getFixedValue();
5289 if (ConstantInt *CI =
5290 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5291 const APInt &CVal = CI->getValue();
5292 if (CVal.getSignificantBits() <= 64) {
5293 ConstantOffset += CVal.getSExtValue() * TypeSize;
5294 continue;
5295 }
5296 }
5297 // We only allow one variable index at the moment.
5298 if (VariableOperand != -1)
5299 return false;
5300
5301 // Remember the variable index.
5302 VariableOperand = i;
5303 VariableScale = TypeSize;
5304 }
5305 }
5306 }
5307
5308 // A common case is for the GEP to only do a constant offset. In this case,
5309 // just add it to the disp field and check validity.
5310 if (VariableOperand == -1) {
5311 AddrMode.BaseOffs += ConstantOffset;
5312 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5313 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5314 AddrMode.InBounds = false;
5315 return true;
5316 }
5317 AddrMode.BaseOffs -= ConstantOffset;
5318
5320 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5321 ConstantOffset > 0) {
5322 // Record GEPs with non-zero offsets as candidates for splitting in
5323 // the event that the offset cannot fit into the r+i addressing mode.
5324 // Simple and common case that only one GEP is used in calculating the
5325 // address for the memory access.
5326 Value *Base = AddrInst->getOperand(0);
5327 auto *BaseI = dyn_cast<Instruction>(Base);
5328 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5330 (BaseI && !isa<CastInst>(BaseI) &&
5331 !isa<GetElementPtrInst>(BaseI))) {
5332 // Make sure the parent block allows inserting non-PHI instructions
5333 // before the terminator.
5334 BasicBlock *Parent = BaseI ? BaseI->getParent()
5335 : &GEP->getFunction()->getEntryBlock();
5336 if (!Parent->getTerminator()->isEHPad())
5337 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5338 }
5339 }
5340
5341 return false;
5342 }
5343
5344 // Save the valid addressing mode in case we can't match.
5345 ExtAddrMode BackupAddrMode = AddrMode;
5346 unsigned OldSize = AddrModeInsts.size();
5347
5348 // See if the scale and offset amount is valid for this target.
5349 AddrMode.BaseOffs += ConstantOffset;
5350 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5351 AddrMode.InBounds = false;
5352
5353 // Match the base operand of the GEP.
5354 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5355 // If it couldn't be matched, just stuff the value in a register.
5356 if (AddrMode.HasBaseReg) {
5357 AddrMode = BackupAddrMode;
5358 AddrModeInsts.resize(OldSize);
5359 return false;
5360 }
5361 AddrMode.HasBaseReg = true;
5362 AddrMode.BaseReg = AddrInst->getOperand(0);
5363 }
5364
5365 // Match the remaining variable portion of the GEP.
5366 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5367 Depth)) {
5368 // If it couldn't be matched, try stuffing the base into a register
5369 // instead of matching it, and retrying the match of the scale.
5370 AddrMode = BackupAddrMode;
5371 AddrModeInsts.resize(OldSize);
5372 if (AddrMode.HasBaseReg)
5373 return false;
5374 AddrMode.HasBaseReg = true;
5375 AddrMode.BaseReg = AddrInst->getOperand(0);
5376 AddrMode.BaseOffs += ConstantOffset;
5377 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5378 VariableScale, Depth)) {
5379 // If even that didn't work, bail.
5380 AddrMode = BackupAddrMode;
5381 AddrModeInsts.resize(OldSize);
5382 return false;
5383 }
5384 }
5385
5386 return true;
5387 }
5388 case Instruction::SExt:
5389 case Instruction::ZExt: {
5390 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5391 if (!Ext)
5392 return false;
5393
5394 // Try to move this ext out of the way of the addressing mode.
5395 // Ask for a method for doing so.
5396 TypePromotionHelper::Action TPH =
5397 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5398 if (!TPH)
5399 return false;
5400
5401 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5402 TPT.getRestorationPoint();
5403 unsigned CreatedInstsCost = 0;
5404 unsigned ExtCost = !TLI.isExtFree(Ext);
5405 Value *PromotedOperand =
5406 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5407 // SExt has been moved away.
5408 // Thus either it will be rematched later in the recursive calls or it is
5409 // gone. Anyway, we must not fold it into the addressing mode at this point.
5410 // E.g.,
5411 // op = add opnd, 1
5412 // idx = ext op
5413 // addr = gep base, idx
5414 // is now:
5415 // promotedOpnd = ext opnd <- no match here
5416 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5417 // addr = gep base, op <- match
5418 if (MovedAway)
5419 *MovedAway = true;
5420
5421 assert(PromotedOperand &&
5422 "TypePromotionHelper should have filtered out those cases");
5423
5424 ExtAddrMode BackupAddrMode = AddrMode;
5425 unsigned OldSize = AddrModeInsts.size();
5426
5427 if (!matchAddr(PromotedOperand, Depth) ||
5428 // The total of the new cost is equal to the cost of the created
5429 // instructions.
5430 // The total of the old cost is equal to the cost of the extension plus
5431 // what we have saved in the addressing mode.
5432 !isPromotionProfitable(CreatedInstsCost,
5433 ExtCost + (AddrModeInsts.size() - OldSize),
5434 PromotedOperand)) {
5435 AddrMode = BackupAddrMode;
5436 AddrModeInsts.resize(OldSize);
5437 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5438 TPT.rollback(LastKnownGood);
5439 return false;
5440 }
5441
5442 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5443 AddrMode.replaceWith(Ext, PromotedOperand);
5444 return true;
5445 }
5446 case Instruction::Call:
5447 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5448 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5449 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5450 if (TLI.addressingModeSupportsTLS(GV))
5451 return matchAddr(AddrInst->getOperand(0), Depth);
5452 }
5453 }
5454 break;
5455 }
5456 return false;
5457}
5458
5459/// If we can, try to add the value of 'Addr' into the current addressing mode.
5460/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5461/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5462/// for the target.
5463///
5464bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5465 // Start a transaction at this point that we will rollback if the matching
5466 // fails.
5467 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5468 TPT.getRestorationPoint();
5469 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5470 if (CI->getValue().isSignedIntN(64)) {
5471 // Check if the addition would result in a signed overflow.
5472 int64_t Result;
5473 bool Overflow =
5474 AddOverflow(AddrMode.BaseOffs, CI->getSExtValue(), Result);
5475 if (!Overflow) {
5476 // Fold in immediates if legal for the target.
5477 AddrMode.BaseOffs = Result;
5478 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5479 return true;
5480 AddrMode.BaseOffs -= CI->getSExtValue();
5481 }
5482 }
5483 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5484 // If this is a global variable, try to fold it into the addressing mode.
5485 if (!AddrMode.BaseGV) {
5486 AddrMode.BaseGV = GV;
5487 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5488 return true;
5489 AddrMode.BaseGV = nullptr;
5490 }
5491 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5492 ExtAddrMode BackupAddrMode = AddrMode;
5493 unsigned OldSize = AddrModeInsts.size();
5494
5495 // Check to see if it is possible to fold this operation.
5496 bool MovedAway = false;
5497 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5498 // This instruction may have been moved away. If so, there is nothing
5499 // to check here.
5500 if (MovedAway)
5501 return true;
5502 // Okay, it's possible to fold this. Check to see if it is actually
5503 // *profitable* to do so. We use a simple cost model to avoid increasing
5504 // register pressure too much.
5505 if (I->hasOneUse() ||
5506 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5507 AddrModeInsts.push_back(I);
5508 return true;
5509 }
5510
5511 // It isn't profitable to do this, roll back.
5512 AddrMode = BackupAddrMode;
5513 AddrModeInsts.resize(OldSize);
5514 TPT.rollback(LastKnownGood);
5515 }
5516 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5517 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5518 return true;
5519 TPT.rollback(LastKnownGood);
5520 } else if (isa<ConstantPointerNull>(Addr)) {
5521 // Null pointer gets folded without affecting the addressing mode.
5522 return true;
5523 }
5524
5525 // Worse case, the target should support [reg] addressing modes. :)
5526 if (!AddrMode.HasBaseReg) {
5527 AddrMode.HasBaseReg = true;
5528 AddrMode.BaseReg = Addr;
5529 // Still check for legality in case the target supports [imm] but not [i+r].
5530 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5531 return true;
5532 AddrMode.HasBaseReg = false;
5533 AddrMode.BaseReg = nullptr;
5534 }
5535
5536 // If the base register is already taken, see if we can do [r+r].
5537 if (AddrMode.Scale == 0) {
5538 AddrMode.Scale = 1;
5539 AddrMode.ScaledReg = Addr;
5540 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5541 return true;
5542 AddrMode.Scale = 0;
5543 AddrMode.ScaledReg = nullptr;
5544 }
5545 // Couldn't match.
5546 TPT.rollback(LastKnownGood);
5547 return false;
5548}
5549
5550/// Check to see if all uses of OpVal by the specified inline asm call are due
5551/// to memory operands. If so, return true, otherwise return false.
5553 const TargetLowering &TLI,
5554 const TargetRegisterInfo &TRI) {
5555 const Function *F = CI->getFunction();
5556 TargetLowering::AsmOperandInfoVector TargetConstraints =
5557 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5558
5559 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5560 // Compute the constraint code and ConstraintType to use.
5561 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5562
5563 // If this asm operand is our Value*, and if it isn't an indirect memory
5564 // operand, we can't fold it! TODO: Also handle C_Address?
5565 if (OpInfo.CallOperandVal == OpVal &&
5566 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5567 !OpInfo.isIndirect))
5568 return false;
5569 }
5570
5571 return true;
5572}
5573
5574/// Recursively walk all the uses of I until we find a memory use.
5575/// If we find an obviously non-foldable instruction, return true.
5576/// Add accessed addresses and types to MemoryUses.
5578 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5579 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5580 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5581 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5582 // If we already considered this instruction, we're done.
5583 if (!ConsideredInsts.insert(I).second)
5584 return false;
5585
5586 // If this is an obviously unfoldable instruction, bail out.
5587 if (!MightBeFoldableInst(I))
5588 return true;
5589
5590 // Loop over all the uses, recursively processing them.
5591 for (Use &U : I->uses()) {
5592 // Conservatively return true if we're seeing a large number or a deep chain
5593 // of users. This avoids excessive compilation times in pathological cases.
5594 if (SeenInsts++ >= MaxAddressUsersToScan)
5595 return true;
5596
5597 Instruction *UserI = cast<Instruction>(U.getUser());
5598 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5599 MemoryUses.push_back({&U, LI->getType()});
5600 continue;
5601 }
5602
5603 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5604 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5605 return true; // Storing addr, not into addr.
5606 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5607 continue;
5608 }
5609
5610 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5611 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5612 return true; // Storing addr, not into addr.
5613 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5614 continue;
5615 }
5616
5618 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5619 return true; // Storing addr, not into addr.
5620 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5621 continue;
5622 }
5623
5626 Type *AccessTy;
5627 if (!TLI.getAddrModeArguments(II, PtrOps, AccessTy))
5628 return true;
5629
5630 if (!find(PtrOps, U.get()))
5631 return true;
5632
5633 MemoryUses.push_back({&U, AccessTy});
5634 continue;
5635 }
5636
5637 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5638 if (CI->hasFnAttr(Attribute::Cold)) {
5639 // If this is a cold call, we can sink the addressing calculation into
5640 // the cold path. See optimizeCallInst
5641 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5642 continue;
5643 }
5644
5645 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5646 if (!IA)
5647 return true;
5648
5649 // If this is a memory operand, we're cool, otherwise bail out.
5650 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5651 return true;
5652 continue;
5653 }
5654
5655 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5656 PSI, BFI, SeenInsts))
5657 return true;
5658 }
5659
5660 return false;
5661}
5662
5664 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5665 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5667 unsigned SeenInsts = 0;
5668 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5669 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5670 PSI, BFI, SeenInsts);
5671}
5672
5673
5674/// Return true if Val is already known to be live at the use site that we're
5675/// folding it into. If so, there is no cost to include it in the addressing
5676/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5677/// instruction already.
5678bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5679 Value *KnownLive1,
5680 Value *KnownLive2) {
5681 // If Val is either of the known-live values, we know it is live!
5682 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5683 return true;
5684
5685 // All values other than instructions and arguments (e.g. constants) are live.
5686 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5687 return true;
5688
5689 // If Val is a constant sized alloca in the entry block, it is live, this is
5690 // true because it is just a reference to the stack/frame pointer, which is
5691 // live for the whole function.
5692 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5693 if (AI->isStaticAlloca())
5694 return true;
5695
5696 // Check to see if this value is already used in the memory instruction's
5697 // block. If so, it's already live into the block at the very least, so we
5698 // can reasonably fold it.
5699 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5700}
5701
5702/// It is possible for the addressing mode of the machine to fold the specified
5703/// instruction into a load or store that ultimately uses it.
5704/// However, the specified instruction has multiple uses.
5705/// Given this, it may actually increase register pressure to fold it
5706/// into the load. For example, consider this code:
5707///
5708/// X = ...
5709/// Y = X+1
5710/// use(Y) -> nonload/store
5711/// Z = Y+1
5712/// load Z
5713///
5714/// In this case, Y has multiple uses, and can be folded into the load of Z
5715/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5716/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5717/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5718/// number of computations either.
5719///
5720/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5721/// X was live across 'load Z' for other reasons, we actually *would* want to
5722/// fold the addressing mode in the Z case. This would make Y die earlier.
5723bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5724 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5725 if (IgnoreProfitability)
5726 return true;
5727
5728 // AMBefore is the addressing mode before this instruction was folded into it,
5729 // and AMAfter is the addressing mode after the instruction was folded. Get
5730 // the set of registers referenced by AMAfter and subtract out those
5731 // referenced by AMBefore: this is the set of values which folding in this
5732 // address extends the lifetime of.
5733 //
5734 // Note that there are only two potential values being referenced here,
5735 // BaseReg and ScaleReg (global addresses are always available, as are any
5736 // folded immediates).
5737 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5738
5739 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5740 // lifetime wasn't extended by adding this instruction.
5741 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5742 BaseReg = nullptr;
5743 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5744 ScaledReg = nullptr;
5745
5746 // If folding this instruction (and it's subexprs) didn't extend any live
5747 // ranges, we're ok with it.
5748 if (!BaseReg && !ScaledReg)
5749 return true;
5750
5751 // If all uses of this instruction can have the address mode sunk into them,
5752 // we can remove the addressing mode and effectively trade one live register
5753 // for another (at worst.) In this context, folding an addressing mode into
5754 // the use is just a particularly nice way of sinking it.
5756 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5757 return false; // Has a non-memory, non-foldable use!
5758
5759 // Now that we know that all uses of this instruction are part of a chain of
5760 // computation involving only operations that could theoretically be folded
5761 // into a memory use, loop over each of these memory operation uses and see
5762 // if they could *actually* fold the instruction. The assumption is that
5763 // addressing modes are cheap and that duplicating the computation involved
5764 // many times is worthwhile, even on a fastpath. For sinking candidates
5765 // (i.e. cold call sites), this serves as a way to prevent excessive code
5766 // growth since most architectures have some reasonable small and fast way to
5767 // compute an effective address. (i.e LEA on x86)
5768 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5769 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5770 Value *Address = Pair.first->get();
5771 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5772 Type *AddressAccessTy = Pair.second;
5773 unsigned AS = Address->getType()->getPointerAddressSpace();
5774
5775 // Do a match against the root of this address, ignoring profitability. This
5776 // will tell us if the addressing mode for the memory operation will
5777 // *actually* cover the shared instruction.
5778 ExtAddrMode Result;
5779 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5780 0);
5781 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5782 TPT.getRestorationPoint();
5783 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5784 AddressAccessTy, AS, UserI, Result,
5785 InsertedInsts, PromotedInsts, TPT,
5786 LargeOffsetGEP, OptSize, PSI, BFI);
5787 Matcher.IgnoreProfitability = true;
5788 bool Success = Matcher.matchAddr(Address, 0);
5789 (void)Success;
5790 assert(Success && "Couldn't select *anything*?");
5791
5792 // The match was to check the profitability, the changes made are not
5793 // part of the original matcher. Therefore, they should be dropped
5794 // otherwise the original matcher will not present the right state.
5795 TPT.rollback(LastKnownGood);
5796
5797 // If the match didn't cover I, then it won't be shared by it.
5798 if (!is_contained(MatchedAddrModeInsts, I))
5799 return false;
5800
5801 MatchedAddrModeInsts.clear();
5802 }
5803
5804 return true;
5805}
5806
5807/// Return true if the specified values are defined in a
5808/// different basic block than BB.
5809static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5811 return I->getParent() != BB;
5812 return false;
5813}
5814
5815// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5816// is the first instruction that will use Addr. So we need to find the first
5817// user of Addr in current BB.
5819 Value *SunkAddr) {
5820 if (Addr->hasOneUse())
5821 return MemoryInst->getIterator();
5822
5823 // We already have a SunkAddr in current BB, but we may need to insert cast
5824 // instruction after it.
5825 if (SunkAddr) {
5826 if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5827 return std::next(AddrInst->getIterator());
5828 }
5829
5830 // Find the first user of Addr in current BB.
5831 Instruction *Earliest = MemoryInst;
5832 for (User *U : Addr->users()) {
5833 Instruction *UserInst = dyn_cast<Instruction>(U);
5834 if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5835 if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5836 continue;
5837 if (UserInst->comesBefore(Earliest))
5838 Earliest = UserInst;
5839 }
5840 }
5841 return Earliest->getIterator();
5842}
5843
5844/// Sink addressing mode computation immediate before MemoryInst if doing so
5845/// can be done without increasing register pressure. The need for the
5846/// register pressure constraint means this can end up being an all or nothing
5847/// decision for all uses of the same addressing computation.
5848///
5849/// Load and Store Instructions often have addressing modes that can do
5850/// significant amounts of computation. As such, instruction selection will try
5851/// to get the load or store to do as much computation as possible for the
5852/// program. The problem is that isel can only see within a single block. As
5853/// such, we sink as much legal addressing mode work into the block as possible.
5854///
5855/// This method is used to optimize both load/store and inline asms with memory
5856/// operands. It's also used to sink addressing computations feeding into cold
5857/// call sites into their (cold) basic block.
5858///
5859/// The motivation for handling sinking into cold blocks is that doing so can
5860/// both enable other address mode sinking (by satisfying the register pressure
5861/// constraint above), and reduce register pressure globally (by removing the
5862/// addressing mode computation from the fast path entirely.).
5863bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5864 Type *AccessTy, unsigned AddrSpace) {
5865 Value *Repl = Addr;
5866
5867 // Try to collapse single-value PHI nodes. This is necessary to undo
5868 // unprofitable PRE transformations.
5869 SmallVector<Value *, 8> worklist;
5870 SmallPtrSet<Value *, 16> Visited;
5871 worklist.push_back(Addr);
5872
5873 // Use a worklist to iteratively look through PHI and select nodes, and
5874 // ensure that the addressing mode obtained from the non-PHI/select roots of
5875 // the graph are compatible.
5876 bool PhiOrSelectSeen = false;
5877 SmallVector<Instruction *, 16> AddrModeInsts;
5878 AddressingModeCombiner AddrModes(*DL, Addr);
5879 TypePromotionTransaction TPT(RemovedInsts);
5880 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5881 TPT.getRestorationPoint();
5882 while (!worklist.empty()) {
5883 Value *V = worklist.pop_back_val();
5884
5885 // We allow traversing cyclic Phi nodes.
5886 // In case of success after this loop we ensure that traversing through
5887 // Phi nodes ends up with all cases to compute address of the form
5888 // BaseGV + Base + Scale * Index + Offset
5889 // where Scale and Offset are constans and BaseGV, Base and Index
5890 // are exactly the same Values in all cases.
5891 // It means that BaseGV, Scale and Offset dominate our memory instruction
5892 // and have the same value as they had in address computation represented
5893 // as Phi. So we can safely sink address computation to memory instruction.
5894 if (!Visited.insert(V).second)
5895 continue;
5896
5897 // For a PHI node, push all of its incoming values.
5898 if (PHINode *P = dyn_cast<PHINode>(V)) {
5899 append_range(worklist, P->incoming_values());
5900 PhiOrSelectSeen = true;
5901 continue;
5902 }
5903 // Similar for select.
5904 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5905 worklist.push_back(SI->getFalseValue());
5906 worklist.push_back(SI->getTrueValue());
5907 PhiOrSelectSeen = true;
5908 continue;
5909 }
5910
5911 // For non-PHIs, determine the addressing mode being computed. Note that
5912 // the result may differ depending on what other uses our candidate
5913 // addressing instructions might have.
5914 AddrModeInsts.clear();
5915 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5916 0);
5917 // Defer the query (and possible computation of) the dom tree to point of
5918 // actual use. It's expected that most address matches don't actually need
5919 // the domtree.
5920 auto getDTFn = [this]() -> const DominatorTree & { return getDT(); };
5921 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5922 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5923 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5924 BFI);
5925
5926 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5927 if (GEP && !NewGEPBases.count(GEP)) {
5928 // If splitting the underlying data structure can reduce the offset of a
5929 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5930 // previously split data structures.
5931 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5932 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5933 }
5934
5935 NewAddrMode.OriginalValue = V;
5936 if (!AddrModes.addNewAddrMode(NewAddrMode))
5937 break;
5938 }
5939
5940 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5941 // or we have multiple but either couldn't combine them or combining them
5942 // wouldn't do anything useful, bail out now.
5943 if (!AddrModes.combineAddrModes()) {
5944 TPT.rollback(LastKnownGood);
5945 return false;
5946 }
5947 bool Modified = TPT.commit();
5948
5949 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5950 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5951
5952 // If all the instructions matched are already in this BB, don't do anything.
5953 // If we saw a Phi node then it is not local definitely, and if we saw a
5954 // select then we want to push the address calculation past it even if it's
5955 // already in this BB.
5956 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5957 return IsNonLocalValue(V, MemoryInst->getParent());
5958 })) {
5959 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5960 << "\n");
5961 return Modified;
5962 }
5963
5964 // Now that we determined the addressing expression we want to use and know
5965 // that we have to sink it into this block. Check to see if we have already
5966 // done this for some other load/store instr in this block. If so, reuse
5967 // the computation. Before attempting reuse, check if the address is valid
5968 // as it may have been erased.
5969
5970 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5971
5972 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5973 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5974
5975 // The current BB may be optimized multiple times, we can't guarantee the
5976 // reuse of Addr happens later, call findInsertPos to find an appropriate
5977 // insert position.
5978 auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5979
5980 // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5981 if (!SunkAddr) {
5982 auto &DT = getDT();
5983 if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5984 (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5985 return Modified;
5986 }
5987
5988 IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5989
5990 if (SunkAddr) {
5991 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5992 << " for " << *MemoryInst << "\n");
5993 if (SunkAddr->getType() != Addr->getType()) {
5994 if (SunkAddr->getType()->getPointerAddressSpace() !=
5995 Addr->getType()->getPointerAddressSpace() &&
5996 !DL->isNonIntegralPointerType(Addr->getType())) {
5997 // There are two reasons the address spaces might not match: a no-op
5998 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5999 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6000 // TODO: allow bitcast between different address space pointers with the
6001 // same size.
6002 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6003 SunkAddr =
6004 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6005 } else
6006 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6007 }
6009 SubtargetInfo->addrSinkUsingGEPs())) {
6010 // By default, we use the GEP-based method when AA is used later. This
6011 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
6012 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6013 << " for " << *MemoryInst << "\n");
6014 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
6015
6016 // First, find the pointer.
6017 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
6018 ResultPtr = AddrMode.BaseReg;
6019 AddrMode.BaseReg = nullptr;
6020 }
6021
6022 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
6023 // We can't add more than one pointer together, nor can we scale a
6024 // pointer (both of which seem meaningless).
6025 if (ResultPtr || AddrMode.Scale != 1)
6026 return Modified;
6027
6028 ResultPtr = AddrMode.ScaledReg;
6029 AddrMode.Scale = 0;
6030 }
6031
6032 // It is only safe to sign extend the BaseReg if we know that the math
6033 // required to create it did not overflow before we extend it. Since
6034 // the original IR value was tossed in favor of a constant back when
6035 // the AddrMode was created we need to bail out gracefully if widths
6036 // do not match instead of extending it.
6037 //
6038 // (See below for code to add the scale.)
6039 if (AddrMode.Scale) {
6040 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6041 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
6042 cast<IntegerType>(ScaledRegTy)->getBitWidth())
6043 return Modified;
6044 }
6045
6046 GlobalValue *BaseGV = AddrMode.BaseGV;
6047 if (BaseGV != nullptr) {
6048 if (ResultPtr)
6049 return Modified;
6050
6051 if (BaseGV->isThreadLocal()) {
6052 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
6053 } else {
6054 ResultPtr = BaseGV;
6055 }
6056 }
6057
6058 // If the real base value actually came from an inttoptr, then the matcher
6059 // will look through it and provide only the integer value. In that case,
6060 // use it here.
6061 if (!DL->isNonIntegralPointerType(Addr->getType())) {
6062 if (!ResultPtr && AddrMode.BaseReg) {
6063 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
6064 "sunkaddr");
6065 AddrMode.BaseReg = nullptr;
6066 } else if (!ResultPtr && AddrMode.Scale == 1) {
6067 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
6068 "sunkaddr");
6069 AddrMode.Scale = 0;
6070 }
6071 }
6072
6073 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6074 !AddrMode.BaseOffs) {
6075 SunkAddr = Constant::getNullValue(Addr->getType());
6076 } else if (!ResultPtr) {
6077 return Modified;
6078 } else {
6079 Type *I8PtrTy =
6080 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
6081
6082 // Start with the base register. Do this first so that subsequent address
6083 // matching finds it last, which will prevent it from trying to match it
6084 // as the scaled value in case it happens to be a mul. That would be
6085 // problematic if we've sunk a different mul for the scale, because then
6086 // we'd end up sinking both muls.
6087 if (AddrMode.BaseReg) {
6088 Value *V = AddrMode.BaseReg;
6089 if (V->getType() != IntPtrTy)
6090 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6091
6092 ResultIndex = V;
6093 }
6094
6095 // Add the scale value.
6096 if (AddrMode.Scale) {
6097 Value *V = AddrMode.ScaledReg;
6098 if (V->getType() == IntPtrTy) {
6099 // done.
6100 } else {
6101 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6102 cast<IntegerType>(V->getType())->getBitWidth() &&
6103 "We can't transform if ScaledReg is too narrow");
6104 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6105 }
6106
6107 if (AddrMode.Scale != 1)
6108 V = Builder.CreateMul(
6109 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6110 if (ResultIndex)
6111 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
6112 else
6113 ResultIndex = V;
6114 }
6115
6116 // Add in the Base Offset if present.
6117 if (AddrMode.BaseOffs) {
6118 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6119 if (ResultIndex) {
6120 // We need to add this separately from the scale above to help with
6121 // SDAG consecutive load/store merging.
6122 if (ResultPtr->getType() != I8PtrTy)
6123 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6124 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6125 AddrMode.InBounds);
6126 }
6127
6128 ResultIndex = V;
6129 }
6130
6131 if (!ResultIndex) {
6132 auto PtrInst = dyn_cast<Instruction>(ResultPtr);
6133 // We know that we have a pointer without any offsets. If this pointer
6134 // originates from a different basic block than the current one, we
6135 // must be able to recreate it in the current basic block.
6136 // We do not support the recreation of any instructions yet.
6137 if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
6138 return Modified;
6139 SunkAddr = ResultPtr;
6140 } else {
6141 if (ResultPtr->getType() != I8PtrTy)
6142 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6143 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6144 AddrMode.InBounds);
6145 }
6146
6147 if (SunkAddr->getType() != Addr->getType()) {
6148 if (SunkAddr->getType()->getPointerAddressSpace() !=
6149 Addr->getType()->getPointerAddressSpace() &&
6150 !DL->isNonIntegralPointerType(Addr->getType())) {
6151 // There are two reasons the address spaces might not match: a no-op
6152 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6153 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6154 // TODO: allow bitcast between different address space pointers with
6155 // the same size.
6156 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6157 SunkAddr =
6158 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6159 } else
6160 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6161 }
6162 }
6163 } else {
6164 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6165 // non-integral pointers, so in that case bail out now.
6166 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6167 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6168 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6169 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6170 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6171 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6172 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6173 (AddrMode.BaseGV &&
6174 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6175 return Modified;
6176
6177 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6178 << " for " << *MemoryInst << "\n");
6179 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6180 Value *Result = nullptr;
6181
6182 // Start with the base register. Do this first so that subsequent address
6183 // matching finds it last, which will prevent it from trying to match it
6184 // as the scaled value in case it happens to be a mul. That would be
6185 // problematic if we've sunk a different mul for the scale, because then
6186 // we'd end up sinking both muls.
6187 if (AddrMode.BaseReg) {
6188 Value *V = AddrMode.BaseReg;
6189 if (V->getType()->isPointerTy())
6190 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6191 if (V->getType() != IntPtrTy)
6192 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6193 Result = V;
6194 }
6195
6196 // Add the scale value.
6197 if (AddrMode.Scale) {
6198 Value *V = AddrMode.ScaledReg;
6199 if (V->getType() == IntPtrTy) {
6200 // done.
6201 } else if (V->getType()->isPointerTy()) {
6202 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6203 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6204 cast<IntegerType>(V->getType())->getBitWidth()) {
6205 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6206 } else {
6207 // It is only safe to sign extend the BaseReg if we know that the math
6208 // required to create it did not overflow before we extend it. Since
6209 // the original IR value was tossed in favor of a constant back when
6210 // the AddrMode was created we need to bail out gracefully if widths
6211 // do not match instead of extending it.
6213 if (I && (Result != AddrMode.BaseReg))
6214 I->eraseFromParent();
6215 return Modified;
6216 }
6217 if (AddrMode.Scale != 1)
6218 V = Builder.CreateMul(
6219 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6220 if (Result)
6221 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6222 else
6223 Result = V;
6224 }
6225
6226 // Add in the BaseGV if present.
6227 GlobalValue *BaseGV = AddrMode.BaseGV;
6228 if (BaseGV != nullptr) {
6229 Value *BaseGVPtr;
6230 if (BaseGV->isThreadLocal()) {
6231 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6232 } else {
6233 BaseGVPtr = BaseGV;
6234 }
6235 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6236 if (Result)
6237 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6238 else
6239 Result = V;
6240 }
6241
6242 // Add in the Base Offset if present.
6243 if (AddrMode.BaseOffs) {
6244 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6245 if (Result)
6246 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6247 else
6248 Result = V;
6249 }
6250
6251 if (!Result)
6252 SunkAddr = Constant::getNullValue(Addr->getType());
6253 else
6254 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6255 }
6256
6257 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6258 // Store the newly computed address into the cache. In the case we reused a
6259 // value, this should be idempotent.
6260 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6261
6262 // If we have no uses, recursively delete the value and all dead instructions
6263 // using it.
6264 if (Repl->use_empty()) {
6265 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6266 RecursivelyDeleteTriviallyDeadInstructions(
6267 Repl, TLInfo, nullptr,
6268 [&](Value *V) { removeAllAssertingVHReferences(V); });
6269 });
6270 }
6271 ++NumMemoryInsts;
6272 return true;
6273}
6274
6275/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6276/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6277/// only handle a 2 operand GEP in the same basic block or a splat constant
6278/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6279/// index.
6280///
6281/// If the existing GEP has a vector base pointer that is splat, we can look
6282/// through the splat to find the scalar pointer. If we can't find a scalar
6283/// pointer there's nothing we can do.
6284///
6285/// If we have a GEP with more than 2 indices where the middle indices are all
6286/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6287///
6288/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6289/// followed by a GEP with an all zeroes vector index. This will enable
6290/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6291/// zero index.
6292bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6293 Value *Ptr) {
6294 Value *NewAddr;
6295
6296 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6297 // Don't optimize GEPs that don't have indices.
6298 if (!GEP->hasIndices())
6299 return false;
6300
6301 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6302 // FIXME: We should support this by sinking the GEP.
6303 if (MemoryInst->getParent() != GEP->getParent())
6304 return false;
6305
6306 SmallVector<Value *, 2> Ops(GEP->operands());
6307
6308 bool RewriteGEP = false;
6309
6310 if (Ops[0]->getType()->isVectorTy()) {
6311 Ops[0] = getSplatValue(Ops[0]);
6312 if (!Ops[0])
6313 return false;
6314 RewriteGEP = true;
6315 }
6316
6317 unsigned FinalIndex = Ops.size() - 1;
6318
6319 // Ensure all but the last index is 0.
6320 // FIXME: This isn't strictly required. All that's required is that they are
6321 // all scalars or splats.
6322 for (unsigned i = 1; i < FinalIndex; ++i) {
6323 auto *C = dyn_cast<Constant>(Ops[i]);
6324 if (!C)
6325 return false;
6326 if (isa<VectorType>(C->getType()))
6327 C = C->getSplatValue();
6328 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6329 if (!CI || !CI->isZero())
6330 return false;
6331 // Scalarize the index if needed.
6332 Ops[i] = CI;
6333 }
6334
6335 // Try to scalarize the final index.
6336 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6337 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6338 auto *C = dyn_cast<ConstantInt>(V);
6339 // Don't scalarize all zeros vector.
6340 if (!C || !C->isZero()) {
6341 Ops[FinalIndex] = V;
6342 RewriteGEP = true;
6343 }
6344 }
6345 }
6346
6347 // If we made any changes or the we have extra operands, we need to generate
6348 // new instructions.
6349 if (!RewriteGEP && Ops.size() == 2)
6350 return false;
6351
6352 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6353
6354 IRBuilder<> Builder(MemoryInst);
6355
6356 Type *SourceTy = GEP->getSourceElementType();
6357 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6358
6359 // If the final index isn't a vector, emit a scalar GEP containing all ops
6360 // and a vector GEP with all zeroes final index.
6361 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6362 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6363 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6364 auto *SecondTy = GetElementPtrInst::getIndexedType(
6365 SourceTy, ArrayRef(Ops).drop_front());
6366 NewAddr =
6367 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6368 } else {
6369 Value *Base = Ops[0];
6370 Value *Index = Ops[FinalIndex];
6371
6372 // Create a scalar GEP if there are more than 2 operands.
6373 if (Ops.size() != 2) {
6374 // Replace the last index with 0.
6375 Ops[FinalIndex] =
6376 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6377 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6379 SourceTy, ArrayRef(Ops).drop_front());
6380 }
6381
6382 // Now create the GEP with scalar pointer and vector index.
6383 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6384 }
6385 } else if (!isa<Constant>(Ptr)) {
6386 // Not a GEP, maybe its a splat and we can create a GEP to enable
6387 // SelectionDAGBuilder to use it as a uniform base.
6388 Value *V = getSplatValue(Ptr);
6389 if (!V)
6390 return false;
6391
6392 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6393
6394 IRBuilder<> Builder(MemoryInst);
6395
6396 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6397 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6398 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6399 Type *ScalarTy;
6400 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6401 Intrinsic::masked_gather) {
6402 ScalarTy = MemoryInst->getType()->getScalarType();
6403 } else {
6404 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6405 Intrinsic::masked_scatter);
6406 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6407 }
6408 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6409 } else {
6410 // Constant, SelectionDAGBuilder knows to check if its a splat.
6411 return false;
6412 }
6413
6414 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6415
6416 // If we have no uses, recursively delete the value and all dead instructions
6417 // using it.
6418 if (Ptr->use_empty())
6420 Ptr, TLInfo, nullptr,
6421 [&](Value *V) { removeAllAssertingVHReferences(V); });
6422
6423 return true;
6424}
6425
6426// This is a helper for CodeGenPrepare::optimizeMulWithOverflow.
6427// Check the pattern we are interested in where there are maximum 2 uses
6428// of the intrinsic which are the extract instructions.
6430 ExtractValueInst *&OverflowExtract) {
6431 // Bail out if it's more than 2 users:
6432 if (I->hasNUsesOrMore(3))
6433 return false;
6434
6435 for (User *U : I->users()) {
6436 auto *Extract = dyn_cast<ExtractValueInst>(U);
6437 if (!Extract || Extract->getNumIndices() != 1)
6438 return false;
6439
6440 unsigned Index = Extract->getIndices()[0];
6441 if (Index == 0)
6442 MulExtract = Extract;
6443 else if (Index == 1)
6444 OverflowExtract = Extract;
6445 else
6446 return false;
6447 }
6448 return true;
6449}
6450
6451// Rewrite the mul_with_overflow intrinsic by checking if both of the
6452// operands' value ranges are within the legal type. If so, we can optimize the
6453// multiplication algorithm. This code is supposed to be written during the step
6454// of type legalization, but given that we need to reconstruct the IR which is
6455// not doable there, we do it here.
6456// The IR after the optimization will look like:
6457// entry:
6458// if signed:
6459// ( (lhs_lo>>BW-1) ^ lhs_hi) || ( (rhs_lo>>BW-1) ^ rhs_hi) ? overflow,
6460// overflow_no
6461// else:
6462// (lhs_hi != 0) || (rhs_hi != 0) ? overflow, overflow_no
6463// overflow_no:
6464// overflow:
6465// overflow.res:
6466// \returns true if optimization was applied
6467// TODO: This optimization can be further improved to optimize branching on
6468// overflow where the 'overflow_no' BB can branch directly to the false
6469// successor of overflow, but that would add additional complexity so we leave
6470// it for future work.
6471bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned,
6472 ModifyDT &ModifiedDT) {
6473 // Check if target supports this optimization.
6475 I->getContext(),
6476 TLI->getValueType(*DL, I->getType()->getContainedType(0))))
6477 return false;
6478
6479 ExtractValueInst *MulExtract = nullptr, *OverflowExtract = nullptr;
6480 if (!matchOverflowPattern(I, MulExtract, OverflowExtract))
6481 return false;
6482
6483 // Keep track of the instruction to stop reoptimizing it again.
6484 InsertedInsts.insert(I);
6485
6486 Value *LHS = I->getOperand(0);
6487 Value *RHS = I->getOperand(1);
6488 Type *Ty = LHS->getType();
6489 unsigned VTHalfBitWidth = Ty->getScalarSizeInBits() / 2;
6490 Type *LegalTy = Ty->getWithNewBitWidth(VTHalfBitWidth);
6491
6492 // New BBs:
6493 BasicBlock *OverflowEntryBB =
6494 splitBlockBefore(I->getParent(), I, DTU, LI, nullptr, "");
6495 OverflowEntryBB->takeName(I->getParent());
6496 // Keep the 'br' instruction that is generated as a result of the split to be
6497 // erased/replaced later.
6498 Instruction *OldTerminator = OverflowEntryBB->getTerminator();
6499 BasicBlock *NoOverflowBB =
6500 BasicBlock::Create(I->getContext(), "overflow.no", I->getFunction());
6501 NoOverflowBB->moveAfter(OverflowEntryBB);
6502 BasicBlock *OverflowBB =
6503 BasicBlock::Create(I->getContext(), "overflow", I->getFunction());
6504 OverflowBB->moveAfter(NoOverflowBB);
6505
6506 // BB overflow.entry:
6507 IRBuilder<> Builder(OverflowEntryBB);
6508 // Extract low and high halves of LHS:
6509 Value *LoLHS = Builder.CreateTrunc(LHS, LegalTy, "lo.lhs");
6510 Value *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
6511 HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs");
6512
6513 // Extract low and high halves of RHS:
6514 Value *LoRHS = Builder.CreateTrunc(RHS, LegalTy, "lo.rhs");
6515 Value *HiRHS = Builder.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr");
6516 HiRHS = Builder.CreateTrunc(HiRHS, LegalTy, "hi.rhs");
6517
6518 Value *IsAnyBitTrue;
6519 if (IsSigned) {
6520 Value *SignLoLHS =
6521 Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs");
6522 Value *SignLoRHS =
6523 Builder.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs");
6524 Value *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS);
6525 Value *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS);
6526 Value *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs");
6527 IsAnyBitTrue = Builder.CreateCmp(ICmpInst::ICMP_NE, Or,
6528 ConstantInt::getNullValue(Or->getType()));
6529 } else {
6530 Value *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS,
6531 ConstantInt::getNullValue(LegalTy));
6532 Value *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS,
6533 ConstantInt::getNullValue(LegalTy));
6534 IsAnyBitTrue = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs");
6535 }
6536 Builder.CreateCondBr(IsAnyBitTrue, OverflowBB, NoOverflowBB);
6537
6538 // BB overflow.no:
6539 Builder.SetInsertPoint(NoOverflowBB);
6540 Value *ExtLoLHS, *ExtLoRHS;
6541 if (IsSigned) {
6542 ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext");
6543 ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext");
6544 } else {
6545 ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext");
6546 ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext");
6547 }
6548
6549 Value *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.overflow.no");
6550
6551 // Create the 'overflow.res' BB to merge the results of
6552 // the two paths:
6553 BasicBlock *OverflowResBB = I->getParent();
6554 OverflowResBB->setName("overflow.res");
6555
6556 // BB overflow.no: jump to overflow.res BB
6557 Builder.CreateBr(OverflowResBB);
6558 // No we don't need the old terminator in overflow.entry BB, erase it:
6559 OldTerminator->eraseFromParent();
6560
6561 // BB overflow.res:
6562 Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt());
6563 // Create PHI nodes to merge results from no.overflow BB and overflow BB to
6564 // replace the extract instructions.
6565 PHINode *OverflowResPHI = Builder.CreatePHI(Ty, 2),
6566 *OverflowFlagPHI =
6567 Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2);
6568
6569 // Add the incoming values from no.overflow BB and later from overflow BB.
6570 OverflowResPHI->addIncoming(Mul, NoOverflowBB);
6571 OverflowFlagPHI->addIncoming(ConstantInt::getFalse(I->getContext()),
6572 NoOverflowBB);
6573
6574 // Replace all users of MulExtract and OverflowExtract to use the PHI nodes.
6575 if (MulExtract) {
6576 MulExtract->replaceAllUsesWith(OverflowResPHI);
6577 MulExtract->eraseFromParent();
6578 }
6579 if (OverflowExtract) {
6580 OverflowExtract->replaceAllUsesWith(OverflowFlagPHI);
6581 OverflowExtract->eraseFromParent();
6582 }
6583
6584 // Remove the intrinsic from parent (overflow.res BB) as it will be part of
6585 // overflow BB
6586 I->removeFromParent();
6587 // BB overflow:
6588 I->insertInto(OverflowBB, OverflowBB->end());
6589 Builder.SetInsertPoint(OverflowBB, OverflowBB->end());
6590 Value *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow");
6591 Value *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag");
6592 Builder.CreateBr(OverflowResBB);
6593
6594 // Add The Extracted values to the PHINodes in the overflow.res BB.
6595 OverflowResPHI->addIncoming(MulOverflow, OverflowBB);
6596 OverflowFlagPHI->addIncoming(OverflowFlag, OverflowBB);
6597
6598 DTU->applyUpdates({{DominatorTree::Insert, OverflowEntryBB, OverflowBB},
6599 {DominatorTree::Insert, OverflowEntryBB, NoOverflowBB},
6600 {DominatorTree::Insert, NoOverflowBB, OverflowResBB},
6601 {DominatorTree::Delete, OverflowEntryBB, OverflowResBB},
6602 {DominatorTree::Insert, OverflowBB, OverflowResBB}});
6603
6604 ModifiedDT = ModifyDT::ModifyBBDT;
6605 return true;
6606}
6607
6608/// If there are any memory operands, use OptimizeMemoryInst to sink their
6609/// address computing into the block when possible / profitable.
6610bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6611 bool MadeChange = false;
6612
6613 const TargetRegisterInfo *TRI =
6615 TargetLowering::AsmOperandInfoVector TargetConstraints =
6616 TLI->ParseConstraints(*DL, TRI, *CS);
6617 unsigned ArgNo = 0;
6618 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6619 // Compute the constraint code and ConstraintType to use.
6620 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6621
6622 // TODO: Also handle C_Address?
6623 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6624 OpInfo.isIndirect) {
6625 Value *OpVal = CS->getArgOperand(ArgNo++);
6626 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6627 } else if (OpInfo.Type == InlineAsm::isInput)
6628 ArgNo++;
6629 }
6630
6631 return MadeChange;
6632}
6633
6634/// Check if all the uses of \p Val are equivalent (or free) zero or
6635/// sign extensions.
6636static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6637 assert(!Val->use_empty() && "Input must have at least one use");
6638 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6639 bool IsSExt = isa<SExtInst>(FirstUser);
6640 Type *ExtTy = FirstUser->getType();
6641 for (const User *U : Val->users()) {
6642 const Instruction *UI = cast<Instruction>(U);
6643 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6644 return false;
6645 Type *CurTy = UI->getType();
6646 // Same input and output types: Same instruction after CSE.
6647 if (CurTy == ExtTy)
6648 continue;
6649
6650 // If IsSExt is true, we are in this situation:
6651 // a = Val
6652 // b = sext ty1 a to ty2
6653 // c = sext ty1 a to ty3
6654 // Assuming ty2 is shorter than ty3, this could be turned into:
6655 // a = Val
6656 // b = sext ty1 a to ty2
6657 // c = sext ty2 b to ty3
6658 // However, the last sext is not free.
6659 if (IsSExt)
6660 return false;
6661
6662 // This is a ZExt, maybe this is free to extend from one type to another.
6663 // In that case, we would not account for a different use.
6664 Type *NarrowTy;
6665 Type *LargeTy;
6666 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6667 CurTy->getScalarType()->getIntegerBitWidth()) {
6668 NarrowTy = CurTy;
6669 LargeTy = ExtTy;
6670 } else {
6671 NarrowTy = ExtTy;
6672 LargeTy = CurTy;
6673 }
6674
6675 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6676 return false;
6677 }
6678 // All uses are the same or can be derived from one another for free.
6679 return true;
6680}
6681
6682/// Try to speculatively promote extensions in \p Exts and continue
6683/// promoting through newly promoted operands recursively as far as doing so is
6684/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6685/// When some promotion happened, \p TPT contains the proper state to revert
6686/// them.
6687///
6688/// \return true if some promotion happened, false otherwise.
6689bool CodeGenPrepare::tryToPromoteExts(
6690 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6691 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6692 unsigned CreatedInstsCost) {
6693 bool Promoted = false;
6694
6695 // Iterate over all the extensions to try to promote them.
6696 for (auto *I : Exts) {
6697 // Early check if we directly have ext(load).
6698 if (isa<LoadInst>(I->getOperand(0))) {
6699 ProfitablyMovedExts.push_back(I);
6700 continue;
6701 }
6702
6703 // Check whether or not we want to do any promotion. The reason we have
6704 // this check inside the for loop is to catch the case where an extension
6705 // is directly fed by a load because in such case the extension can be moved
6706 // up without any promotion on its operands.
6708 return false;
6709
6710 // Get the action to perform the promotion.
6711 TypePromotionHelper::Action TPH =
6712 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6713 // Check if we can promote.
6714 if (!TPH) {
6715 // Save the current extension as we cannot move up through its operand.
6716 ProfitablyMovedExts.push_back(I);
6717 continue;
6718 }
6719
6720 // Save the current state.
6721 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6722 TPT.getRestorationPoint();
6723 SmallVector<Instruction *, 4> NewExts;
6724 unsigned NewCreatedInstsCost = 0;
6725 unsigned ExtCost = !TLI->isExtFree(I);
6726 // Promote.
6727 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6728 &NewExts, nullptr, *TLI);
6729 assert(PromotedVal &&
6730 "TypePromotionHelper should have filtered out those cases");
6731
6732 // We would be able to merge only one extension in a load.
6733 // Therefore, if we have more than 1 new extension we heuristically
6734 // cut this search path, because it means we degrade the code quality.
6735 // With exactly 2, the transformation is neutral, because we will merge
6736 // one extension but leave one. However, we optimistically keep going,
6737 // because the new extension may be removed too. Also avoid replacing a
6738 // single free extension with multiple extensions, as this increases the
6739 // number of IR instructions while not providing any savings.
6740 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6741 // FIXME: It would be possible to propagate a negative value instead of
6742 // conservatively ceiling it to 0.
6743 TotalCreatedInstsCost =
6744 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6745 if (!StressExtLdPromotion &&
6746 (TotalCreatedInstsCost > 1 ||
6747 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6748 (ExtCost == 0 && NewExts.size() > 1))) {
6749 // This promotion is not profitable, rollback to the previous state, and
6750 // save the current extension in ProfitablyMovedExts as the latest
6751 // speculative promotion turned out to be unprofitable.
6752 TPT.rollback(LastKnownGood);
6753 ProfitablyMovedExts.push_back(I);
6754 continue;
6755 }
6756 // Continue promoting NewExts as far as doing so is profitable.
6757 SmallVector<Instruction *, 2> NewlyMovedExts;
6758 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6759 bool NewPromoted = false;
6760 for (auto *ExtInst : NewlyMovedExts) {
6761 Instruction *MovedExt = cast<Instruction>(ExtInst);
6762 Value *ExtOperand = MovedExt->getOperand(0);
6763 // If we have reached to a load, we need this extra profitability check
6764 // as it could potentially be merged into an ext(load).
6765 if (isa<LoadInst>(ExtOperand) &&
6766 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6767 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6768 continue;
6769
6770 ProfitablyMovedExts.push_back(MovedExt);
6771 NewPromoted = true;
6772 }
6773
6774 // If none of speculative promotions for NewExts is profitable, rollback
6775 // and save the current extension (I) as the last profitable extension.
6776 if (!NewPromoted) {
6777 TPT.rollback(LastKnownGood);
6778 ProfitablyMovedExts.push_back(I);
6779 continue;
6780 }
6781 // The promotion is profitable.
6782 Promoted = true;
6783 }
6784 return Promoted;
6785}
6786
6787/// Merging redundant sexts when one is dominating the other.
6788bool CodeGenPrepare::mergeSExts(Function &F) {
6789 bool Changed = false;
6790 for (auto &Entry : ValToSExtendedUses) {
6791 SExts &Insts = Entry.second;
6792 SExts CurPts;
6793 for (Instruction *Inst : Insts) {
6794 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6795 Inst->getOperand(0) != Entry.first)
6796 continue;
6797 bool inserted = false;
6798 for (auto &Pt : CurPts) {
6799 if (getDT().dominates(Inst, Pt)) {
6800 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6801 RemovedInsts.insert(Pt);
6802 Pt->removeFromParent();
6803 Pt = Inst;
6804 inserted = true;
6805 Changed = true;
6806 break;
6807 }
6808 if (!getDT().dominates(Pt, Inst))
6809 // Give up if we need to merge in a common dominator as the
6810 // experiments show it is not profitable.
6811 continue;
6812 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6813 RemovedInsts.insert(Inst);
6814 Inst->removeFromParent();
6815 inserted = true;
6816 Changed = true;
6817 break;
6818 }
6819 if (!inserted)
6820 CurPts.push_back(Inst);
6821 }
6822 }
6823 return Changed;
6824}
6825
6826// Splitting large data structures so that the GEPs accessing them can have
6827// smaller offsets so that they can be sunk to the same blocks as their users.
6828// For example, a large struct starting from %base is split into two parts
6829// where the second part starts from %new_base.
6830//
6831// Before:
6832// BB0:
6833// %base =
6834//
6835// BB1:
6836// %gep0 = gep %base, off0
6837// %gep1 = gep %base, off1
6838// %gep2 = gep %base, off2
6839//
6840// BB2:
6841// %load1 = load %gep0
6842// %load2 = load %gep1
6843// %load3 = load %gep2
6844//
6845// After:
6846// BB0:
6847// %base =
6848// %new_base = gep %base, off0
6849//
6850// BB1:
6851// %new_gep0 = %new_base
6852// %new_gep1 = gep %new_base, off1 - off0
6853// %new_gep2 = gep %new_base, off2 - off0
6854//
6855// BB2:
6856// %load1 = load i32, i32* %new_gep0
6857// %load2 = load i32, i32* %new_gep1
6858// %load3 = load i32, i32* %new_gep2
6859//
6860// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6861// their offsets are smaller enough to fit into the addressing mode.
6862bool CodeGenPrepare::splitLargeGEPOffsets() {
6863 bool Changed = false;
6864 for (auto &Entry : LargeOffsetGEPMap) {
6865 Value *OldBase = Entry.first;
6866 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6867 &LargeOffsetGEPs = Entry.second;
6868 auto compareGEPOffset =
6869 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6870 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6871 if (LHS.first == RHS.first)
6872 return false;
6873 if (LHS.second != RHS.second)
6874 return LHS.second < RHS.second;
6875 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6876 };
6877 // Sorting all the GEPs of the same data structures based on the offsets.
6878 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6879 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6880 // Skip if all the GEPs have the same offsets.
6881 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6882 continue;
6883 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6884 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6885 Value *NewBaseGEP = nullptr;
6886
6887 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6888 GetElementPtrInst *GEP) {
6889 LLVMContext &Ctx = GEP->getContext();
6890 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6891 Type *I8PtrTy =
6892 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6893
6894 BasicBlock::iterator NewBaseInsertPt;
6895 BasicBlock *NewBaseInsertBB;
6896 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6897 // If the base of the struct is an instruction, the new base will be
6898 // inserted close to it.
6899 NewBaseInsertBB = BaseI->getParent();
6900 if (isa<PHINode>(BaseI))
6901 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6902 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6903 NewBaseInsertBB =
6904 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), &getDT(), LI);
6905 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6906 } else
6907 NewBaseInsertPt = std::next(BaseI->getIterator());
6908 } else {
6909 // If the current base is an argument or global value, the new base
6910 // will be inserted to the entry block.
6911 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6912 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6913 }
6914 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6915 // Create a new base.
6916 // TODO: Avoid implicit trunc?
6917 // See https://github.com/llvm/llvm-project/issues/112510.
6918 Value *BaseIndex =
6919 ConstantInt::getSigned(PtrIdxTy, BaseOffset, /*ImplicitTrunc=*/true);
6920 NewBaseGEP = OldBase;
6921 if (NewBaseGEP->getType() != I8PtrTy)
6922 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6923 NewBaseGEP =
6924 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6925 NewGEPBases.insert(NewBaseGEP);
6926 return;
6927 };
6928
6929 // Check whether all the offsets can be encoded with prefered common base.
6930 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6931 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6932 BaseOffset = PreferBase;
6933 // Create a new base if the offset of the BaseGEP can be decoded with one
6934 // instruction.
6935 createNewBase(BaseOffset, OldBase, BaseGEP);
6936 }
6937
6938 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6939 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6940 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6941 int64_t Offset = LargeOffsetGEP->second;
6942 if (Offset != BaseOffset) {
6943 TargetLowering::AddrMode AddrMode;
6944 AddrMode.HasBaseReg = true;
6945 AddrMode.BaseOffs = Offset - BaseOffset;
6946 // The result type of the GEP might not be the type of the memory
6947 // access.
6948 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6949 GEP->getResultElementType(),
6950 GEP->getAddressSpace())) {
6951 // We need to create a new base if the offset to the current base is
6952 // too large to fit into the addressing mode. So, a very large struct
6953 // may be split into several parts.
6954 BaseGEP = GEP;
6955 BaseOffset = Offset;
6956 NewBaseGEP = nullptr;
6957 }
6958 }
6959
6960 // Generate a new GEP to replace the current one.
6961 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6962
6963 if (!NewBaseGEP) {
6964 // Create a new base if we don't have one yet. Find the insertion
6965 // pointer for the new base first.
6966 createNewBase(BaseOffset, OldBase, GEP);
6967 }
6968
6969 IRBuilder<> Builder(GEP);
6970 Value *NewGEP = NewBaseGEP;
6971 if (Offset != BaseOffset) {
6972 // Calculate the new offset for the new GEP.
6973 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6974 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6975 }
6976 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6977 LargeOffsetGEPID.erase(GEP);
6978 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6979 GEP->eraseFromParent();
6980 Changed = true;
6981 }
6982 }
6983 return Changed;
6984}
6985
6986bool CodeGenPrepare::optimizePhiType(
6987 PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
6988 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6989 // We are looking for a collection on interconnected phi nodes that together
6990 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6991 // are of the same type. Convert the whole set of nodes to the type of the
6992 // bitcast.
6993 Type *PhiTy = I->getType();
6994 Type *ConvertTy = nullptr;
6995 if (Visited.count(I) ||
6996 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6997 return false;
6998
6999 SmallVector<Instruction *, 4> Worklist;
7000 Worklist.push_back(cast<Instruction>(I));
7001 SmallPtrSet<PHINode *, 4> PhiNodes;
7002 SmallPtrSet<ConstantData *, 4> Constants;
7003 PhiNodes.insert(I);
7004 Visited.insert(I);
7005 SmallPtrSet<Instruction *, 4> Defs;
7006 SmallPtrSet<Instruction *, 4> Uses;
7007 // This works by adding extra bitcasts between load/stores and removing
7008 // existing bitcasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
7009 // we can get in the situation where we remove a bitcast in one iteration
7010 // just to add it again in the next. We need to ensure that at least one
7011 // bitcast we remove are anchored to something that will not change back.
7012 bool AnyAnchored = false;
7013
7014 while (!Worklist.empty()) {
7015 Instruction *II = Worklist.pop_back_val();
7016
7017 if (auto *Phi = dyn_cast<PHINode>(II)) {
7018 // Handle Defs, which might also be PHI's
7019 for (Value *V : Phi->incoming_values()) {
7020 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
7021 if (!PhiNodes.count(OpPhi)) {
7022 if (!Visited.insert(OpPhi).second)
7023 return false;
7024 PhiNodes.insert(OpPhi);
7025 Worklist.push_back(OpPhi);
7026 }
7027 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
7028 if (!OpLoad->isSimple())
7029 return false;
7030 if (Defs.insert(OpLoad).second)
7031 Worklist.push_back(OpLoad);
7032 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
7033 if (Defs.insert(OpEx).second)
7034 Worklist.push_back(OpEx);
7035 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7036 if (!ConvertTy)
7037 ConvertTy = OpBC->getOperand(0)->getType();
7038 if (OpBC->getOperand(0)->getType() != ConvertTy)
7039 return false;
7040 if (Defs.insert(OpBC).second) {
7041 Worklist.push_back(OpBC);
7042 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
7043 !isa<ExtractElementInst>(OpBC->getOperand(0));
7044 }
7045 } else if (auto *OpC = dyn_cast<ConstantData>(V))
7046 Constants.insert(OpC);
7047 else
7048 return false;
7049 }
7050 }
7051
7052 // Handle uses which might also be phi's
7053 for (User *V : II->users()) {
7054 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
7055 if (!PhiNodes.count(OpPhi)) {
7056 if (Visited.count(OpPhi))
7057 return false;
7058 PhiNodes.insert(OpPhi);
7059 Visited.insert(OpPhi);
7060 Worklist.push_back(OpPhi);
7061 }
7062 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
7063 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
7064 return false;
7065 Uses.insert(OpStore);
7066 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7067 if (!ConvertTy)
7068 ConvertTy = OpBC->getType();
7069 if (OpBC->getType() != ConvertTy)
7070 return false;
7071 Uses.insert(OpBC);
7072 AnyAnchored |=
7073 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
7074 } else {
7075 return false;
7076 }
7077 }
7078 }
7079
7080 if (!ConvertTy || !AnyAnchored || PhiTy == ConvertTy ||
7081 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
7082 return false;
7083
7084 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
7085 << *ConvertTy << "\n");
7086
7087 // Create all the new phi nodes of the new type, and bitcast any loads to the
7088 // correct type.
7089 ValueToValueMap ValMap;
7090 for (ConstantData *C : Constants)
7091 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
7092 for (Instruction *D : Defs) {
7093 if (isa<BitCastInst>(D)) {
7094 ValMap[D] = D->getOperand(0);
7095 DeletedInstrs.insert(D);
7096 } else {
7097 BasicBlock::iterator insertPt = std::next(D->getIterator());
7098 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
7099 }
7100 }
7101 for (PHINode *Phi : PhiNodes)
7102 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
7103 Phi->getName() + ".tc", Phi->getIterator());
7104 // Pipe together all the PhiNodes.
7105 for (PHINode *Phi : PhiNodes) {
7106 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
7107 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
7108 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
7109 Phi->getIncomingBlock(i));
7110 Visited.insert(NewPhi);
7111 }
7112 // And finally pipe up the stores and bitcasts
7113 for (Instruction *U : Uses) {
7114 if (isa<BitCastInst>(U)) {
7115 DeletedInstrs.insert(U);
7116 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
7117 } else {
7118 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
7119 U->getIterator()));
7120 }
7121 }
7122
7123 // Save the removed phis to be deleted later.
7124 DeletedInstrs.insert_range(PhiNodes);
7125 return true;
7126}
7127
7128bool CodeGenPrepare::optimizePhiTypes(Function &F) {
7129 if (!OptimizePhiTypes)
7130 return false;
7131
7132 bool Changed = false;
7133 SmallPtrSet<PHINode *, 4> Visited;
7134 SmallPtrSet<Instruction *, 4> DeletedInstrs;
7135
7136 // Attempt to optimize all the phis in the functions to the correct type.
7137 for (auto &BB : F)
7138 for (auto &Phi : BB.phis())
7139 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
7140
7141 // Remove any old phi's that have been converted.
7142 for (auto *I : DeletedInstrs) {
7143 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
7144 I->eraseFromParent();
7145 }
7146
7147 return Changed;
7148}
7149
7150/// Return true, if an ext(load) can be formed from an extension in
7151/// \p MovedExts.
7152bool CodeGenPrepare::canFormExtLd(
7153 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
7154 Instruction *&Inst, bool HasPromoted) {
7155 for (auto *MovedExtInst : MovedExts) {
7156 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
7157 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
7158 Inst = MovedExtInst;
7159 break;
7160 }
7161 }
7162 if (!LI)
7163 return false;
7164
7165 // If they're already in the same block, there's nothing to do.
7166 // Make the cheap checks first if we did not promote.
7167 // If we promoted, we need to check if it is indeed profitable.
7168 if (!HasPromoted && LI->getParent() == Inst->getParent())
7169 return false;
7170
7171 return TLI->isExtLoad(LI, Inst, *DL);
7172}
7173
7174/// Move a zext or sext fed by a load into the same basic block as the load,
7175/// unless conditions are unfavorable. This allows SelectionDAG to fold the
7176/// extend into the load.
7177///
7178/// E.g.,
7179/// \code
7180/// %ld = load i32* %addr
7181/// %add = add nuw i32 %ld, 4
7182/// %zext = zext i32 %add to i64
7183// \endcode
7184/// =>
7185/// \code
7186/// %ld = load i32* %addr
7187/// %zext = zext i32 %ld to i64
7188/// %add = add nuw i64 %zext, 4
7189/// \encode
7190/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
7191/// allow us to match zext(load i32*) to i64.
7192///
7193/// Also, try to promote the computations used to obtain a sign extended
7194/// value used into memory accesses.
7195/// E.g.,
7196/// \code
7197/// a = add nsw i32 b, 3
7198/// d = sext i32 a to i64
7199/// e = getelementptr ..., i64 d
7200/// \endcode
7201/// =>
7202/// \code
7203/// f = sext i32 b to i64
7204/// a = add nsw i64 f, 3
7205/// e = getelementptr ..., i64 a
7206/// \endcode
7207///
7208/// \p Inst[in/out] the extension may be modified during the process if some
7209/// promotions apply.
7210bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
7211 bool AllowPromotionWithoutCommonHeader = false;
7212 /// See if it is an interesting sext operations for the address type
7213 /// promotion before trying to promote it, e.g., the ones with the right
7214 /// type and used in memory accesses.
7215 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
7216 *Inst, AllowPromotionWithoutCommonHeader);
7217 TypePromotionTransaction TPT(RemovedInsts);
7218 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
7219 TPT.getRestorationPoint();
7221 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
7222 Exts.push_back(Inst);
7223
7224 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
7225
7226 // Look for a load being extended.
7227 LoadInst *LI = nullptr;
7228 Instruction *ExtFedByLoad;
7229
7230 // Try to promote a chain of computation if it allows to form an extended
7231 // load.
7232 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
7233 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7234 TPT.commit();
7235 // Move the extend into the same block as the load.
7236 ExtFedByLoad->moveAfter(LI);
7237 ++NumExtsMoved;
7238 Inst = ExtFedByLoad;
7239 return true;
7240 }
7241
7242 // Continue promoting SExts if known as considerable depending on targets.
7243 if (ATPConsiderable &&
7244 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7245 HasPromoted, TPT, SpeculativelyMovedExts))
7246 return true;
7247
7248 TPT.rollback(LastKnownGood);
7249 return false;
7250}
7251
7252// Perform address type promotion if doing so is profitable.
7253// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7254// instructions that sign extended the same initial value. However, if
7255// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7256// extension is just profitable.
7257bool CodeGenPrepare::performAddressTypePromotion(
7258 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
7259 bool HasPromoted, TypePromotionTransaction &TPT,
7260 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7261 bool Promoted = false;
7262 SmallPtrSet<Instruction *, 1> UnhandledExts;
7263 bool AllSeenFirst = true;
7264 for (auto *I : SpeculativelyMovedExts) {
7265 Value *HeadOfChain = I->getOperand(0);
7266 auto AlreadySeen = SeenChainsForSExt.find(HeadOfChain);
7267 // If there is an unhandled SExt which has the same header, try to promote
7268 // it as well.
7269 if (AlreadySeen != SeenChainsForSExt.end()) {
7270 if (AlreadySeen->second != nullptr)
7271 UnhandledExts.insert(AlreadySeen->second);
7272 AllSeenFirst = false;
7273 }
7274 }
7275
7276 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
7277 SpeculativelyMovedExts.size() == 1)) {
7278 TPT.commit();
7279 if (HasPromoted)
7280 Promoted = true;
7281 for (auto *I : SpeculativelyMovedExts) {
7282 Value *HeadOfChain = I->getOperand(0);
7283 SeenChainsForSExt[HeadOfChain] = nullptr;
7284 ValToSExtendedUses[HeadOfChain].push_back(I);
7285 }
7286 // Update Inst as promotion happen.
7287 Inst = SpeculativelyMovedExts.pop_back_val();
7288 } else {
7289 // This is the first chain visited from the header, keep the current chain
7290 // as unhandled. Defer to promote this until we encounter another SExt
7291 // chain derived from the same header.
7292 for (auto *I : SpeculativelyMovedExts) {
7293 Value *HeadOfChain = I->getOperand(0);
7294 SeenChainsForSExt[HeadOfChain] = Inst;
7295 }
7296 return false;
7297 }
7298
7299 if (!AllSeenFirst && !UnhandledExts.empty())
7300 for (auto *VisitedSExt : UnhandledExts) {
7301 if (RemovedInsts.count(VisitedSExt))
7302 continue;
7303 TypePromotionTransaction TPT(RemovedInsts);
7305 SmallVector<Instruction *, 2> Chains;
7306 Exts.push_back(VisitedSExt);
7307 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
7308 TPT.commit();
7309 if (HasPromoted)
7310 Promoted = true;
7311 for (auto *I : Chains) {
7312 Value *HeadOfChain = I->getOperand(0);
7313 // Mark this as handled.
7314 SeenChainsForSExt[HeadOfChain] = nullptr;
7315 ValToSExtendedUses[HeadOfChain].push_back(I);
7316 }
7317 }
7318 return Promoted;
7319}
7320
7321bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7322 BasicBlock *DefBB = I->getParent();
7323
7324 // If the result of a {s|z}ext and its source are both live out, rewrite all
7325 // other uses of the source with result of extension.
7326 Value *Src = I->getOperand(0);
7327 if (Src->hasOneUse())
7328 return false;
7329
7330 // Only do this xform if truncating is free.
7331 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7332 return false;
7333
7334 // Only safe to perform the optimization if the source is also defined in
7335 // this block.
7336 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7337 return false;
7338
7339 bool DefIsLiveOut = false;
7340 for (User *U : I->users()) {
7342
7343 // Figure out which BB this ext is used in.
7344 BasicBlock *UserBB = UI->getParent();
7345 if (UserBB == DefBB)
7346 continue;
7347 DefIsLiveOut = true;
7348 break;
7349 }
7350 if (!DefIsLiveOut)
7351 return false;
7352
7353 // Make sure none of the uses are PHI nodes.
7354 for (User *U : Src->users()) {
7356 BasicBlock *UserBB = UI->getParent();
7357 if (UserBB == DefBB)
7358 continue;
7359 // Be conservative. We don't want this xform to end up introducing
7360 // reloads just before load / store instructions.
7361 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7362 return false;
7363 }
7364
7365 // InsertedTruncs - Only insert one trunc in each block once.
7366 DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
7367
7368 bool MadeChange = false;
7369 for (Use &U : Src->uses()) {
7370 Instruction *User = cast<Instruction>(U.getUser());
7371
7372 // Figure out which BB this ext is used in.
7373 BasicBlock *UserBB = User->getParent();
7374 if (UserBB == DefBB)
7375 continue;
7376
7377 // Both src and def are live in this block. Rewrite the use.
7378 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7379
7380 if (!InsertedTrunc) {
7381 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7382 assert(InsertPt != UserBB->end());
7383 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7384 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7385 InsertedInsts.insert(InsertedTrunc);
7386 }
7387
7388 // Replace a use of the {s|z}ext source with a use of the result.
7389 U = InsertedTrunc;
7390 ++NumExtUses;
7391 MadeChange = true;
7392 }
7393
7394 return MadeChange;
7395}
7396
7397// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7398// just after the load if the target can fold this into one extload instruction,
7399// with the hope of eliminating some of the other later "and" instructions using
7400// the loaded value. "and"s that are made trivially redundant by the insertion
7401// of the new "and" are removed by this function, while others (e.g. those whose
7402// path from the load goes through a phi) are left for isel to potentially
7403// remove.
7404//
7405// For example:
7406//
7407// b0:
7408// x = load i32
7409// ...
7410// b1:
7411// y = and x, 0xff
7412// z = use y
7413//
7414// becomes:
7415//
7416// b0:
7417// x = load i32
7418// x' = and x, 0xff
7419// ...
7420// b1:
7421// z = use x'
7422//
7423// whereas:
7424//
7425// b0:
7426// x1 = load i32
7427// ...
7428// b1:
7429// x2 = load i32
7430// ...
7431// b2:
7432// x = phi x1, x2
7433// y = and x, 0xff
7434//
7435// becomes (after a call to optimizeLoadExt for each load):
7436//
7437// b0:
7438// x1 = load i32
7439// x1' = and x1, 0xff
7440// ...
7441// b1:
7442// x2 = load i32
7443// x2' = and x2, 0xff
7444// ...
7445// b2:
7446// x = phi x1', x2'
7447// y = and x, 0xff
7448bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7449 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7450 return false;
7451
7452 // Skip loads we've already transformed.
7453 if (Load->hasOneUse() &&
7454 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7455 return false;
7456
7457 // Look at all uses of Load, looking through phis, to determine how many bits
7458 // of the loaded value are needed.
7459 SmallVector<Instruction *, 8> WorkList;
7460 SmallPtrSet<Instruction *, 16> Visited;
7461 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7462 SmallVector<Instruction *, 8> DropFlags;
7463 for (auto *U : Load->users())
7464 WorkList.push_back(cast<Instruction>(U));
7465
7466 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7467 unsigned BitWidth = LoadResultVT.getSizeInBits();
7468 // If the BitWidth is 0, do not try to optimize the type
7469 if (BitWidth == 0)
7470 return false;
7471
7472 APInt DemandBits(BitWidth, 0);
7473 APInt WidestAndBits(BitWidth, 0);
7474
7475 while (!WorkList.empty()) {
7476 Instruction *I = WorkList.pop_back_val();
7477
7478 // Break use-def graph loops.
7479 if (!Visited.insert(I).second)
7480 continue;
7481
7482 // For a PHI node, push all of its users.
7483 if (auto *Phi = dyn_cast<PHINode>(I)) {
7484 for (auto *U : Phi->users())
7485 WorkList.push_back(cast<Instruction>(U));
7486 continue;
7487 }
7488
7489 switch (I->getOpcode()) {
7490 case Instruction::And: {
7491 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7492 if (!AndC)
7493 return false;
7494 APInt AndBits = AndC->getValue();
7495 DemandBits |= AndBits;
7496 // Keep track of the widest and mask we see.
7497 if (AndBits.ugt(WidestAndBits))
7498 WidestAndBits = AndBits;
7499 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7500 AndsToMaybeRemove.push_back(I);
7501 break;
7502 }
7503
7504 case Instruction::Shl: {
7505 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7506 if (!ShlC)
7507 return false;
7508 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7509 DemandBits.setLowBits(BitWidth - ShiftAmt);
7510 DropFlags.push_back(I);
7511 break;
7512 }
7513
7514 case Instruction::Trunc: {
7515 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7516 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7517 DemandBits.setLowBits(TruncBitWidth);
7518 DropFlags.push_back(I);
7519 break;
7520 }
7521
7522 default:
7523 return false;
7524 }
7525 }
7526
7527 uint32_t ActiveBits = DemandBits.getActiveBits();
7528 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7529 // target even if isLoadLegal says an i1 EXTLOAD is valid. For example,
7530 // for the AArch64 target isLoadLegal(i32, i1, ..., ZEXTLOAD, false) returns
7531 // true, but (and (load x) 1) is not matched as a single instruction, rather
7532 // as a LDR followed by an AND.
7533 // TODO: Look into removing this restriction by fixing backends to either
7534 // return false for isLoadLegal for i1 or have them select this pattern to
7535 // a single instruction.
7536 //
7537 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7538 // mask, since these are the only ands that will be removed by isel.
7539 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7540 WidestAndBits != DemandBits)
7541 return false;
7542
7543 LLVMContext &Ctx = Load->getType()->getContext();
7544 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7545 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7546
7547 // Reject cases that won't be matched as extloads.
7548 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7549 !TLI->isLoadLegal(LoadResultVT, TruncVT, Load->getAlign(),
7550 Load->getPointerAddressSpace(), ISD::ZEXTLOAD, false))
7551 return false;
7552
7553 IRBuilder<> Builder(Load->getNextNode());
7554 auto *NewAnd = cast<Instruction>(
7555 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7556 // Mark this instruction as "inserted by CGP", so that other
7557 // optimizations don't touch it.
7558 InsertedInsts.insert(NewAnd);
7559
7560 // Replace all uses of load with new and (except for the use of load in the
7561 // new and itself).
7562 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7563 NewAnd->setOperand(0, Load);
7564
7565 // Remove any and instructions that are now redundant.
7566 for (auto *And : AndsToMaybeRemove)
7567 // Check that the and mask is the same as the one we decided to put on the
7568 // new and.
7569 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7570 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7571 if (&*CurInstIterator == And)
7572 CurInstIterator = std::next(And->getIterator());
7573 And->eraseFromParent();
7574 ++NumAndUses;
7575 }
7576
7577 // NSW flags may not longer hold.
7578 for (auto *Inst : DropFlags)
7579 Inst->setHasNoSignedWrap(false);
7580
7581 ++NumAndsAdded;
7582 return true;
7583}
7584
7585/// Check if V (an operand of a select instruction) is an expensive instruction
7586/// that is only used once.
7588 auto *I = dyn_cast<Instruction>(V);
7589 // If it's safe to speculatively execute, then it should not have side
7590 // effects; therefore, it's safe to sink and possibly *not* execute.
7591 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7592 TTI->isExpensiveToSpeculativelyExecute(I);
7593}
7594
7595/// Returns true if a SelectInst should be turned into an explicit branch.
7597 const TargetLowering *TLI,
7598 SelectInst *SI) {
7599 // If even a predictable select is cheap, then a branch can't be cheaper.
7600 if (!TLI->isPredictableSelectExpensive())
7601 return false;
7602
7603 // FIXME: This should use the same heuristics as IfConversion to determine
7604 // whether a select is better represented as a branch.
7605
7606 // If metadata tells us that the select condition is obviously predictable,
7607 // then we want to replace the select with a branch.
7608 uint64_t TrueWeight, FalseWeight;
7609 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7610 uint64_t Max = std::max(TrueWeight, FalseWeight);
7611 uint64_t Sum = TrueWeight + FalseWeight;
7612 if (Sum != 0) {
7613 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7614 if (Probability > TTI->getPredictableBranchThreshold())
7615 return true;
7616 }
7617 }
7618
7619 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7620
7621 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7622 // comparison condition. If the compare has more than one use, there's
7623 // probably another cmov or setcc around, so it's not worth emitting a branch.
7624 if (!Cmp || !Cmp->hasOneUse())
7625 return false;
7626
7627 // If either operand of the select is expensive and only needed on one side
7628 // of the select, we should form a branch.
7629 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7630 sinkSelectOperand(TTI, SI->getFalseValue()))
7631 return true;
7632
7633 return false;
7634}
7635
7636/// If \p isTrue is true, return the true value of \p SI, otherwise return
7637/// false value of \p SI. If the true/false value of \p SI is defined by any
7638/// select instructions in \p Selects, look through the defining select
7639/// instruction until the true/false value is not defined in \p Selects.
7640static Value *
7642 const SmallPtrSet<const Instruction *, 2> &Selects) {
7643 Value *V = nullptr;
7644
7645 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7646 DefSI = dyn_cast<SelectInst>(V)) {
7647 assert(DefSI->getCondition() == SI->getCondition() &&
7648 "The condition of DefSI does not match with SI");
7649 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7650 }
7651
7652 assert(V && "Failed to get select true/false value");
7653 return V;
7654}
7655
7656bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7657 assert(Shift->isShift() && "Expected a shift");
7658
7659 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7660 // general vector shifts, and (3) the shift amount is a select-of-splatted
7661 // values, hoist the shifts before the select:
7662 // shift Op0, (select Cond, TVal, FVal) -->
7663 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7664 //
7665 // This is inverting a generic IR transform when we know that the cost of a
7666 // general vector shift is more than the cost of 2 shift-by-scalars.
7667 // We can't do this effectively in SDAG because we may not be able to
7668 // determine if the select operands are splats from within a basic block.
7669 Type *Ty = Shift->getType();
7670 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7671 return false;
7672 Value *Cond, *TVal, *FVal;
7673 if (!match(Shift->getOperand(1),
7674 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7675 return false;
7676 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7677 return false;
7678
7679 IRBuilder<> Builder(Shift);
7680 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7681 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7682 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7683 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7684 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7685 Shift->eraseFromParent();
7686 return true;
7687}
7688
7689bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7690 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7691 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7692 "Expected a funnel shift");
7693
7694 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7695 // than general vector shifts, and (3) the shift amount is select-of-splatted
7696 // values, hoist the funnel shifts before the select:
7697 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7698 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7699 //
7700 // This is inverting a generic IR transform when we know that the cost of a
7701 // general vector shift is more than the cost of 2 shift-by-scalars.
7702 // We can't do this effectively in SDAG because we may not be able to
7703 // determine if the select operands are splats from within a basic block.
7704 Type *Ty = Fsh->getType();
7705 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7706 return false;
7707 Value *Cond, *TVal, *FVal;
7708 if (!match(Fsh->getOperand(2),
7709 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7710 return false;
7711 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7712 return false;
7713
7714 IRBuilder<> Builder(Fsh);
7715 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7716 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7717 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7718 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7719 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7720 Fsh->eraseFromParent();
7721 return true;
7722}
7723
7724/// If we have a SelectInst that will likely profit from branch prediction,
7725/// turn it into a branch.
7726bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7728 return false;
7729
7730 // If the SelectOptimize pass is enabled, selects have already been optimized.
7732 return false;
7733
7734 // Find all consecutive select instructions that share the same condition.
7736 ASI.push_back(SI);
7738 It != SI->getParent()->end(); ++It) {
7739 SelectInst *I = dyn_cast<SelectInst>(&*It);
7740 if (I && SI->getCondition() == I->getCondition()) {
7741 ASI.push_back(I);
7742 } else {
7743 break;
7744 }
7745 }
7746
7747 SelectInst *LastSI = ASI.back();
7748 // Increment the current iterator to skip all the rest of select instructions
7749 // because they will be either "not lowered" or "all lowered" to branch.
7750 CurInstIterator = std::next(LastSI->getIterator());
7751 // Examine debug-info attached to the consecutive select instructions. They
7752 // won't be individually optimised by optimizeInst, so we need to perform
7753 // DbgVariableRecord maintenence here instead.
7754 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7755 fixupDbgVariableRecordsOnInst(*SI);
7756
7757 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7758
7759 // Can we convert the 'select' to CF ?
7760 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7761 return false;
7762
7763 TargetLowering::SelectSupportKind SelectKind;
7764 if (SI->getType()->isVectorTy())
7765 SelectKind = TargetLowering::ScalarCondVectorVal;
7766 else
7767 SelectKind = TargetLowering::ScalarValSelect;
7768
7769 if (TLI->isSelectSupported(SelectKind) &&
7771 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI)))
7772 return false;
7773
7774 // Transform a sequence like this:
7775 // start:
7776 // %cmp = cmp uge i32 %a, %b
7777 // %sel = select i1 %cmp, i32 %c, i32 %d
7778 //
7779 // Into:
7780 // start:
7781 // %cmp = cmp uge i32 %a, %b
7782 // %cmp.frozen = freeze %cmp
7783 // br i1 %cmp.frozen, label %select.true, label %select.false
7784 // select.true:
7785 // br label %select.end
7786 // select.false:
7787 // br label %select.end
7788 // select.end:
7789 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7790 //
7791 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7792 // In addition, we may sink instructions that produce %c or %d from
7793 // the entry block into the destination(s) of the new branch.
7794 // If the true or false blocks do not contain a sunken instruction, that
7795 // block and its branch may be optimized away. In that case, one side of the
7796 // first branch will point directly to select.end, and the corresponding PHI
7797 // predecessor block will be the start block.
7798 // The CFG is altered here and we update the DominatorTree and the LoopInfo,
7799 // but we don't set a ModifiedDT flag to avoid restarting the function walk in
7800 // runOnFunction for each select optimized.
7801
7802 // Collect values that go on the true side and the values that go on the false
7803 // side.
7804 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7805 for (SelectInst *SI : ASI) {
7806 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7807 TrueInstrs.push_back(cast<Instruction>(V));
7808 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7809 FalseInstrs.push_back(cast<Instruction>(V));
7810 }
7811
7812 // Split the select block, according to how many (if any) values go on each
7813 // side.
7814 BasicBlock *StartBlock = SI->getParent();
7815 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7816 // We should split before any debug-info.
7817 SplitPt.setHeadBit(true);
7818
7819 IRBuilder<> IB(SI);
7820 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7821
7822 BasicBlock *TrueBlock = nullptr;
7823 BasicBlock *FalseBlock = nullptr;
7824 BasicBlock *EndBlock = nullptr;
7825 UncondBrInst *TrueBranch = nullptr;
7826 UncondBrInst *FalseBranch = nullptr;
7827 if (TrueInstrs.size() == 0) {
7828 FalseBranch = cast<UncondBrInst>(
7829 SplitBlockAndInsertIfElse(CondFr, SplitPt, false, nullptr, DTU, LI));
7830 FalseBlock = FalseBranch->getParent();
7831 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7832 } else if (FalseInstrs.size() == 0) {
7833 TrueBranch = cast<UncondBrInst>(
7834 SplitBlockAndInsertIfThen(CondFr, SplitPt, false, nullptr, DTU, LI));
7835 TrueBlock = TrueBranch->getParent();
7836 EndBlock = TrueBranch->getSuccessor();
7837 } else {
7838 Instruction *ThenTerm = nullptr;
7839 Instruction *ElseTerm = nullptr;
7840 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7841 nullptr, DTU, LI);
7842 TrueBranch = cast<UncondBrInst>(ThenTerm);
7843 FalseBranch = cast<UncondBrInst>(ElseTerm);
7844 TrueBlock = TrueBranch->getParent();
7845 FalseBlock = FalseBranch->getParent();
7846 EndBlock = TrueBranch->getSuccessor();
7847 }
7848
7849 EndBlock->setName("select.end");
7850 if (TrueBlock)
7851 TrueBlock->setName("select.true.sink");
7852 if (FalseBlock)
7853 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7854 : "select.false.sink");
7855
7856 if (IsHugeFunc) {
7857 if (TrueBlock)
7858 FreshBBs.insert(TrueBlock);
7859 if (FalseBlock)
7860 FreshBBs.insert(FalseBlock);
7861 FreshBBs.insert(EndBlock);
7862 }
7863
7864 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7865
7866 static const unsigned MD[] = {
7867 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7868 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7869 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7870
7871 // Sink expensive instructions into the conditional blocks to avoid executing
7872 // them speculatively.
7873 for (Instruction *I : TrueInstrs)
7874 I->moveBefore(TrueBranch->getIterator());
7875 for (Instruction *I : FalseInstrs)
7876 I->moveBefore(FalseBranch->getIterator());
7877
7878 // If we did not create a new block for one of the 'true' or 'false' paths
7879 // of the condition, it means that side of the branch goes to the end block
7880 // directly and the path originates from the start block from the point of
7881 // view of the new PHI.
7882 if (TrueBlock == nullptr)
7883 TrueBlock = StartBlock;
7884 else if (FalseBlock == nullptr)
7885 FalseBlock = StartBlock;
7886
7887 SmallPtrSet<const Instruction *, 2> INS(llvm::from_range, ASI);
7888 // Use reverse iterator because later select may use the value of the
7889 // earlier select, and we need to propagate value through earlier select
7890 // to get the PHI operand.
7891 for (SelectInst *SI : llvm::reverse(ASI)) {
7892 // The select itself is replaced with a PHI Node.
7893 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7894 PN->insertBefore(EndBlock->begin());
7895 PN->takeName(SI);
7896 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7897 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7898 PN->setDebugLoc(SI->getDebugLoc());
7899
7900 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7901 SI->eraseFromParent();
7902 INS.erase(SI);
7903 ++NumSelectsExpanded;
7904 }
7905
7906 // Instruct OptimizeBlock to skip to the next block.
7907 CurInstIterator = StartBlock->end();
7908 return true;
7909}
7910
7911/// Some targets only accept certain types for splat inputs. For example a VDUP
7912/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7913/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7914bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7915 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7917 m_Undef(), m_ZeroMask())))
7918 return false;
7919 Type *NewType = TLI->shouldConvertSplatType(SVI);
7920 if (!NewType)
7921 return false;
7922
7923 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7924 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7925 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7926 "Expected a type of the same size!");
7927 auto *NewVecType =
7928 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7929
7930 // Create a bitcast (shuffle (insert (bitcast(..))))
7931 IRBuilder<> Builder(SVI->getContext());
7932 Builder.SetInsertPoint(SVI);
7933 Value *BC1 = Builder.CreateBitCast(
7934 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7935 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7936 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7937
7938 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7940 SVI, TLInfo, nullptr,
7941 [&](Value *V) { removeAllAssertingVHReferences(V); });
7942
7943 // Also hoist the bitcast up to its operand if it they are not in the same
7944 // block.
7945 if (auto *BCI = dyn_cast<Instruction>(BC1))
7946 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7947 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7948 !Op->isTerminator() && !Op->isEHPad())
7949 BCI->moveAfter(Op);
7950
7951 return true;
7952}
7953
7954bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7955 // If the operands of I can be folded into a target instruction together with
7956 // I, duplicate and sink them.
7957 SmallVector<Use *, 4> OpsToSink;
7958 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7959 return false;
7960
7961 // OpsToSink can contain multiple uses in a use chain (e.g.
7962 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7963 // uses must come first, so we process the ops in reverse order so as to not
7964 // create invalid IR.
7965 BasicBlock *TargetBB = I->getParent();
7966 bool Changed = false;
7967 SmallVector<Use *, 4> ToReplace;
7968 Instruction *InsertPoint = I;
7969 for (Use *U : reverse(OpsToSink)) {
7970 auto *UI = cast<Instruction>(U->get());
7971 if (isa<PHINode>(UI) || UI->mayHaveSideEffects() || UI->mayReadFromMemory())
7972 continue;
7973 if (UI->getParent() == TargetBB) {
7974 if (UI->comesBefore(InsertPoint))
7975 InsertPoint = UI;
7976 continue;
7977 }
7978 ToReplace.push_back(U);
7979 }
7980
7981 SetVector<Instruction *> MaybeDead;
7982 DenseMap<Instruction *, Instruction *> NewInstructions;
7983 for (Use *U : ToReplace) {
7984 auto *UI = cast<Instruction>(U->get());
7985 Instruction *NI = UI->clone();
7986
7987 if (IsHugeFunc) {
7988 // Now we clone an instruction, its operands' defs may sink to this BB
7989 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7990 for (Value *Op : NI->operands())
7991 if (auto *OpDef = dyn_cast<Instruction>(Op))
7992 FreshBBs.insert(OpDef->getParent());
7993 }
7994
7995 NewInstructions[UI] = NI;
7996 MaybeDead.insert(UI);
7997 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7998 NI->insertBefore(InsertPoint->getIterator());
7999 InsertPoint = NI;
8000 InsertedInsts.insert(NI);
8001
8002 // Update the use for the new instruction, making sure that we update the
8003 // sunk instruction uses, if it is part of a chain that has already been
8004 // sunk.
8005 Instruction *OldI = cast<Instruction>(U->getUser());
8006 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
8007 It->second->setOperand(U->getOperandNo(), NI);
8008 else
8009 U->set(NI);
8010 Changed = true;
8011 }
8012
8013 // Remove instructions that are dead after sinking.
8014 for (auto *I : MaybeDead) {
8015 if (!I->hasNUsesOrMore(1)) {
8016 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
8017 I->eraseFromParent();
8018 }
8019 }
8020
8021 return Changed;
8022}
8023
8024bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
8025 Value *Cond = SI->getCondition();
8026 Type *OldType = Cond->getType();
8027 LLVMContext &Context = Cond->getContext();
8028 EVT OldVT = TLI->getValueType(*DL, OldType);
8030 unsigned RegWidth = RegType.getSizeInBits();
8031
8032 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
8033 return false;
8034
8035 // If the register width is greater than the type width, expand the condition
8036 // of the switch instruction and each case constant to the width of the
8037 // register. By widening the type of the switch condition, subsequent
8038 // comparisons (for case comparisons) will not need to be extended to the
8039 // preferred register width, so we will potentially eliminate N-1 extends,
8040 // where N is the number of cases in the switch.
8041 auto *NewType = Type::getIntNTy(Context, RegWidth);
8042
8043 // Extend the switch condition and case constants using the target preferred
8044 // extend unless the switch condition is a function argument with an extend
8045 // attribute. In that case, we can avoid an unnecessary mask/extension by
8046 // matching the argument extension instead.
8047 Instruction::CastOps ExtType = Instruction::ZExt;
8048 // Some targets prefer SExt over ZExt.
8049 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
8050 ExtType = Instruction::SExt;
8051
8052 if (auto *Arg = dyn_cast<Argument>(Cond)) {
8053 if (Arg->hasSExtAttr())
8054 ExtType = Instruction::SExt;
8055 if (Arg->hasZExtAttr())
8056 ExtType = Instruction::ZExt;
8057 }
8058
8059 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
8060 ExtInst->insertBefore(SI->getIterator());
8061 ExtInst->setDebugLoc(SI->getDebugLoc());
8062 SI->setCondition(ExtInst);
8063 for (auto Case : SI->cases()) {
8064 const APInt &NarrowConst = Case.getCaseValue()->getValue();
8065 APInt WideConst = (ExtType == Instruction::ZExt)
8066 ? NarrowConst.zext(RegWidth)
8067 : NarrowConst.sext(RegWidth);
8068 Case.setValue(ConstantInt::get(Context, WideConst));
8069 }
8070
8071 return true;
8072}
8073
8074bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
8075 // The SCCP optimization tends to produce code like this:
8076 // switch(x) { case 42: phi(42, ...) }
8077 // Materializing the constant for the phi-argument needs instructions; So we
8078 // change the code to:
8079 // switch(x) { case 42: phi(x, ...) }
8080
8081 Value *Condition = SI->getCondition();
8082 // Avoid endless loop in degenerate case.
8083 if (isa<ConstantInt>(*Condition))
8084 return false;
8085
8086 bool Changed = false;
8087 BasicBlock *SwitchBB = SI->getParent();
8088 Type *ConditionType = Condition->getType();
8089
8090 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
8091 ConstantInt *CaseValue = Case.getCaseValue();
8092 BasicBlock *CaseBB = Case.getCaseSuccessor();
8093 // Set to true if we previously checked that `CaseBB` is only reached by
8094 // a single case from this switch.
8095 bool CheckedForSinglePred = false;
8096 for (PHINode &PHI : CaseBB->phis()) {
8097 Type *PHIType = PHI.getType();
8098 // If ZExt is free then we can also catch patterns like this:
8099 // switch((i32)x) { case 42: phi((i64)42, ...); }
8100 // and replace `(i64)42` with `zext i32 %x to i64`.
8101 bool TryZExt =
8102 PHIType->isIntegerTy() &&
8103 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
8104 TLI->isZExtFree(ConditionType, PHIType);
8105 if (PHIType == ConditionType || TryZExt) {
8106 // Set to true to skip this case because of multiple preds.
8107 bool SkipCase = false;
8108 Value *Replacement = nullptr;
8109 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
8110 Value *PHIValue = PHI.getIncomingValue(I);
8111 if (PHIValue != CaseValue) {
8112 if (!TryZExt)
8113 continue;
8114 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
8115 if (!PHIValueInt ||
8116 PHIValueInt->getValue() !=
8117 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
8118 continue;
8119 }
8120 if (PHI.getIncomingBlock(I) != SwitchBB)
8121 continue;
8122 // We cannot optimize if there are multiple case labels jumping to
8123 // this block. This check may get expensive when there are many
8124 // case labels so we test for it last.
8125 if (!CheckedForSinglePred) {
8126 CheckedForSinglePred = true;
8127 if (SI->findCaseDest(CaseBB) == nullptr) {
8128 SkipCase = true;
8129 break;
8130 }
8131 }
8132
8133 if (Replacement == nullptr) {
8134 if (PHIValue == CaseValue) {
8135 Replacement = Condition;
8136 } else {
8137 IRBuilder<> Builder(SI);
8138 Replacement = Builder.CreateZExt(Condition, PHIType);
8139 }
8140 }
8141 PHI.setIncomingValue(I, Replacement);
8142 Changed = true;
8143 }
8144 if (SkipCase)
8145 break;
8146 }
8147 }
8148 }
8149 return Changed;
8150}
8151
8152bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
8153 bool Changed = optimizeSwitchType(SI);
8154 Changed |= optimizeSwitchPhiConstants(SI);
8155 return Changed;
8156}
8157
8158namespace {
8159
8160/// Helper class to promote a scalar operation to a vector one.
8161/// This class is used to move downward extractelement transition.
8162/// E.g.,
8163/// a = vector_op <2 x i32>
8164/// b = extractelement <2 x i32> a, i32 0
8165/// c = scalar_op b
8166/// store c
8167///
8168/// =>
8169/// a = vector_op <2 x i32>
8170/// c = vector_op a (equivalent to scalar_op on the related lane)
8171/// * d = extractelement <2 x i32> c, i32 0
8172/// * store d
8173/// Assuming both extractelement and store can be combine, we get rid of the
8174/// transition.
8175class VectorPromoteHelper {
8176 /// DataLayout associated with the current module.
8177 const DataLayout &DL;
8178
8179 /// Used to perform some checks on the legality of vector operations.
8180 const TargetLowering &TLI;
8181
8182 /// Used to estimated the cost of the promoted chain.
8183 const TargetTransformInfo &TTI;
8184
8185 /// The transition being moved downwards.
8186 Instruction *Transition;
8187
8188 /// The sequence of instructions to be promoted.
8189 SmallVector<Instruction *, 4> InstsToBePromoted;
8190
8191 /// Cost of combining a store and an extract.
8192 unsigned StoreExtractCombineCost;
8193
8194 /// Instruction that will be combined with the transition.
8195 Instruction *CombineInst = nullptr;
8196
8197 /// The instruction that represents the current end of the transition.
8198 /// Since we are faking the promotion until we reach the end of the chain
8199 /// of computation, we need a way to get the current end of the transition.
8200 Instruction *getEndOfTransition() const {
8201 if (InstsToBePromoted.empty())
8202 return Transition;
8203 return InstsToBePromoted.back();
8204 }
8205
8206 /// Return the index of the original value in the transition.
8207 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
8208 /// c, is at index 0.
8209 unsigned getTransitionOriginalValueIdx() const {
8210 assert(isa<ExtractElementInst>(Transition) &&
8211 "Other kind of transitions are not supported yet");
8212 return 0;
8213 }
8214
8215 /// Return the index of the index in the transition.
8216 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
8217 /// is at index 1.
8218 unsigned getTransitionIdx() const {
8219 assert(isa<ExtractElementInst>(Transition) &&
8220 "Other kind of transitions are not supported yet");
8221 return 1;
8222 }
8223
8224 /// Get the type of the transition.
8225 /// This is the type of the original value.
8226 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8227 /// transition is <2 x i32>.
8228 Type *getTransitionType() const {
8229 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
8230 }
8231
8232 /// Promote \p ToBePromoted by moving \p Def downward through.
8233 /// I.e., we have the following sequence:
8234 /// Def = Transition <ty1> a to <ty2>
8235 /// b = ToBePromoted <ty2> Def, ...
8236 /// =>
8237 /// b = ToBePromoted <ty1> a, ...
8238 /// Def = Transition <ty1> ToBePromoted to <ty2>
8239 void promoteImpl(Instruction *ToBePromoted);
8240
8241 /// Check whether or not it is profitable to promote all the
8242 /// instructions enqueued to be promoted.
8243 bool isProfitableToPromote() {
8244 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
8245 unsigned Index = isa<ConstantInt>(ValIdx)
8246 ? cast<ConstantInt>(ValIdx)->getZExtValue()
8247 : -1;
8248 Type *PromotedType = getTransitionType();
8249
8250 StoreInst *ST = cast<StoreInst>(CombineInst);
8251 unsigned AS = ST->getPointerAddressSpace();
8252 // Check if this store is supported.
8254 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
8255 ST->getAlign())) {
8256 // If this is not supported, there is no way we can combine
8257 // the extract with the store.
8258 return false;
8259 }
8260
8261 // The scalar chain of computation has to pay for the transition
8262 // scalar to vector.
8263 // The vector chain has to account for the combining cost.
8266 InstructionCost ScalarCost =
8267 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
8268 InstructionCost VectorCost = StoreExtractCombineCost;
8269 for (const auto &Inst : InstsToBePromoted) {
8270 // Compute the cost.
8271 // By construction, all instructions being promoted are arithmetic ones.
8272 // Moreover, one argument is a constant that can be viewed as a splat
8273 // constant.
8274 Value *Arg0 = Inst->getOperand(0);
8275 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
8276 isa<ConstantFP>(Arg0);
8277 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8278 if (IsArg0Constant)
8280 else
8282
8283 ScalarCost += TTI.getArithmeticInstrCost(
8284 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
8285 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
8286 CostKind, Arg0Info, Arg1Info);
8287 }
8288 LLVM_DEBUG(
8289 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8290 << ScalarCost << "\nVector: " << VectorCost << '\n');
8291 return ScalarCost > VectorCost;
8292 }
8293
8294 /// Generate a constant vector with \p Val with the same
8295 /// number of elements as the transition.
8296 /// \p UseSplat defines whether or not \p Val should be replicated
8297 /// across the whole vector.
8298 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8299 /// otherwise we generate a vector with as many poison as possible:
8300 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8301 /// used at the index of the extract.
8302 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8303 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8304 if (!UseSplat) {
8305 // If we cannot determine where the constant must be, we have to
8306 // use a splat constant.
8307 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8308 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8309 ExtractIdx = CstVal->getSExtValue();
8310 else
8311 UseSplat = true;
8312 }
8313
8314 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8315 if (UseSplat)
8316 return ConstantVector::getSplat(EC, Val);
8317
8318 if (!EC.isScalable()) {
8319 SmallVector<Constant *, 4> ConstVec;
8320 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8321 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8322 if (Idx == ExtractIdx)
8323 ConstVec.push_back(Val);
8324 else
8325 ConstVec.push_back(PoisonVal);
8326 }
8327 return ConstantVector::get(ConstVec);
8328 } else
8330 "Generate scalable vector for non-splat is unimplemented");
8331 }
8332
8333 /// Check if promoting to a vector type an operand at \p OperandIdx
8334 /// in \p Use can trigger undefined behavior.
8335 static bool canCauseUndefinedBehavior(const Instruction *Use,
8336 unsigned OperandIdx) {
8337 // This is not safe to introduce undef when the operand is on
8338 // the right hand side of a division-like instruction.
8339 if (OperandIdx != 1)
8340 return false;
8341 switch (Use->getOpcode()) {
8342 default:
8343 return false;
8344 case Instruction::SDiv:
8345 case Instruction::UDiv:
8346 case Instruction::SRem:
8347 case Instruction::URem:
8348 return true;
8349 case Instruction::FDiv:
8350 case Instruction::FRem:
8351 return !Use->hasNoNaNs();
8352 }
8353 llvm_unreachable(nullptr);
8354 }
8355
8356public:
8357 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8358 const TargetTransformInfo &TTI, Instruction *Transition,
8359 unsigned CombineCost)
8360 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8361 StoreExtractCombineCost(CombineCost) {
8362 assert(Transition && "Do not know how to promote null");
8363 }
8364
8365 /// Check if we can promote \p ToBePromoted to \p Type.
8366 bool canPromote(const Instruction *ToBePromoted) const {
8367 // We could support CastInst too.
8368 return isa<BinaryOperator>(ToBePromoted);
8369 }
8370
8371 /// Check if it is profitable to promote \p ToBePromoted
8372 /// by moving downward the transition through.
8373 bool shouldPromote(const Instruction *ToBePromoted) const {
8374 // Promote only if all the operands can be statically expanded.
8375 // Indeed, we do not want to introduce any new kind of transitions.
8376 for (const Use &U : ToBePromoted->operands()) {
8377 const Value *Val = U.get();
8378 if (Val == getEndOfTransition()) {
8379 // If the use is a division and the transition is on the rhs,
8380 // we cannot promote the operation, otherwise we may create a
8381 // division by zero.
8382 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8383 return false;
8384 continue;
8385 }
8386 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8387 !isa<ConstantFP>(Val))
8388 return false;
8389 }
8390 // Check that the resulting operation is legal.
8391 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8392 if (!ISDOpcode)
8393 return false;
8394 return StressStoreExtract ||
8396 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8397 }
8398
8399 /// Check whether or not \p Use can be combined
8400 /// with the transition.
8401 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8402 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8403
8404 /// Record \p ToBePromoted as part of the chain to be promoted.
8405 void enqueueForPromotion(Instruction *ToBePromoted) {
8406 InstsToBePromoted.push_back(ToBePromoted);
8407 }
8408
8409 /// Set the instruction that will be combined with the transition.
8410 void recordCombineInstruction(Instruction *ToBeCombined) {
8411 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8412 CombineInst = ToBeCombined;
8413 }
8414
8415 /// Promote all the instructions enqueued for promotion if it is
8416 /// is profitable.
8417 /// \return True if the promotion happened, false otherwise.
8418 bool promote() {
8419 // Check if there is something to promote.
8420 // Right now, if we do not have anything to combine with,
8421 // we assume the promotion is not profitable.
8422 if (InstsToBePromoted.empty() || !CombineInst)
8423 return false;
8424
8425 // Check cost.
8426 if (!StressStoreExtract && !isProfitableToPromote())
8427 return false;
8428
8429 // Promote.
8430 for (auto &ToBePromoted : InstsToBePromoted)
8431 promoteImpl(ToBePromoted);
8432 InstsToBePromoted.clear();
8433 return true;
8434 }
8435};
8436
8437} // end anonymous namespace
8438
8439void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8440 // At this point, we know that all the operands of ToBePromoted but Def
8441 // can be statically promoted.
8442 // For Def, we need to use its parameter in ToBePromoted:
8443 // b = ToBePromoted ty1 a
8444 // Def = Transition ty1 b to ty2
8445 // Move the transition down.
8446 // 1. Replace all uses of the promoted operation by the transition.
8447 // = ... b => = ... Def.
8448 assert(ToBePromoted->getType() == Transition->getType() &&
8449 "The type of the result of the transition does not match "
8450 "the final type");
8451 ToBePromoted->replaceAllUsesWith(Transition);
8452 // 2. Update the type of the uses.
8453 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8454 Type *TransitionTy = getTransitionType();
8455 ToBePromoted->mutateType(TransitionTy);
8456 // 3. Update all the operands of the promoted operation with promoted
8457 // operands.
8458 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8459 for (Use &U : ToBePromoted->operands()) {
8460 Value *Val = U.get();
8461 Value *NewVal = nullptr;
8462 if (Val == Transition)
8463 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8464 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8465 isa<ConstantFP>(Val)) {
8466 // Use a splat constant if it is not safe to use undef.
8467 NewVal = getConstantVector(
8468 cast<Constant>(Val),
8469 isa<UndefValue>(Val) ||
8470 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8471 } else
8472 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8473 "this?");
8474 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8475 }
8476 Transition->moveAfter(ToBePromoted);
8477 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8478}
8479
8480/// Some targets can do store(extractelement) with one instruction.
8481/// Try to push the extractelement towards the stores when the target
8482/// has this feature and this is profitable.
8483bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8484 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8485 if (DisableStoreExtract ||
8488 Inst->getOperand(1), CombineCost)))
8489 return false;
8490
8491 // At this point we know that Inst is a vector to scalar transition.
8492 // Try to move it down the def-use chain, until:
8493 // - We can combine the transition with its single use
8494 // => we got rid of the transition.
8495 // - We escape the current basic block
8496 // => we would need to check that we are moving it at a cheaper place and
8497 // we do not do that for now.
8498 BasicBlock *Parent = Inst->getParent();
8499 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8500 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8501 // If the transition has more than one use, assume this is not going to be
8502 // beneficial.
8503 while (Inst->hasOneUse()) {
8504 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8505 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8506
8507 if (ToBePromoted->getParent() != Parent) {
8508 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8509 << ToBePromoted->getParent()->getName()
8510 << ") than the transition (" << Parent->getName()
8511 << ").\n");
8512 return false;
8513 }
8514
8515 if (VPH.canCombine(ToBePromoted)) {
8516 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8517 << "will be combined with: " << *ToBePromoted << '\n');
8518 VPH.recordCombineInstruction(ToBePromoted);
8519 bool Changed = VPH.promote();
8520 NumStoreExtractExposed += Changed;
8521 return Changed;
8522 }
8523
8524 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8525 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8526 return false;
8527
8528 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8529
8530 VPH.enqueueForPromotion(ToBePromoted);
8531 Inst = ToBePromoted;
8532 }
8533 return false;
8534}
8535
8536/// For the instruction sequence of store below, F and I values
8537/// are bundled together as an i64 value before being stored into memory.
8538/// Sometimes it is more efficient to generate separate stores for F and I,
8539/// which can remove the bitwise instructions or sink them to colder places.
8540///
8541/// (store (or (zext (bitcast F to i32) to i64),
8542/// (shl (zext I to i64), 32)), addr) -->
8543/// (store F, addr) and (store I, addr+4)
8544///
8545/// Similarly, splitting for other merged store can also be beneficial, like:
8546/// For pair of {i32, i32}, i64 store --> two i32 stores.
8547/// For pair of {i32, i16}, i64 store --> two i32 stores.
8548/// For pair of {i16, i16}, i32 store --> two i16 stores.
8549/// For pair of {i16, i8}, i32 store --> two i16 stores.
8550/// For pair of {i8, i8}, i16 store --> two i8 stores.
8551///
8552/// We allow each target to determine specifically which kind of splitting is
8553/// supported.
8554///
8555/// The store patterns are commonly seen from the simple code snippet below
8556/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8557/// void goo(const std::pair<int, float> &);
8558/// hoo() {
8559/// ...
8560/// goo(std::make_pair(tmp, ftmp));
8561/// ...
8562/// }
8563///
8564/// Although we already have similar splitting in DAG Combine, we duplicate
8565/// it in CodeGenPrepare to catch the case in which pattern is across
8566/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8567/// during code expansion.
8569 const TargetLowering &TLI) {
8570 // Handle simple but common cases only.
8571 Type *StoreType = SI.getValueOperand()->getType();
8572
8573 // The code below assumes shifting a value by <number of bits>,
8574 // whereas scalable vectors would have to be shifted by
8575 // <2log(vscale) + number of bits> in order to store the
8576 // low/high parts. Bailing out for now.
8577 if (StoreType->isScalableTy())
8578 return false;
8579
8580 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8581 DL.getTypeSizeInBits(StoreType) == 0)
8582 return false;
8583
8584 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8585 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8586 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8587 return false;
8588
8589 // Don't split the store if it is volatile.
8590 if (SI.isVolatile())
8591 return false;
8592
8593 // Match the following patterns:
8594 // (store (or (zext LValue to i64),
8595 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8596 // or
8597 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8598 // (zext LValue to i64),
8599 // Expect both operands of OR and the first operand of SHL have only
8600 // one use.
8601 Value *LValue, *HValue;
8602 if (!match(SI.getValueOperand(),
8605 m_SpecificInt(HalfValBitSize))))))
8606 return false;
8607
8608 // Check LValue and HValue are int with size less or equal than 32.
8609 if (!LValue->getType()->isIntegerTy() ||
8610 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8611 !HValue->getType()->isIntegerTy() ||
8612 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8613 return false;
8614
8615 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8616 // as the input of target query.
8617 auto *LBC = dyn_cast<BitCastInst>(LValue);
8618 auto *HBC = dyn_cast<BitCastInst>(HValue);
8619 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8620 : EVT::getEVT(LValue->getType());
8621 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8622 : EVT::getEVT(HValue->getType());
8623 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8624 return false;
8625
8626 // Start to split store.
8627 IRBuilder<> Builder(SI.getContext());
8628 Builder.SetInsertPoint(&SI);
8629
8630 // If LValue/HValue is a bitcast in another BB, create a new one in current
8631 // BB so it may be merged with the splitted stores by dag combiner.
8632 if (LBC && LBC->getParent() != SI.getParent())
8633 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8634 if (HBC && HBC->getParent() != SI.getParent())
8635 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8636
8637 bool IsLE = SI.getDataLayout().isLittleEndian();
8638 auto CreateSplitStore = [&](Value *V, bool Upper) {
8639 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8640 Value *Addr = SI.getPointerOperand();
8641 Align Alignment = SI.getAlign();
8642 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8643 if (IsOffsetStore) {
8644 Addr = Builder.CreateGEP(
8645 SplitStoreType, Addr,
8646 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8647
8648 // When splitting the store in half, naturally one half will retain the
8649 // alignment of the original wider store, regardless of whether it was
8650 // over-aligned or not, while the other will require adjustment.
8651 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8652 }
8653 Builder.CreateAlignedStore(V, Addr, Alignment);
8654 };
8655
8656 CreateSplitStore(LValue, false);
8657 CreateSplitStore(HValue, true);
8658
8659 // Delete the old store.
8660 SI.eraseFromParent();
8661 return true;
8662}
8663
8664// Return true if the GEP has two operands, the first operand is of a sequential
8665// type, and the second operand is a constant.
8668 return GEP->getNumOperands() == 2 && I.isSequential() &&
8669 isa<ConstantInt>(GEP->getOperand(1));
8670}
8671
8672// Try unmerging GEPs to reduce liveness interference (register pressure) across
8673// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8674// reducing liveness interference across those edges benefits global register
8675// allocation. Currently handles only certain cases.
8676//
8677// For example, unmerge %GEPI and %UGEPI as below.
8678//
8679// ---------- BEFORE ----------
8680// SrcBlock:
8681// ...
8682// %GEPIOp = ...
8683// ...
8684// %GEPI = gep %GEPIOp, Idx
8685// ...
8686// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8687// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8688// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8689// %UGEPI)
8690//
8691// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8692// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8693// ...
8694//
8695// DstBi:
8696// ...
8697// %UGEPI = gep %GEPIOp, UIdx
8698// ...
8699// ---------------------------
8700//
8701// ---------- AFTER ----------
8702// SrcBlock:
8703// ... (same as above)
8704// (* %GEPI is still alive on the indirectbr edges)
8705// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8706// unmerging)
8707// ...
8708//
8709// DstBi:
8710// ...
8711// %UGEPI = gep %GEPI, (UIdx-Idx)
8712// ...
8713// ---------------------------
8714//
8715// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8716// no longer alive on them.
8717//
8718// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8719// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8720// not to disable further simplications and optimizations as a result of GEP
8721// merging.
8722//
8723// Note this unmerging may increase the length of the data flow critical path
8724// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8725// between the register pressure and the length of data-flow critical
8726// path. Restricting this to the uncommon IndirectBr case would minimize the
8727// impact of potentially longer critical path, if any, and the impact on compile
8728// time.
8730 const TargetTransformInfo *TTI) {
8731 BasicBlock *SrcBlock = GEPI->getParent();
8732 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8733 // (non-IndirectBr) cases exit early here.
8734 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8735 return false;
8736 // Check that GEPI is a simple gep with a single constant index.
8737 if (!GEPSequentialConstIndexed(GEPI))
8738 return false;
8739 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8740 // Check that GEPI is a cheap one.
8741 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8744 return false;
8745 Value *GEPIOp = GEPI->getOperand(0);
8746 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8747 if (!isa<Instruction>(GEPIOp))
8748 return false;
8749 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8750 if (GEPIOpI->getParent() != SrcBlock)
8751 return false;
8752 // Check that GEP is used outside the block, meaning it's alive on the
8753 // IndirectBr edge(s).
8754 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8755 if (auto *I = dyn_cast<Instruction>(Usr)) {
8756 if (I->getParent() != SrcBlock) {
8757 return true;
8758 }
8759 }
8760 return false;
8761 }))
8762 return false;
8763 // The second elements of the GEP chains to be unmerged.
8764 std::vector<GetElementPtrInst *> UGEPIs;
8765 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8766 // on IndirectBr edges.
8767 for (User *Usr : GEPIOp->users()) {
8768 if (Usr == GEPI)
8769 continue;
8770 // Check if Usr is an Instruction. If not, give up.
8771 if (!isa<Instruction>(Usr))
8772 return false;
8773 auto *UI = cast<Instruction>(Usr);
8774 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8775 if (UI->getParent() == SrcBlock)
8776 continue;
8777 // Check if Usr is a GEP. If not, give up.
8778 if (!isa<GetElementPtrInst>(Usr))
8779 return false;
8780 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8781 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8782 // the pointer operand to it. If so, record it in the vector. If not, give
8783 // up.
8784 if (!GEPSequentialConstIndexed(UGEPI))
8785 return false;
8786 if (UGEPI->getOperand(0) != GEPIOp)
8787 return false;
8788 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8789 return false;
8790 if (GEPIIdx->getType() !=
8791 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8792 return false;
8793 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8794 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8797 return false;
8798 UGEPIs.push_back(UGEPI);
8799 }
8800 if (UGEPIs.size() == 0)
8801 return false;
8802 // Check the materializing cost of (Uidx-Idx).
8803 for (GetElementPtrInst *UGEPI : UGEPIs) {
8804 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8805 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8807 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8808 if (ImmCost > TargetTransformInfo::TCC_Basic)
8809 return false;
8810 }
8811 // Now unmerge between GEPI and UGEPIs.
8812 for (GetElementPtrInst *UGEPI : UGEPIs) {
8813 UGEPI->setOperand(0, GEPI);
8814 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8815 auto NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8816 Constant *NewUGEPIIdx = ConstantInt::get(GEPIIdx->getType(), NewIdx);
8817 UGEPI->setOperand(1, NewUGEPIIdx);
8818
8819 auto SourceFlags = GEPI->getNoWrapFlags();
8820 // Intersect flags to avoid UB in updated GEP.
8821 auto TargetFlags =
8822 UGEPI->getNoWrapFlags().intersectForOffsetAdd(SourceFlags);
8823 // If UGEPI now has a negative index, drop the nuw flag.
8824 if (NewIdx.isNegative() && TargetFlags.hasNoUnsignedWrap())
8825 TargetFlags = TargetFlags.withoutNoUnsignedWrap();
8826 UGEPI->setNoWrapFlags(TargetFlags);
8827 }
8828 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8829 // alive on IndirectBr edges).
8830 assert(llvm::none_of(GEPIOp->users(),
8831 [&](User *Usr) {
8832 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8833 }) &&
8834 "GEPIOp is used outside SrcBlock");
8835 return true;
8836}
8837
8838static bool optimizeBranch(CondBrInst *Branch, const TargetLowering &TLI,
8840 bool IsHugeFunc) {
8841 // Try and convert
8842 // %c = icmp ult %x, 8
8843 // br %c, bla, blb
8844 // %tc = lshr %x, 3
8845 // to
8846 // %tc = lshr %x, 3
8847 // %c = icmp eq %tc, 0
8848 // br %c, bla, blb
8849 // Creating the cmp to zero can be better for the backend, especially if the
8850 // lshr produces flags that can be used automatically.
8851 if (!TLI.preferZeroCompareBranch())
8852 return false;
8853
8854 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8855 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8856 return false;
8857
8858 Value *X = Cmp->getOperand(0);
8859 if (!X->hasUseList())
8860 return false;
8861
8862 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8863
8864 for (auto *U : X->users()) {
8866 // A quick dominance check
8867 if (!UI ||
8868 (UI->getParent() != Branch->getParent() &&
8869 UI->getParent() != Branch->getSuccessor(0) &&
8870 UI->getParent() != Branch->getSuccessor(1)) ||
8871 (UI->getParent() != Branch->getParent() &&
8872 !UI->getParent()->getSinglePredecessor()))
8873 continue;
8874
8875 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8876 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8877 IRBuilder<> Builder(Branch);
8878 if (UI->getParent() != Branch->getParent())
8879 UI->moveBefore(Branch->getIterator());
8881 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8882 ConstantInt::get(UI->getType(), 0));
8883 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8884 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8885 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8886 return true;
8887 }
8888 if (Cmp->isEquality() &&
8889 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8890 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))) ||
8891 match(UI, m_Xor(m_Specific(X), m_SpecificInt(CmpC))))) {
8892 IRBuilder<> Builder(Branch);
8893 if (UI->getParent() != Branch->getParent())
8894 UI->moveBefore(Branch->getIterator());
8896 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8897 ConstantInt::get(UI->getType(), 0));
8898 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8899 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8900 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8901 return true;
8902 }
8903 }
8904 return false;
8905}
8906
8907bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8908 bool AnyChange = false;
8909 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8910
8911 // Bail out if we inserted the instruction to prevent optimizations from
8912 // stepping on each other's toes.
8913 if (InsertedInsts.count(I))
8914 return AnyChange;
8915
8916 // TODO: Move into the switch on opcode below here.
8917 if (PHINode *P = dyn_cast<PHINode>(I)) {
8918 // It is possible for very late stage optimizations (such as SimplifyCFG)
8919 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8920 // trivial PHI, go ahead and zap it here.
8921 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8922 LargeOffsetGEPMap.erase(P);
8923 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8924 P->eraseFromParent();
8925 ++NumPHIsElim;
8926 return true;
8927 }
8928 return AnyChange;
8929 }
8930
8931 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8932 // If the source of the cast is a constant, then this should have
8933 // already been constant folded. The only reason NOT to constant fold
8934 // it is if something (e.g. LSR) was careful to place the constant
8935 // evaluation in a block other than then one that uses it (e.g. to hoist
8936 // the address of globals out of a loop). If this is the case, we don't
8937 // want to forward-subst the cast.
8938 if (isa<Constant>(CI->getOperand(0)))
8939 return AnyChange;
8940
8941 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8942 return true;
8943
8945 isa<TruncInst>(I)) &&
8947 I, LI->getLoopFor(I->getParent()), *TTI))
8948 return true;
8949
8950 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8951 /// Sink a zext or sext into its user blocks if the target type doesn't
8952 /// fit in one register
8953 if (TLI->getTypeAction(CI->getContext(),
8954 TLI->getValueType(*DL, CI->getType())) ==
8955 TargetLowering::TypeExpandInteger) {
8956 return SinkCast(CI);
8957 } else {
8959 I, LI->getLoopFor(I->getParent()), *TTI))
8960 return true;
8961
8962 bool MadeChange = optimizeExt(I);
8963 return MadeChange | optimizeExtUses(I);
8964 }
8965 }
8966 return AnyChange;
8967 }
8968
8969 if (auto *Cmp = dyn_cast<CmpInst>(I))
8970 if (optimizeCmp(Cmp, ModifiedDT))
8971 return true;
8972
8973 if (match(I, m_URem(m_Value(), m_Value())))
8974 if (optimizeURem(I))
8975 return true;
8976
8977 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8978 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8979 bool Modified = optimizeLoadExt(LI);
8980 unsigned AS = LI->getPointerAddressSpace();
8981 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8982 return Modified;
8983 }
8984
8985 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8986 if (splitMergedValStore(*SI, *DL, *TLI))
8987 return true;
8988 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8989 unsigned AS = SI->getPointerAddressSpace();
8990 return optimizeMemoryInst(I, SI->getOperand(1),
8991 SI->getOperand(0)->getType(), AS);
8992 }
8993
8994 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8995 unsigned AS = RMW->getPointerAddressSpace();
8996 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8997 }
8998
8999 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
9000 unsigned AS = CmpX->getPointerAddressSpace();
9001 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
9002 CmpX->getCompareOperand()->getType(), AS);
9003 }
9004
9005 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
9006
9007 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
9008 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
9009 return true;
9010
9011 // TODO: Move this into the switch on opcode - it handles shifts already.
9012 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
9013 BinOp->getOpcode() == Instruction::LShr)) {
9014 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
9015 if (CI && TLI->hasExtractBitsInsn())
9016 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
9017 return true;
9018 }
9019
9020 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
9021 if (GEPI->hasAllZeroIndices()) {
9022 /// The GEP operand must be a pointer, so must its result -> BitCast
9023 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
9024 GEPI->getName(), GEPI->getIterator());
9025 NC->setDebugLoc(GEPI->getDebugLoc());
9026 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
9028 GEPI, TLInfo, nullptr,
9029 [&](Value *V) { removeAllAssertingVHReferences(V); });
9030 ++NumGEPsElim;
9031 optimizeInst(NC, ModifiedDT);
9032 return true;
9033 }
9035 return true;
9036 }
9037 }
9038
9039 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
9040 // freeze(icmp a, const)) -> icmp (freeze a), const
9041 // This helps generate efficient conditional jumps.
9042 Instruction *CmpI = nullptr;
9043 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
9044 CmpI = II;
9045 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
9046 CmpI = F->getFastMathFlags().none() ? F : nullptr;
9047
9048 if (CmpI && CmpI->hasOneUse()) {
9049 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
9050 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
9052 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
9054 if (Const0 || Const1) {
9055 if (!Const0 || !Const1) {
9056 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
9057 F->takeName(FI);
9058 CmpI->setOperand(Const0 ? 1 : 0, F);
9059 }
9060 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
9061 FI->eraseFromParent();
9062 return true;
9063 }
9064 }
9065 return AnyChange;
9066 }
9067
9068 if (tryToSinkFreeOperands(I))
9069 return true;
9070
9071 switch (I->getOpcode()) {
9072 case Instruction::Shl:
9073 case Instruction::LShr:
9074 case Instruction::AShr:
9075 return optimizeShiftInst(cast<BinaryOperator>(I));
9076 case Instruction::Call:
9077 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
9078 case Instruction::Select:
9079 return optimizeSelectInst(cast<SelectInst>(I));
9080 case Instruction::ShuffleVector:
9081 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
9082 case Instruction::Switch:
9083 return optimizeSwitchInst(cast<SwitchInst>(I));
9084 case Instruction::ExtractElement:
9085 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
9086 case Instruction::CondBr:
9087 return optimizeBranch(cast<CondBrInst>(I), *TLI, FreshBBs, IsHugeFunc);
9088 }
9089
9090 return AnyChange;
9091}
9092
9093/// Given an OR instruction, check to see if this is a bitreverse
9094/// idiom. If so, insert the new intrinsic and return true.
9095bool CodeGenPrepare::makeBitReverse(Instruction &I) {
9096 if (!I.getType()->isIntegerTy() ||
9098 TLI->getValueType(*DL, I.getType(), true)))
9099 return false;
9100
9101 SmallVector<Instruction *, 4> Insts;
9102 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
9103 return false;
9104 Instruction *LastInst = Insts.back();
9105 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
9107 &I, TLInfo, nullptr,
9108 [&](Value *V) { removeAllAssertingVHReferences(V); });
9109 return true;
9110}
9111
9112// In this pass we look for GEP and cast instructions that are used
9113// across basic blocks and rewrite them to improve basic-block-at-a-time
9114// selection.
9115bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
9116 SunkAddrs.clear();
9117 bool MadeChange = false;
9118
9119 do {
9120 CurInstIterator = BB.begin();
9121 ModifiedDT = ModifyDT::NotModifyDT;
9122 while (CurInstIterator != BB.end()) {
9123 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
9124 if (ModifiedDT != ModifyDT::NotModifyDT) {
9125 // For huge function we tend to quickly go though the inner optmization
9126 // opportunities in the BB. So we go back to the BB head to re-optimize
9127 // each instruction instead of go back to the function head.
9128 if (IsHugeFunc)
9129 break;
9130 return true;
9131 }
9132 }
9133 } while (ModifiedDT == ModifyDT::ModifyInstDT);
9134
9135 bool MadeBitReverse = true;
9136 while (MadeBitReverse) {
9137 MadeBitReverse = false;
9138 for (auto &I : reverse(BB)) {
9139 if (makeBitReverse(I)) {
9140 MadeBitReverse = MadeChange = true;
9141 break;
9142 }
9143 }
9144 }
9145 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
9146
9147 return MadeChange;
9148}
9149
9150bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
9151 bool AnyChange = false;
9152 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
9153 AnyChange |= fixupDbgVariableRecord(DVR);
9154 return AnyChange;
9155}
9156
9157// FIXME: should updating debug-info really cause the "changed" flag to fire,
9158// which can cause a function to be reprocessed?
9159bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
9160 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
9161 DVR.Type != DbgVariableRecord::LocationType::Assign)
9162 return false;
9163
9164 // Does this DbgVariableRecord refer to a sunk address calculation?
9165 bool AnyChange = false;
9166 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
9167 DVR.location_ops().end());
9168 for (Value *Location : LocationOps) {
9169 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
9170 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
9171 if (SunkAddr) {
9172 // Point dbg.value at locally computed address, which should give the best
9173 // opportunity to be accurately lowered. This update may change the type
9174 // of pointer being referred to; however this makes no difference to
9175 // debugging information, and we can't generate bitcasts that may affect
9176 // codegen.
9177 DVR.replaceVariableLocationOp(Location, SunkAddr);
9178 AnyChange = true;
9179 }
9180 }
9181 return AnyChange;
9182}
9183
9185 DVR->removeFromParent();
9186 BasicBlock *VIBB = VI->getParent();
9187 if (isa<PHINode>(VI))
9188 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
9189 else
9190 VIBB->insertDbgRecordAfter(DVR, &*VI);
9191}
9192
9193// A llvm.dbg.value may be using a value before its definition, due to
9194// optimizations in this pass and others. Scan for such dbg.values, and rescue
9195// them by moving the dbg.value to immediately after the value definition.
9196// FIXME: Ideally this should never be necessary, and this has the potential
9197// to re-order dbg.value intrinsics.
9198bool CodeGenPrepare::placeDbgValues(Function &F) {
9199 bool MadeChange = false;
9200 DominatorTree &DT = getDT();
9201
9202 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
9203 SmallVector<Instruction *, 4> VIs;
9204 for (Value *V : DbgItem->location_ops())
9205 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
9206 VIs.push_back(VI);
9207
9208 // This item may depend on multiple instructions, complicating any
9209 // potential sink. This block takes the defensive approach, opting to
9210 // "undef" the item if it has more than one instruction and any of them do
9211 // not dominate iem.
9212 for (Instruction *VI : VIs) {
9213 if (VI->isTerminator())
9214 continue;
9215
9216 // If VI is a phi in a block with an EHPad terminator, we can't insert
9217 // after it.
9218 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
9219 continue;
9220
9221 // If the defining instruction dominates the dbg.value, we do not need
9222 // to move the dbg.value.
9223 if (DT.dominates(VI, Position))
9224 continue;
9225
9226 // If we depend on multiple instructions and any of them doesn't
9227 // dominate this DVI, we probably can't salvage it: moving it to
9228 // after any of the instructions could cause us to lose the others.
9229 if (VIs.size() > 1) {
9230 LLVM_DEBUG(
9231 dbgs()
9232 << "Unable to find valid location for Debug Value, undefing:\n"
9233 << *DbgItem);
9234 DbgItem->setKillLocation();
9235 break;
9236 }
9237
9238 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9239 << *DbgItem << ' ' << *VI);
9240 DbgInserterHelper(DbgItem, VI->getIterator());
9241 MadeChange = true;
9242 ++NumDbgValueMoved;
9243 }
9244 };
9245
9246 for (BasicBlock &BB : F) {
9247 for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
9248 // Process any DbgVariableRecord records attached to this
9249 // instruction.
9250 for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
9251 filterDbgVars(Insn.getDbgRecordRange()))) {
9252 if (DVR.Type != DbgVariableRecord::LocationType::Value)
9253 continue;
9254 DbgProcessor(&DVR, &Insn);
9255 }
9256 }
9257 }
9258
9259 return MadeChange;
9260}
9261
9262// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9263// probes can be chained dependencies of other regular DAG nodes and block DAG
9264// combine optimizations.
9265bool CodeGenPrepare::placePseudoProbes(Function &F) {
9266 bool MadeChange = false;
9267 for (auto &Block : F) {
9268 // Move the rest probes to the beginning of the block.
9269 auto FirstInst = Block.getFirstInsertionPt();
9270 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9271 ++FirstInst;
9272 BasicBlock::iterator I(FirstInst);
9273 I++;
9274 while (I != Block.end()) {
9275 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9276 II->moveBefore(FirstInst);
9277 MadeChange = true;
9278 }
9279 }
9280 }
9281 return MadeChange;
9282}
9283
9284/// Scale down both weights to fit into uint32_t.
9285static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9286 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9287 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9288 NewTrue = NewTrue / Scale;
9289 NewFalse = NewFalse / Scale;
9290}
9291
9292/// Some targets prefer to split a conditional branch like:
9293/// \code
9294/// %0 = icmp ne i32 %a, 0
9295/// %1 = icmp ne i32 %b, 0
9296/// %or.cond = or i1 %0, %1
9297/// br i1 %or.cond, label %TrueBB, label %FalseBB
9298/// \endcode
9299/// into multiple branch instructions like:
9300/// \code
9301/// bb1:
9302/// %0 = icmp ne i32 %a, 0
9303/// br i1 %0, label %TrueBB, label %bb2
9304/// bb2:
9305/// %1 = icmp ne i32 %b, 0
9306/// br i1 %1, label %TrueBB, label %FalseBB
9307/// \endcode
9308/// This usually allows instruction selection to do even further optimizations
9309/// and combine the compare with the branch instruction. Currently this is
9310/// applied for targets which have "cheap" jump instructions.
9311///
9312/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9313///
9314bool CodeGenPrepare::splitBranchCondition(Function &F) {
9315 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9316 return false;
9317
9318 bool MadeChange = false;
9319 for (auto &BB : F) {
9320 // Does this BB end with the following?
9321 // %cond1 = icmp|fcmp|binary instruction ...
9322 // %cond2 = icmp|fcmp|binary instruction ...
9323 // %cond.or = or|and i1 %cond1, cond2
9324 // br i1 %cond.or label %dest1, label %dest2"
9325 Instruction *LogicOp;
9326 BasicBlock *TBB, *FBB;
9327 if (!match(BB.getTerminator(),
9328 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9329 continue;
9330
9331 auto *Br1 = cast<CondBrInst>(BB.getTerminator());
9332 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9333 continue;
9334
9335 // The merging of mostly empty BB can cause a degenerate branch.
9336 if (TBB == FBB)
9337 continue;
9338
9339 unsigned Opc;
9340 Value *Cond1, *Cond2;
9341 if (match(LogicOp,
9342 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9343 Opc = Instruction::And;
9344 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9345 m_OneUse(m_Value(Cond2)))))
9346 Opc = Instruction::Or;
9347 else
9348 continue;
9349
9350 auto IsGoodCond = [](Value *Cond) {
9351 return match(
9352 Cond,
9354 m_LogicalOr(m_Value(), m_Value()))));
9355 };
9356 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9357 continue;
9358
9359 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9360
9361 // Create a new BB.
9362 auto *TmpBB =
9363 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9364 BB.getParent(), BB.getNextNode());
9365 if (IsHugeFunc)
9366 FreshBBs.insert(TmpBB);
9367
9368 // Update original basic block by using the first condition directly by the
9369 // branch instruction and removing the no longer needed and/or instruction.
9370 Br1->setCondition(Cond1);
9371 LogicOp->eraseFromParent();
9372
9373 // Depending on the condition we have to either replace the true or the
9374 // false successor of the original branch instruction.
9375 if (Opc == Instruction::And)
9376 Br1->setSuccessor(0, TmpBB);
9377 else
9378 Br1->setSuccessor(1, TmpBB);
9379
9380 // Fill in the new basic block.
9381 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9382 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9383 I->removeFromParent();
9384 I->insertBefore(Br2->getIterator());
9385 }
9386
9387 // Update PHI nodes in both successors. The original BB needs to be
9388 // replaced in one successor's PHI nodes, because the branch comes now from
9389 // the newly generated BB (NewBB). In the other successor we need to add one
9390 // incoming edge to the PHI nodes, because both branch instructions target
9391 // now the same successor. Depending on the original branch condition
9392 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9393 // we perform the correct update for the PHI nodes.
9394 // This doesn't change the successor order of the just created branch
9395 // instruction (or any other instruction).
9396 if (Opc == Instruction::Or)
9397 std::swap(TBB, FBB);
9398
9399 // Replace the old BB with the new BB.
9400 TBB->replacePhiUsesWith(&BB, TmpBB);
9401
9402 // Add another incoming edge from the new BB.
9403 for (PHINode &PN : FBB->phis()) {
9404 auto *Val = PN.getIncomingValueForBlock(&BB);
9405 PN.addIncoming(Val, TmpBB);
9406 }
9407
9408 if (Loop *L = LI->getLoopFor(&BB))
9409 L->addBasicBlockToLoop(TmpBB, *LI);
9410
9411 // The edge we need to delete starts at BB and ends at whatever TBB ends
9412 // up pointing to.
9413 DTU->applyUpdates({{DominatorTree::Insert, &BB, TmpBB},
9414 {DominatorTree::Insert, TmpBB, TBB},
9415 {DominatorTree::Insert, TmpBB, FBB},
9416 {DominatorTree::Delete, &BB, TBB}});
9417
9418 // Update the branch weights (from SelectionDAGBuilder::
9419 // FindMergedConditions).
9420 if (Opc == Instruction::Or) {
9421 // Codegen X | Y as:
9422 // BB1:
9423 // jmp_if_X TBB
9424 // jmp TmpBB
9425 // TmpBB:
9426 // jmp_if_Y TBB
9427 // jmp FBB
9428 //
9429
9430 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9431 // The requirement is that
9432 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9433 // = TrueProb for original BB.
9434 // Assuming the original weights are A and B, one choice is to set BB1's
9435 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9436 // assumes that
9437 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9438 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9439 // TmpBB, but the math is more complicated.
9440 uint64_t TrueWeight, FalseWeight;
9441 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9442 uint64_t NewTrueWeight = TrueWeight;
9443 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9444 scaleWeights(NewTrueWeight, NewFalseWeight);
9445 Br1->setMetadata(LLVMContext::MD_prof,
9446 MDBuilder(Br1->getContext())
9447 .createBranchWeights(TrueWeight, FalseWeight,
9448 hasBranchWeightOrigin(*Br1)));
9449
9450 NewTrueWeight = TrueWeight;
9451 NewFalseWeight = 2 * FalseWeight;
9452 scaleWeights(NewTrueWeight, NewFalseWeight);
9453 Br2->setMetadata(LLVMContext::MD_prof,
9454 MDBuilder(Br2->getContext())
9455 .createBranchWeights(TrueWeight, FalseWeight));
9456 }
9457 } else {
9458 // Codegen X & Y as:
9459 // BB1:
9460 // jmp_if_X TmpBB
9461 // jmp FBB
9462 // TmpBB:
9463 // jmp_if_Y TBB
9464 // jmp FBB
9465 //
9466 // This requires creation of TmpBB after CurBB.
9467
9468 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9469 // The requirement is that
9470 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9471 // = FalseProb for original BB.
9472 // Assuming the original weights are A and B, one choice is to set BB1's
9473 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9474 // assumes that
9475 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9476 uint64_t TrueWeight, FalseWeight;
9477 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9478 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9479 uint64_t NewFalseWeight = FalseWeight;
9480 scaleWeights(NewTrueWeight, NewFalseWeight);
9481 Br1->setMetadata(LLVMContext::MD_prof,
9482 MDBuilder(Br1->getContext())
9483 .createBranchWeights(TrueWeight, FalseWeight));
9484
9485 NewTrueWeight = 2 * TrueWeight;
9486 NewFalseWeight = FalseWeight;
9487 scaleWeights(NewTrueWeight, NewFalseWeight);
9488 Br2->setMetadata(LLVMContext::MD_prof,
9489 MDBuilder(Br2->getContext())
9490 .createBranchWeights(TrueWeight, FalseWeight));
9491 }
9492 }
9493
9494 MadeChange = true;
9495
9496 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9497 TmpBB->dump());
9498 }
9499 return MadeChange;
9500}
#define Success
return SDValue()
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static bool optimizeBranch(CondBrInst *Branch, const TargetLowering &TLI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static bool matchOverflowPattern(Instruction *&I, ExtractValueInst *&MulExtract, ExtractValueInst *&OverflowExtract)
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, Value *SunkAddr)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool despeculateCountZeros(IntrinsicInst *CountZeros, DomTreeUpdater *DTU, LoopInfo *LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1448
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define P(N)
ppc ctr loops verify
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static DominatorTree getDomTree(Function &F)
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
unsigned logBase2() const
Definition APInt.h:1784
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1028
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1585
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An instruction that atomically checks whether a specified value is in a memory location,...
static unsigned getPointerOperandIndex()
an instruction that atomically reads a memory location, combines it with another value,...
static unsigned getPointerOperandIndex()
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:687
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Analysis pass which computes BranchProbabilityInfo.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Conditional Branch instruction.
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI void removeFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
LLVM_ABI iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:328
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
LLVM_ABI void deleteBB(BasicBlock *DelBB)
Delete DelBB.
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:314
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
bool none() const
Definition FMF.h:60
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:809
LLVM_ABI const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
DomTreeT & getDomTree()
Flush DomTree updates and return DomTree.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
void flush()
Apply all pending updates to available trees and flush all BasicBlocks awaiting deletion.
bool isBBPendingDeletion(BasicBlockT *DelBB) const
Returns true if DelBB is awaiting deletion.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
LLVM_ABI bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition Globals.cpp:351
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:569
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
LLVM_ABI std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:587
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void analyze(const DominatorTreeBase< BlockT, false > &DomTree)
Create the loop forest using a stable algorithm.
iterator end() const
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:612
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:38
iterator find(const KeyT &Key)
Definition MapVector.h:156
iterator end()
Definition MapVector.h:69
bool empty() const
Definition MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:126
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:210
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains only cold code.
LLVM_ABI bool isFunctionHotnessUnknown(const Function &F) const
Returns true if the hotness of F is unknown.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
LLVM_ABI bool hasPartialSampleProfile() const
Returns true if module M has partial-profile sample profile.
LLVM_ABI bool hasHugeWorkingSetSize() const
Returns true if the working set size of the code is considered huge.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
VectorType * getType() const
Overload to return most specific vector type.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
bool erase(const T &V)
Definition SmallSet.h:200
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:774
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
virtual bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, EVT VT) const
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool hasMultipleConditionRegisters(EVT VT) const
Does the target have multiple (allocatable) condition registers that can be used to store the results...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy,Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:65
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:272
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
BasicBlock * getSuccessor(unsigned i=0) const
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:964
LLVM_ABI bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition Value.cpp:238
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
bool use_empty() const
Definition Value.h:346
user_iterator user_end()
Definition Value.h:410
iterator_range< use_iterator > uses()
Definition Value.h:380
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:816
user_iterator_impl< User > user_iterator
Definition Value.h:391
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
bool pointsToAliveValue() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isNonZero() const
Definition TypeSize.h:155
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_Constant()
Match an arbitrary Constant and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
m_Intrinsic_Ty< Opnd0 >::Ty m_Ctpop(const Opnd0 &Op0)
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
brc_match< Cond_t, match_bind< BasicBlock >, match_bind< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:50
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
std::enable_if_t< std::is_signed_v< T >, T > MulOverflow(T X, T Y, T &Result)
Multiply two signed integers, computing the two's complement truncated result, returning true if an o...
Definition MathExtras.h:753
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2264
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1683
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2142
constexpr from_range_t from_range
LLVM_ABI BasicBlock * splitBlockBefore(BasicBlock *Old, BasicBlock::iterator SplitPt, DomTreeUpdater *DTU, LoopInfo *LI, MemorySSAUpdater *MSSAU, const Twine &BBName="")
Split the specified block at the specified instruction SplitPt.
LLVM_ABI Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr, DomTreeUpdater *DTU=nullptr)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2133
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2172
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2199
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
LLVM_ABI bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition Local.cpp:3785
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1398
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition Analysis.cpp:203
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:53
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition Analysis.cpp:588
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2011
DWARFExpression::Operation Op
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI bool VerifyDomInfo
Enables verification of dominator trees.
constexpr unsigned BitWidth
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition MathExtras.h:701
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::pair< Value *, FPClassTest > fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
DenseMap< const Value *, Value * > ValueToValueMap
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:256
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
This contains information for each constraint that we are lowering.