LLVM 19.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
43#include "llvm/Config/llvm-config.h"
44#include "llvm/IR/Argument.h"
45#include "llvm/IR/Attributes.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/Constant.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DebugInfo.h"
52#include "llvm/IR/Dominators.h"
53#include "llvm/IR/Function.h"
55#include "llvm/IR/GlobalValue.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/InlineAsm.h"
59#include "llvm/IR/InstrTypes.h"
60#include "llvm/IR/Instruction.h"
63#include "llvm/IR/Intrinsics.h"
64#include "llvm/IR/IntrinsicsAArch64.h"
65#include "llvm/IR/LLVMContext.h"
66#include "llvm/IR/MDBuilder.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/Operator.h"
71#include "llvm/IR/Statepoint.h"
72#include "llvm/IR/Type.h"
73#include "llvm/IR/Use.h"
74#include "llvm/IR/User.h"
75#include "llvm/IR/Value.h"
76#include "llvm/IR/ValueHandle.h"
77#include "llvm/IR/ValueMap.h"
79#include "llvm/Pass.h"
85#include "llvm/Support/Debug.h"
96#include <algorithm>
97#include <cassert>
98#include <cstdint>
99#include <iterator>
100#include <limits>
101#include <memory>
102#include <optional>
103#include <utility>
104#include <vector>
105
106using namespace llvm;
107using namespace llvm::PatternMatch;
108
109#define DEBUG_TYPE "codegenprepare"
110
111STATISTIC(NumBlocksElim, "Number of blocks eliminated");
112STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
113STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
114STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
115 "sunken Cmps");
116STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
117 "of sunken Casts");
118STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
119 "computations were sunk");
120STATISTIC(NumMemoryInstsPhiCreated,
121 "Number of phis created when address "
122 "computations were sunk to memory instructions");
123STATISTIC(NumMemoryInstsSelectCreated,
124 "Number of select created when address "
125 "computations were sunk to memory instructions");
126STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
127STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
128STATISTIC(NumAndsAdded,
129 "Number of and mask instructions added to form ext loads");
130STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
131STATISTIC(NumRetsDup, "Number of return instructions duplicated");
132STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
133STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
134STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
135
137 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
138 cl::desc("Disable branch optimizations in CodeGenPrepare"));
139
140static cl::opt<bool>
141 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
142 cl::desc("Disable GC optimizations in CodeGenPrepare"));
143
144static cl::opt<bool>
145 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
146 cl::init(false),
147 cl::desc("Disable select to branch conversion."));
148
149static cl::opt<bool>
150 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
151 cl::desc("Address sinking in CGP using GEPs."));
152
153static cl::opt<bool>
154 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
155 cl::desc("Enable sinkinig and/cmp into branches."));
156
158 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
159 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
160
162 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
163 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
164
166 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
167 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
168 "CodeGenPrepare"));
169
171 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
172 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
173 "optimization in CodeGenPrepare"));
174
176 "disable-preheader-prot", cl::Hidden, cl::init(false),
177 cl::desc("Disable protection against removing loop preheaders"));
178
180 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
181 cl::desc("Use profile info to add section prefix for hot/cold functions"));
182
184 "profile-unknown-in-special-section", cl::Hidden,
185 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
186 "profile, we cannot tell the function is cold for sure because "
187 "it may be a function newly added without ever being sampled. "
188 "With the flag enabled, compiler can put such profile unknown "
189 "functions into a special section, so runtime system can choose "
190 "to handle it in a different way than .text section, to save "
191 "RAM for example. "));
192
194 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
195 cl::desc("Use the basic-block-sections profile to determine the text "
196 "section prefix for hot functions. Functions with "
197 "basic-block-sections profile will be placed in `.text.hot` "
198 "regardless of their FDO profile info. Other functions won't be "
199 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
200 "profiles."));
201
203 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
204 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
205 "(frequency of destination block) is greater than this ratio"));
206
208 "force-split-store", cl::Hidden, cl::init(false),
209 cl::desc("Force store splitting no matter what the target query says."));
210
212 "cgp-type-promotion-merge", cl::Hidden,
213 cl::desc("Enable merging of redundant sexts when one is dominating"
214 " the other."),
215 cl::init(true));
216
218 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
219 cl::desc("Disables combining addressing modes with different parts "
220 "in optimizeMemoryInst."));
221
222static cl::opt<bool>
223 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
224 cl::desc("Allow creation of Phis in Address sinking."));
225
227 "addr-sink-new-select", cl::Hidden, cl::init(true),
228 cl::desc("Allow creation of selects in Address sinking."));
229
231 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
232 cl::desc("Allow combining of BaseReg field in Address sinking."));
233
235 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
236 cl::desc("Allow combining of BaseGV field in Address sinking."));
237
239 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
240 cl::desc("Allow combining of BaseOffs field in Address sinking."));
241
243 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
244 cl::desc("Allow combining of ScaledReg field in Address sinking."));
245
246static cl::opt<bool>
247 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
248 cl::init(true),
249 cl::desc("Enable splitting large offset of GEP."));
250
252 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
253 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
254
255static cl::opt<bool>
256 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
257 cl::desc("Enable BFI update verification for "
258 "CodeGenPrepare."));
259
260static cl::opt<bool>
261 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
262 cl::desc("Enable converting phi types in CodeGenPrepare"));
263
265 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
266 cl::desc("Least BB number of huge function."));
267
269 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
271 cl::desc("Max number of address users to look at"));
272
273static cl::opt<bool>
274 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
275 cl::desc("Disable elimination of dead PHI nodes."));
276
277namespace {
278
279enum ExtType {
280 ZeroExtension, // Zero extension has been seen.
281 SignExtension, // Sign extension has been seen.
282 BothExtension // This extension type is used if we saw sext after
283 // ZeroExtension had been set, or if we saw zext after
284 // SignExtension had been set. It makes the type
285 // information of a promoted instruction invalid.
286};
287
288enum ModifyDT {
289 NotModifyDT, // Not Modify any DT.
290 ModifyBBDT, // Modify the Basic Block Dominator Tree.
291 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
292 // This usually means we move/delete/insert instruction
293 // in a Basic Block. So we should re-iterate instructions
294 // in such Basic Block.
295};
296
297using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
298using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
299using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
301using ValueToSExts = MapVector<Value *, SExts>;
302
303class TypePromotionTransaction;
304
305class CodeGenPrepare {
306 friend class CodeGenPrepareLegacyPass;
307 const TargetMachine *TM = nullptr;
308 const TargetSubtargetInfo *SubtargetInfo = nullptr;
309 const TargetLowering *TLI = nullptr;
310 const TargetRegisterInfo *TRI = nullptr;
311 const TargetTransformInfo *TTI = nullptr;
312 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
313 const TargetLibraryInfo *TLInfo = nullptr;
314 LoopInfo *LI = nullptr;
315 std::unique_ptr<BlockFrequencyInfo> BFI;
316 std::unique_ptr<BranchProbabilityInfo> BPI;
317 ProfileSummaryInfo *PSI = nullptr;
318
319 /// As we scan instructions optimizing them, this is the next instruction
320 /// to optimize. Transforms that can invalidate this should update it.
321 BasicBlock::iterator CurInstIterator;
322
323 /// Keeps track of non-local addresses that have been sunk into a block.
324 /// This allows us to avoid inserting duplicate code for blocks with
325 /// multiple load/stores of the same address. The usage of WeakTrackingVH
326 /// enables SunkAddrs to be treated as a cache whose entries can be
327 /// invalidated if a sunken address computation has been erased.
329
330 /// Keeps track of all instructions inserted for the current function.
331 SetOfInstrs InsertedInsts;
332
333 /// Keeps track of the type of the related instruction before their
334 /// promotion for the current function.
335 InstrToOrigTy PromotedInsts;
336
337 /// Keep track of instructions removed during promotion.
338 SetOfInstrs RemovedInsts;
339
340 /// Keep track of sext chains based on their initial value.
341 DenseMap<Value *, Instruction *> SeenChainsForSExt;
342
343 /// Keep track of GEPs accessing the same data structures such as structs or
344 /// arrays that are candidates to be split later because of their large
345 /// size.
348 LargeOffsetGEPMap;
349
350 /// Keep track of new GEP base after splitting the GEPs having large offset.
351 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
352
353 /// Map serial numbers to Large offset GEPs.
354 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
355
356 /// Keep track of SExt promoted.
357 ValueToSExts ValToSExtendedUses;
358
359 /// True if the function has the OptSize attribute.
360 bool OptSize;
361
362 /// DataLayout for the Function being processed.
363 const DataLayout *DL = nullptr;
364
365 /// Building the dominator tree can be expensive, so we only build it
366 /// lazily and update it when required.
367 std::unique_ptr<DominatorTree> DT;
368
369public:
370 CodeGenPrepare(){};
371 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
372 /// If encounter huge function, we need to limit the build time.
373 bool IsHugeFunc = false;
374
375 /// FreshBBs is like worklist, it collected the updated BBs which need
376 /// to be optimized again.
377 /// Note: Consider building time in this pass, when a BB updated, we need
378 /// to insert such BB into FreshBBs for huge function.
380
381 void releaseMemory() {
382 // Clear per function information.
383 InsertedInsts.clear();
384 PromotedInsts.clear();
385 FreshBBs.clear();
386 BPI.reset();
387 BFI.reset();
388 }
389
391
392private:
393 template <typename F>
394 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
395 // Substituting can cause recursive simplifications, which can invalidate
396 // our iterator. Use a WeakTrackingVH to hold onto it in case this
397 // happens.
398 Value *CurValue = &*CurInstIterator;
399 WeakTrackingVH IterHandle(CurValue);
400
401 f();
402
403 // If the iterator instruction was recursively deleted, start over at the
404 // start of the block.
405 if (IterHandle != CurValue) {
406 CurInstIterator = BB->begin();
407 SunkAddrs.clear();
408 }
409 }
410
411 // Get the DominatorTree, building if necessary.
412 DominatorTree &getDT(Function &F) {
413 if (!DT)
414 DT = std::make_unique<DominatorTree>(F);
415 return *DT;
416 }
417
418 void removeAllAssertingVHReferences(Value *V);
419 bool eliminateAssumptions(Function &F);
420 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
421 bool eliminateMostlyEmptyBlocks(Function &F);
422 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
423 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
424 void eliminateMostlyEmptyBlock(BasicBlock *BB);
425 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
426 bool isPreheader);
427 bool makeBitReverse(Instruction &I);
428 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
429 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
430 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
431 unsigned AddrSpace);
432 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
433 bool optimizeInlineAsmInst(CallInst *CS);
434 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
435 bool optimizeExt(Instruction *&I);
436 bool optimizeExtUses(Instruction *I);
437 bool optimizeLoadExt(LoadInst *Load);
438 bool optimizeShiftInst(BinaryOperator *BO);
439 bool optimizeFunnelShift(IntrinsicInst *Fsh);
440 bool optimizeSelectInst(SelectInst *SI);
441 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
442 bool optimizeSwitchType(SwitchInst *SI);
443 bool optimizeSwitchPhiConstants(SwitchInst *SI);
444 bool optimizeSwitchInst(SwitchInst *SI);
445 bool optimizeExtractElementInst(Instruction *Inst);
446 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
447 bool fixupDbgValue(Instruction *I);
448 bool fixupDPValue(DPValue &I);
449 bool fixupDPValuesOnInst(Instruction &I);
450 bool placeDbgValues(Function &F);
451 bool placePseudoProbes(Function &F);
452 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
453 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
454 bool tryToPromoteExts(TypePromotionTransaction &TPT,
456 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457 unsigned CreatedInstsCost = 0);
458 bool mergeSExts(Function &F);
459 bool splitLargeGEPOffsets();
460 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
461 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462 bool optimizePhiTypes(Function &F);
463 bool performAddressTypePromotion(
464 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
465 bool HasPromoted, TypePromotionTransaction &TPT,
466 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468 bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470 bool tryToSinkFreeOperands(Instruction *I);
471 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
472 CmpInst *Cmp, Intrinsic::ID IID);
473 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
475 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 void verifyBFIUpdates(Function &F);
477 bool _run(Function &F);
478};
479
480class CodeGenPrepareLegacyPass : public FunctionPass {
481public:
482 static char ID; // Pass identification, replacement for typeid
483
484 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
486 }
487
488 bool runOnFunction(Function &F) override;
489
490 StringRef getPassName() const override { return "CodeGen Prepare"; }
491
492 void getAnalysisUsage(AnalysisUsage &AU) const override {
493 // FIXME: When we can selectively preserve passes, preserve the domtree.
500 }
501};
502
503} // end anonymous namespace
504
505char CodeGenPrepareLegacyPass::ID = 0;
506
507bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
508 if (skipFunction(F))
509 return false;
510 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
511 CodeGenPrepare CGP(TM);
512 CGP.DL = &F.getParent()->getDataLayout();
513 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
514 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
515 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
516 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
517 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
518 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
519 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
520 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
521 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
522 auto BBSPRWP =
523 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
524 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
525
526 return CGP._run(F);
527}
528
529INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
530 "Optimize for code generation", false, false)
537INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
538 "Optimize for code generation", false, false)
539
541 return new CodeGenPrepareLegacyPass();
542}
543
546 CodeGenPrepare CGP(TM);
547
548 bool Changed = CGP.run(F, AM);
549 if (!Changed)
550 return PreservedAnalyses::all();
551
556 return PA;
557}
558
559bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
560 DL = &F.getParent()->getDataLayout();
561 SubtargetInfo = TM->getSubtargetImpl(F);
562 TLI = SubtargetInfo->getTargetLowering();
563 TRI = SubtargetInfo->getRegisterInfo();
564 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
566 LI = &AM.getResult<LoopAnalysis>(F);
567 BPI.reset(new BranchProbabilityInfo(F, *LI));
568 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
569 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
570 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
571 BBSectionsProfileReader =
573 return _run(F);
574}
575
576bool CodeGenPrepare::_run(Function &F) {
577 bool EverMadeChange = false;
578
579 OptSize = F.hasOptSize();
580 // Use the basic-block-sections profile to promote hot functions to .text.hot
581 // if requested.
582 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
583 BBSectionsProfileReader->isFunctionHot(F.getName())) {
584 F.setSectionPrefix("hot");
585 } else if (ProfileGuidedSectionPrefix) {
586 // The hot attribute overwrites profile count based hotness while profile
587 // counts based hotness overwrite the cold attribute.
588 // This is a conservative behabvior.
589 if (F.hasFnAttribute(Attribute::Hot) ||
590 PSI->isFunctionHotInCallGraph(&F, *BFI))
591 F.setSectionPrefix("hot");
592 // If PSI shows this function is not hot, we will placed the function
593 // into unlikely section if (1) PSI shows this is a cold function, or
594 // (2) the function has a attribute of cold.
595 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
596 F.hasFnAttribute(Attribute::Cold))
597 F.setSectionPrefix("unlikely");
598 else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
599 PSI->isFunctionHotnessUnknown(F))
600 F.setSectionPrefix("unknown");
601 }
602
603 /// This optimization identifies DIV instructions that can be
604 /// profitably bypassed and carried out with a shorter, faster divide.
605 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
606 const DenseMap<unsigned int, unsigned int> &BypassWidths =
608 BasicBlock *BB = &*F.begin();
609 while (BB != nullptr) {
610 // bypassSlowDivision may create new BBs, but we don't want to reapply the
611 // optimization to those blocks.
612 BasicBlock *Next = BB->getNextNode();
613 // F.hasOptSize is already checked in the outer if statement.
614 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
615 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
616 BB = Next;
617 }
618 }
619
620 // Get rid of @llvm.assume builtins before attempting to eliminate empty
621 // blocks, since there might be blocks that only contain @llvm.assume calls
622 // (plus arguments that we can get rid of).
623 EverMadeChange |= eliminateAssumptions(F);
624
625 // Eliminate blocks that contain only PHI nodes and an
626 // unconditional branch.
627 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
628
629 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
631 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
632
633 // Split some critical edges where one of the sources is an indirect branch,
634 // to help generate sane code for PHIs involving such edges.
635 EverMadeChange |=
636 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
637
638 // If we are optimzing huge function, we need to consider the build time.
639 // Because the basic algorithm's complex is near O(N!).
640 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
641
642 // Transformations above may invalidate dominator tree and/or loop info.
643 DT.reset();
644 LI->releaseMemory();
645 LI->analyze(getDT(F));
646
647 bool MadeChange = true;
648 bool FuncIterated = false;
649 while (MadeChange) {
650 MadeChange = false;
651
653 if (FuncIterated && !FreshBBs.contains(&BB))
654 continue;
655
656 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
657 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
658
659 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
660 DT.reset();
661
662 MadeChange |= Changed;
663 if (IsHugeFunc) {
664 // If the BB is updated, it may still has chance to be optimized.
665 // This usually happen at sink optimization.
666 // For example:
667 //
668 // bb0:
669 // %and = and i32 %a, 4
670 // %cmp = icmp eq i32 %and, 0
671 //
672 // If the %cmp sink to other BB, the %and will has chance to sink.
673 if (Changed)
674 FreshBBs.insert(&BB);
675 else if (FuncIterated)
676 FreshBBs.erase(&BB);
677 } else {
678 // For small/normal functions, we restart BB iteration if the dominator
679 // tree of the Function was changed.
680 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
681 break;
682 }
683 }
684 // We have iterated all the BB in the (only work for huge) function.
685 FuncIterated = IsHugeFunc;
686
687 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
688 MadeChange |= mergeSExts(F);
689 if (!LargeOffsetGEPMap.empty())
690 MadeChange |= splitLargeGEPOffsets();
691 MadeChange |= optimizePhiTypes(F);
692
693 if (MadeChange)
694 eliminateFallThrough(F, DT.get());
695
696#ifndef NDEBUG
697 if (MadeChange && VerifyLoopInfo)
698 LI->verify(getDT(F));
699#endif
700
701 // Really free removed instructions during promotion.
702 for (Instruction *I : RemovedInsts)
703 I->deleteValue();
704
705 EverMadeChange |= MadeChange;
706 SeenChainsForSExt.clear();
707 ValToSExtendedUses.clear();
708 RemovedInsts.clear();
709 LargeOffsetGEPMap.clear();
710 LargeOffsetGEPID.clear();
711 }
712
713 NewGEPBases.clear();
714 SunkAddrs.clear();
715
716 if (!DisableBranchOpts) {
717 MadeChange = false;
718 // Use a set vector to get deterministic iteration order. The order the
719 // blocks are removed may affect whether or not PHI nodes in successors
720 // are removed.
722 for (BasicBlock &BB : F) {
724 MadeChange |= ConstantFoldTerminator(&BB, true);
725 if (!MadeChange)
726 continue;
727
728 for (BasicBlock *Succ : Successors)
729 if (pred_empty(Succ))
730 WorkList.insert(Succ);
731 }
732
733 // Delete the dead blocks and any of their dead successors.
734 MadeChange |= !WorkList.empty();
735 while (!WorkList.empty()) {
736 BasicBlock *BB = WorkList.pop_back_val();
738
739 DeleteDeadBlock(BB);
740
741 for (BasicBlock *Succ : Successors)
742 if (pred_empty(Succ))
743 WorkList.insert(Succ);
744 }
745
746 // Merge pairs of basic blocks with unconditional branches, connected by
747 // a single edge.
748 if (EverMadeChange || MadeChange)
749 MadeChange |= eliminateFallThrough(F);
750
751 EverMadeChange |= MadeChange;
752 }
753
754 if (!DisableGCOpts) {
756 for (BasicBlock &BB : F)
757 for (Instruction &I : BB)
758 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
759 Statepoints.push_back(SP);
760 for (auto &I : Statepoints)
761 EverMadeChange |= simplifyOffsetableRelocate(*I);
762 }
763
764 // Do this last to clean up use-before-def scenarios introduced by other
765 // preparatory transforms.
766 EverMadeChange |= placeDbgValues(F);
767 EverMadeChange |= placePseudoProbes(F);
768
769#ifndef NDEBUG
771 verifyBFIUpdates(F);
772#endif
773
774 return EverMadeChange;
775}
776
777bool CodeGenPrepare::eliminateAssumptions(Function &F) {
778 bool MadeChange = false;
779 for (BasicBlock &BB : F) {
780 CurInstIterator = BB.begin();
781 while (CurInstIterator != BB.end()) {
782 Instruction *I = &*(CurInstIterator++);
783 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
784 MadeChange = true;
785 Value *Operand = Assume->getOperand(0);
786 Assume->eraseFromParent();
787
788 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
789 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
790 });
791 }
792 }
793 }
794 return MadeChange;
795}
796
797/// An instruction is about to be deleted, so remove all references to it in our
798/// GEP-tracking data strcutures.
799void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
800 LargeOffsetGEPMap.erase(V);
801 NewGEPBases.erase(V);
802
803 auto GEP = dyn_cast<GetElementPtrInst>(V);
804 if (!GEP)
805 return;
806
807 LargeOffsetGEPID.erase(GEP);
808
809 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
810 if (VecI == LargeOffsetGEPMap.end())
811 return;
812
813 auto &GEPVector = VecI->second;
814 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
815
816 if (GEPVector.empty())
817 LargeOffsetGEPMap.erase(VecI);
818}
819
820// Verify BFI has been updated correctly by recomputing BFI and comparing them.
821void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
822 DominatorTree NewDT(F);
823 LoopInfo NewLI(NewDT);
824 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
825 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
826 NewBFI.verifyMatch(*BFI);
827}
828
829/// Merge basic blocks which are connected by a single edge, where one of the
830/// basic blocks has a single successor pointing to the other basic block,
831/// which has a single predecessor.
832bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
833 bool Changed = false;
834 // Scan all of the blocks in the function, except for the entry block.
835 // Use a temporary array to avoid iterator being invalidated when
836 // deleting blocks.
838 for (auto &Block : llvm::drop_begin(F))
839 Blocks.push_back(&Block);
840
842 for (auto &Block : Blocks) {
843 auto *BB = cast_or_null<BasicBlock>(Block);
844 if (!BB)
845 continue;
846 // If the destination block has a single pred, then this is a trivial
847 // edge, just collapse it.
848 BasicBlock *SinglePred = BB->getSinglePredecessor();
849
850 // Don't merge if BB's address is taken.
851 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
852 continue;
853
854 // Make an effort to skip unreachable blocks.
855 if (DT && !DT->isReachableFromEntry(BB))
856 continue;
857
858 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
859 if (Term && !Term->isConditional()) {
860 Changed = true;
861 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
862
863 // Merge BB into SinglePred and delete it.
864 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
865 /* MemDep */ nullptr,
866 /* PredecessorWithTwoSuccessors */ false, DT);
867 Preds.insert(SinglePred);
868
869 if (IsHugeFunc) {
870 // Update FreshBBs to optimize the merged BB.
871 FreshBBs.insert(SinglePred);
872 FreshBBs.erase(BB);
873 }
874 }
875 }
876
877 // (Repeatedly) merging blocks into their predecessors can create redundant
878 // debug intrinsics.
879 for (const auto &Pred : Preds)
880 if (auto *BB = cast_or_null<BasicBlock>(Pred))
882
883 return Changed;
884}
885
886/// Find a destination block from BB if BB is mergeable empty block.
887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
888 // If this block doesn't end with an uncond branch, ignore it.
889 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
890 if (!BI || !BI->isUnconditional())
891 return nullptr;
892
893 // If the instruction before the branch (skipping debug info) isn't a phi
894 // node, then other stuff is happening here.
896 if (BBI != BB->begin()) {
897 --BBI;
898 while (isa<DbgInfoIntrinsic>(BBI)) {
899 if (BBI == BB->begin())
900 break;
901 --BBI;
902 }
903 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
904 return nullptr;
905 }
906
907 // Do not break infinite loops.
908 BasicBlock *DestBB = BI->getSuccessor(0);
909 if (DestBB == BB)
910 return nullptr;
911
912 if (!canMergeBlocks(BB, DestBB))
913 DestBB = nullptr;
914
915 return DestBB;
916}
917
918/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
919/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
920/// edges in ways that are non-optimal for isel. Start by eliminating these
921/// blocks so we can split them the way we want them.
922bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
924 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
925 while (!LoopList.empty()) {
926 Loop *L = LoopList.pop_back_val();
927 llvm::append_range(LoopList, *L);
928 if (BasicBlock *Preheader = L->getLoopPreheader())
929 Preheaders.insert(Preheader);
930 }
931
932 bool MadeChange = false;
933 // Copy blocks into a temporary array to avoid iterator invalidation issues
934 // as we remove them.
935 // Note that this intentionally skips the entry block.
937 for (auto &Block : llvm::drop_begin(F)) {
938 // Delete phi nodes that could block deleting other empty blocks.
940 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
941 Blocks.push_back(&Block);
942 }
943
944 for (auto &Block : Blocks) {
945 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
946 if (!BB)
947 continue;
948 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
949 if (!DestBB ||
950 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
951 continue;
952
953 eliminateMostlyEmptyBlock(BB);
954 MadeChange = true;
955 }
956 return MadeChange;
957}
958
959bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
960 BasicBlock *DestBB,
961 bool isPreheader) {
962 // Do not delete loop preheaders if doing so would create a critical edge.
963 // Loop preheaders can be good locations to spill registers. If the
964 // preheader is deleted and we create a critical edge, registers may be
965 // spilled in the loop body instead.
966 if (!DisablePreheaderProtect && isPreheader &&
967 !(BB->getSinglePredecessor() &&
969 return false;
970
971 // Skip merging if the block's successor is also a successor to any callbr
972 // that leads to this block.
973 // FIXME: Is this really needed? Is this a correctness issue?
974 for (BasicBlock *Pred : predecessors(BB)) {
975 if (isa<CallBrInst>(Pred->getTerminator()) &&
976 llvm::is_contained(successors(Pred), DestBB))
977 return false;
978 }
979
980 // Try to skip merging if the unique predecessor of BB is terminated by a
981 // switch or indirect branch instruction, and BB is used as an incoming block
982 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
983 // add COPY instructions in the predecessor of BB instead of BB (if it is not
984 // merged). Note that the critical edge created by merging such blocks wont be
985 // split in MachineSink because the jump table is not analyzable. By keeping
986 // such empty block (BB), ISel will place COPY instructions in BB, not in the
987 // predecessor of BB.
988 BasicBlock *Pred = BB->getUniquePredecessor();
989 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
990 isa<IndirectBrInst>(Pred->getTerminator())))
991 return true;
992
993 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
994 return true;
995
996 // We use a simple cost heuristic which determine skipping merging is
997 // profitable if the cost of skipping merging is less than the cost of
998 // merging : Cost(skipping merging) < Cost(merging BB), where the
999 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1000 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1001 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1002 // Freq(Pred) / Freq(BB) > 2.
1003 // Note that if there are multiple empty blocks sharing the same incoming
1004 // value for the PHIs in the DestBB, we consider them together. In such
1005 // case, Cost(merging BB) will be the sum of their frequencies.
1006
1007 if (!isa<PHINode>(DestBB->begin()))
1008 return true;
1009
1010 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1011
1012 // Find all other incoming blocks from which incoming values of all PHIs in
1013 // DestBB are the same as the ones from BB.
1014 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1015 if (DestBBPred == BB)
1016 continue;
1017
1018 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1019 return DestPN.getIncomingValueForBlock(BB) ==
1020 DestPN.getIncomingValueForBlock(DestBBPred);
1021 }))
1022 SameIncomingValueBBs.insert(DestBBPred);
1023 }
1024
1025 // See if all BB's incoming values are same as the value from Pred. In this
1026 // case, no reason to skip merging because COPYs are expected to be place in
1027 // Pred already.
1028 if (SameIncomingValueBBs.count(Pred))
1029 return true;
1030
1031 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1032 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1033
1034 for (auto *SameValueBB : SameIncomingValueBBs)
1035 if (SameValueBB->getUniquePredecessor() == Pred &&
1036 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1037 BBFreq += BFI->getBlockFreq(SameValueBB);
1038
1039 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1040 return !Limit || PredFreq <= *Limit;
1041}
1042
1043/// Return true if we can merge BB into DestBB if there is a single
1044/// unconditional branch between them, and BB contains no other non-phi
1045/// instructions.
1046bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1047 const BasicBlock *DestBB) const {
1048 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1049 // the successor. If there are more complex condition (e.g. preheaders),
1050 // don't mess around with them.
1051 for (const PHINode &PN : BB->phis()) {
1052 for (const User *U : PN.users()) {
1053 const Instruction *UI = cast<Instruction>(U);
1054 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1055 return false;
1056 // If User is inside DestBB block and it is a PHINode then check
1057 // incoming value. If incoming value is not from BB then this is
1058 // a complex condition (e.g. preheaders) we want to avoid here.
1059 if (UI->getParent() == DestBB) {
1060 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1061 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1062 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1063 if (Insn && Insn->getParent() == BB &&
1064 Insn->getParent() != UPN->getIncomingBlock(I))
1065 return false;
1066 }
1067 }
1068 }
1069 }
1070
1071 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1072 // and DestBB may have conflicting incoming values for the block. If so, we
1073 // can't merge the block.
1074 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1075 if (!DestBBPN)
1076 return true; // no conflict.
1077
1078 // Collect the preds of BB.
1080 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1081 // It is faster to get preds from a PHI than with pred_iterator.
1082 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1083 BBPreds.insert(BBPN->getIncomingBlock(i));
1084 } else {
1085 BBPreds.insert(pred_begin(BB), pred_end(BB));
1086 }
1087
1088 // Walk the preds of DestBB.
1089 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1090 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1091 if (BBPreds.count(Pred)) { // Common predecessor?
1092 for (const PHINode &PN : DestBB->phis()) {
1093 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1094 const Value *V2 = PN.getIncomingValueForBlock(BB);
1095
1096 // If V2 is a phi node in BB, look up what the mapped value will be.
1097 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1098 if (V2PN->getParent() == BB)
1099 V2 = V2PN->getIncomingValueForBlock(Pred);
1100
1101 // If there is a conflict, bail out.
1102 if (V1 != V2)
1103 return false;
1104 }
1105 }
1106 }
1107
1108 return true;
1109}
1110
1111/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1112static void replaceAllUsesWith(Value *Old, Value *New,
1114 bool IsHuge) {
1115 auto *OldI = dyn_cast<Instruction>(Old);
1116 if (OldI) {
1117 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1118 UI != E; ++UI) {
1119 Instruction *User = cast<Instruction>(*UI);
1120 if (IsHuge)
1121 FreshBBs.insert(User->getParent());
1122 }
1123 }
1124 Old->replaceAllUsesWith(New);
1125}
1126
1127/// Eliminate a basic block that has only phi's and an unconditional branch in
1128/// it.
1129void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1130 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1131 BasicBlock *DestBB = BI->getSuccessor(0);
1132
1133 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1134 << *BB << *DestBB);
1135
1136 // If the destination block has a single pred, then this is a trivial edge,
1137 // just collapse it.
1138 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1139 if (SinglePred != DestBB) {
1140 assert(SinglePred == BB &&
1141 "Single predecessor not the same as predecessor");
1142 // Merge DestBB into SinglePred/BB and delete it.
1144 // Note: BB(=SinglePred) will not be deleted on this path.
1145 // DestBB(=its single successor) is the one that was deleted.
1146 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1147
1148 if (IsHugeFunc) {
1149 // Update FreshBBs to optimize the merged BB.
1150 FreshBBs.insert(SinglePred);
1151 FreshBBs.erase(DestBB);
1152 }
1153 return;
1154 }
1155 }
1156
1157 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1158 // to handle the new incoming edges it is about to have.
1159 for (PHINode &PN : DestBB->phis()) {
1160 // Remove the incoming value for BB, and remember it.
1161 Value *InVal = PN.removeIncomingValue(BB, false);
1162
1163 // Two options: either the InVal is a phi node defined in BB or it is some
1164 // value that dominates BB.
1165 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1166 if (InValPhi && InValPhi->getParent() == BB) {
1167 // Add all of the input values of the input PHI as inputs of this phi.
1168 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1169 PN.addIncoming(InValPhi->getIncomingValue(i),
1170 InValPhi->getIncomingBlock(i));
1171 } else {
1172 // Otherwise, add one instance of the dominating value for each edge that
1173 // we will be adding.
1174 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1175 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1176 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1177 } else {
1178 for (BasicBlock *Pred : predecessors(BB))
1179 PN.addIncoming(InVal, Pred);
1180 }
1181 }
1182 }
1183
1184 // The PHIs are now updated, change everything that refers to BB to use
1185 // DestBB and remove BB.
1186 BB->replaceAllUsesWith(DestBB);
1187 BB->eraseFromParent();
1188 ++NumBlocksElim;
1189
1190 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1191}
1192
1193// Computes a map of base pointer relocation instructions to corresponding
1194// derived pointer relocation instructions given a vector of all relocate calls
1196 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1198 &RelocateInstMap) {
1199 // Collect information in two maps: one primarily for locating the base object
1200 // while filling the second map; the second map is the final structure holding
1201 // a mapping between Base and corresponding Derived relocate calls
1203 for (auto *ThisRelocate : AllRelocateCalls) {
1204 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1205 ThisRelocate->getDerivedPtrIndex());
1206 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1207 }
1208 for (auto &Item : RelocateIdxMap) {
1209 std::pair<unsigned, unsigned> Key = Item.first;
1210 if (Key.first == Key.second)
1211 // Base relocation: nothing to insert
1212 continue;
1213
1214 GCRelocateInst *I = Item.second;
1215 auto BaseKey = std::make_pair(Key.first, Key.first);
1216
1217 // We're iterating over RelocateIdxMap so we cannot modify it.
1218 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1219 if (MaybeBase == RelocateIdxMap.end())
1220 // TODO: We might want to insert a new base object relocate and gep off
1221 // that, if there are enough derived object relocates.
1222 continue;
1223
1224 RelocateInstMap[MaybeBase->second].push_back(I);
1225 }
1226}
1227
1228// Accepts a GEP and extracts the operands into a vector provided they're all
1229// small integer constants
1231 SmallVectorImpl<Value *> &OffsetV) {
1232 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1233 // Only accept small constant integer operands
1234 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1235 if (!Op || Op->getZExtValue() > 20)
1236 return false;
1237 }
1238
1239 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1240 OffsetV.push_back(GEP->getOperand(i));
1241 return true;
1242}
1243
1244// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1245// replace, computes a replacement, and affects it.
1246static bool
1248 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1249 bool MadeChange = false;
1250 // We must ensure the relocation of derived pointer is defined after
1251 // relocation of base pointer. If we find a relocation corresponding to base
1252 // defined earlier than relocation of base then we move relocation of base
1253 // right before found relocation. We consider only relocation in the same
1254 // basic block as relocation of base. Relocations from other basic block will
1255 // be skipped by optimization and we do not care about them.
1256 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1257 &*R != RelocatedBase; ++R)
1258 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1259 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1260 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1261 RelocatedBase->moveBefore(RI);
1262 MadeChange = true;
1263 break;
1264 }
1265
1266 for (GCRelocateInst *ToReplace : Targets) {
1267 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1268 "Not relocating a derived object of the original base object");
1269 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1270 // A duplicate relocate call. TODO: coalesce duplicates.
1271 continue;
1272 }
1273
1274 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1275 // Base and derived relocates are in different basic blocks.
1276 // In this case transform is only valid when base dominates derived
1277 // relocate. However it would be too expensive to check dominance
1278 // for each such relocate, so we skip the whole transformation.
1279 continue;
1280 }
1281
1282 Value *Base = ToReplace->getBasePtr();
1283 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1284 if (!Derived || Derived->getPointerOperand() != Base)
1285 continue;
1286
1288 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1289 continue;
1290
1291 // Create a Builder and replace the target callsite with a gep
1292 assert(RelocatedBase->getNextNode() &&
1293 "Should always have one since it's not a terminator");
1294
1295 // Insert after RelocatedBase
1296 IRBuilder<> Builder(RelocatedBase->getNextNode());
1297 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1298
1299 // If gc_relocate does not match the actual type, cast it to the right type.
1300 // In theory, there must be a bitcast after gc_relocate if the type does not
1301 // match, and we should reuse it to get the derived pointer. But it could be
1302 // cases like this:
1303 // bb1:
1304 // ...
1305 // %g1 = call coldcc i8 addrspace(1)*
1306 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1307 //
1308 // bb2:
1309 // ...
1310 // %g2 = call coldcc i8 addrspace(1)*
1311 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1312 //
1313 // merge:
1314 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1315 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1316 //
1317 // In this case, we can not find the bitcast any more. So we insert a new
1318 // bitcast no matter there is already one or not. In this way, we can handle
1319 // all cases, and the extra bitcast should be optimized away in later
1320 // passes.
1321 Value *ActualRelocatedBase = RelocatedBase;
1322 if (RelocatedBase->getType() != Base->getType()) {
1323 ActualRelocatedBase =
1324 Builder.CreateBitCast(RelocatedBase, Base->getType());
1325 }
1326 Value *Replacement =
1327 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1328 ArrayRef(OffsetV));
1329 Replacement->takeName(ToReplace);
1330 // If the newly generated derived pointer's type does not match the original
1331 // derived pointer's type, cast the new derived pointer to match it. Same
1332 // reasoning as above.
1333 Value *ActualReplacement = Replacement;
1334 if (Replacement->getType() != ToReplace->getType()) {
1335 ActualReplacement =
1336 Builder.CreateBitCast(Replacement, ToReplace->getType());
1337 }
1338 ToReplace->replaceAllUsesWith(ActualReplacement);
1339 ToReplace->eraseFromParent();
1340
1341 MadeChange = true;
1342 }
1343 return MadeChange;
1344}
1345
1346// Turns this:
1347//
1348// %base = ...
1349// %ptr = gep %base + 15
1350// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1351// %base' = relocate(%tok, i32 4, i32 4)
1352// %ptr' = relocate(%tok, i32 4, i32 5)
1353// %val = load %ptr'
1354//
1355// into this:
1356//
1357// %base = ...
1358// %ptr = gep %base + 15
1359// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1360// %base' = gc.relocate(%tok, i32 4, i32 4)
1361// %ptr' = gep %base' + 15
1362// %val = load %ptr'
1363bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1364 bool MadeChange = false;
1365 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1366 for (auto *U : I.users())
1367 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1368 // Collect all the relocate calls associated with a statepoint
1369 AllRelocateCalls.push_back(Relocate);
1370
1371 // We need at least one base pointer relocation + one derived pointer
1372 // relocation to mangle
1373 if (AllRelocateCalls.size() < 2)
1374 return false;
1375
1376 // RelocateInstMap is a mapping from the base relocate instruction to the
1377 // corresponding derived relocate instructions
1379 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1380 if (RelocateInstMap.empty())
1381 return false;
1382
1383 for (auto &Item : RelocateInstMap)
1384 // Item.first is the RelocatedBase to offset against
1385 // Item.second is the vector of Targets to replace
1386 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1387 return MadeChange;
1388}
1389
1390/// Sink the specified cast instruction into its user blocks.
1391static bool SinkCast(CastInst *CI) {
1392 BasicBlock *DefBB = CI->getParent();
1393
1394 /// InsertedCasts - Only insert a cast in each block once.
1396
1397 bool MadeChange = false;
1398 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1399 UI != E;) {
1400 Use &TheUse = UI.getUse();
1401 Instruction *User = cast<Instruction>(*UI);
1402
1403 // Figure out which BB this cast is used in. For PHI's this is the
1404 // appropriate predecessor block.
1405 BasicBlock *UserBB = User->getParent();
1406 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1407 UserBB = PN->getIncomingBlock(TheUse);
1408 }
1409
1410 // Preincrement use iterator so we don't invalidate it.
1411 ++UI;
1412
1413 // The first insertion point of a block containing an EH pad is after the
1414 // pad. If the pad is the user, we cannot sink the cast past the pad.
1415 if (User->isEHPad())
1416 continue;
1417
1418 // If the block selected to receive the cast is an EH pad that does not
1419 // allow non-PHI instructions before the terminator, we can't sink the
1420 // cast.
1421 if (UserBB->getTerminator()->isEHPad())
1422 continue;
1423
1424 // If this user is in the same block as the cast, don't change the cast.
1425 if (UserBB == DefBB)
1426 continue;
1427
1428 // If we have already inserted a cast into this block, use it.
1429 CastInst *&InsertedCast = InsertedCasts[UserBB];
1430
1431 if (!InsertedCast) {
1432 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1433 assert(InsertPt != UserBB->end());
1434 InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1435 CI->getType(), "");
1436 InsertedCast->insertBefore(*UserBB, InsertPt);
1437 InsertedCast->setDebugLoc(CI->getDebugLoc());
1438 }
1439
1440 // Replace a use of the cast with a use of the new cast.
1441 TheUse = InsertedCast;
1442 MadeChange = true;
1443 ++NumCastUses;
1444 }
1445
1446 // If we removed all uses, nuke the cast.
1447 if (CI->use_empty()) {
1448 salvageDebugInfo(*CI);
1449 CI->eraseFromParent();
1450 MadeChange = true;
1451 }
1452
1453 return MadeChange;
1454}
1455
1456/// If the specified cast instruction is a noop copy (e.g. it's casting from
1457/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1458/// reduce the number of virtual registers that must be created and coalesced.
1459///
1460/// Return true if any changes are made.
1462 const DataLayout &DL) {
1463 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1464 // than sinking only nop casts, but is helpful on some platforms.
1465 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1466 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1467 ASC->getDestAddressSpace()))
1468 return false;
1469 }
1470
1471 // If this is a noop copy,
1472 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1473 EVT DstVT = TLI.getValueType(DL, CI->getType());
1474
1475 // This is an fp<->int conversion?
1476 if (SrcVT.isInteger() != DstVT.isInteger())
1477 return false;
1478
1479 // If this is an extension, it will be a zero or sign extension, which
1480 // isn't a noop.
1481 if (SrcVT.bitsLT(DstVT))
1482 return false;
1483
1484 // If these values will be promoted, find out what they will be promoted
1485 // to. This helps us consider truncates on PPC as noop copies when they
1486 // are.
1487 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1489 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1490 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1492 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1493
1494 // If, after promotion, these are the same types, this is a noop copy.
1495 if (SrcVT != DstVT)
1496 return false;
1497
1498 return SinkCast(CI);
1499}
1500
1501// Match a simple increment by constant operation. Note that if a sub is
1502// matched, the step is negated (as if the step had been canonicalized to
1503// an add, even though we leave the instruction alone.)
1504bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1505 Constant *&Step) {
1506 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1507 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1508 m_Instruction(LHS), m_Constant(Step)))))
1509 return true;
1510 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1511 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1512 m_Instruction(LHS), m_Constant(Step))))) {
1513 Step = ConstantExpr::getNeg(Step);
1514 return true;
1515 }
1516 return false;
1517}
1518
1519/// If given \p PN is an inductive variable with value IVInc coming from the
1520/// backedge, and on each iteration it gets increased by Step, return pair
1521/// <IVInc, Step>. Otherwise, return std::nullopt.
1522static std::optional<std::pair<Instruction *, Constant *>>
1523getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1524 const Loop *L = LI->getLoopFor(PN->getParent());
1525 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1526 return std::nullopt;
1527 auto *IVInc =
1528 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1529 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1530 return std::nullopt;
1531 Instruction *LHS = nullptr;
1532 Constant *Step = nullptr;
1533 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1534 return std::make_pair(IVInc, Step);
1535 return std::nullopt;
1536}
1537
1538static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1539 auto *I = dyn_cast<Instruction>(V);
1540 if (!I)
1541 return false;
1542 Instruction *LHS = nullptr;
1543 Constant *Step = nullptr;
1544 if (!matchIncrement(I, LHS, Step))
1545 return false;
1546 if (auto *PN = dyn_cast<PHINode>(LHS))
1547 if (auto IVInc = getIVIncrement(PN, LI))
1548 return IVInc->first == I;
1549 return false;
1550}
1551
1552bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1553 Value *Arg0, Value *Arg1,
1554 CmpInst *Cmp,
1555 Intrinsic::ID IID) {
1556 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1557 if (!isIVIncrement(BO, LI))
1558 return false;
1559 const Loop *L = LI->getLoopFor(BO->getParent());
1560 assert(L && "L should not be null after isIVIncrement()");
1561 // Do not risk on moving increment into a child loop.
1562 if (LI->getLoopFor(Cmp->getParent()) != L)
1563 return false;
1564
1565 // Finally, we need to ensure that the insert point will dominate all
1566 // existing uses of the increment.
1567
1568 auto &DT = getDT(*BO->getParent()->getParent());
1569 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1570 // If we're moving up the dom tree, all uses are trivially dominated.
1571 // (This is the common case for code produced by LSR.)
1572 return true;
1573
1574 // Otherwise, special case the single use in the phi recurrence.
1575 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1576 };
1577 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1578 // We used to use a dominator tree here to allow multi-block optimization.
1579 // But that was problematic because:
1580 // 1. It could cause a perf regression by hoisting the math op into the
1581 // critical path.
1582 // 2. It could cause a perf regression by creating a value that was live
1583 // across multiple blocks and increasing register pressure.
1584 // 3. Use of a dominator tree could cause large compile-time regression.
1585 // This is because we recompute the DT on every change in the main CGP
1586 // run-loop. The recomputing is probably unnecessary in many cases, so if
1587 // that was fixed, using a DT here would be ok.
1588 //
1589 // There is one important particular case we still want to handle: if BO is
1590 // the IV increment. Important properties that make it profitable:
1591 // - We can speculate IV increment anywhere in the loop (as long as the
1592 // indvar Phi is its only user);
1593 // - Upon computing Cmp, we effectively compute something equivalent to the
1594 // IV increment (despite it loops differently in the IR). So moving it up
1595 // to the cmp point does not really increase register pressure.
1596 return false;
1597 }
1598
1599 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1600 if (BO->getOpcode() == Instruction::Add &&
1601 IID == Intrinsic::usub_with_overflow) {
1602 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1603 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1604 }
1605
1606 // Insert at the first instruction of the pair.
1607 Instruction *InsertPt = nullptr;
1608 for (Instruction &Iter : *Cmp->getParent()) {
1609 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1610 // the overflow intrinsic are defined.
1611 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1612 InsertPt = &Iter;
1613 break;
1614 }
1615 }
1616 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1617
1618 IRBuilder<> Builder(InsertPt);
1619 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1620 if (BO->getOpcode() != Instruction::Xor) {
1621 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1622 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1623 } else
1624 assert(BO->hasOneUse() &&
1625 "Patterns with XOr should use the BO only in the compare");
1626 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1627 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1628 Cmp->eraseFromParent();
1629 BO->eraseFromParent();
1630 return true;
1631}
1632
1633/// Match special-case patterns that check for unsigned add overflow.
1635 BinaryOperator *&Add) {
1636 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1637 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1638 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1639
1640 // We are not expecting non-canonical/degenerate code. Just bail out.
1641 if (isa<Constant>(A))
1642 return false;
1643
1644 ICmpInst::Predicate Pred = Cmp->getPredicate();
1645 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1646 B = ConstantInt::get(B->getType(), 1);
1647 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1648 B = ConstantInt::get(B->getType(), -1);
1649 else
1650 return false;
1651
1652 // Check the users of the variable operand of the compare looking for an add
1653 // with the adjusted constant.
1654 for (User *U : A->users()) {
1655 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1656 Add = cast<BinaryOperator>(U);
1657 return true;
1658 }
1659 }
1660 return false;
1661}
1662
1663/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1664/// intrinsic. Return true if any changes were made.
1665bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1666 ModifyDT &ModifiedDT) {
1667 bool EdgeCase = false;
1668 Value *A, *B;
1670 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1672 return false;
1673 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1674 A = Add->getOperand(0);
1675 B = Add->getOperand(1);
1676 EdgeCase = true;
1677 }
1678
1680 TLI->getValueType(*DL, Add->getType()),
1681 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1682 return false;
1683
1684 // We don't want to move around uses of condition values this late, so we
1685 // check if it is legal to create the call to the intrinsic in the basic
1686 // block containing the icmp.
1687 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1688 return false;
1689
1690 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1691 Intrinsic::uadd_with_overflow))
1692 return false;
1693
1694 // Reset callers - do not crash by iterating over a dead instruction.
1695 ModifiedDT = ModifyDT::ModifyInstDT;
1696 return true;
1697}
1698
1699bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1700 ModifyDT &ModifiedDT) {
1701 // We are not expecting non-canonical/degenerate code. Just bail out.
1702 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1703 if (isa<Constant>(A) && isa<Constant>(B))
1704 return false;
1705
1706 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1707 ICmpInst::Predicate Pred = Cmp->getPredicate();
1708 if (Pred == ICmpInst::ICMP_UGT) {
1709 std::swap(A, B);
1710 Pred = ICmpInst::ICMP_ULT;
1711 }
1712 // Convert special-case: (A == 0) is the same as (A u< 1).
1713 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1714 B = ConstantInt::get(B->getType(), 1);
1715 Pred = ICmpInst::ICMP_ULT;
1716 }
1717 // Convert special-case: (A != 0) is the same as (0 u< A).
1718 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1719 std::swap(A, B);
1720 Pred = ICmpInst::ICMP_ULT;
1721 }
1722 if (Pred != ICmpInst::ICMP_ULT)
1723 return false;
1724
1725 // Walk the users of a variable operand of a compare looking for a subtract or
1726 // add with that same operand. Also match the 2nd operand of the compare to
1727 // the add/sub, but that may be a negated constant operand of an add.
1728 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1729 BinaryOperator *Sub = nullptr;
1730 for (User *U : CmpVariableOperand->users()) {
1731 // A - B, A u< B --> usubo(A, B)
1732 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1733 Sub = cast<BinaryOperator>(U);
1734 break;
1735 }
1736
1737 // A + (-C), A u< C (canonicalized form of (sub A, C))
1738 const APInt *CmpC, *AddC;
1739 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1740 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1741 Sub = cast<BinaryOperator>(U);
1742 break;
1743 }
1744 }
1745 if (!Sub)
1746 return false;
1747
1749 TLI->getValueType(*DL, Sub->getType()),
1750 Sub->hasNUsesOrMore(1)))
1751 return false;
1752
1753 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1754 Cmp, Intrinsic::usub_with_overflow))
1755 return false;
1756
1757 // Reset callers - do not crash by iterating over a dead instruction.
1758 ModifiedDT = ModifyDT::ModifyInstDT;
1759 return true;
1760}
1761
1762/// Sink the given CmpInst into user blocks to reduce the number of virtual
1763/// registers that must be created and coalesced. This is a clear win except on
1764/// targets with multiple condition code registers (PowerPC), where it might
1765/// lose; some adjustment may be wanted there.
1766///
1767/// Return true if any changes are made.
1768static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1770 return false;
1771
1772 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1773 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1774 return false;
1775
1776 // Only insert a cmp in each block once.
1778
1779 bool MadeChange = false;
1780 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1781 UI != E;) {
1782 Use &TheUse = UI.getUse();
1783 Instruction *User = cast<Instruction>(*UI);
1784
1785 // Preincrement use iterator so we don't invalidate it.
1786 ++UI;
1787
1788 // Don't bother for PHI nodes.
1789 if (isa<PHINode>(User))
1790 continue;
1791
1792 // Figure out which BB this cmp is used in.
1793 BasicBlock *UserBB = User->getParent();
1794 BasicBlock *DefBB = Cmp->getParent();
1795
1796 // If this user is in the same block as the cmp, don't change the cmp.
1797 if (UserBB == DefBB)
1798 continue;
1799
1800 // If we have already inserted a cmp into this block, use it.
1801 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1802
1803 if (!InsertedCmp) {
1804 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1805 assert(InsertPt != UserBB->end());
1806 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1807 Cmp->getOperand(0), Cmp->getOperand(1), "");
1808 InsertedCmp->insertBefore(*UserBB, InsertPt);
1809 // Propagate the debug info.
1810 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1811 }
1812
1813 // Replace a use of the cmp with a use of the new cmp.
1814 TheUse = InsertedCmp;
1815 MadeChange = true;
1816 ++NumCmpUses;
1817 }
1818
1819 // If we removed all uses, nuke the cmp.
1820 if (Cmp->use_empty()) {
1821 Cmp->eraseFromParent();
1822 MadeChange = true;
1823 }
1824
1825 return MadeChange;
1826}
1827
1828/// For pattern like:
1829///
1830/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1831/// ...
1832/// DomBB:
1833/// ...
1834/// br DomCond, TrueBB, CmpBB
1835/// CmpBB: (with DomBB being the single predecessor)
1836/// ...
1837/// Cmp = icmp eq CmpOp0, CmpOp1
1838/// ...
1839///
1840/// It would use two comparison on targets that lowering of icmp sgt/slt is
1841/// different from lowering of icmp eq (PowerPC). This function try to convert
1842/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1843/// After that, DomCond and Cmp can use the same comparison so reduce one
1844/// comparison.
1845///
1846/// Return true if any changes are made.
1848 const TargetLowering &TLI) {
1850 return false;
1851
1852 ICmpInst::Predicate Pred = Cmp->getPredicate();
1853 if (Pred != ICmpInst::ICMP_EQ)
1854 return false;
1855
1856 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1857 // icmp slt/sgt would introduce more redundant LLVM IR.
1858 for (User *U : Cmp->users()) {
1859 if (isa<BranchInst>(U))
1860 continue;
1861 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1862 continue;
1863 return false;
1864 }
1865
1866 // This is a cheap/incomplete check for dominance - just match a single
1867 // predecessor with a conditional branch.
1868 BasicBlock *CmpBB = Cmp->getParent();
1869 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1870 if (!DomBB)
1871 return false;
1872
1873 // We want to ensure that the only way control gets to the comparison of
1874 // interest is that a less/greater than comparison on the same operands is
1875 // false.
1876 Value *DomCond;
1877 BasicBlock *TrueBB, *FalseBB;
1878 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1879 return false;
1880 if (CmpBB != FalseBB)
1881 return false;
1882
1883 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1884 ICmpInst::Predicate DomPred;
1885 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1886 return false;
1887 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1888 return false;
1889
1890 // Convert the equality comparison to the opposite of the dominating
1891 // comparison and swap the direction for all branch/select users.
1892 // We have conceptually converted:
1893 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1894 // to
1895 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1896 // And similarly for branches.
1897 for (User *U : Cmp->users()) {
1898 if (auto *BI = dyn_cast<BranchInst>(U)) {
1899 assert(BI->isConditional() && "Must be conditional");
1900 BI->swapSuccessors();
1901 continue;
1902 }
1903 if (auto *SI = dyn_cast<SelectInst>(U)) {
1904 // Swap operands
1905 SI->swapValues();
1906 SI->swapProfMetadata();
1907 continue;
1908 }
1909 llvm_unreachable("Must be a branch or a select");
1910 }
1911 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1912 return true;
1913}
1914
1915/// Many architectures use the same instruction for both subtract and cmp. Try
1916/// to swap cmp operands to match subtract operations to allow for CSE.
1918 Value *Op0 = Cmp->getOperand(0);
1919 Value *Op1 = Cmp->getOperand(1);
1920 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1921 isa<Constant>(Op1) || Op0 == Op1)
1922 return false;
1923
1924 // If a subtract already has the same operands as a compare, swapping would be
1925 // bad. If a subtract has the same operands as a compare but in reverse order,
1926 // then swapping is good.
1927 int GoodToSwap = 0;
1928 unsigned NumInspected = 0;
1929 for (const User *U : Op0->users()) {
1930 // Avoid walking many users.
1931 if (++NumInspected > 128)
1932 return false;
1933 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
1934 GoodToSwap++;
1935 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
1936 GoodToSwap--;
1937 }
1938
1939 if (GoodToSwap > 0) {
1940 Cmp->swapOperands();
1941 return true;
1942 }
1943 return false;
1944}
1945
1946bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
1947 if (sinkCmpExpression(Cmp, *TLI))
1948 return true;
1949
1950 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
1951 return true;
1952
1953 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
1954 return true;
1955
1956 if (foldICmpWithDominatingICmp(Cmp, *TLI))
1957 return true;
1958
1960 return true;
1961
1962 return false;
1963}
1964
1965/// Duplicate and sink the given 'and' instruction into user blocks where it is
1966/// used in a compare to allow isel to generate better code for targets where
1967/// this operation can be combined.
1968///
1969/// Return true if any changes are made.
1971 SetOfInstrs &InsertedInsts) {
1972 // Double-check that we're not trying to optimize an instruction that was
1973 // already optimized by some other part of this pass.
1974 assert(!InsertedInsts.count(AndI) &&
1975 "Attempting to optimize already optimized and instruction");
1976 (void)InsertedInsts;
1977
1978 // Nothing to do for single use in same basic block.
1979 if (AndI->hasOneUse() &&
1980 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1981 return false;
1982
1983 // Try to avoid cases where sinking/duplicating is likely to increase register
1984 // pressure.
1985 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1986 !isa<ConstantInt>(AndI->getOperand(1)) &&
1987 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1988 return false;
1989
1990 for (auto *U : AndI->users()) {
1991 Instruction *User = cast<Instruction>(U);
1992
1993 // Only sink 'and' feeding icmp with 0.
1994 if (!isa<ICmpInst>(User))
1995 return false;
1996
1997 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1998 if (!CmpC || !CmpC->isZero())
1999 return false;
2000 }
2001
2002 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2003 return false;
2004
2005 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2006 LLVM_DEBUG(AndI->getParent()->dump());
2007
2008 // Push the 'and' into the same block as the icmp 0. There should only be
2009 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2010 // others, so we don't need to keep track of which BBs we insert into.
2011 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2012 UI != E;) {
2013 Use &TheUse = UI.getUse();
2014 Instruction *User = cast<Instruction>(*UI);
2015
2016 // Preincrement use iterator so we don't invalidate it.
2017 ++UI;
2018
2019 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2020
2021 // Keep the 'and' in the same place if the use is already in the same block.
2022 Instruction *InsertPt =
2023 User->getParent() == AndI->getParent() ? AndI : User;
2024 Instruction *InsertedAnd =
2025 BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
2026 AndI->getOperand(1), "", InsertPt);
2027 // Propagate the debug info.
2028 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2029
2030 // Replace a use of the 'and' with a use of the new 'and'.
2031 TheUse = InsertedAnd;
2032 ++NumAndUses;
2033 LLVM_DEBUG(User->getParent()->dump());
2034 }
2035
2036 // We removed all uses, nuke the and.
2037 AndI->eraseFromParent();
2038 return true;
2039}
2040
2041/// Check if the candidates could be combined with a shift instruction, which
2042/// includes:
2043/// 1. Truncate instruction
2044/// 2. And instruction and the imm is a mask of the low bits:
2045/// imm & (imm+1) == 0
2047 if (!isa<TruncInst>(User)) {
2048 if (User->getOpcode() != Instruction::And ||
2049 !isa<ConstantInt>(User->getOperand(1)))
2050 return false;
2051
2052 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2053
2054 if ((Cimm & (Cimm + 1)).getBoolValue())
2055 return false;
2056 }
2057 return true;
2058}
2059
2060/// Sink both shift and truncate instruction to the use of truncate's BB.
2061static bool
2064 const TargetLowering &TLI, const DataLayout &DL) {
2065 BasicBlock *UserBB = User->getParent();
2067 auto *TruncI = cast<TruncInst>(User);
2068 bool MadeChange = false;
2069
2070 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2071 TruncE = TruncI->user_end();
2072 TruncUI != TruncE;) {
2073
2074 Use &TruncTheUse = TruncUI.getUse();
2075 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2076 // Preincrement use iterator so we don't invalidate it.
2077
2078 ++TruncUI;
2079
2080 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2081 if (!ISDOpcode)
2082 continue;
2083
2084 // If the use is actually a legal node, there will not be an
2085 // implicit truncate.
2086 // FIXME: always querying the result type is just an
2087 // approximation; some nodes' legality is determined by the
2088 // operand or other means. There's no good way to find out though.
2090 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2091 continue;
2092
2093 // Don't bother for PHI nodes.
2094 if (isa<PHINode>(TruncUser))
2095 continue;
2096
2097 BasicBlock *TruncUserBB = TruncUser->getParent();
2098
2099 if (UserBB == TruncUserBB)
2100 continue;
2101
2102 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2103 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2104
2105 if (!InsertedShift && !InsertedTrunc) {
2106 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2107 assert(InsertPt != TruncUserBB->end());
2108 // Sink the shift
2109 if (ShiftI->getOpcode() == Instruction::AShr)
2110 InsertedShift =
2111 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2112 else
2113 InsertedShift =
2114 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2115 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2116 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2117
2118 // Sink the trunc
2119 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2120 TruncInsertPt++;
2121 // It will go ahead of any debug-info.
2122 TruncInsertPt.setHeadBit(true);
2123 assert(TruncInsertPt != TruncUserBB->end());
2124
2125 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2126 TruncI->getType(), "");
2127 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2128 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2129
2130 MadeChange = true;
2131
2132 TruncTheUse = InsertedTrunc;
2133 }
2134 }
2135 return MadeChange;
2136}
2137
2138/// Sink the shift *right* instruction into user blocks if the uses could
2139/// potentially be combined with this shift instruction and generate BitExtract
2140/// instruction. It will only be applied if the architecture supports BitExtract
2141/// instruction. Here is an example:
2142/// BB1:
2143/// %x.extract.shift = lshr i64 %arg1, 32
2144/// BB2:
2145/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2146/// ==>
2147///
2148/// BB2:
2149/// %x.extract.shift.1 = lshr i64 %arg1, 32
2150/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2151///
2152/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2153/// instruction.
2154/// Return true if any changes are made.
2156 const TargetLowering &TLI,
2157 const DataLayout &DL) {
2158 BasicBlock *DefBB = ShiftI->getParent();
2159
2160 /// Only insert instructions in each block once.
2162
2163 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2164
2165 bool MadeChange = false;
2166 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2167 UI != E;) {
2168 Use &TheUse = UI.getUse();
2169 Instruction *User = cast<Instruction>(*UI);
2170 // Preincrement use iterator so we don't invalidate it.
2171 ++UI;
2172
2173 // Don't bother for PHI nodes.
2174 if (isa<PHINode>(User))
2175 continue;
2176
2178 continue;
2179
2180 BasicBlock *UserBB = User->getParent();
2181
2182 if (UserBB == DefBB) {
2183 // If the shift and truncate instruction are in the same BB. The use of
2184 // the truncate(TruncUse) may still introduce another truncate if not
2185 // legal. In this case, we would like to sink both shift and truncate
2186 // instruction to the BB of TruncUse.
2187 // for example:
2188 // BB1:
2189 // i64 shift.result = lshr i64 opnd, imm
2190 // trunc.result = trunc shift.result to i16
2191 //
2192 // BB2:
2193 // ----> We will have an implicit truncate here if the architecture does
2194 // not have i16 compare.
2195 // cmp i16 trunc.result, opnd2
2196 //
2197 if (isa<TruncInst>(User) &&
2198 shiftIsLegal
2199 // If the type of the truncate is legal, no truncate will be
2200 // introduced in other basic blocks.
2201 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2202 MadeChange =
2203 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2204
2205 continue;
2206 }
2207 // If we have already inserted a shift into this block, use it.
2208 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2209
2210 if (!InsertedShift) {
2211 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2212 assert(InsertPt != UserBB->end());
2213
2214 if (ShiftI->getOpcode() == Instruction::AShr)
2215 InsertedShift =
2216 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2217 else
2218 InsertedShift =
2219 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2220 InsertedShift->insertBefore(*UserBB, InsertPt);
2221 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2222
2223 MadeChange = true;
2224 }
2225
2226 // Replace a use of the shift with a use of the new shift.
2227 TheUse = InsertedShift;
2228 }
2229
2230 // If we removed all uses, or there are none, nuke the shift.
2231 if (ShiftI->use_empty()) {
2232 salvageDebugInfo(*ShiftI);
2233 ShiftI->eraseFromParent();
2234 MadeChange = true;
2235 }
2236
2237 return MadeChange;
2238}
2239
2240/// If counting leading or trailing zeros is an expensive operation and a zero
2241/// input is defined, add a check for zero to avoid calling the intrinsic.
2242///
2243/// We want to transform:
2244/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2245///
2246/// into:
2247/// entry:
2248/// %cmpz = icmp eq i64 %A, 0
2249/// br i1 %cmpz, label %cond.end, label %cond.false
2250/// cond.false:
2251/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2252/// br label %cond.end
2253/// cond.end:
2254/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2255///
2256/// If the transform is performed, return true and set ModifiedDT to true.
2257static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2258 LoopInfo &LI,
2259 const TargetLowering *TLI,
2260 const DataLayout *DL, ModifyDT &ModifiedDT,
2262 bool IsHugeFunc) {
2263 // If a zero input is undefined, it doesn't make sense to despeculate that.
2264 if (match(CountZeros->getOperand(1), m_One()))
2265 return false;
2266
2267 // If it's cheap to speculate, there's nothing to do.
2268 Type *Ty = CountZeros->getType();
2269 auto IntrinsicID = CountZeros->getIntrinsicID();
2270 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2271 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2272 return false;
2273
2274 // Only handle legal scalar cases. Anything else requires too much work.
2275 unsigned SizeInBits = Ty->getScalarSizeInBits();
2276 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
2277 return false;
2278
2279 // Bail if the value is never zero.
2280 Use &Op = CountZeros->getOperandUse(0);
2281 if (isKnownNonZero(Op, *DL))
2282 return false;
2283
2284 // The intrinsic will be sunk behind a compare against zero and branch.
2285 BasicBlock *StartBlock = CountZeros->getParent();
2286 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2287 if (IsHugeFunc)
2288 FreshBBs.insert(CallBlock);
2289
2290 // Create another block after the count zero intrinsic. A PHI will be added
2291 // in this block to select the result of the intrinsic or the bit-width
2292 // constant if the input to the intrinsic is zero.
2293 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2294 // Any debug-info after CountZeros should not be included.
2295 SplitPt.setHeadBit(true);
2296 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2297 if (IsHugeFunc)
2298 FreshBBs.insert(EndBlock);
2299
2300 // Update the LoopInfo. The new blocks are in the same loop as the start
2301 // block.
2302 if (Loop *L = LI.getLoopFor(StartBlock)) {
2303 L->addBasicBlockToLoop(CallBlock, LI);
2304 L->addBasicBlockToLoop(EndBlock, LI);
2305 }
2306
2307 // Set up a builder to create a compare, conditional branch, and PHI.
2308 IRBuilder<> Builder(CountZeros->getContext());
2309 Builder.SetInsertPoint(StartBlock->getTerminator());
2310 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2311
2312 // Replace the unconditional branch that was created by the first split with
2313 // a compare against zero and a conditional branch.
2314 Value *Zero = Constant::getNullValue(Ty);
2315 // Avoid introducing branch on poison. This also replaces the ctz operand.
2317 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2318 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2319 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2320 StartBlock->getTerminator()->eraseFromParent();
2321
2322 // Create a PHI in the end block to select either the output of the intrinsic
2323 // or the bit width of the operand.
2324 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2325 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2326 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2327 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2328 PN->addIncoming(BitWidth, StartBlock);
2329 PN->addIncoming(CountZeros, CallBlock);
2330
2331 // We are explicitly handling the zero case, so we can set the intrinsic's
2332 // undefined zero argument to 'true'. This will also prevent reprocessing the
2333 // intrinsic; we only despeculate when a zero input is defined.
2334 CountZeros->setArgOperand(1, Builder.getTrue());
2335 ModifiedDT = ModifyDT::ModifyBBDT;
2336 return true;
2337}
2338
2339bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2340 BasicBlock *BB = CI->getParent();
2341
2342 // Lower inline assembly if we can.
2343 // If we found an inline asm expession, and if the target knows how to
2344 // lower it to normal LLVM code, do so now.
2345 if (CI->isInlineAsm()) {
2346 if (TLI->ExpandInlineAsm(CI)) {
2347 // Avoid invalidating the iterator.
2348 CurInstIterator = BB->begin();
2349 // Avoid processing instructions out of order, which could cause
2350 // reuse before a value is defined.
2351 SunkAddrs.clear();
2352 return true;
2353 }
2354 // Sink address computing for memory operands into the block.
2355 if (optimizeInlineAsmInst(CI))
2356 return true;
2357 }
2358
2359 // Align the pointer arguments to this call if the target thinks it's a good
2360 // idea
2361 unsigned MinSize;
2362 Align PrefAlign;
2363 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2364 for (auto &Arg : CI->args()) {
2365 // We want to align both objects whose address is used directly and
2366 // objects whose address is used in casts and GEPs, though it only makes
2367 // sense for GEPs if the offset is a multiple of the desired alignment and
2368 // if size - offset meets the size threshold.
2369 if (!Arg->getType()->isPointerTy())
2370 continue;
2371 APInt Offset(DL->getIndexSizeInBits(
2372 cast<PointerType>(Arg->getType())->getAddressSpace()),
2373 0);
2375 uint64_t Offset2 = Offset.getLimitedValue();
2376 if (!isAligned(PrefAlign, Offset2))
2377 continue;
2378 AllocaInst *AI;
2379 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2380 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2381 AI->setAlignment(PrefAlign);
2382 // Global variables can only be aligned if they are defined in this
2383 // object (i.e. they are uniquely initialized in this object), and
2384 // over-aligning global variables that have an explicit section is
2385 // forbidden.
2386 GlobalVariable *GV;
2387 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2388 GV->getPointerAlignment(*DL) < PrefAlign &&
2389 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2390 GV->setAlignment(PrefAlign);
2391 }
2392 }
2393 // If this is a memcpy (or similar) then we may be able to improve the
2394 // alignment.
2395 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2396 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2397 MaybeAlign MIDestAlign = MI->getDestAlign();
2398 if (!MIDestAlign || DestAlign > *MIDestAlign)
2399 MI->setDestAlignment(DestAlign);
2400 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2401 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2402 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2403 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2404 MTI->setSourceAlignment(SrcAlign);
2405 }
2406 }
2407
2408 // If we have a cold call site, try to sink addressing computation into the
2409 // cold block. This interacts with our handling for loads and stores to
2410 // ensure that we can fold all uses of a potential addressing computation
2411 // into their uses. TODO: generalize this to work over profiling data
2412 if (CI->hasFnAttr(Attribute::Cold) && !OptSize &&
2413 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2414 for (auto &Arg : CI->args()) {
2415 if (!Arg->getType()->isPointerTy())
2416 continue;
2417 unsigned AS = Arg->getType()->getPointerAddressSpace();
2418 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2419 return true;
2420 }
2421
2422 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2423 if (II) {
2424 switch (II->getIntrinsicID()) {
2425 default:
2426 break;
2427 case Intrinsic::assume:
2428 llvm_unreachable("llvm.assume should have been removed already");
2429 case Intrinsic::experimental_widenable_condition: {
2430 // Give up on future widening oppurtunties so that we can fold away dead
2431 // paths and merge blocks before going into block-local instruction
2432 // selection.
2433 if (II->use_empty()) {
2434 II->eraseFromParent();
2435 return true;
2436 }
2437 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2438 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2439 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2440 });
2441 return true;
2442 }
2443 case Intrinsic::objectsize:
2444 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2445 case Intrinsic::is_constant:
2446 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2447 case Intrinsic::aarch64_stlxr:
2448 case Intrinsic::aarch64_stxr: {
2449 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2450 if (!ExtVal || !ExtVal->hasOneUse() ||
2451 ExtVal->getParent() == CI->getParent())
2452 return false;
2453 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2454 ExtVal->moveBefore(CI);
2455 // Mark this instruction as "inserted by CGP", so that other
2456 // optimizations don't touch it.
2457 InsertedInsts.insert(ExtVal);
2458 return true;
2459 }
2460
2461 case Intrinsic::launder_invariant_group:
2462 case Intrinsic::strip_invariant_group: {
2463 Value *ArgVal = II->getArgOperand(0);
2464 auto it = LargeOffsetGEPMap.find(II);
2465 if (it != LargeOffsetGEPMap.end()) {
2466 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2467 // Make sure not to have to deal with iterator invalidation
2468 // after possibly adding ArgVal to LargeOffsetGEPMap.
2469 auto GEPs = std::move(it->second);
2470 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2471 LargeOffsetGEPMap.erase(II);
2472 }
2473
2474 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2475 II->eraseFromParent();
2476 return true;
2477 }
2478 case Intrinsic::cttz:
2479 case Intrinsic::ctlz:
2480 // If counting zeros is expensive, try to avoid it.
2481 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2482 IsHugeFunc);
2483 case Intrinsic::fshl:
2484 case Intrinsic::fshr:
2485 return optimizeFunnelShift(II);
2486 case Intrinsic::dbg_assign:
2487 case Intrinsic::dbg_value:
2488 return fixupDbgValue(II);
2489 case Intrinsic::masked_gather:
2490 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2491 case Intrinsic::masked_scatter:
2492 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2493 }
2494
2496 Type *AccessTy;
2497 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2498 while (!PtrOps.empty()) {
2499 Value *PtrVal = PtrOps.pop_back_val();
2500 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2501 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2502 return true;
2503 }
2504 }
2505
2506 // From here on out we're working with named functions.
2507 if (!CI->getCalledFunction())
2508 return false;
2509
2510 // Lower all default uses of _chk calls. This is very similar
2511 // to what InstCombineCalls does, but here we are only lowering calls
2512 // to fortified library functions (e.g. __memcpy_chk) that have the default
2513 // "don't know" as the objectsize. Anything else should be left alone.
2514 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2515 IRBuilder<> Builder(CI);
2516 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2517 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2518 CI->eraseFromParent();
2519 return true;
2520 }
2521
2522 return false;
2523}
2524
2526 const CallInst *CI) {
2527 assert(CI && CI->use_empty());
2528
2529 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2530 switch (II->getIntrinsicID()) {
2531 case Intrinsic::memset:
2532 case Intrinsic::memcpy:
2533 case Intrinsic::memmove:
2534 return true;
2535 default:
2536 return false;
2537 }
2538
2539 LibFunc LF;
2540 Function *Callee = CI->getCalledFunction();
2541 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2542 switch (LF) {
2543 case LibFunc_strcpy:
2544 case LibFunc_strncpy:
2545 case LibFunc_strcat:
2546 case LibFunc_strncat:
2547 return true;
2548 default:
2549 return false;
2550 }
2551
2552 return false;
2553}
2554
2555/// Look for opportunities to duplicate return instructions to the predecessor
2556/// to enable tail call optimizations. The case it is currently looking for is
2557/// the following one. Known intrinsics or library function that may be tail
2558/// called are taken into account as well.
2559/// @code
2560/// bb0:
2561/// %tmp0 = tail call i32 @f0()
2562/// br label %return
2563/// bb1:
2564/// %tmp1 = tail call i32 @f1()
2565/// br label %return
2566/// bb2:
2567/// %tmp2 = tail call i32 @f2()
2568/// br label %return
2569/// return:
2570/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2571/// ret i32 %retval
2572/// @endcode
2573///
2574/// =>
2575///
2576/// @code
2577/// bb0:
2578/// %tmp0 = tail call i32 @f0()
2579/// ret i32 %tmp0
2580/// bb1:
2581/// %tmp1 = tail call i32 @f1()
2582/// ret i32 %tmp1
2583/// bb2:
2584/// %tmp2 = tail call i32 @f2()
2585/// ret i32 %tmp2
2586/// @endcode
2587bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2588 ModifyDT &ModifiedDT) {
2589 if (!BB->getTerminator())
2590 return false;
2591
2592 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2593 if (!RetI)
2594 return false;
2595
2596 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2597
2598 PHINode *PN = nullptr;
2599 ExtractValueInst *EVI = nullptr;
2600 BitCastInst *BCI = nullptr;
2601 Value *V = RetI->getReturnValue();
2602 if (V) {
2603 BCI = dyn_cast<BitCastInst>(V);
2604 if (BCI)
2605 V = BCI->getOperand(0);
2606
2607 EVI = dyn_cast<ExtractValueInst>(V);
2608 if (EVI) {
2609 V = EVI->getOperand(0);
2610 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2611 return false;
2612 }
2613
2614 PN = dyn_cast<PHINode>(V);
2615 }
2616
2617 if (PN && PN->getParent() != BB)
2618 return false;
2619
2620 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2621 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2622 if (BC && BC->hasOneUse())
2623 Inst = BC->user_back();
2624
2625 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2626 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2627 return false;
2628 };
2629
2630 // Make sure there are no instructions between the first instruction
2631 // and return.
2632 const Instruction *BI = BB->getFirstNonPHI();
2633 // Skip over debug and the bitcast.
2634 while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
2635 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
2636 BI = BI->getNextNode();
2637 if (BI != RetI)
2638 return false;
2639
2640 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2641 /// call.
2642 const Function *F = BB->getParent();
2643 SmallVector<BasicBlock *, 4> TailCallBBs;
2644 if (PN) {
2645 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2646 // Look through bitcasts.
2647 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2648 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2649 BasicBlock *PredBB = PN->getIncomingBlock(I);
2650 // Make sure the phi value is indeed produced by the tail call.
2651 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2652 TLI->mayBeEmittedAsTailCall(CI) &&
2653 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2654 TailCallBBs.push_back(PredBB);
2655 } else {
2656 // Consider the cases in which the phi value is indirectly produced by
2657 // the tail call, for example when encountering memset(), memmove(),
2658 // strcpy(), whose return value may have been optimized out. In such
2659 // cases, the value needs to be the first function argument.
2660 //
2661 // bb0:
2662 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
2663 // br label %return
2664 // return:
2665 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
2666 if (PredBB && PredBB->getSingleSuccessor() == BB)
2667 CI = dyn_cast_or_null<CallInst>(
2668 PredBB->getTerminator()->getPrevNonDebugInstruction(true));
2669
2670 if (CI && CI->use_empty() &&
2671 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2672 IncomingVal == CI->getArgOperand(0) &&
2673 TLI->mayBeEmittedAsTailCall(CI) &&
2674 attributesPermitTailCall(F, CI, RetI, *TLI))
2675 TailCallBBs.push_back(PredBB);
2676 }
2677 }
2678 } else {
2680 for (BasicBlock *Pred : predecessors(BB)) {
2681 if (!VisitedBBs.insert(Pred).second)
2682 continue;
2683 if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
2684 CallInst *CI = dyn_cast<CallInst>(I);
2685 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2686 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2687 // Either we return void or the return value must be the first
2688 // argument of a known intrinsic or library function.
2689 if (!V || isa<UndefValue>(V) ||
2690 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2691 V == CI->getArgOperand(0))) {
2692 TailCallBBs.push_back(Pred);
2693 }
2694 }
2695 }
2696 }
2697 }
2698
2699 bool Changed = false;
2700 for (auto const &TailCallBB : TailCallBBs) {
2701 // Make sure the call instruction is followed by an unconditional branch to
2702 // the return block.
2703 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2704 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2705 continue;
2706
2707 // Duplicate the return into TailCallBB.
2708 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2710 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
2711 BFI->setBlockFreq(BB,
2712 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
2713 ModifiedDT = ModifyDT::ModifyBBDT;
2714 Changed = true;
2715 ++NumRetsDup;
2716 }
2717
2718 // If we eliminated all predecessors of the block, delete the block now.
2719 if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
2720 BB->eraseFromParent();
2721
2722 return Changed;
2723}
2724
2725//===----------------------------------------------------------------------===//
2726// Memory Optimization
2727//===----------------------------------------------------------------------===//
2728
2729namespace {
2730
2731/// This is an extended version of TargetLowering::AddrMode
2732/// which holds actual Value*'s for register values.
2733struct ExtAddrMode : public TargetLowering::AddrMode {
2734 Value *BaseReg = nullptr;
2735 Value *ScaledReg = nullptr;
2736 Value *OriginalValue = nullptr;
2737 bool InBounds = true;
2738
2739 enum FieldName {
2740 NoField = 0x00,
2741 BaseRegField = 0x01,
2742 BaseGVField = 0x02,
2743 BaseOffsField = 0x04,
2744 ScaledRegField = 0x08,
2745 ScaleField = 0x10,
2746 MultipleFields = 0xff
2747 };
2748
2749 ExtAddrMode() = default;
2750
2751 void print(raw_ostream &OS) const;
2752 void dump() const;
2753
2754 FieldName compare(const ExtAddrMode &other) {
2755 // First check that the types are the same on each field, as differing types
2756 // is something we can't cope with later on.
2757 if (BaseReg && other.BaseReg &&
2758 BaseReg->getType() != other.BaseReg->getType())
2759 return MultipleFields;
2760 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
2761 return MultipleFields;
2762 if (ScaledReg && other.ScaledReg &&
2763 ScaledReg->getType() != other.ScaledReg->getType())
2764 return MultipleFields;
2765
2766 // Conservatively reject 'inbounds' mismatches.
2767 if (InBounds != other.InBounds)
2768 return MultipleFields;
2769
2770 // Check each field to see if it differs.
2771 unsigned Result = NoField;
2772 if (BaseReg != other.BaseReg)
2773 Result |= BaseRegField;
2774 if (BaseGV != other.BaseGV)
2775 Result |= BaseGVField;
2776 if (BaseOffs != other.BaseOffs)
2777 Result |= BaseOffsField;
2778 if (ScaledReg != other.ScaledReg)
2779 Result |= ScaledRegField;
2780 // Don't count 0 as being a different scale, because that actually means
2781 // unscaled (which will already be counted by having no ScaledReg).
2782 if (Scale && other.Scale && Scale != other.Scale)
2783 Result |= ScaleField;
2784
2785 if (llvm::popcount(Result) > 1)
2786 return MultipleFields;
2787 else
2788 return static_cast<FieldName>(Result);
2789 }
2790
2791 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2792 // with no offset.
2793 bool isTrivial() {
2794 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2795 // trivial if at most one of these terms is nonzero, except that BaseGV and
2796 // BaseReg both being zero actually means a null pointer value, which we
2797 // consider to be 'non-zero' here.
2798 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2799 }
2800
2801 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2802 switch (Field) {
2803 default:
2804 return nullptr;
2805 case BaseRegField:
2806 return BaseReg;
2807 case BaseGVField:
2808 return BaseGV;
2809 case ScaledRegField:
2810 return ScaledReg;
2811 case BaseOffsField:
2812 return ConstantInt::get(IntPtrTy, BaseOffs);
2813 }
2814 }
2815
2816 void SetCombinedField(FieldName Field, Value *V,
2817 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2818 switch (Field) {
2819 default:
2820 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
2821 break;
2822 case ExtAddrMode::BaseRegField:
2823 BaseReg = V;
2824 break;
2825 case ExtAddrMode::BaseGVField:
2826 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2827 // in the BaseReg field.
2828 assert(BaseReg == nullptr);
2829 BaseReg = V;
2830 BaseGV = nullptr;
2831 break;
2832 case ExtAddrMode::ScaledRegField:
2833 ScaledReg = V;
2834 // If we have a mix of scaled and unscaled addrmodes then we want scale
2835 // to be the scale and not zero.
2836 if (!Scale)
2837 for (const ExtAddrMode &AM : AddrModes)
2838 if (AM.Scale) {
2839 Scale = AM.Scale;
2840 break;
2841 }
2842 break;
2843 case ExtAddrMode::BaseOffsField:
2844 // The offset is no longer a constant, so it goes in ScaledReg with a
2845 // scale of 1.
2846 assert(ScaledReg == nullptr);
2847 ScaledReg = V;
2848 Scale = 1;
2849 BaseOffs = 0;
2850 break;
2851 }
2852 }
2853};
2854
2855#ifndef NDEBUG
2856static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2857 AM.print(OS);
2858 return OS;
2859}
2860#endif
2861
2862#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2863void ExtAddrMode::print(raw_ostream &OS) const {
2864 bool NeedPlus = false;
2865 OS << "[";
2866 if (InBounds)
2867 OS << "inbounds ";
2868 if (BaseGV) {
2869 OS << "GV:";
2870 BaseGV->printAsOperand(OS, /*PrintType=*/false);
2871 NeedPlus = true;
2872 }
2873
2874 if (BaseOffs) {
2875 OS << (NeedPlus ? " + " : "") << BaseOffs;
2876 NeedPlus = true;
2877 }
2878
2879 if (BaseReg) {
2880 OS << (NeedPlus ? " + " : "") << "Base:";
2881 BaseReg->printAsOperand(OS, /*PrintType=*/false);
2882 NeedPlus = true;
2883 }
2884 if (Scale) {
2885 OS << (NeedPlus ? " + " : "") << Scale << "*";
2886 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2887 }
2888
2889 OS << ']';
2890}
2891
2892LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
2893 print(dbgs());
2894 dbgs() << '\n';
2895}
2896#endif
2897
2898} // end anonymous namespace
2899
2900namespace {
2901
2902/// This class provides transaction based operation on the IR.
2903/// Every change made through this class is recorded in the internal state and
2904/// can be undone (rollback) until commit is called.
2905/// CGP does not check if instructions could be speculatively executed when
2906/// moved. Preserving the original location would pessimize the debugging
2907/// experience, as well as negatively impact the quality of sample PGO.
2908class TypePromotionTransaction {
2909 /// This represents the common interface of the individual transaction.
2910 /// Each class implements the logic for doing one specific modification on
2911 /// the IR via the TypePromotionTransaction.
2912 class TypePromotionAction {
2913 protected:
2914 /// The Instruction modified.
2915 Instruction *Inst;
2916
2917 public:
2918 /// Constructor of the action.
2919 /// The constructor performs the related action on the IR.
2920 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2921
2922 virtual ~TypePromotionAction() = default;
2923
2924 /// Undo the modification done by this action.
2925 /// When this method is called, the IR must be in the same state as it was
2926 /// before this action was applied.
2927 /// \pre Undoing the action works if and only if the IR is in the exact same
2928 /// state as it was directly after this action was applied.
2929 virtual void undo() = 0;
2930
2931 /// Advocate every change made by this action.
2932 /// When the results on the IR of the action are to be kept, it is important
2933 /// to call this function, otherwise hidden information may be kept forever.
2934 virtual void commit() {
2935 // Nothing to be done, this action is not doing anything.
2936 }
2937 };
2938
2939 /// Utility to remember the position of an instruction.
2940 class InsertionHandler {
2941 /// Position of an instruction.
2942 /// Either an instruction:
2943 /// - Is the first in a basic block: BB is used.
2944 /// - Has a previous instruction: PrevInst is used.
2945 union {
2946 Instruction *PrevInst;
2947 BasicBlock *BB;
2948 } Point;
2949 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
2950
2951 /// Remember whether or not the instruction had a previous instruction.
2952 bool HasPrevInstruction;
2953
2954 public:
2955 /// Record the position of \p Inst.
2956 InsertionHandler(Instruction *Inst) {
2957 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
2958 BasicBlock *BB = Inst->getParent();
2959
2960 // Record where we would have to re-insert the instruction in the sequence
2961 // of DbgRecords, if we ended up reinserting.
2962 if (BB->IsNewDbgInfoFormat)
2963 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
2964
2965 if (HasPrevInstruction) {
2966 Point.PrevInst = &*std::prev(Inst->getIterator());
2967 } else {
2968 Point.BB = BB;
2969 }
2970 }
2971
2972 /// Insert \p Inst at the recorded position.
2973 void insert(Instruction *Inst) {
2974 if (HasPrevInstruction) {
2975 if (Inst->getParent())
2976 Inst->removeFromParent();
2977 Inst->insertAfter(&*Point.PrevInst);
2978 } else {
2979 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
2980 if (Inst->getParent())
2981 Inst->moveBefore(*Point.BB, Position);
2982 else
2983 Inst->insertBefore(*Point.BB, Position);
2984 }
2985
2986 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
2987 }
2988 };
2989
2990 /// Move an instruction before another.
2991 class InstructionMoveBefore : public TypePromotionAction {
2992 /// Original position of the instruction.
2993 InsertionHandler Position;
2994
2995 public:
2996 /// Move \p Inst before \p Before.
2997 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2998 : TypePromotionAction(Inst), Position(Inst) {
2999 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3000 << "\n");
3001 Inst->moveBefore(Before);
3002 }
3003
3004 /// Move the instruction back to its original position.
3005 void undo() override {
3006 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3007 Position.insert(Inst);
3008 }
3009 };
3010
3011 /// Set the operand of an instruction with a new value.
3012 class OperandSetter : public TypePromotionAction {
3013 /// Original operand of the instruction.
3014 Value *Origin;
3015
3016 /// Index of the modified instruction.
3017 unsigned Idx;
3018
3019 public:
3020 /// Set \p Idx operand of \p Inst with \p NewVal.
3021 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3022 : TypePromotionAction(Inst), Idx(Idx) {
3023 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3024 << "for:" << *Inst << "\n"
3025 << "with:" << *NewVal << "\n");
3026 Origin = Inst->getOperand(Idx);
3027 Inst->setOperand(Idx, NewVal);
3028 }
3029
3030 /// Restore the original value of the instruction.
3031 void undo() override {
3032 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3033 << "for: " << *Inst << "\n"
3034 << "with: " << *Origin << "\n");
3035 Inst->setOperand(Idx, Origin);
3036 }
3037 };
3038
3039 /// Hide the operands of an instruction.
3040 /// Do as if this instruction was not using any of its operands.
3041 class OperandsHider : public TypePromotionAction {
3042 /// The list of original operands.
3043 SmallVector<Value *, 4> OriginalValues;
3044
3045 public:
3046 /// Remove \p Inst from the uses of the operands of \p Inst.
3047 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3048 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3049 unsigned NumOpnds = Inst->getNumOperands();
3050 OriginalValues.reserve(NumOpnds);
3051 for (unsigned It = 0; It < NumOpnds; ++It) {
3052 // Save the current operand.
3053 Value *Val = Inst->getOperand(It);
3054 OriginalValues.push_back(Val);
3055 // Set a dummy one.
3056 // We could use OperandSetter here, but that would imply an overhead
3057 // that we are not willing to pay.
3058 Inst->setOperand(It, UndefValue::get(Val->getType()));
3059 }
3060 }
3061
3062 /// Restore the original list of uses.
3063 void undo() override {
3064 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3065 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3066 Inst->setOperand(It, OriginalValues[It]);
3067 }
3068 };
3069
3070 /// Build a truncate instruction.
3071 class TruncBuilder : public TypePromotionAction {
3072 Value *Val;
3073
3074 public:
3075 /// Build a truncate instruction of \p Opnd producing a \p Ty
3076 /// result.
3077 /// trunc Opnd to Ty.
3078 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3079 IRBuilder<> Builder(Opnd);
3080 Builder.SetCurrentDebugLocation(DebugLoc());
3081 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3082 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3083 }
3084
3085 /// Get the built value.
3086 Value *getBuiltValue() { return Val; }
3087
3088 /// Remove the built instruction.
3089 void undo() override {
3090 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3091 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3092 IVal->eraseFromParent();
3093 }
3094 };
3095
3096 /// Build a sign extension instruction.
3097 class SExtBuilder : public TypePromotionAction {
3098 Value *Val;
3099
3100 public:
3101 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3102 /// result.
3103 /// sext Opnd to Ty.
3104 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3105 : TypePromotionAction(InsertPt) {
3106 IRBuilder<> Builder(InsertPt);
3107 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3108 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3109 }
3110
3111 /// Get the built value.
3112 Value *getBuiltValue() { return Val; }
3113
3114 /// Remove the built instruction.
3115 void undo() override {
3116 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3117 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3118 IVal->eraseFromParent();
3119 }
3120 };
3121
3122 /// Build a zero extension instruction.
3123 class ZExtBuilder : public TypePromotionAction {
3124 Value *Val;
3125
3126 public:
3127 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3128 /// result.
3129 /// zext Opnd to Ty.
3130 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3131 : TypePromotionAction(InsertPt) {
3132 IRBuilder<> Builder(InsertPt);
3133 Builder.SetCurrentDebugLocation(DebugLoc());
3134 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3135 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3136 }
3137
3138 /// Get the built value.
3139 Value *getBuiltValue() { return Val; }
3140
3141 /// Remove the built instruction.
3142 void undo() override {
3143 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3144 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3145 IVal->eraseFromParent();
3146 }
3147 };
3148
3149 /// Mutate an instruction to another type.
3150 class TypeMutator : public TypePromotionAction {
3151 /// Record the original type.
3152 Type *OrigTy;
3153
3154 public:
3155 /// Mutate the type of \p Inst into \p NewTy.
3156 TypeMutator(Instruction *Inst, Type *NewTy)
3157 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3158 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3159 << "\n");
3160 Inst->mutateType(NewTy);
3161 }
3162
3163 /// Mutate the instruction back to its original type.
3164 void undo() override {
3165 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3166 << "\n");
3167 Inst->mutateType(OrigTy);
3168 }
3169 };
3170
3171 /// Replace the uses of an instruction by another instruction.
3172 class UsesReplacer : public TypePromotionAction {
3173 /// Helper structure to keep track of the replaced uses.
3174 struct InstructionAndIdx {
3175 /// The instruction using the instruction.
3176 Instruction *Inst;
3177
3178 /// The index where this instruction is used for Inst.
3179 unsigned Idx;
3180
3181 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3182 : Inst(Inst), Idx(Idx) {}
3183 };
3184
3185 /// Keep track of the original uses (pair Instruction, Index).
3187 /// Keep track of the debug users.
3189 /// And non-instruction debug-users too.
3191
3192 /// Keep track of the new value so that we can undo it by replacing
3193 /// instances of the new value with the original value.
3194 Value *New;
3195
3197
3198 public:
3199 /// Replace all the use of \p Inst by \p New.
3200 UsesReplacer(Instruction *Inst, Value *New)
3201 : TypePromotionAction(Inst), New(New) {
3202 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3203 << "\n");
3204 // Record the original uses.
3205 for (Use &U : Inst->uses()) {
3206 Instruction *UserI = cast<Instruction>(U.getUser());
3207 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3208 }
3209 // Record the debug uses separately. They are not in the instruction's
3210 // use list, but they are replaced by RAUW.
3211 findDbgValues(DbgValues, Inst, &DPValues);
3212
3213 // Now, we can replace the uses.
3214 Inst->replaceAllUsesWith(New);
3215 }
3216
3217 /// Reassign the original uses of Inst to Inst.
3218 void undo() override {
3219 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3220 for (InstructionAndIdx &Use : OriginalUses)
3221 Use.Inst->setOperand(Use.Idx, Inst);
3222 // RAUW has replaced all original uses with references to the new value,
3223 // including the debug uses. Since we are undoing the replacements,
3224 // the original debug uses must also be reinstated to maintain the
3225 // correctness and utility of debug value instructions.
3226 for (auto *DVI : DbgValues)
3227 DVI->replaceVariableLocationOp(New, Inst);
3228 // Similar story with DPValues, the non-instruction representation of
3229 // dbg.values.
3230 for (DPValue *DPV : DPValues) // tested by transaction-test I'm adding
3231 DPV->replaceVariableLocationOp(New, Inst);
3232 }
3233 };
3234
3235 /// Remove an instruction from the IR.
3236 class InstructionRemover : public TypePromotionAction {
3237 /// Original position of the instruction.
3238 InsertionHandler Inserter;
3239
3240 /// Helper structure to hide all the link to the instruction. In other
3241 /// words, this helps to do as if the instruction was removed.
3242 OperandsHider Hider;
3243
3244 /// Keep track of the uses replaced, if any.
3245 UsesReplacer *Replacer = nullptr;
3246
3247 /// Keep track of instructions removed.
3248 SetOfInstrs &RemovedInsts;
3249
3250 public:
3251 /// Remove all reference of \p Inst and optionally replace all its
3252 /// uses with New.
3253 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3254 /// \pre If !Inst->use_empty(), then New != nullptr
3255 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3256 Value *New = nullptr)
3257 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3258 RemovedInsts(RemovedInsts) {
3259 if (New)
3260 Replacer = new UsesReplacer(Inst, New);
3261 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3262 RemovedInsts.insert(Inst);
3263 /// The instructions removed here will be freed after completing
3264 /// optimizeBlock() for all blocks as we need to keep track of the
3265 /// removed instructions during promotion.
3266 Inst->removeFromParent();
3267 }
3268
3269 ~InstructionRemover() override { delete Replacer; }
3270
3271 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3272 InstructionRemover(const InstructionRemover &other) = delete;
3273
3274 /// Resurrect the instruction and reassign it to the proper uses if
3275 /// new value was provided when build this action.
3276 void undo() override {
3277 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3278 Inserter.insert(Inst);
3279 if (Replacer)
3280 Replacer->undo();
3281 Hider.undo();
3282 RemovedInsts.erase(Inst);
3283 }
3284 };
3285
3286public:
3287 /// Restoration point.
3288 /// The restoration point is a pointer to an action instead of an iterator
3289 /// because the iterator may be invalidated but not the pointer.
3290 using ConstRestorationPt = const TypePromotionAction *;
3291
3292 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3293 : RemovedInsts(RemovedInsts) {}
3294
3295 /// Advocate every changes made in that transaction. Return true if any change
3296 /// happen.
3297 bool commit();
3298
3299 /// Undo all the changes made after the given point.
3300 void rollback(ConstRestorationPt Point);
3301
3302 /// Get the current restoration point.
3303 ConstRestorationPt getRestorationPoint() const;
3304
3305 /// \name API for IR modification with state keeping to support rollback.
3306 /// @{
3307 /// Same as Instruction::setOperand.
3308 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3309
3310 /// Same as Instruction::eraseFromParent.
3311 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3312
3313 /// Same as Value::replaceAllUsesWith.
3314 void replaceAllUsesWith(Instruction *Inst, Value *New);
3315
3316 /// Same as Value::mutateType.
3317 void mutateType(Instruction *Inst, Type *NewTy);
3318
3319 /// Same as IRBuilder::createTrunc.
3320 Value *createTrunc(Instruction *Opnd, Type *Ty);
3321
3322 /// Same as IRBuilder::createSExt.
3323 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3324
3325 /// Same as IRBuilder::createZExt.
3326 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3327
3328private:
3329 /// The ordered list of actions made so far.
3331
3332 using CommitPt =
3334
3335 SetOfInstrs &RemovedInsts;
3336};
3337
3338} // end anonymous namespace
3339
3340void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3341 Value *NewVal) {
3342 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3343 Inst, Idx, NewVal));
3344}
3345
3346void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3347 Value *NewVal) {
3348 Actions.push_back(
3349 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3350 Inst, RemovedInsts, NewVal));
3351}
3352
3353void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3354 Value *New) {
3355 Actions.push_back(
3356 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3357}
3358
3359void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3360 Actions.push_back(
3361 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3362}
3363
3364Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3365 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3366 Value *Val = Ptr->getBuiltValue();
3367 Actions.push_back(std::move(Ptr));
3368 return Val;
3369}
3370
3371Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3372 Type *Ty) {
3373 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3374 Value *Val = Ptr->getBuiltValue();
3375 Actions.push_back(std::move(Ptr));
3376 return Val;
3377}
3378
3379Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3380 Type *Ty) {
3381 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3382 Value *Val = Ptr->getBuiltValue();
3383 Actions.push_back(std::move(Ptr));
3384 return Val;
3385}
3386
3387TypePromotionTransaction::ConstRestorationPt
3388TypePromotionTransaction::getRestorationPoint() const {
3389 return !Actions.empty() ? Actions.back().get() : nullptr;
3390}
3391
3392bool TypePromotionTransaction::commit() {
3393 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3394 Action->commit();
3395 bool Modified = !Actions.empty();
3396 Actions.clear();
3397 return Modified;
3398}
3399
3400void TypePromotionTransaction::rollback(
3401 TypePromotionTransaction::ConstRestorationPt Point) {
3402 while (!Actions.empty() && Point != Actions.back().get()) {
3403 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3404 Curr->undo();
3405 }
3406}
3407
3408namespace {
3409
3410/// A helper class for matching addressing modes.
3411///
3412/// This encapsulates the logic for matching the target-legal addressing modes.
3413class AddressingModeMatcher {
3414 SmallVectorImpl<Instruction *> &AddrModeInsts;
3415 const TargetLowering &TLI;
3416 const TargetRegisterInfo &TRI;
3417 const DataLayout &DL;
3418 const LoopInfo &LI;
3419 const std::function<const DominatorTree &()> getDTFn;
3420
3421 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3422 /// the memory instruction that we're computing this address for.
3423 Type *AccessTy;
3424 unsigned AddrSpace;
3425 Instruction *MemoryInst;
3426
3427 /// This is the addressing mode that we're building up. This is
3428 /// part of the return value of this addressing mode matching stuff.
3430
3431 /// The instructions inserted by other CodeGenPrepare optimizations.
3432 const SetOfInstrs &InsertedInsts;
3433
3434 /// A map from the instructions to their type before promotion.
3435 InstrToOrigTy &PromotedInsts;
3436
3437 /// The ongoing transaction where every action should be registered.
3438 TypePromotionTransaction &TPT;
3439
3440 // A GEP which has too large offset to be folded into the addressing mode.
3441 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3442
3443 /// This is set to true when we should not do profitability checks.
3444 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3445 bool IgnoreProfitability;
3446
3447 /// True if we are optimizing for size.
3448 bool OptSize = false;
3449
3450 ProfileSummaryInfo *PSI;
3452
3453 AddressingModeMatcher(
3455 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3456 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3457 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3458 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3459 TypePromotionTransaction &TPT,
3460 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3461 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3462 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3463 DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
3464 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3465 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3466 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3467 IgnoreProfitability = false;
3468 }
3469
3470public:
3471 /// Find the maximal addressing mode that a load/store of V can fold,
3472 /// give an access type of AccessTy. This returns a list of involved
3473 /// instructions in AddrModeInsts.
3474 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3475 /// optimizations.
3476 /// \p PromotedInsts maps the instructions to their type before promotion.
3477 /// \p The ongoing transaction where every action should be registered.
3478 static ExtAddrMode
3479 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3480 SmallVectorImpl<Instruction *> &AddrModeInsts,
3481 const TargetLowering &TLI, const LoopInfo &LI,
3482 const std::function<const DominatorTree &()> getDTFn,
3483 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3484 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3485 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3486 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3488
3489 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3490 AccessTy, AS, MemoryInst, Result,
3491 InsertedInsts, PromotedInsts, TPT,
3492 LargeOffsetGEP, OptSize, PSI, BFI)
3493 .matchAddr(V, 0);
3494 (void)Success;
3495 assert(Success && "Couldn't select *anything*?");
3496 return Result;
3497 }
3498
3499private:
3500 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3501 bool matchAddr(Value *Addr, unsigned Depth);
3502 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3503 bool *MovedAway = nullptr);
3504 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3505 ExtAddrMode &AMBefore,
3506 ExtAddrMode &AMAfter);
3507 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3508 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3509 Value *PromotedOperand) const;
3510};
3511
3512class PhiNodeSet;
3513
3514/// An iterator for PhiNodeSet.
3515class PhiNodeSetIterator {
3516 PhiNodeSet *const Set;
3517 size_t CurrentIndex = 0;
3518
3519public:
3520 /// The constructor. Start should point to either a valid element, or be equal
3521 /// to the size of the underlying SmallVector of the PhiNodeSet.
3522 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3523 PHINode *operator*() const;
3524 PhiNodeSetIterator &operator++();
3525 bool operator==(const PhiNodeSetIterator &RHS) const;
3526 bool operator!=(const PhiNodeSetIterator &RHS) const;
3527};
3528
3529/// Keeps a set of PHINodes.
3530///
3531/// This is a minimal set implementation for a specific use case:
3532/// It is very fast when there are very few elements, but also provides good
3533/// performance when there are many. It is similar to SmallPtrSet, but also
3534/// provides iteration by insertion order, which is deterministic and stable
3535/// across runs. It is also similar to SmallSetVector, but provides removing
3536/// elements in O(1) time. This is achieved by not actually removing the element
3537/// from the underlying vector, so comes at the cost of using more memory, but
3538/// that is fine, since PhiNodeSets are used as short lived objects.
3539class PhiNodeSet {
3540 friend class PhiNodeSetIterator;
3541
3543 using iterator = PhiNodeSetIterator;
3544
3545 /// Keeps the elements in the order of their insertion in the underlying
3546 /// vector. To achieve constant time removal, it never deletes any element.
3548
3549 /// Keeps the elements in the underlying set implementation. This (and not the
3550 /// NodeList defined above) is the source of truth on whether an element
3551 /// is actually in the collection.
3552 MapType NodeMap;
3553
3554 /// Points to the first valid (not deleted) element when the set is not empty
3555 /// and the value is not zero. Equals to the size of the underlying vector
3556 /// when the set is empty. When the value is 0, as in the beginning, the
3557 /// first element may or may not be valid.
3558 size_t FirstValidElement = 0;
3559
3560public:
3561 /// Inserts a new element to the collection.
3562 /// \returns true if the element is actually added, i.e. was not in the
3563 /// collection before the operation.
3564 bool insert(PHINode *Ptr) {
3565 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3566 NodeList.push_back(Ptr);
3567 return true;
3568 }
3569 return false;
3570 }
3571
3572 /// Removes the element from the collection.
3573 /// \returns whether the element is actually removed, i.e. was in the
3574 /// collection before the operation.
3575 bool erase(PHINode *Ptr) {
3576 if (NodeMap.erase(Ptr)) {
3577 SkipRemovedElements(FirstValidElement);
3578 return true;
3579 }
3580 return false;
3581 }
3582
3583 /// Removes all elements and clears the collection.
3584 void clear() {
3585 NodeMap.clear();
3586 NodeList.clear();
3587 FirstValidElement = 0;
3588 }
3589
3590 /// \returns an iterator that will iterate the elements in the order of
3591 /// insertion.
3592 iterator begin() {
3593 if (FirstValidElement == 0)
3594 SkipRemovedElements(FirstValidElement);
3595 return PhiNodeSetIterator(this, FirstValidElement);
3596 }
3597
3598 /// \returns an iterator that points to the end of the collection.
3599 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3600
3601 /// Returns the number of elements in the collection.
3602 size_t size() const { return NodeMap.size(); }
3603
3604 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3605 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3606
3607private:
3608 /// Updates the CurrentIndex so that it will point to a valid element.
3609 ///
3610 /// If the element of NodeList at CurrentIndex is valid, it does not
3611 /// change it. If there are no more valid elements, it updates CurrentIndex
3612 /// to point to the end of the NodeList.
3613 void SkipRemovedElements(size_t &CurrentIndex) {
3614 while (CurrentIndex < NodeList.size()) {
3615 auto it = NodeMap.find(NodeList[CurrentIndex]);
3616 // If the element has been deleted and added again later, NodeMap will
3617 // point to a different index, so CurrentIndex will still be invalid.
3618 if (it != NodeMap.end() && it->second == CurrentIndex)
3619 break;
3620 ++CurrentIndex;
3621 }
3622 }
3623};
3624
3625PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3626 : Set(Set), CurrentIndex(Start) {}
3627
3628PHINode *PhiNodeSetIterator::operator*() const {
3629 assert(CurrentIndex < Set->NodeList.size() &&
3630 "PhiNodeSet access out of range");
3631 return Set->NodeList[CurrentIndex];
3632}
3633
3634PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
3635 assert(CurrentIndex < Set->NodeList.size() &&
3636 "PhiNodeSet access out of range");
3637 ++CurrentIndex;
3638 Set->SkipRemovedElements(CurrentIndex);
3639 return *this;
3640}
3641
3642bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3643 return CurrentIndex == RHS.CurrentIndex;
3644}
3645
3646bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3647 return !((*this) == RHS);
3648}
3649
3650/// Keep track of simplification of Phi nodes.
3651/// Accept the set of all phi nodes and erase phi node from this set
3652/// if it is simplified.
3653class SimplificationTracker {
3655 const SimplifyQuery &SQ;
3656 // Tracks newly created Phi nodes. The elements are iterated by insertion
3657 // order.
3658 PhiNodeSet AllPhiNodes;
3659 // Tracks newly created Select nodes.
3660 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3661
3662public:
3663 SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
3664
3665 Value *Get(Value *V) {
3666 do {
3667 auto SV = Storage.find(V);
3668 if (SV == Storage.end())
3669 return V;
3670 V = SV->second;
3671 } while (true);
3672 }
3673
3674 Value *Simplify(Value *Val) {
3675 SmallVector<Value *, 32> WorkList;
3677 WorkList.push_back(Val);
3678 while (!WorkList.empty()) {
3679 auto *P = WorkList.pop_back_val();
3680 if (!Visited.insert(P).second)
3681 continue;
3682 if (auto *PI = dyn_cast<Instruction>(P))
3683 if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
3684 for (auto *U : PI->users())
3685 WorkList.push_back(cast<Value>(U));
3686 Put(PI, V);
3687 PI->replaceAllUsesWith(V);
3688 if (auto *PHI = dyn_cast<PHINode>(PI))
3689 AllPhiNodes.erase(PHI);
3690 if (auto *Select = dyn_cast<SelectInst>(PI))
3691 AllSelectNodes.erase(Select);
3692 PI->eraseFromParent();
3693 }
3694 }
3695 return Get(Val);
3696 }
3697
3698 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
3699
3700 void ReplacePhi(PHINode *From, PHINode *To) {
3701 Value *OldReplacement = Get(From);
3702 while (OldReplacement != From) {
3703 From = To;
3704 To = dyn_cast<PHINode>(OldReplacement);
3705 OldReplacement = Get(From);
3706 }
3707 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
3708 Put(From, To);
3709 From->replaceAllUsesWith(To);
3710 AllPhiNodes.erase(From);
3711 From->eraseFromParent();
3712 }
3713
3714 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
3715
3716 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3717
3718 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3719
3720 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3721
3722 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3723
3724 void destroyNewNodes(Type *CommonType) {
3725 // For safe erasing, replace the uses with dummy value first.
3726 auto *Dummy = PoisonValue::get(CommonType);
3727 for (auto *I : AllPhiNodes) {
3728 I->replaceAllUsesWith(Dummy);
3729 I->eraseFromParent();
3730 }
3731 AllPhiNodes.clear();
3732 for (auto *I : AllSelectNodes) {
3733 I->replaceAllUsesWith(Dummy);
3734 I->eraseFromParent();
3735 }
3736 AllSelectNodes.clear();
3737 }
3738};
3739
3740/// A helper class for combining addressing modes.
3741class AddressingModeCombiner {
3742 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3743 typedef std::pair<PHINode *, PHINode *> PHIPair;
3744
3745private:
3746 /// The addressing modes we've collected.
3748
3749 /// The field in which the AddrModes differ, when we have more than one.
3750 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3751
3752 /// Are the AddrModes that we have all just equal to their original values?
3753 bool AllAddrModesTrivial = true;
3754
3755 /// Common Type for all different fields in addressing modes.
3756 Type *CommonType = nullptr;
3757
3758 /// SimplifyQuery for simplifyInstruction utility.
3759 const SimplifyQuery &SQ;
3760
3761 /// Original Address.
3762 Value *Original;
3763
3764 /// Common value among addresses
3765 Value *CommonValue = nullptr;
3766
3767public:
3768 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3769 : SQ(_SQ), Original(OriginalValue) {}
3770
3771 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
3772
3773 /// Get the combined AddrMode
3774 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
3775
3776 /// Add a new AddrMode if it's compatible with the AddrModes we already
3777 /// have.
3778 /// \return True iff we succeeded in doing so.
3779 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3780 // Take note of if we have any non-trivial AddrModes, as we need to detect
3781 // when all AddrModes are trivial as then we would introduce a phi or select
3782 // which just duplicates what's already there.
3783 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3784
3785 // If this is the first addrmode then everything is fine.
3786 if (AddrModes.empty()) {
3787 AddrModes.emplace_back(NewAddrMode);
3788 return true;
3789 }
3790
3791 // Figure out how different this is from the other address modes, which we
3792 // can do just by comparing against the first one given that we only care
3793 // about the cumulative difference.
3794 ExtAddrMode::FieldName ThisDifferentField =
3795 AddrModes[0].compare(NewAddrMode);
3796 if (DifferentField == ExtAddrMode::NoField)
3797 DifferentField = ThisDifferentField;
3798 else if (DifferentField != ThisDifferentField)
3799 DifferentField = ExtAddrMode::MultipleFields;
3800
3801 // If NewAddrMode differs in more than one dimension we cannot handle it.
3802 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3803
3804 // If Scale Field is different then we reject.
3805 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3806
3807 // We also must reject the case when base offset is different and
3808 // scale reg is not null, we cannot handle this case due to merge of
3809 // different offsets will be used as ScaleReg.
3810 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3811 !NewAddrMode.ScaledReg);
3812
3813 // We also must reject the case when GV is different and BaseReg installed
3814 // due to we want to use base reg as a merge of GV values.
3815 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3816 !NewAddrMode.HasBaseReg);
3817
3818 // Even if NewAddMode is the same we still need to collect it due to
3819 // original value is different. And later we will need all original values
3820 // as anchors during finding the common Phi node.
3821 if (CanHandle)
3822 AddrModes.emplace_back(NewAddrMode);
3823 else
3824 AddrModes.clear();
3825
3826 return CanHandle;
3827 }
3828
3829 /// Combine the addressing modes we've collected into a single
3830 /// addressing mode.
3831 /// \return True iff we successfully combined them or we only had one so
3832 /// didn't need to combine them anyway.
3833 bool combineAddrModes() {
3834 // If we have no AddrModes then they can't be combined.
3835 if (AddrModes.size() == 0)
3836 return false;
3837
3838 // A single AddrMode can trivially be combined.
3839 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
3840 return true;
3841
3842 // If the AddrModes we collected are all just equal to the value they are
3843 // derived from then combining them wouldn't do anything useful.
3844 if (AllAddrModesTrivial)
3845 return false;
3846
3847 if (!addrModeCombiningAllowed())
3848 return false;
3849
3850 // Build a map between <original value, basic block where we saw it> to
3851 // value of base register.
3852 // Bail out if there is no common type.
3853 FoldAddrToValueMapping Map;
3854 if (!initializeMap(Map))
3855 return false;
3856
3857 CommonValue = findCommon(Map);
3858 if (CommonValue)
3859 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3860 return CommonValue != nullptr;
3861 }
3862
3863private:
3864 /// `CommonValue` may be a placeholder inserted by us.
3865 /// If the placeholder is not used, we should remove this dead instruction.
3866 void eraseCommonValueIfDead() {
3867 if (CommonValue && CommonValue->getNumUses() == 0)
3868 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
3869 CommonInst->eraseFromParent();
3870 }
3871
3872 /// Initialize Map with anchor values. For address seen
3873 /// we set the value of different field saw in this address.
3874 /// At the same time we find a common type for different field we will
3875 /// use to create new Phi/Select nodes. Keep it in CommonType field.
3876 /// Return false if there is no common type found.
3877 bool initializeMap(FoldAddrToValueMapping &Map) {
3878 // Keep track of keys where the value is null. We will need to replace it
3879 // with constant null when we know the common type.
3880 SmallVector<Value *, 2> NullValue;
3881 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3882 for (auto &AM : AddrModes) {
3883 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3884 if (DV) {
3885 auto *Type = DV->getType();
3886 if (CommonType && CommonType != Type)
3887 return false;
3888 CommonType = Type;
3889 Map[AM.OriginalValue] = DV;
3890 } else {
3891 NullValue.push_back(AM.OriginalValue);
3892 }
3893 }
3894 assert(CommonType && "At least one non-null value must be!");
3895 for (auto *V : NullValue)
3896 Map[V] = Constant::getNullValue(CommonType);
3897 return true;
3898 }
3899
3900 /// We have mapping between value A and other value B where B was a field in
3901 /// addressing mode represented by A. Also we have an original value C
3902 /// representing an address we start with. Traversing from C through phi and
3903 /// selects we ended up with A's in a map. This utility function tries to find
3904 /// a value V which is a field in addressing mode C and traversing through phi
3905 /// nodes and selects we will end up in corresponded values B in a map.
3906 /// The utility will create a new Phi/Selects if needed.
3907 // The simple example looks as follows:
3908 // BB1:
3909 // p1 = b1 + 40
3910 // br cond BB2, BB3
3911 // BB2:
3912 // p2 = b2 + 40
3913 // br BB3
3914 // BB3:
3915 // p = phi [p1, BB1], [p2, BB2]
3916 // v = load p
3917 // Map is
3918 // p1 -> b1
3919 // p2 -> b2
3920 // Request is
3921 // p -> ?
3922 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3923 Value *findCommon(FoldAddrToValueMapping &Map) {
3924 // Tracks the simplification of newly created phi nodes. The reason we use
3925 // this mapping is because we will add new created Phi nodes in AddrToBase.
3926 // Simplification of Phi nodes is recursive, so some Phi node may
3927 // be simplified after we added it to AddrToBase. In reality this
3928 // simplification is possible only if original phi/selects were not
3929 // simplified yet.
3930 // Using this mapping we can find the current value in AddrToBase.
3931 SimplificationTracker ST(SQ);
3932
3933 // First step, DFS to create PHI nodes for all intermediate blocks.
3934 // Also fill traverse order for the second step.
3935 SmallVector<Value *, 32> TraverseOrder;
3936 InsertPlaceholders(Map, TraverseOrder, ST);
3937
3938 // Second Step, fill new nodes by merged values and simplify if possible.
3939 FillPlaceholders(Map, TraverseOrder, ST);
3940
3941 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
3942 ST.destroyNewNodes(CommonType);
3943 return nullptr;
3944 }
3945
3946 // Now we'd like to match New Phi nodes to existed ones.
3947 unsigned PhiNotMatchedCount = 0;
3948 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
3949 ST.destroyNewNodes(CommonType);
3950 return nullptr;
3951 }
3952
3953 auto *Result = ST.Get(Map.find(Original)->second);
3954 if (Result) {
3955 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3956 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3957 }
3958 return Result;
3959 }
3960
3961 /// Try to match PHI node to Candidate.
3962 /// Matcher tracks the matched Phi nodes.
3963 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
3965 PhiNodeSet &PhiNodesToMatch) {
3966 SmallVector<PHIPair, 8> WorkList;
3967 Matcher.insert({PHI, Candidate});
3968 SmallSet<PHINode *, 8> MatchedPHIs;
3969 MatchedPHIs.insert(PHI);
3970 WorkList.push_back({PHI, Candidate});
3971 SmallSet<PHIPair, 8> Visited;
3972 while (!WorkList.empty()) {
3973 auto Item = WorkList.pop_back_val();
3974 if (!Visited.insert(Item).second)
3975 continue;
3976 // We iterate over all incoming values to Phi to compare them.
3977 // If values are different and both of them Phi and the first one is a
3978 // Phi we added (subject to match) and both of them is in the same basic
3979 // block then we can match our pair if values match. So we state that
3980 // these values match and add it to work list to verify that.
3981 for (auto *B : Item.first->blocks()) {
3982 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
3983 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
3984 if (FirstValue == SecondValue)
3985 continue;
3986
3987 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
3988 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
3989
3990 // One of them is not Phi or
3991 // The first one is not Phi node from the set we'd like to match or
3992 // Phi nodes from different basic blocks then
3993 // we will not be able to match.
3994 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
3995 FirstPhi->getParent() != SecondPhi->getParent())
3996 return false;
3997
3998 // If we already matched them then continue.
3999 if (Matcher.count({FirstPhi, SecondPhi}))
4000 continue;
4001 // So the values are different and does not match. So we need them to
4002 // match. (But we register no more than one match per PHI node, so that
4003 // we won't later try to replace them twice.)
4004 if (MatchedPHIs.insert(FirstPhi).second)
4005 Matcher.insert({FirstPhi, SecondPhi});
4006 // But me must check it.
4007 WorkList.push_back({FirstPhi, SecondPhi});
4008 }
4009 }
4010 return true;
4011 }
4012
4013 /// For the given set of PHI nodes (in the SimplificationTracker) try
4014 /// to find their equivalents.
4015 /// Returns false if this matching fails and creation of new Phi is disabled.
4016 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4017 unsigned &PhiNotMatchedCount) {
4018 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4019 // order, so the replacements (ReplacePhi) are also done in a deterministic
4020 // order.
4022 SmallPtrSet<PHINode *, 8> WillNotMatch;
4023 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4024 while (PhiNodesToMatch.size()) {
4025 PHINode *PHI = *PhiNodesToMatch.begin();
4026
4027 // Add us, if no Phi nodes in the basic block we do not match.
4028 WillNotMatch.clear();
4029 WillNotMatch.insert(PHI);
4030
4031 // Traverse all Phis until we found equivalent or fail to do that.
4032 bool IsMatched = false;
4033 for (auto &P : PHI->getParent()->phis()) {
4034 // Skip new Phi nodes.
4035 if (PhiNodesToMatch.count(&P))
4036 continue;
4037 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4038 break;
4039 // If it does not match, collect all Phi nodes from matcher.
4040 // if we end up with no match, them all these Phi nodes will not match
4041 // later.
4042 for (auto M : Matched)
4043 WillNotMatch.insert(M.first);
4044 Matched.clear();
4045 }
4046 if (IsMatched) {
4047 // Replace all matched values and erase them.
4048 for (auto MV : Matched)
4049 ST.ReplacePhi(MV.first, MV.second);
4050 Matched.clear();
4051 continue;
4052 }
4053 // If we are not allowed to create new nodes then bail out.
4054 if (!AllowNewPhiNodes)
4055 return false;
4056 // Just remove all seen values in matcher. They will not match anything.
4057 PhiNotMatchedCount += WillNotMatch.size();
4058 for (auto *P : WillNotMatch)
4059 PhiNodesToMatch.erase(P);
4060 }
4061 return true;
4062 }
4063 /// Fill the placeholders with values from predecessors and simplify them.
4064 void FillPlaceholders(FoldAddrToValueMapping &Map,
4065 SmallVectorImpl<Value *> &TraverseOrder,
4066 SimplificationTracker &ST) {
4067 while (!TraverseOrder.empty()) {
4068 Value *Current = TraverseOrder.pop_back_val();
4069 assert(Map.contains(Current) && "No node to fill!!!");
4070 Value *V = Map[Current];
4071
4072 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4073 // CurrentValue also must be Select.
4074 auto *CurrentSelect = cast<SelectInst>(Current);
4075 auto *TrueValue = CurrentSelect->getTrueValue();
4076 assert(Map.contains(TrueValue) && "No True Value!");
4077 Select->setTrueValue(ST.Get(Map[TrueValue]));
4078 auto *FalseValue = CurrentSelect->getFalseValue();
4079 assert(Map.contains(FalseValue) && "No False Value!");
4080 Select->setFalseValue(ST.Get(Map[FalseValue]));
4081 } else {
4082 // Must be a Phi node then.
4083 auto *PHI = cast<PHINode>(V);
4084 // Fill the Phi node with values from predecessors.
4085 for (auto *B : predecessors(PHI->getParent())) {
4086 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4087 assert(Map.contains(PV) && "No predecessor Value!");
4088 PHI->addIncoming(ST.Get(Map[PV]), B);
4089 }
4090 }
4091 Map[Current] = ST.Simplify(V);
4092 }
4093 }
4094
4095 /// Starting from original value recursively iterates over def-use chain up to
4096 /// known ending values represented in a map. For each traversed phi/select
4097 /// inserts a placeholder Phi or Select.
4098 /// Reports all new created Phi/Select nodes by adding them to set.
4099 /// Also reports and order in what values have been traversed.
4100 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4101 SmallVectorImpl<Value *> &TraverseOrder,
4102 SimplificationTracker &ST) {
4103 SmallVector<Value *, 32> Worklist;
4104 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4105 "Address must be a Phi or Select node");
4106 auto *Dummy = PoisonValue::get(CommonType);
4107 Worklist.push_back(Original);
4108 while (!Worklist.empty()) {
4109 Value *Current = Worklist.pop_back_val();
4110 // if it is already visited or it is an ending value then skip it.
4111 if (Map.contains(Current))
4112 continue;
4113 TraverseOrder.push_back(Current);
4114
4115 // CurrentValue must be a Phi node or select. All others must be covered
4116 // by anchors.
4117 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4118 // Is it OK to get metadata from OrigSelect?!
4119 // Create a Select placeholder with dummy value.
4121 CurrentSelect->getCondition(), Dummy, Dummy,
4122 CurrentSelect->getName(), CurrentSelect, CurrentSelect);
4123 Map[Current] = Select;
4124 ST.insertNewSelect(Select);
4125 // We are interested in True and False values.
4126 Worklist.push_back(CurrentSelect->getTrueValue());
4127 Worklist.push_back(CurrentSelect->getFalseValue());
4128 } else {
4129 // It must be a Phi node then.
4130 PHINode *CurrentPhi = cast<PHINode>(Current);
4131 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4132 PHINode *PHI =
4133 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4134 Map[Current] = PHI;
4135 ST.insertNewPhi(PHI);
4136 append_range(Worklist, CurrentPhi->incoming_values());
4137 }
4138 }
4139 }
4140
4141 bool addrModeCombiningAllowed() {
4143 return false;
4144 switch (DifferentField) {
4145 default:
4146 return false;
4147 case ExtAddrMode::BaseRegField:
4149 case ExtAddrMode::BaseGVField:
4150 return AddrSinkCombineBaseGV;
4151 case ExtAddrMode::BaseOffsField:
4153 case ExtAddrMode::ScaledRegField:
4155 }
4156 }
4157};
4158} // end anonymous namespace
4159
4160/// Try adding ScaleReg*Scale to the current addressing mode.
4161/// Return true and update AddrMode if this addr mode is legal for the target,
4162/// false if not.
4163bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4164 unsigned Depth) {
4165 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4166 // mode. Just process that directly.
4167 if (Scale == 1)
4168 return matchAddr(ScaleReg, Depth);
4169
4170 // If the scale is 0, it takes nothing to add this.
4171 if (Scale == 0)
4172 return true;
4173
4174 // If we already have a scale of this value, we can add to it, otherwise, we
4175 // need an available scale field.
4176 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4177 return false;
4178
4179 ExtAddrMode TestAddrMode = AddrMode;
4180
4181 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4182 // [A+B + A*7] -> [B+A*8].
4183 TestAddrMode.Scale += Scale;
4184 TestAddrMode.ScaledReg = ScaleReg;
4185
4186 // If the new address isn't legal, bail out.
4187 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4188 return false;
4189
4190 // It was legal, so commit it.
4191 AddrMode = TestAddrMode;
4192
4193 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4194 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4195 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4196 // go any further: we can reuse it and cannot eliminate it.
4197 ConstantInt *CI = nullptr;
4198 Value *AddLHS = nullptr;
4199 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4200 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4201 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4202 TestAddrMode.InBounds = false;
4203 TestAddrMode.ScaledReg = AddLHS;
4204 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4205
4206 // If this addressing mode is legal, commit it and remember that we folded
4207 // this instruction.
4208 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4209 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4210 AddrMode = TestAddrMode;
4211 return true;
4212 }
4213 // Restore status quo.
4214 TestAddrMode = AddrMode;
4215 }
4216
4217 // If this is an add recurrence with a constant step, return the increment
4218 // instruction and the canonicalized step.
4219 auto GetConstantStep =
4220 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4221 auto *PN = dyn_cast<PHINode>(V);
4222 if (!PN)
4223 return std::nullopt;
4224 auto IVInc = getIVIncrement(PN, &LI);
4225 if (!IVInc)
4226 return std::nullopt;
4227 // TODO: The result of the intrinsics above is two-complement. However when
4228 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4229 // If it has nuw or nsw flags, we need to make sure that these flags are
4230 // inferrable at the point of memory instruction. Otherwise we are replacing
4231 // well-defined two-complement computation with poison. Currently, to avoid
4232 // potentially complex analysis needed to prove this, we reject such cases.
4233 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4234 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4235 return std::nullopt;
4236 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4237 return std::make_pair(IVInc->first, ConstantStep->getValue());
4238 return std::nullopt;
4239 };
4240
4241 // Try to account for the following special case:
4242 // 1. ScaleReg is an inductive variable;
4243 // 2. We use it with non-zero offset;
4244 // 3. IV's increment is available at the point of memory instruction.
4245 //
4246 // In this case, we may reuse the IV increment instead of the IV Phi to
4247 // achieve the following advantages:
4248 // 1. If IV step matches the offset, we will have no need in the offset;
4249 // 2. Even if they don't match, we will reduce the overlap of living IV
4250 // and IV increment, that will potentially lead to better register
4251 // assignment.
4252 if (AddrMode.BaseOffs) {
4253 if (auto IVStep = GetConstantStep(ScaleReg)) {
4254 Instruction *IVInc = IVStep->first;
4255 // The following assert is important to ensure a lack of infinite loops.
4256 // This transforms is (intentionally) the inverse of the one just above.
4257 // If they don't agree on the definition of an increment, we'd alternate
4258 // back and forth indefinitely.
4259 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4260 APInt Step = IVStep->second;
4261 APInt Offset = Step * AddrMode.Scale;
4262 if (Offset.isSignedIntN(64)) {
4263 TestAddrMode.InBounds = false;
4264 TestAddrMode.ScaledReg = IVInc;
4265 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4266 // If this addressing mode is legal, commit it..
4267 // (Note that we defer the (expensive) domtree base legality check
4268 // to the very last possible point.)
4269 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4270 getDTFn().dominates(IVInc, MemoryInst)) {
4271 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4272 AddrMode = TestAddrMode;
4273 return true;
4274 }
4275 // Restore status quo.
4276 TestAddrMode = AddrMode;
4277 }
4278 }
4279 }
4280
4281 // Otherwise, just return what we have.
4282 return true;
4283}
4284
4285/// This is a little filter, which returns true if an addressing computation
4286/// involving I might be folded into a load/store accessing it.
4287/// This doesn't need to be perfect, but needs to accept at least
4288/// the set of instructions that MatchOperationAddr can.
4290 switch (I->getOpcode()) {
4291 case Instruction::BitCast:
4292 case Instruction::AddrSpaceCast:
4293 // Don't touch identity bitcasts.
4294 if (I->getType() == I->getOperand(0)->getType())
4295 return false;
4296 return I->getType()->isIntOrPtrTy();
4297 case Instruction::PtrToInt:
4298 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4299 return true;
4300 case Instruction::IntToPtr:
4301 // We know the input is intptr_t, so this is foldable.
4302 return true;
4303 case Instruction::Add:
4304 return true;
4305 case Instruction::Mul:
4306 case Instruction::Shl:
4307 // Can only handle X*C and X << C.
4308 return isa<ConstantInt>(I->getOperand(1));
4309 case Instruction::GetElementPtr:
4310 return true;
4311 default:
4312 return false;
4313 }
4314}
4315
4316/// Check whether or not \p Val is a legal instruction for \p TLI.
4317/// \note \p Val is assumed to be the product of some type promotion.
4318/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4319/// to be legal, as the non-promoted value would have had the same state.
4321 const DataLayout &DL, Value *Val) {
4322 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4323 if (!PromotedInst)
4324 return false;
4325 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4326 // If the ISDOpcode is undefined, it was undefined before the promotion.
4327 if (!ISDOpcode)
4328 return true;
4329 // Otherwise, check if the promoted instruction is legal or not.
4330 return TLI.isOperationLegalOrCustom(
4331 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4332}
4333
4334namespace {
4335
4336/// Hepler class to perform type promotion.
4337class TypePromotionHelper {
4338 /// Utility function to add a promoted instruction \p ExtOpnd to
4339 /// \p PromotedInsts and record the type of extension we have seen.
4340 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4341 Instruction *ExtOpnd, bool IsSExt) {
4342 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4343 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4344 if (It != PromotedInsts.end()) {
4345 // If the new extension is same as original, the information in
4346 // PromotedInsts[ExtOpnd] is still correct.
4347 if (It->second.getInt() == ExtTy)
4348 return;
4349
4350 // Now the new extension is different from old extension, we make
4351 // the type information invalid by setting extension type to
4352 // BothExtension.
4353 ExtTy = BothExtension;
4354 }
4355 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4356 }
4357
4358 /// Utility function to query the original type of instruction \p Opnd
4359 /// with a matched extension type. If the extension doesn't match, we
4360 /// cannot use the information we had on the original type.
4361 /// BothExtension doesn't match any extension type.
4362 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4363 Instruction *Opnd, bool IsSExt) {
4364 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4365 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4366 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4367 return It->second.getPointer();
4368 return nullptr;
4369 }
4370
4371 /// Utility function to check whether or not a sign or zero extension
4372 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4373 /// either using the operands of \p Inst or promoting \p Inst.
4374 /// The type of the extension is defined by \p IsSExt.
4375 /// In other words, check if:
4376 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4377 /// #1 Promotion applies:
4378 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4379 /// #2 Operand reuses:
4380 /// ext opnd1 to ConsideredExtType.
4381 /// \p PromotedInsts maps the instructions to their type before promotion.
4382 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4383 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4384
4385 /// Utility function to determine if \p OpIdx should be promoted when
4386 /// promoting \p Inst.
4387 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4388 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4389 }
4390
4391 /// Utility function to promote the operand of \p Ext when this
4392 /// operand is a promotable trunc or sext or zext.
4393 /// \p PromotedInsts maps the instructions to their type before promotion.
4394 /// \p CreatedInstsCost[out] contains the cost of all instructions
4395 /// created to promote the operand of Ext.
4396 /// Newly added extensions are inserted in \p Exts.
4397 /// Newly added truncates are inserted in \p Truncs.
4398 /// Should never be called directly.
4399 /// \return The promoted value which is used instead of Ext.
4400 static Value *promoteOperandForTruncAndAnyExt(
4401 Instruction *Ext, TypePromotionTransaction &TPT,
4402 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4405
4406 /// Utility function to promote the operand of \p Ext when this
4407 /// operand is promotable and is not a supported trunc or sext.
4408 /// \p PromotedInsts maps the instructions to their type before promotion.
4409 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4410 /// created to promote the operand of Ext.
4411 /// Newly added extensions are inserted in \p Exts.
4412 /// Newly added truncates are inserted in \p Truncs.
4413 /// Should never be called directly.
4414 /// \return The promoted value which is used instead of Ext.
4415 static Value *promoteOperandForOther(Instruction *Ext,
4416 TypePromotionTransaction &TPT,
4417 InstrToOrigTy &PromotedInsts,
4418 unsigned &CreatedInstsCost,
4421 const TargetLowering &TLI, bool IsSExt);
4422
4423 /// \see promoteOperandForOther.
4424 static Value *signExtendOperandForOther(
4425 Instruction *Ext, TypePromotionTransaction &TPT,
4426 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4428 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4429 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4430 Exts, Truncs, TLI, true);
4431 }
4432
4433 /// \see promoteOperandForOther.
4434 static Value *zeroExtendOperandForOther(
4435 Instruction *Ext, TypePromotionTransaction &TPT,
4436 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4438 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4439 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4440 Exts, Truncs, TLI, false);
4441 }
4442
4443public:
4444 /// Type for the utility function that promotes the operand of Ext.
4445 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4446 InstrToOrigTy &PromotedInsts,
4447 unsigned &CreatedInstsCost,
4450 const TargetLowering &TLI);
4451
4452 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4453 /// action to promote the operand of \p Ext instead of using Ext.
4454 /// \return NULL if no promotable action is possible with the current
4455 /// sign extension.
4456 /// \p InsertedInsts keeps track of all the instructions inserted by the
4457 /// other CodeGenPrepare optimizations. This information is important
4458 /// because we do not want to promote these instructions as CodeGenPrepare
4459 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4460 /// \p PromotedInsts maps the instructions to their type before promotion.
4461 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4462 const TargetLowering &TLI,
4463 const InstrToOrigTy &PromotedInsts);
4464};
4465
4466} // end anonymous namespace
4467
4468bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4469 Type *ConsideredExtType,
4470 const InstrToOrigTy &PromotedInsts,
4471 bool IsSExt) {
4472 // The promotion helper does not know how to deal with vector types yet.
4473 // To be able to fix that, we would need to fix the places where we
4474 // statically extend, e.g., constants and such.
4475 if (Inst->getType()->isVectorTy())
4476 return false;
4477
4478 // We can always get through zext.
4479 if (isa<ZExtInst>(Inst))
4480 return true;
4481
4482 // sext(sext) is ok too.
4483 if (IsSExt && isa<SExtInst>(Inst))
4484 return true;
4485
4486 // We can get through binary operator, if it is legal. In other words, the
4487 // binary operator must have a nuw or nsw flag.
4488 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4489 if (isa<OverflowingBinaryOperator>(BinOp) &&
4490 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4491 (IsSExt && BinOp->hasNoSignedWrap())))
4492 return true;
4493
4494 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4495 if ((Inst->getOpcode() == Instruction::And ||
4496 Inst->getOpcode() == Instruction::Or))
4497 return true;
4498
4499 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4500 if (Inst->getOpcode() == Instruction::Xor) {
4501 // Make sure it is not a NOT.
4502 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4503 if (!Cst->getValue().isAllOnes())
4504 return true;
4505 }
4506
4507 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4508 // It may change a poisoned value into a regular value, like
4509 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4510 // poisoned value regular value
4511 // It should be OK since undef covers valid value.
4512 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4513 return true;
4514
4515 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4516 // It may change a poisoned value into a regular value, like
4517 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4518 // poisoned value regular value
4519 // It should be OK since undef covers valid value.
4520 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4521 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4522 if (ExtInst->hasOneUse()) {
4523 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4524 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4525 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4526 if (Cst &&
4527 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4528 return true;
4529 }
4530 }
4531 }
4532
4533 // Check if we can do the following simplification.
4534 // ext(trunc(opnd)) --> ext(opnd)
4535 if (!isa<TruncInst>(Inst))
4536 return false;
4537
4538 Value *OpndVal = Inst->getOperand(0);
4539 // Check if we can use this operand in the extension.
4540 // If the type is larger than the result type of the extension, we cannot.
4541 if (!OpndVal->getType()->isIntegerTy() ||
4542 OpndVal->getType()->getIntegerBitWidth() >
4543 ConsideredExtType->getIntegerBitWidth())
4544 return false;
4545
4546 // If the operand of the truncate is not an instruction, we will not have
4547 // any information on the dropped bits.
4548 // (Actually we could for constant but it is not worth the extra logic).
4549 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4550 if (!Opnd)
4551 return false;
4552
4553 // Check if the source of the type is narrow enough.
4554 // I.e., check that trunc just drops extended bits of the same kind of
4555 // the extension.
4556 // #1 get the type of the operand and check the kind of the extended bits.
4557 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4558 if (OpndType)
4559 ;
4560 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4561 OpndType = Opnd->getOperand(0)->getType();
4562 else
4563 return false;
4564
4565 // #2 check that the truncate just drops extended bits.
4566 return Inst->getType()->getIntegerBitWidth() >=
4567 OpndType->getIntegerBitWidth();
4568}
4569
4570TypePromotionHelper::Action TypePromotionHelper::getAction(
4571 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4572 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4573 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4574 "Unexpected instruction type");
4575 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4576 Type *ExtTy = Ext->getType();
4577 bool IsSExt = isa<SExtInst>(Ext);
4578 // If the operand of the extension is not an instruction, we cannot
4579 // get through.
4580 // If it, check we can get through.
4581 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4582 return nullptr;
4583
4584 // Do not promote if the operand has been added by codegenprepare.
4585 // Otherwise, it means we are undoing an optimization that is likely to be
4586 // redone, thus causing potential infinite loop.
4587 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4588 return nullptr;
4589
4590 // SExt or Trunc instructions.
4591 // Return the related handler.
4592 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4593 isa<ZExtInst>(ExtOpnd))
4594 return promoteOperandForTruncAndAnyExt;
4595
4596 // Regular instruction.
4597 // Abort early if we will have to insert non-free instructions.
4598 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4599 return nullptr;
4600 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4601}
4602
4603Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4604 Instruction *SExt, TypePromotionTransaction &TPT,
4605 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4607 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4608 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4609 // get through it and this method should not be called.
4610 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4611 Value *ExtVal = SExt;
4612 bool HasMergedNonFreeExt = false;
4613 if (isa<ZExtInst>(SExtOpnd)) {
4614 // Replace s|zext(zext(opnd))
4615 // => zext(opnd).
4616 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4617 Value *ZExt =
4618 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4619 TPT.replaceAllUsesWith(SExt, ZExt);
4620 TPT.eraseInstruction(SExt);
4621 ExtVal = ZExt;
4622 } else {
4623 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4624 // => z|sext(opnd).
4625 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4626 }
4627 CreatedInstsCost = 0;
4628
4629 // Remove dead code.
4630 if (SExtOpnd->use_empty())
4631 TPT.eraseInstruction(SExtOpnd);
4632
4633 // Check if the extension is still needed.
4634 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4635 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4636 if (ExtInst) {
4637 if (Exts)
4638 Exts->push_back(ExtInst);
4639 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4640 }
4641 return ExtVal;
4642 }
4643
4644 // At this point we have: ext ty opnd to ty.
4645 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
4646 Value *NextVal = ExtInst->getOperand(0);
4647 TPT.eraseInstruction(ExtInst, NextVal);
4648 return NextVal;
4649}
4650
4651Value *TypePromotionHelper::promoteOperandForOther(
4652 Instruction *Ext, TypePromotionTransaction &TPT,
4653 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4656 bool IsSExt) {
4657 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4658 // get through it and this method should not be called.
4659 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4660 CreatedInstsCost = 0;
4661 if (!ExtOpnd->hasOneUse()) {
4662 // ExtOpnd will be promoted.
4663 // All its uses, but Ext, will need to use a truncated value of the
4664 // promoted version.
4665 // Create the truncate now.
4666 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4667 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4668 // Insert it just after the definition.
4669 ITrunc->moveAfter(ExtOpnd);
4670 if (Truncs)
4671 Truncs->push_back(ITrunc);
4672 }
4673
4674 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4675 // Restore the operand of Ext (which has been replaced by the previous call
4676 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
4677 TPT.setOperand(Ext, 0, ExtOpnd);
4678 }
4679
4680 // Get through the Instruction:
4681 // 1. Update its type.
4682 // 2. Replace the uses of Ext by Inst.
4683 // 3. Extend each operand that needs to be extended.
4684
4685 // Remember the original type of the instruction before promotion.
4686 // This is useful to know that the high bits are sign extended bits.
4687 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
4688 // Step #1.
4689 TPT.mutateType(ExtOpnd, Ext->getType());
4690 // Step #2.
4691 TPT.replaceAllUsesWith(Ext, ExtOpnd);
4692 // Step #3.
4693 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
4694 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
4695 ++OpIdx) {
4696 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
4697 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
4698 !shouldExtOperand(ExtOpnd, OpIdx)) {
4699 LLVM_DEBUG(dbgs() << "No need to propagate\n");
4700 continue;
4701 }
4702 // Check if we can statically extend the operand.
4703 Value *Opnd = ExtOpnd->getOperand(OpIdx);
4704 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
4705 LLVM_DEBUG(dbgs() << "Statically extend\n");
4706 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
4707 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
4708 : Cst->getValue().zext(BitWidth);
4709 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
4710 continue;
4711 }
4712 // UndefValue are typed, so we have to statically sign extend them.
4713 if (isa<UndefValue>(Opnd)) {
4714 LLVM_DEBUG(dbgs() << "Statically extend\n");
4715 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
4716 continue;
4717 }
4718
4719 // Otherwise we have to explicitly sign extend the operand.
4720 Value *ValForExtOpnd = IsSExt
4721 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
4722 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
4723 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
4724 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
4725 if (!InstForExtOpnd)
4726 continue;
4727
4728 if (Exts)
4729 Exts->push_back(InstForExtOpnd);
4730
4731 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
4732 }
4733 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
4734 TPT.eraseInstruction(Ext);
4735 return ExtOpnd;
4736}
4737
4738/// Check whether or not promoting an instruction to a wider type is profitable.
4739/// \p NewCost gives the cost of extension instructions created by the
4740/// promotion.
4741/// \p OldCost gives the cost of extension instructions before the promotion
4742/// plus the number of instructions that have been
4743/// matched in the addressing mode the promotion.
4744/// \p PromotedOperand is the value that has been promoted.
4745/// \return True if the promotion is profitable, false otherwise.
4746bool AddressingModeMatcher::isPromotionProfitable(
4747 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4748 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
4749 << '\n');
4750 // The cost of the new extensions is greater than the cost of the
4751 // old extension plus what we folded.
4752 // This is not profitable.
4753 if (NewCost > OldCost)
4754 return false;
4755 if (NewCost < OldCost)
4756 return true;
4757 // The promotion is neutral but it may help folding the sign extension in
4758 // loads for instance.
4759 // Check that we did not create an illegal instruction.
4760 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4761}
4762
4763/// Given an instruction or constant expr, see if we can fold the operation
4764/// into the addressing mode. If so, update the addressing mode and return
4765/// true, otherwise return false without modifying AddrMode.
4766/// If \p MovedAway is not NULL, it contains the information of whether or
4767/// not AddrInst has to be folded into the addressing mode on success.
4768/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4769/// because it has been moved away.
4770/// Thus AddrInst must not be added in the matched instructions.
4771/// This state can happen when AddrInst is a sext, since it may be moved away.
4772/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4773/// not be referenced anymore.
4774bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4775 unsigned Depth,
4776 bool *MovedAway) {
4777 // Avoid exponential behavior on extremely deep expression trees.
4778 if (Depth >= 5)
4779 return false;
4780
4781 // By default, all matched instructions stay in place.
4782 if (MovedAway)
4783 *MovedAway = false;
4784
4785 switch (Opcode) {
4786 case Instruction::PtrToInt:
4787 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4788 return matchAddr(AddrInst->getOperand(0), Depth);
4789 case Instruction::IntToPtr: {
4790 auto AS = AddrInst->getType()->getPointerAddressSpace();
4791 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4792 // This inttoptr is a no-op if the integer type is pointer sized.
4793 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4794 return matchAddr(AddrInst->getOperand(0), Depth);
4795 return false;
4796 }
4797 case Instruction::BitCast:
4798 // BitCast is always a noop, and we can handle it as long as it is
4799 // int->int or pointer->pointer (we don't want int<->fp or something).
4800 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4801 // Don't touch identity bitcasts. These were probably put here by LSR,
4802 // and we don't want to mess around with them. Assume it knows what it
4803 // is doing.
4804 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4805 return matchAddr(AddrInst->getOperand(0), Depth);
4806 return false;
4807 case Instruction::AddrSpaceCast: {
4808 unsigned SrcAS =
4809 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4810 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4811 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
4812 return matchAddr(AddrInst->getOperand(0), Depth);
4813 return false;
4814 }
4815 case Instruction::Add: {
4816 // Check to see if we can merge in one operand, then the other. If so, we
4817 // win.
4818 ExtAddrMode BackupAddrMode = AddrMode;
4819 unsigned OldSize = AddrModeInsts.size();
4820 // Start a transaction at this point.
4821 // The LHS may match but not the RHS.
4822 // Therefore, we need a higher level restoration point to undo partially
4823 // matched operation.
4824 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4825 TPT.getRestorationPoint();
4826
4827 // Try to match an integer constant second to increase its chance of ending
4828 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
4829 int First = 0, Second = 1;
4830 if (isa<ConstantInt>(AddrInst->getOperand(First))
4831 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
4832 std::swap(First, Second);
4833 AddrMode.InBounds = false;
4834 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
4835 matchAddr(AddrInst->getOperand(Second), Depth + 1))
4836 return true;
4837
4838 // Restore the old addr mode info.
4839 AddrMode = BackupAddrMode;
4840 AddrModeInsts.resize(OldSize);
4841 TPT.rollback(LastKnownGood);
4842
4843 // Otherwise this was over-aggressive. Try merging operands in the opposite
4844 // order.
4845 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
4846 matchAddr(AddrInst->getOperand(First), Depth + 1))
4847 return true;
4848
4849 // Otherwise we definitely can't merge the ADD in.
4850 AddrMode = BackupAddrMode;
4851 AddrModeInsts.resize(OldSize);
4852 TPT.rollback(LastKnownGood);
4853 break;
4854 }
4855 // case Instruction::Or:
4856 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
4857 // break;
4858 case Instruction::Mul:
4859 case Instruction::Shl: {
4860 // Can only handle X*C and X << C.
4861 AddrMode.InBounds = false;
4862 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
4863 if (!RHS || RHS->getBitWidth() > 64)
4864 return false;
4865 int64_t Scale = Opcode == Instruction::Shl
4866 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
4867 : RHS->getSExtValue();
4868
4869 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
4870 }
4871 case Instruction::GetElementPtr: {
4872 // Scan the GEP. We check it if it contains constant offsets and at most
4873 // one variable offset.
4874 int VariableOperand = -1;
4875 unsigned VariableScale = 0;
4876
4877 int64_t ConstantOffset = 0;
4878 gep_type_iterator GTI = gep_type_begin(AddrInst);
4879 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
4880 if (StructType *STy = GTI.getStructTypeOrNull()) {
4881 const StructLayout *SL = DL.getStructLayout(STy);
4882 unsigned Idx =
4883 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
4884 ConstantOffset += SL->getElementOffset(Idx);
4885 } else {
4887 if (TS.isNonZero()) {
4888 // The optimisations below currently only work for fixed offsets.
4889 if (TS.isScalable())
4890 return false;
4891 int64_t TypeSize = TS.getFixedValue();
4892 if (ConstantInt *CI =
4893 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
4894 const APInt &CVal = CI->getValue();
4895 if (CVal.getSignificantBits() <= 64) {
4896 ConstantOffset += CVal.getSExtValue() * TypeSize;
4897 continue;
4898 }
4899 }
4900 // We only allow one variable index at the moment.
4901 if (VariableOperand != -1)
4902 return false;
4903
4904 // Remember the variable index.
4905 VariableOperand = i;
4906 VariableScale = TypeSize;
4907 }
4908 }
4909 }
4910
4911 // A common case is for the GEP to only do a constant offset. In this case,
4912 // just add it to the disp field and check validity.
4913 if (VariableOperand == -1) {
4914 AddrMode.BaseOffs += ConstantOffset;
4915 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
4916 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4917 AddrMode.InBounds = false;
4918 return true;
4919 }
4920 AddrMode.BaseOffs -= ConstantOffset;
4921
4922 if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4923 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4924 ConstantOffset > 0) {
4925 // Record GEPs with non-zero offsets as candidates for splitting in
4926 // the event that the offset cannot fit into the r+i addressing mode.
4927 // Simple and common case that only one GEP is used in calculating the
4928 // address for the memory access.
4929 Value *Base = AddrInst->getOperand(0);
4930 auto *BaseI = dyn_cast<Instruction>(Base);
4931 auto *GEP = cast<GetElementPtrInst>(AddrInst);
4932 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4933 (BaseI && !isa<CastInst>(BaseI) &&
4934 !isa<GetElementPtrInst>(BaseI))) {
4935 // Make sure the parent block allows inserting non-PHI instructions
4936 // before the terminator.
4937 BasicBlock *Parent = BaseI ? BaseI->getParent()
4938 : &GEP->getFunction()->getEntryBlock();
4939 if (!Parent->getTerminator()->isEHPad())
4940 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4941 }
4942 }
4943
4944 return false;
4945 }
4946
4947 // Save the valid addressing mode in case we can't match.
4948 ExtAddrMode BackupAddrMode = AddrMode;
4949 unsigned OldSize = AddrModeInsts.size();
4950
4951 // See if the scale and offset amount is valid for this target.
4952 AddrMode.BaseOffs += ConstantOffset;
4953 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4954 AddrMode.InBounds = false;
4955
4956 // Match the base operand of the GEP.
4957 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
4958 // If it couldn't be matched, just stuff the value in a register.
4959 if (AddrMode.HasBaseReg) {
4960 AddrMode = BackupAddrMode;
4961 AddrModeInsts.resize(OldSize);
4962 return false;
4963 }
4964 AddrMode.HasBaseReg = true;
4965 AddrMode.BaseReg = AddrInst->getOperand(0);
4966 }
4967
4968 // Match the remaining variable portion of the GEP.
4969 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
4970 Depth)) {
4971 // If it couldn't be matched, try stuffing the base into a register
4972 // instead of matching it, and retrying the match of the scale.
4973 AddrMode = BackupAddrMode;
4974 AddrModeInsts.resize(OldSize);
4975 if (AddrMode.HasBaseReg)
4976 return false;
4977 AddrMode.HasBaseReg = true;
4978 AddrMode.BaseReg = AddrInst->getOperand(0);
4979 AddrMode.BaseOffs += ConstantOffset;
4980 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
4981 VariableScale, Depth)) {
4982 // If even that didn't work, bail.
4983 AddrMode = BackupAddrMode;
4984 AddrModeInsts.resize(OldSize);
4985 return false;
4986 }
4987 }
4988
4989 return true;
4990 }
4991 case Instruction::SExt:
4992 case Instruction::ZExt: {
4993 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
4994 if (!Ext)
4995 return false;
4996
4997 // Try to move this ext out of the way of the addressing mode.
4998 // Ask for a method for doing so.
4999 TypePromotionHelper::Action TPH =
5000 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5001 if (!TPH)
5002 return false;
5003
5004 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5005 TPT.getRestorationPoint();
5006 unsigned CreatedInstsCost = 0;
5007 unsigned ExtCost = !TLI.isExtFree(Ext);
5008 Value *PromotedOperand =
5009 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5010 // SExt has been moved away.
5011 // Thus either it will be rematched later in the recursive calls or it is
5012 // gone. Anyway, we must not fold it into the addressing mode at this point.
5013 // E.g.,
5014 // op = add opnd, 1
5015 // idx = ext op
5016 // addr = gep base, idx
5017 // is now:
5018 // promotedOpnd = ext opnd <- no match here
5019 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5020 // addr = gep base, op <- match
5021 if (MovedAway)
5022 *MovedAway = true;
5023
5024 assert(PromotedOperand &&
5025 "TypePromotionHelper should have filtered out those cases");
5026
5027 ExtAddrMode BackupAddrMode = AddrMode;
5028 unsigned OldSize = AddrModeInsts.size();
5029
5030 if (!matchAddr(PromotedOperand, Depth) ||
5031 // The total of the new cost is equal to the cost of the created
5032 // instructions.
5033 // The total of the old cost is equal to the cost of the extension plus
5034 // what we have saved in the addressing mode.
5035 !isPromotionProfitable(CreatedInstsCost,
5036 ExtCost + (AddrModeInsts.size() - OldSize),
5037 PromotedOperand)) {
5038 AddrMode = BackupAddrMode;
5039 AddrModeInsts.resize(OldSize);
5040 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5041 TPT.rollback(LastKnownGood);
5042 return false;
5043 }
5044 return true;
5045 }
5046 }
5047 return false;
5048}
5049
5050/// If we can, try to add the value of 'Addr' into the current addressing mode.
5051/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5052/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5053/// for the target.
5054///
5055bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5056 // Start a transaction at this point that we will rollback if the matching
5057 // fails.
5058 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5059 TPT.getRestorationPoint();
5060 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5061 if (CI->getValue().isSignedIntN(64)) {
5062 // Fold in immediates if legal for the target.
5063 AddrMode.BaseOffs += CI->getSExtValue();
5064 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5065 return true;
5066 AddrMode.BaseOffs -= CI->getSExtValue();
5067 }
5068 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5069 // If this is a global variable, try to fold it into the addressing mode.
5070 if (!AddrMode.BaseGV) {
5071 AddrMode.BaseGV = GV;
5072 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5073 return true;
5074 AddrMode.BaseGV = nullptr;
5075 }
5076 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5077 ExtAddrMode BackupAddrMode = AddrMode;
5078 unsigned OldSize = AddrModeInsts.size();
5079
5080 // Check to see if it is possible to fold this operation.
5081 bool MovedAway = false;
5082 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5083 // This instruction may have been moved away. If so, there is nothing
5084 // to check here.
5085 if (MovedAway)
5086 return true;
5087 // Okay, it's possible to fold this. Check to see if it is actually
5088 // *profitable* to do so. We use a simple cost model to avoid increasing
5089 // register pressure too much.
5090 if (I->hasOneUse() ||
5091 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5092 AddrModeInsts.push_back(I);
5093 return true;
5094 }
5095
5096 // It isn't profitable to do this, roll back.
5097 AddrMode = BackupAddrMode;
5098 AddrModeInsts.resize(OldSize);
5099 TPT.rollback(LastKnownGood);
5100 }
5101 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5102 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5103 return true;
5104 TPT.rollback(LastKnownGood);
5105 } else if (isa<ConstantPointerNull>(Addr)) {
5106 // Null pointer gets folded without affecting the addressing mode.
5107 return true;
5108 }
5109
5110 // Worse case, the target should support [reg] addressing modes. :)
5111 if (!AddrMode.HasBaseReg) {
5112 AddrMode.HasBaseReg = true;
5113 AddrMode.BaseReg = Addr;
5114 // Still check for legality in case the target supports [imm] but not [i+r].
5115 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5116 return true;
5117 AddrMode.HasBaseReg = false;
5118 AddrMode.BaseReg = nullptr;
5119 }
5120
5121 // If the base register is already taken, see if we can do [r+r].
5122 if (AddrMode.Scale == 0) {
5123 AddrMode.Scale = 1;
5124 AddrMode.ScaledReg = Addr;
5125 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5126 return true;
5127 AddrMode.Scale = 0;
5128 AddrMode.ScaledReg = nullptr;
5129 }
5130 // Couldn't match.
5131 TPT.rollback(LastKnownGood);
5132 return false;
5133}
5134
5135/// Check to see if all uses of OpVal by the specified inline asm call are due
5136/// to memory operands. If so, return true, otherwise return false.
5138 const TargetLowering &TLI,
5139 const TargetRegisterInfo &TRI) {
5140 const Function *F = CI->getFunction();
5141 TargetLowering::AsmOperandInfoVector TargetConstraints =
5142 TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI);
5143
5144 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5145 // Compute the constraint code and ConstraintType to use.
5146 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5147
5148 // If this asm operand is our Value*, and if it isn't an indirect memory
5149 // operand, we can't fold it! TODO: Also handle C_Address?
5150 if (OpInfo.CallOperandVal == OpVal &&
5151 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5152 !OpInfo.isIndirect))
5153 return false;
5154 }
5155
5156 return true;
5157}
5158
5159/// Recursively walk all the uses of I until we find a memory use.
5160/// If we find an obviously non-foldable instruction, return true.
5161/// Add accessed addresses and types to MemoryUses.
5163 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5164 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5165 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5166 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5167 // If we already considered this instruction, we're done.
5168 if (!ConsideredInsts.insert(I).second)
5169 return false;
5170
5171 // If this is an obviously unfoldable instruction, bail out.
5172 if (!MightBeFoldableInst(I))
5173 return true;
5174
5175 // Loop over all the uses, recursively processing them.
5176 for (Use &U : I->uses()) {
5177 // Conservatively return true if we're seeing a large number or a deep chain
5178 // of users. This avoids excessive compilation times in pathological cases.
5179 if (SeenInsts++ >= MaxAddressUsersToScan)
5180 return true;
5181
5182 Instruction *UserI = cast<Instruction>(U.getUser());
5183 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5184 MemoryUses.push_back({&U, LI->getType()});
5185 continue;
5186 }
5187
5188 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5189 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5190 return true; // Storing addr, not into addr.
5191 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5192 continue;
5193 }
5194
5195 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5196 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5197 return true; // Storing addr, not into addr.
5198 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5199 continue;
5200 }
5201
5202 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
5203 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5204 return true; // Storing addr, not into addr.
5205 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5206 continue;
5207 }
5208
5209 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5210 if (CI->hasFnAttr(Attribute::Cold)) {
5211 // If this is a cold call, we can sink the addressing calculation into
5212 // the cold path. See optimizeCallInst
5213 bool OptForSize =
5214 OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
5215 if (!OptForSize)
5216 continue;
5217 }
5218
5219 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5220 if (!IA)
5221 return true;
5222
5223 // If this is a memory operand, we're cool, otherwise bail out.
5224 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5225 return true;
5226 continue;
5227 }
5228
5229 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5230 PSI, BFI, SeenInsts))
5231 return true;
5232 }
5233
5234 return false;
5235}
5236
5238 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5239 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5241 unsigned SeenInsts = 0;
5242 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5243 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5244 PSI, BFI, SeenInsts);
5245}
5246
5247
5248/// Return true if Val is already known to be live at the use site that we're
5249/// folding it into. If so, there is no cost to include it in the addressing
5250/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5251/// instruction already.
5252bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5253 Value *KnownLive1,
5254 Value *KnownLive2) {
5255 // If Val is either of the known-live values, we know it is live!
5256 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5257 return true;
5258
5259 // All values other than instructions and arguments (e.g. constants) are live.
5260 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5261 return true;
5262
5263 // If Val is a constant sized alloca in the entry block, it is live, this is
5264 // true because it is just a reference to the stack/frame pointer, which is
5265 // live for the whole function.
5266 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5267 if (AI->isStaticAlloca())
5268 return true;
5269
5270 // Check to see if this value is already used in the memory instruction's
5271 // block. If so, it's already live into the block at the very least, so we
5272 // can reasonably fold it.
5273 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5274}
5275
5276/// It is possible for the addressing mode of the machine to fold the specified
5277/// instruction into a load or store that ultimately uses it.
5278/// However, the specified instruction has multiple uses.
5279/// Given this, it may actually increase register pressure to fold it
5280/// into the load. For example, consider this code:
5281///
5282/// X = ...
5283/// Y = X+1
5284/// use(Y) -> nonload/store
5285/// Z = Y+1
5286/// load Z
5287///
5288/// In this case, Y has multiple uses, and can be folded into the load of Z
5289/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5290/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5291/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5292/// number of computations either.
5293///
5294/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5295/// X was live across 'load Z' for other reasons, we actually *would* want to
5296/// fold the addressing mode in the Z case. This would make Y die earlier.
5297bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5298 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5299 if (IgnoreProfitability)
5300 return true;
5301
5302 // AMBefore is the addressing mode before this instruction was folded into it,
5303 // and AMAfter is the addressing mode after the instruction was folded. Get
5304 // the set of registers referenced by AMAfter and subtract out those
5305 // referenced by AMBefore: this is the set of values which folding in this
5306 // address extends the lifetime of.
5307 //
5308 // Note that there are only two potential values being referenced here,
5309 // BaseReg and ScaleReg (global addresses are always available, as are any
5310 // folded immediates).
5311 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5312
5313 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5314 // lifetime wasn't extended by adding this instruction.
5315 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5316 BaseReg = nullptr;
5317 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5318 ScaledReg = nullptr;
5319
5320 // If folding this instruction (and it's subexprs) didn't extend any live
5321 // ranges, we're ok with it.
5322 if (!BaseReg && !ScaledReg)
5323 return true;
5324
5325 // If all uses of this instruction can have the address mode sunk into them,
5326 // we can remove the addressing mode and effectively trade one live register
5327 // for another (at worst.) In this context, folding an addressing mode into
5328 // the use is just a particularly nice way of sinking it.
5330 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5331 return false; // Has a non-memory, non-foldable use!
5332
5333 // Now that we know that all uses of this instruction are part of a chain of
5334 // computation involving only operations that could theoretically be folded
5335 // into a memory use, loop over each of these memory operation uses and see
5336 // if they could *actually* fold the instruction. The assumption is that
5337 // addressing modes are cheap and that duplicating the computation involved
5338 // many times is worthwhile, even on a fastpath. For sinking candidates
5339 // (i.e. cold call sites), this serves as a way to prevent excessive code
5340 // growth since most architectures have some reasonable small and fast way to
5341 // compute an effective address. (i.e LEA on x86)
5342 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5343 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5344 Value *Address = Pair.first->get();
5345 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5346 Type *AddressAccessTy = Pair.second;
5347 unsigned AS = Address->getType()->getPointerAddressSpace();
5348
5349 // Do a match against the root of this address, ignoring profitability. This
5350 // will tell us if the addressing mode for the memory operation will
5351 // *actually* cover the shared instruction.
5353 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5354 0);
5355 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5356 TPT.getRestorationPoint();
5357 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5358 AddressAccessTy, AS, UserI, Result,
5359 InsertedInsts, PromotedInsts, TPT,
5360 LargeOffsetGEP, OptSize, PSI, BFI);
5361 Matcher.IgnoreProfitability = true;
5362 bool Success = Matcher.matchAddr(Address, 0);
5363 (void)Success;
5364 assert(Success && "Couldn't select *anything*?");
5365
5366 // The match was to check the profitability, the changes made are not
5367 // part of the original matcher. Therefore, they should be dropped
5368 // otherwise the original matcher will not present the right state.
5369 TPT.rollback(LastKnownGood);
5370
5371 // If the match didn't cover I, then it won't be shared by it.
5372 if (!is_contained(MatchedAddrModeInsts, I))
5373 return false;
5374
5375 MatchedAddrModeInsts.clear();
5376 }
5377
5378 return true;
5379}
5380
5381/// Return true if the specified values are defined in a
5382/// different basic block than BB.
5383static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5384 if (Instruction *I = dyn_cast<Instruction>(V))
5385 return I->getParent() != BB;
5386 return false;
5387}
5388
5389/// Sink addressing mode computation immediate before MemoryInst if doing so
5390/// can be done without increasing register pressure. The need for the
5391/// register pressure constraint means this can end up being an all or nothing
5392/// decision for all uses of the same addressing computation.
5393///
5394/// Load and Store Instructions often have addressing modes that can do
5395/// significant amounts of computation. As such, instruction selection will try
5396/// to get the load or store to do as much computation as possible for the
5397/// program. The problem is that isel can only see within a single block. As
5398/// such, we sink as much legal addressing mode work into the block as possible.
5399///
5400/// This method is used to optimize both load/store and inline asms with memory
5401/// operands. It's also used to sink addressing computations feeding into cold
5402/// call sites into their (cold) basic block.
5403///
5404/// The motivation for handling sinking into cold blocks is that doing so can
5405/// both enable other address mode sinking (by satisfying the register pressure
5406/// constraint above), and reduce register pressure globally (by removing the
5407/// addressing mode computation from the fast path entirely.).
5408bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5409 Type *AccessTy, unsigned AddrSpace) {
5410 Value *Repl = Addr;
5411
5412 // Try to collapse single-value PHI nodes. This is necessary to undo
5413 // unprofitable PRE transformations.
5414 SmallVector<Value *, 8> worklist;
5416 worklist.push_back(Addr);
5417
5418 // Use a worklist to iteratively look through PHI and select nodes, and
5419 // ensure that the addressing mode obtained from the non-PHI/select roots of
5420 // the graph are compatible.
5421 bool PhiOrSelectSeen = false;
5422 SmallVector<Instruction *, 16> AddrModeInsts;
5423 const SimplifyQuery SQ(*DL, TLInfo);
5424 AddressingModeCombiner AddrModes(SQ, Addr);
5425 TypePromotionTransaction TPT(RemovedInsts);
5426 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5427 TPT.getRestorationPoint();
5428 while (!worklist.empty()) {
5429 Value *V = worklist.pop_back_val();
5430
5431 // We allow traversing cyclic Phi nodes.
5432 // In case of success after this loop we ensure that traversing through
5433 // Phi nodes ends up with all cases to compute address of the form
5434 // BaseGV + Base + Scale * Index + Offset
5435 // where Scale and Offset are constans and BaseGV, Base and Index
5436 // are exactly the same Values in all cases.
5437 // It means that BaseGV, Scale and Offset dominate our memory instruction
5438 // and have the same value as they had in address computation represented
5439 // as Phi. So we can safely sink address computation to memory instruction.
5440 if (!Visited.insert(V).second)
5441 continue;
5442
5443 // For a PHI node, push all of its incoming values.
5444 if (PHINode *P = dyn_cast<PHINode>(V)) {
5445 append_range(worklist, P->incoming_values());
5446 PhiOrSelectSeen = true;
5447 continue;
5448 }
5449 // Similar for select.
5450 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5451 worklist.push_back(SI->getFalseValue());
5452 worklist.push_back(SI->getTrueValue());
5453 PhiOrSelectSeen = true;
5454 continue;
5455 }
5456
5457 // For non-PHIs, determine the addressing mode being computed. Note that
5458 // the result may differ depending on what other uses our candidate
5459 // addressing instructions might have.
5460 AddrModeInsts.clear();
5461 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5462 0);
5463 // Defer the query (and possible computation of) the dom tree to point of
5464 // actual use. It's expected that most address matches don't actually need
5465 // the domtree.
5466 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5467 Function *F = MemoryInst->getParent()->getParent();
5468 return this->getDT(*F);
5469 };
5470 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5471 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5472 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5473 BFI.get());
5474
5475 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5476 if (GEP && !NewGEPBases.count(GEP)) {
5477 // If splitting the underlying data structure can reduce the offset of a
5478 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5479 // previously split data structures.
5480 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5481 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5482 }
5483
5484 NewAddrMode.OriginalValue = V;
5485 if (!AddrModes.addNewAddrMode(NewAddrMode))
5486 break;
5487 }
5488
5489 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5490 // or we have multiple but either couldn't combine them or combining them
5491 // wouldn't do anything useful, bail out now.
5492 if (!AddrModes.combineAddrModes()) {
5493 TPT.rollback(LastKnownGood);
5494 return false;
5495 }
5496 bool Modified = TPT.commit();
5497
5498 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5499 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5500
5501 // If all the instructions matched are already in this BB, don't do anything.
5502 // If we saw a Phi node then it is not local definitely, and if we saw a
5503 // select then we want to push the address calculation past it even if it's
5504 // already in this BB.
5505 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5506 return IsNonLocalValue(V, MemoryInst->getParent());
5507 })) {
5508 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5509 << "\n");
5510 return Modified;
5511 }
5512
5513 // Insert this computation right after this user. Since our caller is
5514 // scanning from the top of the BB to the bottom, reuse of the expr are
5515 // guaranteed to happen later.
5516 IRBuilder<> Builder(MemoryInst);
5517
5518 // Now that we determined the addressing expression we want to use and know
5519 // that we have to sink it into this block. Check to see if we have already
5520 // done this for some other load/store instr in this block. If so, reuse
5521 // the computation. Before attempting reuse, check if the address is valid
5522 // as it may have been erased.
5523
5524 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5525
5526 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5527 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5528 if (SunkAddr) {
5529 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5530 << " for " << *MemoryInst << "\n");
5531 if (SunkAddr->getType() != Addr->getType()) {
5532 if (SunkAddr->getType()->getPointerAddressSpace() !=
5533 Addr->getType()->getPointerAddressSpace() &&
5534 !DL->isNonIntegralPointerType(Addr->getType())) {
5535 // There are two reasons the address spaces might not match: a no-op
5536 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5537 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5538 // TODO: allow bitcast between different address space pointers with the
5539 // same size.
5540 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5541 SunkAddr =
5542 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5543 } else
5544 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5545 }
5546 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
5547 SubtargetInfo->addrSinkUsingGEPs())) {
5548 // By default, we use the GEP-based method when AA is used later. This
5549 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5550 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5551 << " for " << *MemoryInst << "\n");
5552 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5553
5554 // First, find the pointer.
5555 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5556 ResultPtr = AddrMode.BaseReg;
5557 AddrMode.BaseReg = nullptr;
5558 }
5559
5560 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5561 // We can't add more than one pointer together, nor can we scale a
5562 // pointer (both of which seem meaningless).
5563 if (ResultPtr || AddrMode.Scale != 1)
5564 return Modified;
5565
5566 ResultPtr = AddrMode.ScaledReg;
5567 AddrMode.Scale = 0;
5568 }
5569
5570 // It is only safe to sign extend the BaseReg if we know that the math
5571 // required to create it did not overflow before we extend it. Since
5572 // the original IR value was tossed in favor of a constant back when
5573 // the AddrMode was created we need to bail out gracefully if widths
5574 // do not match instead of extending it.
5575 //
5576 // (See below for code to add the scale.)
5577 if (AddrMode.Scale) {
5578 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
5579 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
5580 cast<IntegerType>(ScaledRegTy)->getBitWidth())
5581 return Modified;
5582 }
5583
5584 if (AddrMode.BaseGV) {
5585 if (ResultPtr)
5586 return Modified;
5587
5588 ResultPtr = AddrMode.BaseGV;
5589 }
5590
5591 // If the real base value actually came from an inttoptr, then the matcher
5592 // will look through it and provide only the integer value. In that case,
5593 // use it here.
5594 if (!DL->isNonIntegralPointerType(Addr->getType())) {
5595 if (!ResultPtr && AddrMode.BaseReg) {
5596 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
5597 "sunkaddr");
5598 AddrMode.BaseReg = nullptr;
5599 } else if (!ResultPtr && AddrMode.Scale == 1) {
5600 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
5601 "sunkaddr");
5602 AddrMode.Scale = 0;
5603 }
5604 }
5605
5606 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
5607 !AddrMode.BaseOffs) {
5608 SunkAddr = Constant::getNullValue(Addr->getType());
5609 } else if (!ResultPtr) {
5610 return Modified;
5611 } else {
5612 Type *I8PtrTy =
5613 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
5614
5615 // Start with the base register. Do this first so that subsequent address
5616 // matching finds it last, which will prevent it from trying to match it
5617 // as the scaled value in case it happens to be a mul. That would be
5618 // problematic if we've sunk a different mul for the scale, because then
5619 // we'd end up sinking both muls.
5620 if (AddrMode.BaseReg) {
5621 Value *V = AddrMode.BaseReg;
5622 if (V->getType() != IntPtrTy)
5623 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5624
5625 ResultIndex = V;
5626 }
5627
5628 // Add the scale value.
5629 if (AddrMode.Scale) {
5630 Value *V = AddrMode.ScaledReg;
5631 if (V->getType() == IntPtrTy) {
5632 // done.
5633 } else {
5634 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
5635 cast<IntegerType>(V->getType())->getBitWidth() &&
5636 "We can't transform if ScaledReg is too narrow");
5637 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5638 }
5639
5640 if (AddrMode.Scale != 1)
5641 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5642 "sunkaddr");
5643 if (ResultIndex)
5644 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
5645 else
5646 ResultIndex = V;
5647 }
5648
5649 // Add in the Base Offset if present.
5650 if (AddrMode.BaseOffs) {
5651 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5652 if (ResultIndex) {
5653 // We need to add this separately from the scale above to help with
5654 // SDAG consecutive load/store merging.
5655 if (ResultPtr->getType() != I8PtrTy)
5656 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5657 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
5658 AddrMode.InBounds);
5659 }
5660
5661 ResultIndex = V;
5662 }
5663
5664 if (!ResultIndex) {
5665 SunkAddr = ResultPtr;
5666 } else {
5667 if (ResultPtr->getType() != I8PtrTy)
5668 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5669 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
5670 AddrMode.InBounds);
5671 }
5672
5673 if (SunkAddr->getType() != Addr->getType()) {
5674 if (SunkAddr->getType()->getPointerAddressSpace() !=
5675 Addr->getType()->getPointerAddressSpace() &&
5676 !DL->isNonIntegralPointerType(Addr->getType())) {
5677 // There are two reasons the address spaces might not match: a no-op
5678 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5679 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5680 // TODO: allow bitcast between different address space pointers with
5681 // the same size.
5682 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5683 SunkAddr =
5684 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5685 } else
5686 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5687 }
5688 }
5689 } else {
5690 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
5691 // non-integral pointers, so in that case bail out now.
5692 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
5693 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
5694 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
5695 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
5696 if (DL->isNonIntegralPointerType(Addr->getType()) ||
5697 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
5698 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
5699 (AddrMode.BaseGV &&
5700 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
5701 return Modified;
5702
5703 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5704 << " for " << *MemoryInst << "\n");
5705 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5706 Value *Result = nullptr;
5707
5708 // Start with the base register. Do this first so that subsequent address
5709 // matching finds it last, which will prevent it from trying to match it
5710 // as the scaled value in case it happens to be a mul. That would be
5711 // problematic if we've sunk a different mul for the scale, because then
5712 // we'd end up sinking both muls.
5713 if (AddrMode.BaseReg) {
5714 Value *V = AddrMode.BaseReg;
5715 if (V->getType()->isPointerTy())
5716 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5717 if (V->getType() != IntPtrTy)
5718 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5719 Result = V;
5720 }
5721
5722 // Add the scale value.
5723 if (AddrMode.Scale) {
5724 Value *V = AddrMode.ScaledReg;
5725 if (V->getType() == IntPtrTy) {
5726 // done.
5727 } else if (V->getType()->isPointerTy()) {
5728 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5729 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
5730 cast<IntegerType>(V->getType())->getBitWidth()) {
5731 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5732 } else {
5733 // It is only safe to sign extend the BaseReg if we know that the math
5734 // required to create it did not overflow before we extend it. Since
5735 // the original IR value was tossed in favor of a constant back when
5736 // the AddrMode was created we need to bail out gracefully if widths
5737 // do not match instead of extending it.
5738 Instruction *I = dyn_cast_or_null<Instruction>(Result);
5739 if (I && (Result != AddrMode.BaseReg))
5740 I->eraseFromParent();
5741 return Modified;
5742 }
5743 if (AddrMode.Scale != 1)
5744 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5745 "sunkaddr");
5746 if (Result)
5747 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5748 else
5749 Result = V;
5750 }
5751
5752 // Add in the BaseGV if present.
5753 if (AddrMode.BaseGV) {
5754 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
5755 if (Result)
5756 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5757 else
5758 Result = V;
5759 }
5760
5761 // Add in the Base Offset if present.
5762 if (AddrMode.BaseOffs) {
5763 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5764 if (Result)
5765 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5766 else
5767 Result = V;
5768 }
5769
5770 if (!Result)
5771 SunkAddr = Constant::getNullValue(Addr->getType());
5772 else
5773 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
5774 }
5775
5776 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
5777 // Store the newly computed address into the cache. In the case we reused a
5778 // value, this should be idempotent.
5779 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
5780
5781 // If we have no uses, recursively delete the value and all dead instructions
5782 // using it.
5783 if (Repl->use_empty()) {
5784 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
5785 RecursivelyDeleteTriviallyDeadInstructions(
5786 Repl, TLInfo, nullptr,
5787 [&](Value *V) { removeAllAssertingVHReferences(V); });
5788 });
5789 }
5790 ++NumMemoryInsts;
5791 return true;
5792}
5793
5794/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
5795/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
5796/// only handle a 2 operand GEP in the same basic block or a splat constant
5797/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
5798/// index.
5799///
5800/// If the existing GEP has a vector base pointer that is splat, we can look
5801/// through the splat to find the scalar pointer. If we can't find a scalar
5802/// pointer there's nothing we can do.
5803///
5804/// If we have a GEP with more than 2 indices where the middle indices are all
5805/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
5806///
5807/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
5808/// followed by a GEP with an all zeroes vector index. This will enable
5809/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
5810/// zero index.
5811bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
5812 Value *Ptr) {
5813 Value *NewAddr;
5814
5815 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
5816 // Don't optimize GEPs that don't have indices.
5817 if (!GEP->hasIndices())
5818 return false;
5819
5820 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
5821 // FIXME: We should support this by sinking the GEP.
5822 if (MemoryInst->getParent() != GEP->getParent())
5823 return false;
5824
5825 SmallVector<Value *, 2> Ops(GEP->operands());
5826
5827 bool RewriteGEP = false;
5828
5829 if (Ops[0]->getType()->isVectorTy()) {
5830 Ops[0] = getSplatValue(Ops[0]);
5831 if (!Ops[0])
5832 return false;
5833 RewriteGEP = true;
5834 }
5835
5836 unsigned FinalIndex = Ops.size() - 1;
5837
5838 // Ensure all but the last index is 0.
5839 // FIXME: This isn't strictly required. All that's required is that they are
5840 // all scalars or splats.
5841 for (unsigned i = 1; i < FinalIndex; ++i) {
5842 auto *C = dyn_cast<Constant>(Ops[i]);
5843 if (!C)
5844 return false;
5845 if (isa<VectorType>(C->getType()))
5846 C = C->getSplatValue();
5847 auto *CI = dyn_cast_or_null<ConstantInt>(C);
5848 if (!CI || !CI->isZero())
5849 return false;
5850 // Scalarize the index if needed.
5851 Ops[i] = CI;
5852 }
5853
5854 // Try to scalarize the final index.
5855 if (Ops[FinalIndex]->getType()->isVectorTy()) {
5856 if (Value *V = getSplatValue(Ops[FinalIndex])) {
5857 auto *C = dyn_cast<ConstantInt>(V);
5858 // Don't scalarize all zeros vector.
5859 if (!C || !C->isZero()) {
5860 Ops[FinalIndex] = V;
5861 RewriteGEP = true;
5862 }
5863 }
5864 }
5865
5866 // If we made any changes or the we have extra operands, we need to generate
5867 // new instructions.
5868 if (!RewriteGEP && Ops.size() == 2)
5869 return false;
5870
5871 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
5872
5873 IRBuilder<> Builder(MemoryInst);
5874
5875 Type *SourceTy = GEP->getSourceElementType();
5876 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
5877
5878 // If the final index isn't a vector, emit a scalar GEP containing all ops
5879 // and a vector GEP with all zeroes final index.
5880 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
5881 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
5882 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
5883 auto *SecondTy = GetElementPtrInst::getIndexedType(
5884 SourceTy, ArrayRef(Ops).drop_front());
5885 NewAddr =
5886 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
5887 } else {
5888 Value *Base = Ops[0];
5889 Value *Index = Ops[FinalIndex];
5890
5891 // Create a scalar GEP if there are more than 2 operands.
5892 if (Ops.size() != 2) {
5893 // Replace the last index with 0.
5894 Ops[FinalIndex] =
5895 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
5896 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
5898 SourceTy, ArrayRef(Ops).drop_front());
5899 }
5900
5901 // Now create the GEP with scalar pointer and vector index.
5902 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
5903 }
5904 } else if (!isa<Constant>(Ptr)) {
5905 // Not a GEP, maybe its a splat and we can create a GEP to enable
5906 // SelectionDAGBuilder to use it as a uniform base.
5908 if (!V)
5909 return false;
5910
5911 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
5912
5913 IRBuilder<> Builder(MemoryInst);
5914
5915 // Emit a vector GEP with a scalar pointer and all 0s vector index.
5916 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
5917 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
5918 Type *ScalarTy;
5919 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
5920 Intrinsic::masked_gather) {
5921 ScalarTy = MemoryInst->getType()->getScalarType();
5922 } else {
5923 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
5924 Intrinsic::masked_scatter);
5925 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
5926 }
5927 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
5928 } else {
5929 // Constant, SelectionDAGBuilder knows to check if its a splat.
5930 return false;
5931 }
5932
5933 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
5934
5935 // If we have no uses, recursively delete the value and all dead instructions
5936 // using it.
5937 if (Ptr->use_empty())
5939 Ptr, TLInfo, nullptr,
5940 [&](Value *V) { removeAllAssertingVHReferences(V); });
5941
5942 return true;
5943}
5944
5945/// If there are any memory operands, use OptimizeMemoryInst to sink their
5946/// address computing into the block when possible / profitable.
5947bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
5948 bool MadeChange = false;
5949
5950 const TargetRegisterInfo *TRI =
5951 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
5952 TargetLowering::AsmOperandInfoVector TargetConstraints =
5953 TLI->ParseConstraints(*DL, TRI, *CS);
5954 unsigned ArgNo = 0;
5955 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5956 // Compute the constraint code and ConstraintType to use.
5957 TLI->ComputeConstraintToUse(OpInfo, SDValue());
5958
5959 // TODO: Also handle C_Address?
5960 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
5961 OpInfo.isIndirect) {
5962 Value *OpVal = CS->getArgOperand(ArgNo++);
5963 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
5964 } else if (OpInfo.Type == InlineAsm::isInput)
5965 ArgNo++;
5966 }
5967
5968 return MadeChange;
5969}
5970
5971/// Check if all the uses of \p Val are equivalent (or free) zero or
5972/// sign extensions.
5973static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
5974 assert(!Val->use_empty() && "Input must have at least one use");
5975 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
5976 bool IsSExt = isa<SExtInst>(FirstUser);
5977 Type *ExtTy = FirstUser->getType();
5978 for (const User *U : Val->users()) {
5979 const Instruction *UI = cast<Instruction>(U);
5980 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
5981 return false;
5982 Type *CurTy = UI->getType();
5983 // Same input and output types: Same instruction after CSE.
5984 if (CurTy == ExtTy)
5985 continue;
5986
5987 // If IsSExt is true, we are in this situation:
5988 // a = Val
5989 // b = sext ty1 a to ty2
5990 // c = sext ty1 a to ty3
5991 // Assuming ty2 is shorter than ty3, this could be turned into:
5992 // a = Val
5993 // b = sext ty1 a to ty2
5994 // c = sext ty2 b to ty3
5995 // However, the last sext is not free.
5996 if (IsSExt)
5997 return false;
5998
5999 // This is a ZExt, maybe this is free to extend from one type to another.
6000 // In that case, we would not account for a different use.
6001 Type *NarrowTy;
6002 Type *LargeTy;
6003 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6004 CurTy->getScalarType()->getIntegerBitWidth()) {
6005 NarrowTy = CurTy;
6006 LargeTy = ExtTy;
6007 } else {
6008 NarrowTy = ExtTy;
6009 LargeTy = CurTy;
6010 }
6011
6012 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6013 return false;
6014 }
6015 // All uses are the same or can be derived from one another for free.
6016 return true;
6017}
6018
6019/// Try to speculatively promote extensions in \p Exts and continue
6020/// promoting through newly promoted operands recursively as far as doing so is
6021/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6022/// When some promotion happened, \p TPT contains the proper state to revert
6023/// them.
6024///
6025/// \return true if some promotion happened, false otherwise.
6026bool CodeGenPrepare::tryToPromoteExts(
6027 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6028 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6029 unsigned CreatedInstsCost) {
6030 bool Promoted = false;
6031
6032 // Iterate over all the extensions to try to promote them.
6033 for (auto *I : Exts) {
6034 // Early check if we directly have ext(load).
6035 if (isa<LoadInst>(I->getOperand(0))) {
6036 ProfitablyMovedExts.push_back(I);
6037 continue;
6038 }
6039
6040 // Check whether or not we want to do any promotion. The reason we have
6041 // this check inside the for loop is to catch the case where an extension
6042 // is directly fed by a load because in such case the extension can be moved
6043 // up without any promotion on its operands.
6045 return false;
6046
6047 // Get the action to perform the promotion.
6048 TypePromotionHelper::Action TPH =
6049 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6050 // Check if we can promote.
6051 if (!TPH) {
6052 // Save the current extension as we cannot move up through its operand.
6053 ProfitablyMovedExts.push_back(I);
6054 continue;
6055 }
6056
6057 // Save the current state.
6058 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6059 TPT.getRestorationPoint();
6061 unsigned NewCreatedInstsCost = 0;
6062 unsigned ExtCost = !TLI->isExtFree(I);
6063 // Promote.
6064 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6065 &NewExts, nullptr, *TLI);
6066 assert(PromotedVal &&
6067 "TypePromotionHelper should have filtered out those cases");
6068
6069 // We would be able to merge only one extension in a load.
6070 // Therefore, if we have more than 1 new extension we heuristically
6071 // cut this search path, because it means we degrade the code quality.
6072 // With exactly 2, the transformation is neutral, because we will merge
6073 // one extension but leave one. However, we optimistically keep going,
6074 // because the new extension may be removed too. Also avoid replacing a
6075 // single free extension with multiple extensions, as this increases the
6076 // number of IR instructions while not providing any savings.
6077 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6078 // FIXME: It would be possible to propagate a negative value instead of
6079 // conservatively ceiling it to 0.
6080 TotalCreatedInstsCost =
6081 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6082 if (!StressExtLdPromotion &&
6083 (TotalCreatedInstsCost > 1 ||
6084 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6085 (ExtCost == 0 && NewExts.size() > 1))) {
6086 // This promotion is not profitable, rollback to the previous state, and
6087 // save the current extension in ProfitablyMovedExts as the latest
6088 // speculative promotion turned out to be unprofitable.
6089 TPT.rollback(LastKnownGood);
6090 ProfitablyMovedExts.push_back(I);
6091 continue;
6092 }
6093 // Continue promoting NewExts as far as doing so is profitable.
6094 SmallVector<Instruction *, 2> NewlyMovedExts;
6095 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6096 bool NewPromoted = false;
6097 for (auto *ExtInst : NewlyMovedExts) {
6098 Instruction *MovedExt = cast<Instruction>(ExtInst);
6099 Value *ExtOperand = MovedExt->getOperand(0);
6100 // If we have reached to a load, we need this extra profitability check
6101 // as it could potentially be merged into an ext(load).
6102 if (isa<LoadInst>(ExtOperand) &&
6103 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6104 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6105 continue;
6106
6107 ProfitablyMovedExts.push_back(MovedExt);
6108 NewPromoted = true;
6109 }
6110
6111 // If none of speculative promotions for NewExts is profitable, rollback
6112 // and save the current extension (I) as the last profitable extension.
6113 if (!NewPromoted) {
6114 TPT.rollback(LastKnownGood);
6115 ProfitablyMovedExts.push_back(I);
6116 continue;
6117 }
6118 // The promotion is profitable.
6119 Promoted = true;
6120 }
6121 return Promoted;
6122}
6123
6124/// Merging redundant sexts when one is dominating the other.
6125bool CodeGenPrepare::mergeSExts(Function &F) {
6126 bool Changed = false;
6127 for (auto &Entry : ValToSExtendedUses) {
6128 SExts &Insts = Entry.second;
6129 SExts CurPts;
6130 for (Instruction *Inst : Insts) {
6131 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6132 Inst->getOperand(0) != Entry.first)
6133 continue;
6134 bool inserted = false;
6135 for (auto &Pt : CurPts) {
6136 if (getDT(F).dominates(Inst, Pt)) {
6137 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6138 RemovedInsts.insert(Pt);
6139 Pt->removeFromParent();
6140 Pt = Inst;
6141 inserted = true;
6142 Changed = true;
6143 break;
6144 }
6145 if (!getDT(F).dominates(Pt, Inst))
6146 // Give up if we need to merge in a common dominator as the
6147 // experiments show it is not profitable.
6148 continue;
6149 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6150 RemovedInsts.insert(Inst);
6151 Inst->removeFromParent();
6152 inserted = true;
6153 Changed = true;
6154 break;
6155 }
6156 if (!inserted)
6157 CurPts.push_back(Inst);
6158 }
6159 }
6160 return Changed;
6161}
6162
6163// Splitting large data structures so that the GEPs accessing them can have
6164// smaller offsets so that they can be sunk to the same blocks as their users.
6165// For example, a large struct starting from %base is split into two parts
6166// where the second part starts from %new_base.
6167//
6168// Before:
6169// BB0:
6170// %base =
6171//
6172// BB1:
6173// %gep0 = gep %base, off0
6174// %gep1 = gep %base, off1
6175// %gep2 = gep %base, off2
6176//
6177// BB2:
6178// %load1 = load %gep0
6179// %load2 = load %gep1
6180// %load3 = load %gep2
6181//
6182// After:
6183// BB0:
6184// %base =
6185// %new_base = gep %base, off0
6186//
6187// BB1:
6188// %new_gep0 = %new_base
6189// %new_gep1 = gep %new_base, off1 - off0
6190// %new_gep2 = gep %new_base, off2 - off0
6191//
6192// BB2:
6193// %load1 = load i32, i32* %new_gep0
6194// %load2 = load i32, i32* %new_gep1
6195// %load3 = load i32, i32* %new_gep2
6196//
6197// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6198// their offsets are smaller enough to fit into the addressing mode.
6199bool CodeGenPrepare::splitLargeGEPOffsets() {
6200 bool Changed = false;
6201 for (auto &Entry : LargeOffsetGEPMap) {
6202 Value *OldBase = Entry.first;
6204 &LargeOffsetGEPs = Entry.second;
6205 auto compareGEPOffset =
6206 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6207 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6208 if (LHS.first == RHS.first)
6209 return false;
6210 if (LHS.second != RHS.second)
6211 return LHS.second < RHS.second;
6212 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6213 };
6214 // Sorting all the GEPs of the same data structures based on the offsets.
6215 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6216 LargeOffsetGEPs.erase(
6217 std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
6218 LargeOffsetGEPs.end());
6219 // Skip if all the GEPs have the same offsets.
6220 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6221 continue;
6222 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6223 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6224 Value *NewBaseGEP = nullptr;
6225
6226 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6228 LLVMContext &Ctx = GEP->getContext();
6229 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6230 Type *I8PtrTy =
6231 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6232
6233 BasicBlock::iterator NewBaseInsertPt;
6234 BasicBlock *NewBaseInsertBB;
6235 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6236 // If the base of the struct is an instruction, the new base will be
6237 // inserted close to it.
6238 NewBaseInsertBB = BaseI->getParent();
6239 if (isa<PHINode>(BaseI))
6240 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6241 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6242 NewBaseInsertBB =
6243 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6244 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6245 } else
6246 NewBaseInsertPt = std::next(BaseI->getIterator());
6247 } else {
6248 // If the current base is an argument or global value, the new base
6249 // will be inserted to the entry block.
6250 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6251 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6252 }
6253 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6254 // Create a new base.
6255 Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6256 NewBaseGEP = OldBase;
6257 if (NewBaseGEP->getType() != I8PtrTy)
6258 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6259 NewBaseGEP =
6260 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6261 NewGEPBases.insert(NewBaseGEP);
6262 return;
6263 };
6264
6265 // Check whether all the offsets can be encoded with prefered common base.
6266 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6267 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6268 BaseOffset = PreferBase;
6269 // Create a new base if the offset of the BaseGEP can be decoded with one
6270 // instruction.
6271 createNewBase(BaseOffset, OldBase, BaseGEP);
6272 }
6273
6274 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6275 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6276 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6277 int64_t Offset = LargeOffsetGEP->second;
6278 if (Offset != BaseOffset) {
6280 AddrMode.HasBaseReg = true;
6281 AddrMode.BaseOffs = Offset - BaseOffset;
6282 // The result type of the GEP might not be the type of the memory
6283 // access.
6284 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6285 GEP->getResultElementType(),
6286 GEP->getAddressSpace())) {
6287 // We need to create a new base if the offset to the current base is
6288 // too large to fit into the addressing mode. So, a very large struct
6289 // may be split into several parts.
6290 BaseGEP = GEP;
6291 BaseOffset = Offset;
6292 NewBaseGEP = nullptr;
6293 }
6294 }
6295
6296 // Generate a new GEP to replace the current one.
6297 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6298
6299 if (!NewBaseGEP) {
6300 // Create a new base if we don't have one yet. Find the insertion
6301 // pointer for the new base first.
6302 createNewBase(BaseOffset, OldBase, GEP);
6303 }
6304
6305 IRBuilder<> Builder(GEP);
6306 Value *NewGEP = NewBaseGEP;
6307 if (Offset != BaseOffset) {
6308 // Calculate the new offset for the new GEP.
6309 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6310 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6311 }
6312 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6313 LargeOffsetGEPID.erase(GEP);
6314 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6315 GEP->eraseFromParent();
6316 Changed = true;
6317 }
6318 }
6319 return Changed;
6320}
6321
6322bool CodeGenPrepare::optimizePhiType(
6324 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6325 // We are looking for a collection on interconnected phi nodes that together
6326 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6327 // are of the same type. Convert the whole set of nodes to the type of the
6328 // bitcast.
6329 Type *PhiTy = I->getType();
6330 Type *ConvertTy = nullptr;
6331 if (Visited.count(I) ||
6332 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6333 return false;
6334
6336 Worklist.push_back(cast<Instruction>(I));
6339 PhiNodes.insert(I);
6340 Visited.insert(I);
6343 // This works by adding extra bitcasts between load/stores and removing
6344 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6345 // we can get in the situation where we remove a bitcast in one iteration
6346 // just to add it again in the next. We need to ensure that at least one
6347 // bitcast we remove are anchored to something that will not change back.
6348 bool AnyAnchored = false;
6349
6350 while (!Worklist.empty()) {
6351 Instruction *II = Worklist.pop_back_val();
6352
6353 if (auto *Phi = dyn_cast<PHINode>(II)) {
6354 // Handle Defs, which might also be PHI's
6355 for (Value *V : Phi->incoming_values()) {
6356 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6357 if (!PhiNodes.count(OpPhi)) {
6358 if (!Visited.insert(OpPhi).second)
6359 return false;
6360 PhiNodes.insert(OpPhi);
6361 Worklist.push_back(OpPhi);
6362 }
6363 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6364 if (!OpLoad->isSimple())
6365 return false;
6366 if (Defs.insert(OpLoad).second)
6367 Worklist.push_back(OpLoad);
6368 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6369 if (Defs.insert(OpEx).second)
6370 Worklist.push_back(OpEx);
6371 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6372 if (!ConvertTy)
6373 ConvertTy = OpBC->getOperand(0)->getType();
6374 if (OpBC->getOperand(0)->getType() != ConvertTy)
6375 return false;
6376 if (Defs.insert(OpBC).second) {
6377 Worklist.push_back(OpBC);
6378 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6379 !isa<ExtractElementInst>(OpBC->getOperand(0));
6380 }
6381 } else if (auto *OpC = dyn_cast<ConstantData>(V))
6382 Constants.insert(OpC);
6383 else
6384 return false;
6385 }
6386 }
6387
6388 // Handle uses which might also be phi's
6389 for (User *V : II->users()) {
6390 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6391 if (!PhiNodes.count(OpPhi)) {
6392 if (Visited.count(OpPhi))
6393 return false;
6394 PhiNodes.insert(OpPhi);
6395 Visited.insert(OpPhi);
6396 Worklist.push_back(OpPhi);
6397 }
6398 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6399 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6400 return false;
6401 Uses.insert(OpStore);
6402 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6403 if (!ConvertTy)
6404 ConvertTy = OpBC->getType();
6405 if (OpBC->getType() != ConvertTy)
6406 return false;
6407 Uses.insert(OpBC);
6408 AnyAnchored |=
6409 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6410 } else {
6411 return false;
6412 }
6413 }
6414 }
6415
6416 if (!ConvertTy || !AnyAnchored ||
6417 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6418 return false;
6419
6420 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6421 << *ConvertTy << "\n");
6422
6423 // Create all the new phi nodes of the new type, and bitcast any loads to the
6424 // correct type.
6425 ValueToValueMap ValMap;
6426 for (ConstantData *C : Constants)
6427 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
6428 for (Instruction *D : Defs) {
6429 if (isa<BitCastInst>(D)) {
6430 ValMap[D] = D->getOperand(0);
6431 DeletedInstrs.insert(D);
6432 } else {
6433 ValMap[D] =
6434 new BitCastInst(D, ConvertTy, D->getName() + ".bc", D->getNextNode());
6435 }
6436 }
6437 for (PHINode *Phi : PhiNodes)
6438 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6439 Phi->getName() + ".tc", Phi->getIterator());
6440 // Pipe together all the PhiNodes.
6441 for (PHINode *Phi : PhiNodes) {
6442 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6443 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6444 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6445 Phi->getIncomingBlock(i));
6446 Visited.insert(NewPhi);
6447 }
6448 // And finally pipe up the stores and bitcasts
6449 for (Instruction *U : Uses) {
6450 if (isa<BitCastInst>(U)) {
6451 DeletedInstrs.insert(U);
6452 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6453 } else {
6454 U->setOperand(0,
6455 new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc", U));
6456 }
6457 }
6458
6459 // Save the removed phis to be deleted later.
6460 for (PHINode *Phi : PhiNodes)
6461 DeletedInstrs.insert(Phi);
6462 return true;
6463}
6464
6465bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6466 if (!OptimizePhiTypes)
6467 return false;
6468
6469 bool Changed = false;
6471 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6472
6473 // Attempt to optimize all the phis in the functions to the correct type.
6474 for (auto &BB : F)
6475 for (auto &Phi : BB.phis())
6476 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6477
6478 // Remove any old phi's that have been converted.
6479 for (auto *I : DeletedInstrs) {
6480 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6481 I->eraseFromParent();
6482 }
6483
6484 return Changed;
6485}
6486
6487/// Return true, if an ext(load) can be formed from an extension in
6488/// \p MovedExts.
6489bool CodeGenPrepare::canFormExtLd(
6490 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6491 Instruction *&Inst, bool HasPromoted) {
6492 for (auto *MovedExtInst : MovedExts) {
6493 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6494 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6495 Inst = MovedExtInst;
6496 break;
6497 }
6498 }
6499 if (!LI)
6500 return false;
6501
6502 // If they're already in the same block, there's nothing to do.
6503 // Make the cheap checks first if we did not promote.
6504 // If we promoted, we need to check if it is indeed profitable.
6505 if (!HasPromoted && LI->getParent() == Inst->getParent())
6506 return false;
6507
6508 return TLI->isExtLoad(LI, Inst, *DL);
6509}
6510
6511/// Move a zext or sext fed by a load into the same basic block as the load,
6512/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6513/// extend into the load.
6514///
6515/// E.g.,
6516/// \code
6517/// %ld = load i32* %addr
6518/// %add = add nuw i32 %ld, 4
6519/// %zext = zext i32 %add to i64
6520// \endcode
6521/// =>
6522/// \code
6523/// %ld = load i32* %addr
6524/// %zext = zext i32 %ld to i64
6525/// %add = add nuw i64 %zext, 4
6526/// \encode
6527/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6528/// allow us to match zext(load i32*) to i64.
6529///
6530/// Also, try to promote the computations used to obtain a sign extended
6531/// value used into memory accesses.
6532/// E.g.,
6533/// \code
6534/// a = add nsw i32 b, 3
6535/// d = sext i32 a to i64
6536/// e = getelementptr ..., i64 d
6537/// \endcode
6538/// =>
6539/// \code
6540/// f = sext i32 b to i64
6541/// a = add nsw i64 f, 3
6542/// e = getelementptr ..., i64 a
6543/// \endcode
6544///
6545/// \p Inst[in/out] the extension may be modified during the process if some
6546/// promotions apply.
6547bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6548 bool AllowPromotionWithoutCommonHeader = false;
6549 /// See if it is an interesting sext operations for the address type
6550 /// promotion before trying to promote it, e.g., the ones with the right
6551 /// type and used in memory accesses.
6552 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
6553 *Inst, AllowPromotionWithoutCommonHeader);
6554 TypePromotionTransaction TPT(RemovedInsts);
6555 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6556 TPT.getRestorationPoint();
6558 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
6559 Exts.push_back(Inst);
6560
6561 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
6562
6563 // Look for a load being extended.
6564 LoadInst *LI = nullptr;
6565 Instruction *ExtFedByLoad;
6566
6567 // Try to promote a chain of computation if it allows to form an extended
6568 // load.
6569 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
6570 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
6571 TPT.commit();
6572 // Move the extend into the same block as the load.
6573 ExtFedByLoad->moveAfter(LI);
6574 ++NumExtsMoved;
6575 Inst = ExtFedByLoad;
6576 return true;
6577 }
6578
6579 // Continue promoting SExts if known as considerable depending on targets.
6580 if (ATPConsiderable &&
6581 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
6582 HasPromoted, TPT, SpeculativelyMovedExts))
6583 return true;
6584
6585 TPT.rollback(LastKnownGood);
6586 return false;
6587}
6588
6589// Perform address type promotion if doing so is profitable.
6590// If AllowPromotionWithoutCommonHeader == false, we should find other sext
6591// instructions that sign extended the same initial value. However, if
6592// AllowPromotionWithoutCommonHeader == true, we expect promoting the
6593// extension is just profitable.
6594bool CodeGenPrepare::performAddressTypePromotion(
6595 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
6596 bool HasPromoted, TypePromotionTransaction &TPT,
6597 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
6598 bool Promoted = false;
6599 SmallPtrSet<Instruction *, 1> UnhandledExts;
6600 bool AllSeenFirst = true;
6601 for (auto *I : SpeculativelyMovedExts) {
6602 Value *HeadOfChain = I->getOperand(0);
6604 SeenChainsForSExt.find(HeadOfChain);
6605 // If there is an unhandled SExt which has the same header, try to promote
6606 // it as well.
6607 if (AlreadySeen != SeenChainsForSExt.end()) {
6608 if (AlreadySeen->second != nullptr)
6609 UnhandledExts.insert(AlreadySeen->second);
6610 AllSeenFirst = false;
6611 }
6612 }
6613
6614 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
6615 SpeculativelyMovedExts.size() == 1)) {
6616 TPT.commit();
6617 if (HasPromoted)
6618 Promoted = true;
6619 for (auto *I : SpeculativelyMovedExts) {
6620 Value *HeadOfChain = I->getOperand(0);
6621 SeenChainsForSExt[HeadOfChain] = nullptr;
6622 ValToSExtendedUses[HeadOfChain].push_back(I);
6623 }
6624 // Update Inst as promotion happen.
6625 Inst = SpeculativelyMovedExts.pop_back_val();
6626 } else {
6627 // This is the first chain visited from the header, keep the current chain
6628 // as unhandled. Defer to promote this until we encounter another SExt
6629 // chain derived from the same header.
6630 for (auto *I : SpeculativelyMovedExts) {
6631 Value *HeadOfChain = I->getOperand(0);
6632 SeenChainsForSExt[HeadOfChain] = Inst;
6633 }
6634 return false;
6635 }
6636
6637 if (!AllSeenFirst && !UnhandledExts.empty())
6638 for (auto *VisitedSExt : UnhandledExts) {
6639 if (RemovedInsts.count(VisitedSExt))
6640 continue;
6641 TypePromotionTransaction TPT(RemovedInsts);
6644 Exts.push_back(VisitedSExt);
6645 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
6646 TPT.commit();
6647 if (HasPromoted)
6648 Promoted = true;
6649 for (auto *I : Chains) {
6650 Value *HeadOfChain = I->getOperand(0);
6651 // Mark this as handled.
6652 SeenChainsForSExt[HeadOfChain] = nullptr;
6653 ValToSExtendedUses[HeadOfChain].push_back(I);
6654 }
6655 }
6656 return Promoted;
6657}
6658
6659bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
6660 BasicBlock *DefBB = I->getParent();
6661
6662 // If the result of a {s|z}ext and its source are both live out, rewrite all
6663 // other uses of the source with result of extension.
6664 Value *Src = I->getOperand(0);
6665 if (Src->hasOneUse())
6666 return false;
6667
6668 // Only do this xform if truncating is free.
6669 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
6670 return false;
6671
6672 // Only safe to perform the optimization if the source is also defined in
6673 // this block.
6674 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
6675 return false;
6676
6677 bool DefIsLiveOut = false;
6678 for (User *U : I->users()) {
6679 Instruction *UI = cast<Instruction>(U);
6680
6681 // Figure out which BB this ext is used in.
6682 BasicBlock *UserBB = UI->getParent();
6683 if (UserBB == DefBB)
6684 continue;
6685 DefIsLiveOut = true;
6686 break;
6687 }
6688 if (!DefIsLiveOut)
6689 return false;
6690
6691 // Make sure none of the uses are PHI nodes.
6692 for (User *U : Src->users()) {
6693 Instruction *UI = cast<Instruction>(U);
6694 BasicBlock *UserBB = UI->getParent();
6695 if (UserBB == DefBB)
6696 continue;
6697 // Be conservative. We don't want this xform to end up introducing
6698 // reloads just before load / store instructions.
6699 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
6700 return false;
6701 }
6702
6703 // InsertedTruncs - Only insert one trunc in each block once.
6705
6706 bool MadeChange = false;
6707 for (Use &U : Src->uses()) {
6708 Instruction *User = cast<Instruction>(U.getUser());
6709
6710 // Figure out which BB this ext is used in.
6711 BasicBlock *UserBB = User->getParent();
6712 if (UserBB == DefBB)
6713 continue;
6714
6715 // Both src and def are live in this block. Rewrite the use.
6716 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
6717
6718 if (!InsertedTrunc) {
6719 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
6720 assert(InsertPt != UserBB->end());
6721 InsertedTrunc = new TruncInst(I, Src->getType(), "");
6722 InsertedTrunc->insertBefore(*UserBB, InsertPt);
6723 InsertedInsts.insert(InsertedTrunc);
6724 }
6725
6726 // Replace a use of the {s|z}ext source with a use of the result.
6727 U = InsertedTrunc;
6728 ++NumExtUses;
6729 MadeChange = true;
6730 }
6731
6732 return MadeChange;
6733}
6734
6735// Find loads whose uses only use some of the loaded value's bits. Add an "and"
6736// just after the load if the target can fold this into one extload instruction,
6737// with the hope of eliminating some of the other later "and" instructions using
6738// the loaded value. "and"s that are made trivially redundant by the insertion
6739// of the new "and" are removed by this function, while others (e.g. those whose
6740// path from the load goes through a phi) are left for isel to potentially
6741// remove.
6742//
6743// For example:
6744//
6745// b0:
6746// x = load i32
6747// ...
6748// b1:
6749// y = and x, 0xff
6750// z = use y
6751//
6752// becomes:
6753//
6754// b0:
6755// x = load i32
6756// x' = and x, 0xff
6757// ...
6758// b1:
6759// z = use x'
6760//
6761// whereas:
6762//
6763// b0:
6764// x1 = load i32
6765// ...
6766// b1:
6767// x2 = load i32
6768// ...
6769// b2:
6770// x = phi x1, x2
6771// y = and x, 0xff
6772//
6773// becomes (after a call to optimizeLoadExt for each load):
6774//
6775// b0:
6776// x1 = load i32
6777// x1' = and x1, 0xff
6778// ...
6779// b1:
6780// x2 = load i32
6781// x2' = and x2, 0xff
6782// ...
6783// b2:
6784// x = phi x1', x2'
6785// y = and x, 0xff
6786bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
6787 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
6788 return false;
6789
6790 // Skip loads we've already transformed.
6791 if (Load->hasOneUse() &&
6792 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
6793 return false;
6794
6795 // Look at all uses of Load, looking through phis, to determine how many bits
6796 // of the loaded value are needed.
6799 SmallVector<Instruction *, 8> AndsToMaybeRemove;
6800 for (auto *U : Load->users())
6801 WorkList.push_back(cast<Instruction>(U));
6802
6803 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
6804 unsigned BitWidth = LoadResultVT.getSizeInBits();
6805 // If the BitWidth is 0, do not try to optimize the type
6806 if (BitWidth == 0)
6807 return false;
6808
6809 APInt DemandBits(BitWidth, 0);
6810 APInt WidestAndBits(BitWidth, 0);
6811
6812 while (!WorkList.empty()) {
6813 Instruction *I = WorkList.pop_back_val();
6814
6815 // Break use-def graph loops.
6816 if (!Visited.insert(I).second)
6817 continue;
6818
6819 // For a PHI node, push all of its users.
6820 if (auto *Phi = dyn_cast<PHINode>(I)) {
6821 for (auto *U : Phi->users())
6822 WorkList.push_back(cast<Instruction>(U));
6823 continue;
6824 }
6825
6826 switch (I->getOpcode()) {
6827 case Instruction::And: {
6828 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
6829 if (!AndC)
6830 return false;
6831 APInt AndBits = AndC->getValue();
6832 DemandBits |= AndBits;
6833 // Keep track of the widest and mask we see.
6834 if (AndBits.ugt(WidestAndBits))
6835 WidestAndBits = AndBits;
6836 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
6837 AndsToMaybeRemove.push_back(I);
6838 break;
6839 }
6840
6841 case Instruction::Shl: {
6842 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
6843 if (!ShlC)
6844 return false;
6845 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
6846 DemandBits.setLowBits(BitWidth - ShiftAmt);
6847 break;
6848 }
6849
6850 case Instruction::Trunc: {
6851 EVT TruncVT = TLI->getValueType(*DL, I->getType());
6852 unsigned TruncBitWidth = TruncVT.getSizeInBits();
6853 DemandBits.setLowBits(TruncBitWidth);
6854 break;
6855 }
6856
6857 default:
6858 return false;
6859 }
6860 }
6861
6862 uint32_t ActiveBits = DemandBits.getActiveBits();
6863 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
6864 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
6865 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
6866 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
6867 // followed by an AND.
6868 // TODO: Look into removing this restriction by fixing backends to either
6869 // return false for isLoadExtLegal for i1 or have them select this pattern to
6870 // a single instruction.
6871 //
6872 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
6873 // mask, since these are the only ands that will be removed by isel.
6874 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
6875 WidestAndBits != DemandBits)
6876 return false;
6877
6878 LLVMContext &Ctx = Load->getType()->getContext();
6879 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
6880 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
6881
6882 // Reject cases that won't be matched as extloads.
6883 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
6884 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
6885 return false;
6886
6887 IRBuilder<> Builder(Load->getNextNonDebugInstruction());
6888 auto *NewAnd = cast<Instruction>(
6889 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
6890 // Mark this instruction as "inserted by CGP", so that other
6891 // optimizations don't touch it.
6892 InsertedInsts.insert(NewAnd);
6893
6894 // Replace all uses of load with new and (except for the use of load in the
6895 // new and itself).
6896 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
6897 NewAnd->setOperand(0, Load);
6898
6899 // Remove any and instructions that are now redundant.
6900 for (auto *And : AndsToMaybeRemove)
6901 // Check that the and mask is the same as the one we decided to put on the
6902 // new and.
6903 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
6904 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
6905 if (&*CurInstIterator == And)
6906 CurInstIterator = std::next(And->getIterator());
6907 And->eraseFromParent();
6908 ++NumAndUses;
6909 }
6910
6911 ++NumAndsAdded;
6912 return true;
6913}
6914
6915/// Check if V (an operand of a select instruction) is an expensive instruction
6916/// that is only used once.
6918 auto *I = dyn_cast<Instruction>(V);
6919 // If it's safe to speculatively execute, then it should not have side
6920 // effects; therefore, it's safe to sink and possibly *not* execute.
6921 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
6923}
6924
6925/// Returns true if a SelectInst should be turned into an explicit branch.
6927 const TargetLowering *TLI,
6928 SelectInst *SI) {
6929 // If even a predictable select is cheap, then a branch can't be cheaper.
6930 if (!TLI->isPredictableSelectExpensive())
6931 return false;
6932
6933 // FIXME: This should use the same heuristics as IfConversion to determine
6934 // whether a select is better represented as a branch.
6935
6936 // If metadata tells us that the select condition is obviously predictable,
6937 // then we want to replace the select with a branch.
6938 uint64_t TrueWeight, FalseWeight;
6939 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
6940 uint64_t Max = std::max(TrueWeight, FalseWeight);
6941 uint64_t Sum = TrueWeight + FalseWeight;
6942 if (Sum != 0) {
6943 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
6944 if (Probability > TTI->getPredictableBranchThreshold())
6945 return true;
6946 }
6947 }
6948
6949 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
6950
6951 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
6952 // comparison condition. If the compare has more than one use, there's
6953 // probably another cmov or setcc around, so it's not worth emitting a branch.
6954 if (!Cmp || !Cmp->hasOneUse())
6955 return false;
6956
6957 // If either operand of the select is expensive and only needed on one side
6958 // of the select, we should form a branch.
6959 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
6960 sinkSelectOperand(TTI, SI->getFalseValue()))
6961 return true;
6962
6963 return false;
6964}
6965
6966/// If \p isTrue is true, return the true value of \p SI, otherwise return
6967/// false value of \p SI. If the true/false value of \p SI is defined by any
6968/// select instructions in \p Selects, look through the defining select
6969/// instruction until the true/false value is not defined in \p Selects.
6970static Value *
6972 const SmallPtrSet<const Instruction *, 2> &Selects) {
6973 Value *V = nullptr;
6974
6975 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
6976 DefSI = dyn_cast<SelectInst>(V)) {
6977 assert(DefSI->getCondition() == SI->getCondition() &&
6978 "The condition of DefSI does not match with SI");
6979 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
6980 }
6981
6982 assert(V && "Failed to get select true/false value");
6983 return V;
6984}
6985
6986bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
6987 assert(Shift->isShift() && "Expected a shift");
6988
6989 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
6990 // general vector shifts, and (3) the shift amount is a select-of-splatted
6991 // values, hoist the shifts before the select:
6992 // shift Op0, (select Cond, TVal, FVal) -->
6993 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
6994 //
6995 // This is inverting a generic IR transform when we know that the cost of a
6996 // general vector shift is more than the cost of 2 shift-by-scalars.
6997 // We can't do this effectively in SDAG because we may not be able to
6998 // determine if the select operands are splats from within a basic block.
6999 Type *Ty = Shift->getType();
7000 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
7001 return false;
7002 Value *Cond, *TVal, *FVal;
7003 if (!match(Shift->getOperand(1),
7004 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7005 return false;
7006 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7007 return false;
7008
7009 IRBuilder<> Builder(Shift);
7010 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7011 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7012 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7013 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7014 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7015 Shift->eraseFromParent();
7016 return true;
7017}
7018
7019bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7020 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7021 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7022 "Expected a funnel shift");
7023
7024 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7025 // than general vector shifts, and (3) the shift amount is select-of-splatted
7026 // values, hoist the funnel shifts before the select:
7027 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7028 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7029 //
7030 // This is inverting a generic IR transform when we know that the cost of a
7031 // general vector shift is more than the cost of 2 shift-by-scalars.
7032 // We can't do this effectively in SDAG because we may not be able to
7033 // determine if the select operands are splats from within a basic block.
7034 Type *Ty = Fsh->getType();
7035 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
7036 return false;
7037 Value *Cond, *TVal, *FVal;
7038 if (!match(Fsh->getOperand(2),
7039 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7040 return false;
7041 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7042 return false;
7043
7044 IRBuilder<> Builder(Fsh);
7045 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7046 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7047 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7048 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7049 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7050 Fsh->eraseFromParent();
7051 return true;
7052}
7053
7054/// If we have a SelectInst that will likely profit from branch prediction,
7055/// turn it into a branch.
7056bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7058 return false;
7059
7060 // If the SelectOptimize pass is enabled, selects have already been optimized.
7062 return false;
7063
7064 // Find all consecutive select instructions that share the same condition.
7066 ASI.push_back(SI);
7068 It != SI->getParent()->end(); ++It) {
7069 SelectInst *I = dyn_cast<SelectInst>(&*It);
7070 if (I && SI->getCondition() == I->getCondition()) {
7071 ASI.push_back(I);
7072 } else {
7073 break;
7074 }
7075 }
7076
7077 SelectInst *LastSI = ASI.back();
7078 // Increment the current iterator to skip all the rest of select instructions
7079 // because they will be either "not lowered" or "all lowered" to branch.
7080 CurInstIterator = std::next(LastSI->getIterator());
7081 // Examine debug-info attached to the consecutive select instructions. They
7082 // won't be individually optimised by optimizeInst, so we need to perform
7083 // DPValue maintenence here instead.
7084 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7085 fixupDPValuesOnInst(*SI);
7086
7087 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7088
7089 // Can we convert the 'select' to CF ?
7090 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7091 return false;
7092
7094 if (SI->getType()->isVectorTy())
7095 SelectKind = TargetLowering::ScalarCondVectorVal;
7096 else
7097 SelectKind = TargetLowering::ScalarValSelect;
7098
7099 if (TLI->isSelectSupported(SelectKind) &&
7100 (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
7101 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7102 return false;
7103
7104 // The DominatorTree needs to be rebuilt by any consumers after this
7105 // transformation. We simply reset here rather than setting the ModifiedDT
7106 // flag to avoid restarting the function walk in runOnFunction for each
7107 // select optimized.
7108 DT.reset();
7109
7110 // Transform a sequence like this:
7111 // start:
7112 // %cmp = cmp uge i32 %a, %b
7113 // %sel = select i1 %cmp, i32 %c, i32 %d
7114 //
7115 // Into:
7116 // start:
7117 // %cmp = cmp uge i32 %a, %b
7118 // %cmp.frozen = freeze %cmp
7119 // br i1 %cmp.frozen, label %select.true, label %select.false
7120 // select.true:
7121 // br label %select.end
7122 // select.false:
7123 // br label %select.end
7124 // select.end:
7125 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7126 //
7127 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7128 // In addition, we may sink instructions that produce %c or %d from
7129 // the entry block into the destination(s) of the new branch.
7130 // If the true or false blocks do not contain a sunken instruction, that
7131 // block and its branch may be optimized away. In that case, one side of the
7132 // first branch will point directly to select.end, and the corresponding PHI
7133 // predecessor block will be the start block.
7134
7135 // Collect values that go on the true side and the values that go on the false
7136 // side.
7137 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7138 for (SelectInst *SI : ASI) {
7139 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7140 TrueInstrs.push_back(cast<Instruction>(V));
7141 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7142 FalseInstrs.push_back(cast<Instruction>(V));
7143 }
7144
7145 // Split the select block, according to how many (if any) values go on each
7146 // side.
7147 BasicBlock *StartBlock = SI->getParent();
7148 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7149 // We should split before any debug-info.
7150 SplitPt.setHeadBit(true);
7151
7152 IRBuilder<> IB(SI);
7153 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7154
7155 BasicBlock *TrueBlock = nullptr;
7156 BasicBlock *FalseBlock = nullptr;
7157 BasicBlock *EndBlock = nullptr;
7158 BranchInst *TrueBranch = nullptr;
7159 BranchInst *FalseBranch = nullptr;
7160 if (TrueInstrs.size() == 0) {
7161 FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
7162 CondFr, SplitPt, false, nullptr, nullptr, LI));
7163 FalseBlock = FalseBranch->getParent();
7164 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7165 } else if (FalseInstrs.size() == 0) {
7166 TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
7167 CondFr, SplitPt, false, nullptr, nullptr, LI));
7168 TrueBlock = TrueBranch->getParent();
7169 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7170 } else {
7171 Instruction *ThenTerm = nullptr;
7172 Instruction *ElseTerm = nullptr;
7173 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7174 nullptr, nullptr, LI);
7175 TrueBranch = cast<BranchInst>(ThenTerm);
7176 FalseBranch = cast<BranchInst>(ElseTerm);
7177 TrueBlock = TrueBranch->getParent();
7178 FalseBlock = FalseBranch->getParent();
7179 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7180 }
7181
7182 EndBlock->setName("select.end");
7183 if (TrueBlock)
7184 TrueBlock->setName("select.true.sink");
7185 if (FalseBlock)
7186 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7187 : "select.false.sink");
7188
7189 if (IsHugeFunc) {
7190 if (TrueBlock)
7191 FreshBBs.insert(TrueBlock);
7192 if (FalseBlock)
7193 FreshBBs.insert(FalseBlock);
7194 FreshBBs.insert(EndBlock);
7195 }
7196
7197 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7198
7199 static const unsigned MD[] = {
7200 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7201 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7202 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7203
7204 // Sink expensive instructions into the conditional blocks to avoid executing
7205 // them speculatively.
7206 for (Instruction *I : TrueInstrs)
7207 I->moveBefore(TrueBranch);
7208 for (Instruction *I : FalseInstrs)
7209 I->moveBefore(FalseBranch);
7210
7211 // If we did not create a new block for one of the 'true' or 'false' paths
7212 // of the condition, it means that side of the branch goes to the end block
7213 // directly and the path originates from the start block from the point of
7214 // view of the new PHI.
7215 if (TrueBlock == nullptr)
7216 TrueBlock = StartBlock;
7217 else if (FalseBlock == nullptr)
7218 FalseBlock = StartBlock;
7219
7221 INS.insert(ASI.begin(), ASI.end());
7222 // Use reverse iterator because later select may use the value of the
7223 // earlier select, and we need to propagate value through earlier select
7224 // to get the PHI operand.
7225 for (SelectInst *SI : llvm::reverse(ASI)) {
7226 // The select itself is replaced with a PHI Node.
7227 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7228 PN->insertBefore(EndBlock->begin());
7229 PN->takeName(SI);
7230 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7231 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7232 PN->setDebugLoc(SI->getDebugLoc());
7233
7234 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7235 SI->eraseFromParent();
7236 INS.erase(SI);
7237 ++NumSelectsExpanded;
7238 }
7239
7240 // Instruct OptimizeBlock to skip to the next block.
7241 CurInstIterator = StartBlock->end();
7242 return true;
7243}
7244
7245/// Some targets only accept certain types for splat inputs. For example a VDUP
7246/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7247/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7248bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7249 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7251 m_Undef(), m_ZeroMask())))
7252 return false;
7253 Type *NewType = TLI->shouldConvertSplatType(SVI);
7254 if (!NewType)
7255 return false;
7256
7257 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7258 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7259 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7260 "Expected a type of the same size!");
7261 auto *NewVecType =
7262 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7263
7264 // Create a bitcast (shuffle (insert (bitcast(..))))
7265 IRBuilder<> Builder(SVI->getContext());
7266 Builder.SetInsertPoint(SVI);
7267 Value *BC1 = Builder.CreateBitCast(
7268 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7269 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7270 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7271
7272 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7274 SVI, TLInfo, nullptr,
7275 [&](Value *V) { removeAllAssertingVHReferences(V); });
7276
7277 // Also hoist the bitcast up to its operand if it they are not in the same
7278 // block.
7279 if (auto *BCI = dyn_cast<Instruction>(BC1))
7280 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7281 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7282 !Op->isTerminator() && !Op->isEHPad())
7283 BCI->moveAfter(Op);
7284
7285 return true;
7286}
7287
7288bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7289 // If the operands of I can be folded into a target instruction together with
7290 // I, duplicate and sink them.
7291 SmallVector<Use *, 4> OpsToSink;
7292 if (!TLI->shouldSinkOperands(I, OpsToSink))
7293 return false;
7294
7295 // OpsToSink can contain multiple uses in a use chain (e.g.
7296 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7297 // uses must come first, so we process the ops in reverse order so as to not
7298 // create invalid IR.
7299 BasicBlock *TargetBB = I->getParent();
7300 bool Changed = false;
7301 SmallVector<Use *, 4> ToReplace;
7302 Instruction *InsertPoint = I;
7304 unsigned long InstNumber = 0;
7305 for (const auto &I : *TargetBB)
7306 InstOrdering[&I] = InstNumber++;
7307
7308 for (Use *U : reverse(OpsToSink)) {
7309 auto *UI = cast<Instruction>(U->get());
7310 if (isa<PHINode>(UI))
7311 continue;
7312 if (UI->getParent() == TargetBB) {
7313 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7314 InsertPoint = UI;
7315 continue;
7316 }
7317 ToReplace.push_back(U);
7318 }
7319
7320 SetVector<Instruction *> MaybeDead;
7322 for (Use *U : ToReplace) {
7323 auto *UI = cast<Instruction>(U->get());
7324 Instruction *NI = UI->clone();
7325
7326 if (IsHugeFunc) {
7327 // Now we clone an instruction, its operands' defs may sink to this BB
7328 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7329 for (unsigned I = 0; I < NI->getNumOperands(); ++I) {
7330 auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I));
7331 if (!OpDef)
7332 continue;
7333 FreshBBs.insert(OpDef->getParent());
7334 }
7335 }
7336
7337 NewInstructions[UI] = NI;
7338 MaybeDead.insert(UI);
7339 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7340 NI->insertBefore(InsertPoint);
7341 InsertPoint = NI;
7342 InsertedInsts.insert(NI);
7343
7344 // Update the use for the new instruction, making sure that we update the
7345 // sunk instruction uses, if it is part of a chain that has already been
7346 // sunk.
7347 Instruction *OldI = cast<Instruction>(U->getUser());
7348 if (NewInstructions.count(OldI))
7349 NewInstructions[OldI]->setOperand(U->getOperandNo(), NI);
7350 else
7351 U->set(NI);
7352 Changed = true;
7353 }
7354
7355 // Remove instructions that are dead after sinking.
7356 for (auto *I : MaybeDead) {
7357 if (!I->hasNUsesOrMore(1)) {
7358 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7359 I->eraseFromParent();
7360 }
7361 }
7362
7363 return Changed;
7364}
7365
7366bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7367 Value *Cond = SI->getCondition();
7368 Type *OldType = Cond->getType();
7369 LLVMContext &Context = Cond->getContext();
7370 EVT OldVT = TLI->getValueType(*DL, OldType);
7371 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7372 unsigned RegWidth = RegType.getSizeInBits();
7373
7374 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7375 return false;
7376
7377 // If the register width is greater than the type width, expand the condition
7378 // of the switch instruction and each case constant to the width of the
7379 // register. By widening the type of the switch condition, subsequent
7380 // comparisons (for case comparisons) will not need to be extended to the
7381 // preferred register width, so we will potentially eliminate N-1 extends,
7382 // where N is the number of cases in the switch.
7383 auto *NewType = Type::getIntNTy(Context, RegWidth);
7384
7385 // Extend the switch condition and case constants using the target preferred
7386 // extend unless the switch condition is a function argument with an extend
7387 // attribute. In that case, we can avoid an unnecessary mask/extension by
7388 // matching the argument extension instead.
7389 Instruction::CastOps ExtType = Instruction::ZExt;
7390 // Some targets prefer SExt over ZExt.
7391 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7392 ExtType = Instruction::SExt;
7393
7394 if (auto *Arg = dyn_cast<Argument>(Cond)) {
7395 if (Arg->hasSExtAttr())
7396 ExtType = Instruction::SExt;
7397 if (Arg->hasZExtAttr())
7398 ExtType = Instruction::ZExt;
7399 }
7400
7401 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7402 ExtInst->insertBefore(SI);
7403 ExtInst->setDebugLoc(SI->getDebugLoc());
7404 SI->setCondition(ExtInst);
7405 for (auto Case : SI->cases()) {
7406 const APInt &NarrowConst = Case.getCaseValue()->getValue();
7407 APInt WideConst = (ExtType == Instruction::ZExt)
7408 ? NarrowConst.zext(RegWidth)
7409 : NarrowConst.sext(RegWidth);
7410 Case.setValue(ConstantInt::get(Context, WideConst));
7411 }
7412
7413 return true;
7414}
7415
7416bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7417 // The SCCP optimization tends to produce code like this:
7418 // switch(x) { case 42: phi(42, ...) }
7419 // Materializing the constant for the phi-argument needs instructions; So we
7420 // change the code to:
7421 // switch(x) { case 42: phi(x, ...) }
7422
7423 Value *Condition = SI->getCondition();
7424 // Avoid endless loop in degenerate case.
7425 if (isa<ConstantInt>(*Condition))
7426 return false;
7427
7428 bool Changed = false;
7429 BasicBlock *SwitchBB = SI->getParent();
7430 Type *ConditionType = Condition->getType();
7431
7432 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7433 ConstantInt *CaseValue = Case.getCaseValue();
7434 BasicBlock *CaseBB = Case.getCaseSuccessor();
7435 // Set to true if we previously checked that `CaseBB` is only reached by
7436 // a single case from this switch.
7437 bool CheckedForSinglePred = false;
7438 for (PHINode &PHI : CaseBB->phis()) {
7439 Type *PHIType = PHI.getType();
7440 // If ZExt is free then we can also catch patterns like this:
7441 // switch((i32)x) { case 42: phi((i64)42, ...); }
7442 // and replace `(i64)42` with `zext i32 %x to i64`.
7443 bool TryZExt =
7444 PHIType->isIntegerTy() &&
7445 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7446 TLI->isZExtFree(ConditionType, PHIType);
7447 if (PHIType == ConditionType || TryZExt) {
7448 // Set to true to skip this case because of multiple preds.
7449 bool SkipCase = false;
7450 Value *Replacement = nullptr;
7451 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
7452 Value *PHIValue = PHI.getIncomingValue(I);
7453 if (PHIValue != CaseValue) {
7454 if (!TryZExt)
7455 continue;
7456 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7457 if (!PHIValueInt ||
7458 PHIValueInt->getValue() !=
7459 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7460 continue;
7461 }
7462 if (PHI.getIncomingBlock(I) != SwitchBB)
7463 continue;
7464 // We cannot optimize if there are multiple case labels jumping to
7465 // this block. This check may get expensive when there are many
7466 // case labels so we test for it last.
7467 if (!CheckedForSinglePred) {
7468 CheckedForSinglePred = true;
7469 if (SI->findCaseDest(CaseBB) == nullptr) {
7470 SkipCase = true;
7471 break;
7472 }
7473 }
7474
7475 if (Replacement == nullptr) {
7476 if (PHIValue == CaseValue) {
7477 Replacement = Condition;
7478 } else {
7479 IRBuilder<> Builder(SI);
7480 Replacement = Builder.CreateZExt(Condition, PHIType);
7481 }
7482 }
7483 PHI.setIncomingValue(I, Replacement);
7484 Changed = true;
7485 }
7486 if (SkipCase)
7487 break;
7488 }
7489 }
7490 }
7491 return Changed;
7492}
7493
7494bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7495 bool Changed = optimizeSwitchType(SI);
7496 Changed |= optimizeSwitchPhiConstants(SI);
7497 return Changed;
7498}
7499
7500namespace {
7501
7502/// Helper class to promote a scalar operation to a vector one.
7503/// This class is used to move downward extractelement transition.
7504/// E.g.,
7505/// a = vector_op <2 x i32>
7506/// b = extractelement <2 x i32> a, i32 0
7507/// c = scalar_op b
7508/// store c
7509///
7510/// =>
7511/// a = vector_op <2 x i32>
7512/// c = vector_op a (equivalent to scalar_op on the related lane)
7513/// * d = extractelement <2 x i32> c, i32 0
7514/// * store d
7515/// Assuming both extractelement and store can be combine, we get rid of the
7516/// transition.
7517class VectorPromoteHelper {
7518 /// DataLayout associated with the current module.
7519 const DataLayout &DL;
7520
7521 /// Used to perform some checks on the legality of vector operations.
7522 const TargetLowering &TLI;
7523
7524 /// Used to estimated the cost of the promoted chain.
7525 const TargetTransformInfo &TTI;
7526
7527 /// The transition being moved downwards.
7528 Instruction *Transition;
7529
7530 /// The sequence of instructions to be promoted.
7531 SmallVector<Instruction *, 4> InstsToBePromoted;
7532
7533 /// Cost of combining a store and an extract.
7534 unsigned StoreExtractCombineCost;
7535
7536 /// Instruction that will be combined with the transition.
7537 Instruction *CombineInst = nullptr;
7538
7539 /// The instruction that represents the current end of the transition.
7540 /// Since we are faking the promotion until we reach the end of the chain
7541 /// of computation, we need a way to get the current end of the transition.
7542 Instruction *getEndOfTransition() const {
7543 if (InstsToBePromoted.empty())
7544 return Transition;
7545 return InstsToBePromoted.back();
7546 }
7547
7548 /// Return the index of the original value in the transition.
7549 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7550 /// c, is at index 0.
7551 unsigned getTransitionOriginalValueIdx() const {
7552 assert(isa<ExtractElementInst>(Transition) &&
7553 "Other kind of transitions are not supported yet");
7554 return 0;
7555 }
7556
7557 /// Return the index of the index in the transition.
7558 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
7559 /// is at index 1.
7560 unsigned getTransitionIdx() const {
7561 assert(isa<ExtractElementInst>(Transition) &&
7562 "Other kind of transitions are not supported yet");
7563 return 1;
7564 }
7565
7566 /// Get the type of the transition.
7567 /// This is the type of the original value.
7568 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
7569 /// transition is <2 x i32>.
7570 Type *getTransitionType() const {
7571 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
7572 }
7573
7574 /// Promote \p ToBePromoted by moving \p Def downward through.
7575 /// I.e., we have the following sequence:
7576 /// Def = Transition <ty1> a to <ty2>
7577 /// b = ToBePromoted <ty2> Def, ...
7578 /// =>
7579 /// b = ToBePromoted <ty1> a, ...
7580 /// Def = Transition <ty1> ToBePromoted to <ty2>
7581 void promoteImpl(Instruction *ToBePromoted);
7582
7583 /// Check whether or not it is profitable to promote all the
7584 /// instructions enqueued to be promoted.
7585 bool isProfitableToPromote() {
7586 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
7587 unsigned Index = isa<ConstantInt>(ValIdx)
7588 ? cast<ConstantInt>(ValIdx)->getZExtValue()
7589 : -1;
7590 Type *PromotedType = getTransitionType();
7591
7592 StoreInst *ST = cast<StoreInst>(CombineInst);
7593 unsigned AS = ST->getPointerAddressSpace();
7594 // Check if this store is supported.
7596 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
7597 ST->getAlign())) {
7598 // If this is not supported, there is no way we can combine
7599 // the extract with the store.
7600 return false;
7601 }
7602
7603 // The scalar chain of computation has to pay for the transition
7604 // scalar to vector.
7605 // The vector chain has to account for the combining cost.
7608 InstructionCost ScalarCost =
7609 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
7610 InstructionCost VectorCost = StoreExtractCombineCost;
7611 for (const auto &Inst : InstsToBePromoted) {
7612 // Compute the cost.
7613 // By construction, all instructions being promoted are arithmetic ones.
7614 // Moreover, one argument is a constant that can be viewed as a splat
7615 // constant.
7616 Value *Arg0 = Inst->getOperand(0);
7617 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
7618 isa<ConstantFP>(Arg0);
7619 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
7620 if (IsArg0Constant)
7622 else
7624
7625 ScalarCost += TTI.getArithmeticInstrCost(
7626 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
7627 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
7628 CostKind, Arg0Info, Arg1Info);
7629 }
7630 LLVM_DEBUG(
7631 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
7632 << ScalarCost << "\nVector: " << VectorCost << '\n');
7633 return ScalarCost > VectorCost;
7634 }
7635
7636 /// Generate a constant vector with \p Val with the same
7637 /// number of elements as the transition.
7638 /// \p UseSplat defines whether or not \p Val should be replicated
7639 /// across the whole vector.
7640 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
7641 /// otherwise we generate a vector with as many undef as possible:
7642 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
7643 /// used at the index of the extract.
7644 Value *getConstantVector(Constant *Val, bool UseSplat) const {
7645 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
7646 if (!UseSplat) {
7647 // If we cannot determine where the constant must be, we have to
7648 // use a splat constant.
7649 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
7650 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
7651 ExtractIdx = CstVal->getSExtValue();
7652 else
7653 UseSplat = true;
7654 }
7655
7656 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
7657 if (UseSplat)
7658 return ConstantVector::getSplat(EC, Val);
7659
7660 if (!EC.isScalable()) {
7662 UndefValue *UndefVal = UndefValue::get(Val->getType());
7663 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
7664 if (Idx == ExtractIdx)
7665 ConstVec.push_back(Val);
7666 else
7667 ConstVec.push_back(UndefVal);
7668 }
7669 return ConstantVector::get(ConstVec);
7670 } else
7672 "Generate scalable vector for non-splat is unimplemented");
7673 }
7674
7675 /// Check if promoting to a vector type an operand at \p OperandIdx
7676 /// in \p Use can trigger undefined behavior.
7677 static bool canCauseUndefinedBehavior(const Instruction *Use,
7678 unsigned OperandIdx) {
7679 // This is not safe to introduce undef when the operand is on
7680 // the right hand side of a division-like instruction.
7681 if (OperandIdx != 1)
7682 return false;
7683 switch (Use->getOpcode()) {
7684 default:
7685 return false;
7686 case Instruction::SDiv:
7687 case Instruction::UDiv:
7688 case Instruction::SRem:
7689 case Instruction::URem:
7690 return true;
7691 case Instruction::FDiv:
7692 case Instruction::FRem:
7693 return !Use->hasNoNaNs();
7694 }
7695 llvm_unreachable(nullptr);
7696 }
7697
7698public:
7699 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
7700 const TargetTransformInfo &TTI, Instruction *Transition,
7701 unsigned CombineCost)
7702 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
7703 StoreExtractCombineCost(CombineCost) {
7704 assert(Transition && "Do not know how to promote null");
7705 }
7706
7707 /// Check if we can promote \p ToBePromoted to \p Type.
7708 bool canPromote(const Instruction *ToBePromoted) const {
7709 // We could support CastInst too.
7710 return isa<BinaryOperator>(ToBePromoted);
7711 }
7712
7713 /// Check if it is profitable to promote \p ToBePromoted
7714 /// by moving downward the transition through.
7715 bool shouldPromote(const Instruction *ToBePromoted) const {
7716 // Promote only if all the operands can be statically expanded.
7717 // Indeed, we do not want to introduce any new kind of transitions.
7718 for (const Use &U : ToBePromoted->operands()) {
7719 const Value *Val = U.get();
7720 if (Val == getEndOfTransition()) {
7721 // If the use is a division and the transition is on the rhs,
7722 // we cannot promote the operation, otherwise we may create a
7723 // division by zero.
7724 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
7725 return false;
7726 continue;
7727 }
7728 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
7729 !isa<ConstantFP>(Val))
7730 return false;
7731 }
7732 // Check that the resulting operation is legal.
7733 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
7734 if (!ISDOpcode)
7735 return false;
7736 return StressStoreExtract ||
7738 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
7739 }
7740
7741 /// Check whether or not \p Use can be combined
7742 /// with the transition.
7743 /// I.e., is it possible to do Use(Transition) => AnotherUse?
7744 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
7745
7746 /// Record \p ToBePromoted as part of the chain to be promoted.
7747 void enqueueForPromotion(Instruction *ToBePromoted) {
7748 InstsToBePromoted.push_back(ToBePromoted);
7749 }
7750
7751 /// Set the instruction that will be combined with the transition.
7752 void recordCombineInstruction(Instruction *ToBeCombined) {
7753 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
7754 CombineInst = ToBeCombined;
7755 }
7756
7757 /// Promote all the instructions enqueued for promotion if it is
7758 /// is profitable.
7759 /// \return True if the promotion happened, false otherwise.
7760 bool promote() {
7761 // Check if there is something to promote.
7762 // Right now, if we do not have anything to combine with,
7763 // we assume the promotion is not profitable.
7764 if (InstsToBePromoted.empty() || !CombineInst)
7765 return false;
7766
7767 // Check cost.
7768 if (!StressStoreExtract && !isProfitableToPromote())
7769 return false;
7770
7771 // Promote.
7772 for (auto &ToBePromoted : InstsToBePromoted)
7773 promoteImpl(ToBePromoted);
7774 InstsToBePromoted.clear();
7775 return true;
7776 }
7777};
7778
7779} // end anonymous namespace
7780
7781void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
7782 // At this point, we know that all the operands of ToBePromoted but Def
7783 // can be statically promoted.
7784 // For Def, we need to use its parameter in ToBePromoted:
7785 // b = ToBePromoted ty1 a
7786 // Def = Transition ty1 b to ty2
7787 // Move the transition down.
7788 // 1. Replace all uses of the promoted operation by the transition.
7789 // = ... b => = ... Def.
7790 assert(ToBePromoted->getType() == Transition->getType() &&
7791 "The type of the result of the transition does not match "
7792 "the final type");
7793 ToBePromoted->replaceAllUsesWith(Transition);
7794 // 2. Update the type of the uses.
7795 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
7796 Type *TransitionTy = getTransitionType();
7797 ToBePromoted->mutateType(TransitionTy);
7798 // 3. Update all the operands of the promoted operation with promoted
7799 // operands.
7800 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
7801 for (Use &U : ToBePromoted->operands()) {
7802 Value *Val = U.get();
7803 Value *NewVal = nullptr;
7804 if (Val == Transition)
7805 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
7806 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
7807 isa<ConstantFP>(Val)) {
7808 // Use a splat constant if it is not safe to use undef.
7809 NewVal = getConstantVector(
7810 cast<Constant>(Val),
7811 isa<UndefValue>(Val) ||
7812 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
7813 } else
7814 llvm_unreachable("Did you modified shouldPromote and forgot to update "
7815 "this?");
7816 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
7817 }
7818 Transition->moveAfter(ToBePromoted);
7819 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
7820}
7821
7822/// Some targets can do store(extractelement) with one instruction.
7823/// Try to push the extractelement towards the stores when the target
7824/// has this feature and this is profitable.
7825bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
7826 unsigned CombineCost = std::numeric_limits<unsigned>::max();
7827 if (DisableStoreExtract ||
7830 Inst->getOperand(1), CombineCost)))
7831 return false;
7832
7833 // At this point we know that Inst is a vector to scalar transition.
7834 // Try to move it down the def-use chain, until:
7835 // - We can combine the transition with its single use
7836 // => we got rid of the transition.
7837 // - We escape the current basic block
7838 // => we would need to check that we are moving it at a cheaper place and
7839 // we do not do that for now.
7840 BasicBlock *Parent = Inst->getParent();
7841 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
7842 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
7843 // If the transition has more than one use, assume this is not going to be
7844 // beneficial.
7845 while (Inst->hasOneUse()) {
7846 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
7847 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
7848
7849 if (ToBePromoted->getParent() != Parent) {
7850 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
7851 << ToBePromoted->getParent()->getName()
7852 << ") than the transition (" << Parent->getName()
7853 << ").\n");
7854 return false;
7855 }
7856
7857 if (VPH.canCombine(ToBePromoted)) {
7858 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
7859 << "will be combined with: " << *ToBePromoted << '\n');
7860 VPH.recordCombineInstruction(ToBePromoted);
7861 bool Changed = VPH.promote();
7862 NumStoreExtractExposed += Changed;
7863 return Changed;
7864 }
7865
7866 LLVM_DEBUG(dbgs() << "Try promoting.\n");
7867 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
7868 return false;
7869
7870 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
7871
7872 VPH.enqueueForPromotion(ToBePromoted);
7873 Inst = ToBePromoted;
7874 }
7875 return false;
7876}
7877
7878/// For the instruction sequence of store below, F and I values
7879/// are bundled together as an i64 value before being stored into memory.
7880/// Sometimes it is more efficient to generate separate stores for F and I,
7881/// which can remove the bitwise instructions or sink them to colder places.
7882///
7883/// (store (or (zext (bitcast F to i32) to i64),
7884/// (shl (zext I to i64), 32)), addr) -->
7885/// (store F, addr) and (store I, addr+4)
7886///
7887/// Similarly, splitting for other merged store can also be beneficial, like:
7888/// For pair of {i32, i32}, i64 store --> two i32 stores.
7889/// For pair of {i32, i16}, i64 store --> two i32 stores.
7890/// For pair of {i16, i16}, i32 store --> two i16 stores.
7891/// For pair of {i16, i8}, i32 store --> two i16 stores.
7892/// For pair of {i8, i8}, i16 store --> two i8 stores.
7893///
7894/// We allow each target to determine specifically which kind of splitting is
7895/// supported.
7896///
7897/// The store patterns are commonly seen from the simple code snippet below
7898/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
7899/// void goo(const std::pair<int, float> &);
7900/// hoo() {
7901/// ...
7902/// goo(std::make_pair(tmp, ftmp));
7903/// ...
7904/// }
7905///
7906/// Although we already have similar splitting in DAG Combine, we duplicate
7907/// it in CodeGenPrepare to catch the case in which pattern is across
7908/// multiple BBs. The logic in DAG Combine is kept to catch case generated
7909/// during code expansion.
7911 const TargetLowering &TLI) {
7912 // Handle simple but common cases only.
7913 Type *StoreType = SI.getValueOperand()->getType();
7914
7915 // The code below assumes shifting a value by <number of bits>,
7916 // whereas scalable vectors would have to be shifted by
7917 // <2log(vscale) + number of bits> in order to store the
7918 // low/high parts. Bailing out for now.
7919 if (StoreType->isScalableTy())
7920 return false;
7921
7922 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
7923 DL.getTypeSizeInBits(StoreType) == 0)
7924 return false;
7925
7926 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
7927 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
7928 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
7929 return false;
7930
7931 // Don't split the store if it is volatile.
7932 if (SI.isVolatile())
7933 return false;
7934
7935 // Match the following patterns:
7936 // (store (or (zext LValue to i64),
7937 // (shl (zext HValue to i64), 32)), HalfValBitSize)
7938 // or
7939 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
7940 // (zext LValue to i64),
7941 // Expect both operands of OR and the first operand of SHL have only
7942 // one use.
7943 Value *LValue, *HValue;
7944 if (!match(SI.getValueOperand(),
7947 m_SpecificInt(HalfValBitSize))))))
7948 return false;
7949
7950 // Check LValue and HValue are int with size less or equal than 32.
7951 if (!LValue->getType()->isIntegerTy() ||
7952 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
7953 !HValue->getType()->isIntegerTy() ||
7954 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
7955 return false;
7956
7957 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
7958 // as the input of target query.
7959 auto *LBC = dyn_cast<BitCastInst>(LValue);
7960 auto *HBC = dyn_cast<BitCastInst>(HValue);
7961 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
7962 : EVT::getEVT(LValue->getType());
7963 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
7964 : EVT::getEVT(HValue->getType());
7965 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
7966 return false;
7967
7968 // Start to split store.
7969 IRBuilder<> Builder(SI.getContext());
7970 Builder.SetInsertPoint(&SI);
7971
7972 // If LValue/HValue is a bitcast in another BB, create a new one in current
7973 // BB so it may be merged with the splitted stores by dag combiner.
7974 if (LBC && LBC->getParent() != SI.getParent())
7975 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
7976 if (HBC && HBC->getParent() != SI.getParent())
7977 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
7978
7979 bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
7980 auto CreateSplitStore = [&](Value *V, bool Upper) {
7981 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
7982 Value *Addr = SI.getPointerOperand();
7983 Align Alignment = SI.getAlign();
7984 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
7985 if (IsOffsetStore) {
7986 Addr = Builder.CreateGEP(
7987 SplitStoreType, Addr,
7988 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
7989
7990 // When splitting the store in half, naturally one half will retain the
7991 // alignment of the original wider store, regardless of whether it was
7992 // over-aligned or not, while the other will require adjustment.
7993 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
7994 }
7995 Builder.CreateAlignedStore(V, Addr, Alignment);
7996 };
7997
7998 CreateSplitStore(LValue, false);
7999 CreateSplitStore(HValue, true);
8000
8001 // Delete the old store.
8002 SI.eraseFromParent();
8003 return true;
8004}
8005
8006// Return true if the GEP has two operands, the first operand is of a sequential
8007// type, and the second operand is a constant.
8010 return GEP->getNumOperands() == 2 && I.isSequential() &&
8011 isa<ConstantInt>(GEP->getOperand(1));
8012}
8013
8014// Try unmerging GEPs to reduce liveness interference (register pressure) across
8015// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8016// reducing liveness interference across those edges benefits global register
8017// allocation. Currently handles only certain cases.
8018//
8019// For example, unmerge %GEPI and %UGEPI as below.
8020//
8021// ---------- BEFORE ----------
8022// SrcBlock:
8023// ...
8024// %GEPIOp = ...
8025// ...
8026// %GEPI = gep %GEPIOp, Idx
8027// ...
8028// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8029// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8030// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8031// %UGEPI)
8032//
8033// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8034// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8035// ...
8036//
8037// DstBi:
8038// ...
8039// %UGEPI = gep %GEPIOp, UIdx
8040// ...
8041// ---------------------------
8042//
8043// ---------- AFTER ----------
8044// SrcBlock:
8045// ... (same as above)
8046// (* %GEPI is still alive on the indirectbr edges)
8047// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8048// unmerging)
8049// ...
8050//
8051// DstBi:
8052// ...
8053// %UGEPI = gep %GEPI, (UIdx-Idx)
8054// ...
8055// ---------------------------
8056//
8057// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8058// no longer alive on them.
8059//
8060// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8061// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8062// not to disable further simplications and optimizations as a result of GEP
8063// merging.
8064//
8065// Note this unmerging may increase the length of the data flow critical path
8066// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8067// between the register pressure and the length of data-flow critical
8068// path. Restricting this to the uncommon IndirectBr case would minimize the
8069// impact of potentially longer critical path, if any, and the impact on compile
8070// time.
8072 const TargetTransformInfo *TTI) {
8073 BasicBlock *SrcBlock = GEPI->getParent();
8074 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8075 // (non-IndirectBr) cases exit early here.
8076 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8077 return false;
8078 // Check that GEPI is a simple gep with a single constant index.
8079 if (!GEPSequentialConstIndexed(GEPI))
8080 return false;
8081 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8082 // Check that GEPI is a cheap one.
8083 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8086 return false;
8087 Value *GEPIOp = GEPI->getOperand(0);
8088 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8089 if (!isa<Instruction>(GEPIOp))
8090 return false;
8091 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8092 if (GEPIOpI->getParent() != SrcBlock)
8093 return false;
8094 // Check that GEP is used outside the block, meaning it's alive on the
8095 // IndirectBr edge(s).
8096 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8097 if (auto *I = dyn_cast<Instruction>(Usr)) {
8098 if (I->getParent() != SrcBlock) {
8099 return true;
8100 }
8101 }
8102 return false;
8103 }))
8104 return false;
8105 // The second elements of the GEP chains to be unmerged.
8106 std::vector<GetElementPtrInst *> UGEPIs;
8107 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8108 // on IndirectBr edges.
8109 for (User *Usr : GEPIOp->users()) {
8110 if (Usr == GEPI)
8111 continue;
8112 // Check if Usr is an Instruction. If not, give up.
8113 if (!isa<Instruction>(Usr))
8114 return false;
8115 auto *UI = cast<Instruction>(Usr);
8116 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8117 if (UI->getParent() == SrcBlock)
8118 continue;
8119 // Check if Usr is a GEP. If not, give up.
8120 if (!isa<GetElementPtrInst>(Usr))
8121 return false;
8122 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8123 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8124 // the pointer operand to it. If so, record it in the vector. If not, give
8125 // up.
8126 if (!GEPSequentialConstIndexed(UGEPI))
8127 return false;
8128 if (UGEPI->getOperand(0) != GEPIOp)
8129 return false;
8130 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8131 return false;
8132 if (GEPIIdx->getType() !=
8133 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8134 return false;
8135 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8136 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8139 return false;
8140 UGEPIs.push_back(UGEPI);
8141 }
8142 if (UGEPIs.size() == 0)
8143 return false;
8144 // Check the materializing cost of (Uidx-Idx).
8145 for (GetElementPtrInst *UGEPI : UGEPIs) {
8146 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8147 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8149 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8150 if (ImmCost > TargetTransformInfo::TCC_Basic)
8151 return false;
8152 }
8153 // Now unmerge between GEPI and UGEPIs.
8154 for (GetElementPtrInst *UGEPI : UGEPIs) {
8155 UGEPI->setOperand(0, GEPI);
8156 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8157 Constant *NewUGEPIIdx = ConstantInt::get(
8158 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8159 UGEPI->setOperand(1, NewUGEPIIdx);
8160 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8161 // inbounds to avoid UB.
8162 if (!GEPI->isInBounds()) {
8163 UGEPI->setIsInBounds(false);
8164 }
8165 }
8166 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8167 // alive on IndirectBr edges).
8168 assert(llvm::none_of(GEPIOp->users(),
8169 [&](User *Usr) {
8170 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8171 }) &&
8172 "GEPIOp is used outside SrcBlock");
8173 return true;
8174}
8175
8176static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8178 bool IsHugeFunc) {
8179 // Try and convert
8180 // %c = icmp ult %x, 8
8181 // br %c, bla, blb
8182 // %tc = lshr %x, 3
8183 // to
8184 // %tc = lshr %x, 3
8185 // %c = icmp eq %tc, 0
8186 // br %c, bla, blb
8187 // Creating the cmp to zero can be better for the backend, especially if the
8188 // lshr produces flags that can be used automatically.
8189 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8190 return false;
8191
8192 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8193 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8194 return false;
8195
8196 Value *X = Cmp->getOperand(0);
8197 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8198
8199 for (auto *U : X->users()) {
8200 Instruction *UI = dyn_cast<Instruction>(U);
8201 // A quick dominance check
8202 if (!UI ||
8203 (UI->getParent() != Branch->getParent() &&
8204 UI->getParent() != Branch->getSuccessor(0) &&
8205 UI->getParent() != Branch->getSuccessor(1)) ||
8206 (UI->getParent() != Branch->getParent() &&
8208 continue;
8209
8210 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8211 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8212 IRBuilder<> Builder(Branch);
8213 if (UI->getParent() != Branch->getParent())
8214 UI->moveBefore(Branch);
8215 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8216 ConstantInt::get(UI->getType(), 0));
8217 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8218 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8219 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8220 return true;
8221 }
8222 if (Cmp->isEquality() &&
8223 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8224 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) {
8225 IRBuilder<> Builder(Branch);
8226 if (UI->getParent() != Branch->getParent())
8227 UI->moveBefore(Branch);
8228 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8229 ConstantInt::get(UI->getType(), 0));
8230 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8231 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8232 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8233 return true;
8234 }
8235 }
8236 return false;
8237}
8238
8239bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8240 bool AnyChange = false;
8241 AnyChange = fixupDPValuesOnInst(*I);
8242
8243 // Bail out if we inserted the instruction to prevent optimizations from
8244 // stepping on each other's toes.
8245 if (InsertedInsts.count(I))
8246 return AnyChange;
8247
8248 // TODO: Move into the switch on opcode below here.
8249 if (PHINode *P = dyn_cast<PHINode>(I)) {
8250 // It is possible for very late stage optimizations (such as SimplifyCFG)
8251 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8252 // trivial PHI, go ahead and zap it here.
8253 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8254 LargeOffsetGEPMap.erase(P);
8255 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8256 P->eraseFromParent();
8257 ++NumPHIsElim;
8258 return true;
8259 }
8260 return AnyChange;
8261 }
8262
8263 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8264 // If the source of the cast is a constant, then this should have
8265 // already been constant folded. The only reason NOT to constant fold
8266 // it is if something (e.g. LSR) was careful to place the constant
8267 // evaluation in a block other than then one that uses it (e.g. to hoist
8268 // the address of globals out of a loop). If this is the case, we don't
8269 // want to forward-subst the cast.
8270 if (isa<Constant>(CI->getOperand(0)))
8271 return AnyChange;
8272
8273 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8274 return true;
8275
8276 if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) &&
8278 I, LI->getLoopFor(I->getParent()), *TTI))
8279 return true;
8280
8281 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8282 /// Sink a zext or sext into its user blocks if the target type doesn't
8283 /// fit in one register
8284 if (TLI->getTypeAction(CI->getContext(),
8285 TLI->getValueType(*DL, CI->getType())) ==
8286 TargetLowering::TypeExpandInteger) {
8287 return SinkCast(CI);
8288 } else {
8290 I, LI->getLoopFor(I->getParent()), *TTI))
8291 return true;
8292
8293 bool MadeChange = optimizeExt(I);
8294 return MadeChange | optimizeExtUses(I);
8295 }
8296 }
8297 return AnyChange;
8298 }
8299
8300 if (auto *Cmp = dyn_cast<CmpInst>(I))
8301 if (optimizeCmp(Cmp, ModifiedDT))
8302 return true;
8303
8304 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8305 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8306 bool Modified = optimizeLoadExt(LI);
8307 unsigned AS = LI->getPointerAddressSpace();
8308 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8309 return Modified;
8310 }
8311
8312 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8313 if (splitMergedValStore(*SI, *DL, *TLI))
8314 return true;
8315 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8316 unsigned AS = SI->getPointerAddressSpace();
8317 return optimizeMemoryInst(I, SI->getOperand(1),
8318 SI->getOperand(0)->getType(), AS);
8319 }
8320
8321 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8322 unsigned AS = RMW->getPointerAddressSpace();
8323 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8324 }
8325
8326 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8327 unsigned AS = CmpX->getPointerAddressSpace();
8328 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8329 CmpX->getCompareOperand()->getType(), AS);
8330 }
8331
8332 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8333
8334 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8335 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8336 return true;
8337
8338 // TODO: Move this into the switch on opcode - it handles shifts already.
8339 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8340 BinOp->getOpcode() == Instruction::LShr)) {
8341 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8342 if (CI && TLI->hasExtractBitsInsn())
8343 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8344 return true;
8345 }
8346
8347 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8348 if (GEPI->hasAllZeroIndices()) {
8349 /// The GEP operand must be a pointer, so must its result -> BitCast
8350 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8351 GEPI->getName(), GEPI);
8352 NC->setDebugLoc(GEPI->getDebugLoc());
8353 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8355 GEPI, TLInfo, nullptr,
8356 [&](Value *V) { removeAllAssertingVHReferences(V); });
8357 ++NumGEPsElim;
8358 optimizeInst(NC, ModifiedDT);
8359 return true;
8360 }
8362 return true;
8363 }
8364 }
8365
8366 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8367 // freeze(icmp a, const)) -> icmp (freeze a), const
8368 // This helps generate efficient conditional jumps.
8369 Instruction *CmpI = nullptr;
8370 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8371 CmpI = II;
8372 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8373 CmpI = F->getFastMathFlags().none() ? F : nullptr;
8374
8375 if (CmpI && CmpI->hasOneUse()) {
8376 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8377 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8378 isa<ConstantPointerNull>(Op0);
8379 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8380 isa<ConstantPointerNull>(Op1);
8381 if (Const0 || Const1) {
8382 if (!Const0 || !Const1) {
8383 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI);
8384 F->takeName(FI);
8385 CmpI->setOperand(Const0 ? 1 : 0, F);
8386 }
8387 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8388 FI->eraseFromParent();
8389 return true;
8390 }
8391 }
8392 return AnyChange;
8393 }
8394
8395 if (tryToSinkFreeOperands(I))
8396 return true;
8397
8398 switch (I->getOpcode()) {
8399 case Instruction::Shl:
8400 case Instruction::LShr:
8401 case Instruction::AShr:
8402 return optimizeShiftInst(cast<BinaryOperator>(I));
8403 case Instruction::Call:
8404 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
8405 case Instruction::Select:
8406 return optimizeSelectInst(cast<SelectInst>(I));
8407 case Instruction::ShuffleVector:
8408 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8409 case Instruction::Switch:
8410 return optimizeSwitchInst(cast<SwitchInst>(I));
8411 case Instruction::ExtractElement:
8412 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8413 case Instruction::Br:
8414 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8415 }
8416
8417 return AnyChange;
8418}
8419
8420/// Given an OR instruction, check to see if this is a bitreverse
8421/// idiom. If so, insert the new intrinsic and return true.
8422bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8423 if (!I.getType()->isIntegerTy() ||
8425 TLI->getValueType(*DL, I.getType(), true)))
8426 return false;
8427
8429 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
8430 return false;
8431 Instruction *LastInst = Insts.back();
8432 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8434 &I, TLInfo, nullptr,
8435 [&](Value *V) { removeAllAssertingVHReferences(V); });
8436 return true;
8437}
8438
8439// In this pass we look for GEP and cast instructions that are used
8440// across basic blocks and rewrite them to improve basic-block-at-a-time
8441// selection.
8442bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8443 SunkAddrs.clear();
8444 bool MadeChange = false;
8445
8446 do {
8447 CurInstIterator = BB.begin();
8448 ModifiedDT = ModifyDT::NotModifyDT;
8449 while (CurInstIterator != BB.end()) {
8450 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8451 if (ModifiedDT != ModifyDT::NotModifyDT) {
8452 // For huge function we tend to quickly go though the inner optmization
8453 // opportunities in the BB. So we go back to the BB head to re-optimize
8454 // each instruction instead of go back to the function head.
8455 if (IsHugeFunc) {
8456 DT.reset();
8457 getDT(*BB.getParent());
8458 break;
8459 } else {
8460 return true;
8461 }
8462 }
8463 }
8464 } while (ModifiedDT == ModifyDT::ModifyInstDT);
8465
8466 bool MadeBitReverse = true;
8467 while (MadeBitReverse) {
8468 MadeBitReverse = false;
8469 for (auto &I : reverse(BB)) {
8470 if (makeBitReverse(I)) {
8471 MadeBitReverse = MadeChange = true;
8472 break;
8473 }
8474 }
8475 }
8476 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8477
8478 return MadeChange;
8479}
8480
8481// Some CGP optimizations may move or alter what's computed in a block. Check
8482// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
8483bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
8484 assert(isa<DbgValueInst>(I));
8485 DbgValueInst &DVI = *cast<DbgValueInst>(I);
8486
8487 // Does this dbg.value refer to a sunk address calculation?
8488 bool AnyChange = false;
8489 SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
8490 DVI.location_ops().end());
8491 for (Value *Location : LocationOps) {
8492 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8493 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8494 if (SunkAddr) {
8495 // Point dbg.value at locally computed address, which should give the best
8496 // opportunity to be accurately lowered. This update may change the type
8497 // of pointer being referred to; however this makes no difference to
8498 // debugging information, and we can't generate bitcasts that may affect
8499 // codegen.
8500 DVI.replaceVariableLocationOp(Location, SunkAddr);
8501 AnyChange = true;
8502 }
8503 }
8504 return AnyChange;
8505}
8506
8507bool CodeGenPrepare::fixupDPValuesOnInst(Instruction &I) {
8508 bool AnyChange = false;
8509 for (DPValue &DPV : filterDbgVars(I.getDbgRecordRange()))
8510 AnyChange |= fixupDPValue(DPV);
8511 return AnyChange;
8512}
8513
8514// FIXME: should updating debug-info really cause the "changed" flag to fire,
8515// which can cause a function to be reprocessed?
8516bool CodeGenPrepare::fixupDPValue(DPValue &DPV) {
8517 if (DPV.Type != DPValue::LocationType::Value &&
8518 DPV.Type != DPValue::LocationType::Assign)
8519 return false;
8520
8521 // Does this DPValue refer to a sunk address calculation?
8522 bool AnyChange = false;
8523 SmallDenseSet<Value *> LocationOps(DPV.location_ops().begin(),
8524 DPV.location_ops().end());
8525 for (Value *Location : LocationOps) {
8526 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8527 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8528 if (SunkAddr) {
8529 // Point dbg.value at locally computed address, which should give the best
8530 // opportunity to be accurately lowered. This update may change the type
8531 // of pointer being referred to; however this makes no difference to
8532 // debugging information, and we can't generate bitcasts that may affect
8533 // codegen.
8534 DPV.replaceVariableLocationOp(Location, SunkAddr);
8535 AnyChange = true;
8536 }
8537 }
8538 return AnyChange;
8539}
8540
8542 DVI->removeFromParent();
8543 if (isa<PHINode>(VI))
8544 DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
8545 else
8546 DVI->insertAfter(VI);
8547}
8548
8549static void DbgInserterHelper(DPValue *DPV, Instruction *VI) {
8550 DPV->removeFromParent();
8551 BasicBlock *VIBB = VI->getParent();
8552 if (isa<PHINode>(VI))
8553 VIBB->insertDbgRecordBefore(DPV, VIBB->getFirstInsertionPt());
8554 else
8555 VIBB->insertDbgRecordAfter(DPV, VI);
8556}
8557
8558// A llvm.dbg.value may be using a value before its definition, due to
8559// optimizations in this pass and others. Scan for such dbg.values, and rescue
8560// them by moving the dbg.value to immediately after the value definition.
8561// FIXME: Ideally this should never be necessary, and this has the potential
8562// to re-order dbg.value intrinsics.
8563bool CodeGenPrepare::placeDbgValues(Function &F) {
8564 bool MadeChange = false;
8565 DominatorTree DT(F);
8566
8567 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
8569 for (Value *V : DbgItem->location_ops())
8570 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
8571 VIs.push_back(VI);
8572
8573 // This item may depend on multiple instructions, complicating any
8574 // potential sink. This block takes the defensive approach, opting to
8575 // "undef" the item if it has more than one instruction and any of them do
8576 // not dominate iem.
8577 for (Instruction *VI : VIs) {
8578 if (VI->isTerminator())
8579 continue;
8580
8581 // If VI is a phi in a block with an EHPad terminator, we can't insert
8582 // after it.
8583 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
8584 continue;
8585
8586 // If the defining instruction dominates the dbg.value, we do not need
8587 // to move the dbg.value.
8588 if (DT.dominates(VI, Position))
8589 continue;
8590
8591 // If we depend on multiple instructions and any of them doesn't
8592 // dominate this DVI, we probably can't salvage it: moving it to
8593 // after any of the instructions could cause us to lose the others.
8594 if (VIs.size() > 1) {
8595 LLVM_DEBUG(
8596 dbgs()
8597 << "Unable to find valid location for Debug Value, undefing:\n"
8598 << *DbgItem);
8599 DbgItem->setKillLocation();
8600 break;
8601 }
8602
8603 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
8604 << *DbgItem << ' ' << *VI);
8605 DbgInserterHelper(DbgItem, VI);
8606 MadeChange = true;
8607 ++NumDbgValueMoved;
8608 }
8609 };
8610
8611 for (BasicBlock &BB : F) {
8613 // Process dbg.value intrinsics.
8614 DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
8615 if (DVI) {
8616 DbgProcessor(DVI, DVI);
8617 continue;
8618 }
8619
8620 // If this isn't a dbg.value, process any attached DPValue records
8621 // attached to this instruction.
8623 filterDbgVars(Insn.getDbgRecordRange()))) {
8624 if (DPV.Type != DPValue::LocationType::Value)
8625 continue;
8626 DbgProcessor(&DPV, &Insn);
8627 }
8628 }
8629 }
8630
8631 return MadeChange;
8632}
8633
8634// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
8635// probes can be chained dependencies of other regular DAG nodes and block DAG
8636// combine optimizations.
8637bool CodeGenPrepare::placePseudoProbes(Function &F) {
8638 bool MadeChange = false;
8639 for (auto &Block : F) {
8640 // Move the rest probes to the beginning of the block.
8641 auto FirstInst = Block.getFirstInsertionPt();
8642 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
8643 ++FirstInst;
8644 BasicBlock::iterator I(FirstInst);
8645 I++;
8646 while (I != Block.end()) {
8647 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
8648 II->moveBefore(&*FirstInst);
8649 MadeChange = true;
8650 }
8651 }
8652 }
8653 return MadeChange;
8654}
8655
8656/// Scale down both weights to fit into uint32_t.
8657static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
8658 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
8659 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
8660 NewTrue = NewTrue / Scale;
8661 NewFalse = NewFalse / Scale;
8662}
8663
8664/// Some targets prefer to split a conditional branch like:
8665/// \code
8666/// %0 = icmp ne i32 %a, 0
8667/// %1 = icmp ne i32 %b, 0
8668/// %or.cond = or i1 %0, %1
8669/// br i1 %or.cond, label %TrueBB, label %FalseBB
8670/// \endcode
8671/// into multiple branch instructions like:
8672/// \code
8673/// bb1:
8674/// %0 = icmp ne i32 %a, 0
8675/// br i1 %0, label %TrueBB, label %bb2
8676/// bb2:
8677/// %1 = icmp ne i32 %b, 0
8678/// br i1 %1, label %TrueBB, label %FalseBB
8679/// \endcode
8680/// This usually allows instruction selection to do even further optimizations
8681/// and combine the compare with the branch instruction. Currently this is
8682/// applied for targets which have "cheap" jump instructions.
8683///
8684/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
8685///
8686bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
8687 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
8688 return false;
8689
8690 bool MadeChange = false;
8691 for (auto &BB : F) {
8692 // Does this BB end with the following?
8693 // %cond1 = icmp|fcmp|binary instruction ...
8694 // %cond2 = icmp|fcmp|binary instruction ...
8695 // %cond.or = or|and i1 %cond1, cond2
8696 // br i1 %cond.or label %dest1, label %dest2"
8697 Instruction *LogicOp;
8698 BasicBlock *TBB, *FBB;
8699 if (!match(BB.getTerminator(),
8700 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
8701 continue;
8702
8703 auto *Br1 = cast<BranchInst>(BB.getTerminator());
8704 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
8705 continue;
8706
8707 // The merging of mostly empty BB can cause a degenerate branch.
8708 if (TBB == FBB)
8709 continue;
8710
8711 unsigned Opc;
8712 Value *Cond1, *Cond2;
8713 if (match(LogicOp,
8714 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
8715 Opc = Instruction::And;
8716 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
8717 m_OneUse(m_Value(Cond2)))))
8718 Opc = Instruction::Or;
8719 else
8720 continue;
8721
8722 auto IsGoodCond = [](Value *Cond) {
8723 return match(
8724 Cond,
8726 m_LogicalOr(m_Value(), m_Value()))));
8727 };
8728 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
8729 continue;
8730
8731 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
8732
8733 // Create a new BB.
8734 auto *TmpBB =
8735 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
8736 BB.getParent(), BB.getNextNode());
8737 if (IsHugeFunc)
8738 FreshBBs.insert(TmpBB);
8739
8740 // Update original basic block by using the first condition directly by the
8741 // branch instruction and removing the no longer needed and/or instruction.
8742 Br1->setCondition(Cond1);
8743 LogicOp->eraseFromParent();
8744
8745 // Depending on the condition we have to either replace the true or the
8746 // false successor of the original branch instruction.
8747 if (Opc == Instruction::And)
8748 Br1->setSuccessor(0, TmpBB);
8749 else
8750 Br1->setSuccessor(1, TmpBB);
8751
8752 // Fill in the new basic block.
8753 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
8754 if (auto *I = dyn_cast<Instruction>(Cond2)) {
8755 I->removeFromParent();
8756 I->insertBefore(Br2);
8757 }
8758
8759 // Update PHI nodes in both successors. The original BB needs to be
8760 // replaced in one successor's PHI nodes, because the branch comes now from
8761 // the newly generated BB (NewBB). In the other successor we need to add one
8762 // incoming edge to the PHI nodes, because both branch instructions target
8763 // now the same successor. Depending on the original branch condition
8764 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
8765 // we perform the correct update for the PHI nodes.
8766 // This doesn't change the successor order of the just created branch
8767 // instruction (or any other instruction).
8768 if (Opc == Instruction::Or)
8769 std::swap(TBB, FBB);
8770
8771 // Replace the old BB with the new BB.
8772 TBB->replacePhiUsesWith(&BB, TmpBB);
8773
8774 // Add another incoming edge from the new BB.
8775 for (PHINode &PN : FBB->phis()) {
8776 auto *Val = PN.getIncomingValueForBlock(&BB);
8777 PN.addIncoming(Val, TmpBB);
8778 }
8779
8780 // Update the branch weights (from SelectionDAGBuilder::
8781 // FindMergedConditions).
8782 if (Opc == Instruction::Or) {
8783 // Codegen X | Y as:
8784 // BB1:
8785 // jmp_if_X TBB
8786 // jmp TmpBB
8787 // TmpBB:
8788 // jmp_if_Y TBB
8789 // jmp FBB
8790 //
8791
8792 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
8793 // The requirement is that
8794 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
8795 // = TrueProb for original BB.
8796 // Assuming the original weights are A and B, one choice is to set BB1's
8797 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
8798 // assumes that
8799 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
8800 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
8801 // TmpBB, but the math is more complicated.
8802 uint64_t TrueWeight, FalseWeight;
8803 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
8804 uint64_t NewTrueWeight = TrueWeight;
8805 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
8806 scaleWeights(NewTrueWeight, NewFalseWeight);
8807 Br1->setMetadata(LLVMContext::MD_prof,
8808 MDBuilder(Br1->getContext())
8809 .createBranchWeights(TrueWeight, FalseWeight));
8810
8811 NewTrueWeight = TrueWeight;
8812 NewFalseWeight = 2 * FalseWeight;
8813 scaleWeights(NewTrueWeight, NewFalseWeight);
8814 Br2->setMetadata(LLVMContext::MD_prof,
8815 MDBuilder(Br2->getContext())
8816 .createBranchWeights(TrueWeight, FalseWeight));
8817 }
8818 } else {
8819 // Codegen X & Y as:
8820 // BB1:
8821 // jmp_if_X TmpBB
8822 // jmp FBB
8823 // TmpBB:
8824 // jmp_if_Y TBB
8825 // jmp FBB
8826 //
8827 // This requires creation of TmpBB after CurBB.
8828
8829 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
8830 // The requirement is that
8831 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
8832 // = FalseProb for original BB.
8833 // Assuming the original weights are A and B, one choice is to set BB1's
8834 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
8835 // assumes that
8836 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
8837 uint64_t TrueWeight, FalseWeight;
8838 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
8839 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
8840 uint64_t NewFalseWeight = FalseWeight;
8841 scaleWeights(NewTrueWeight, NewFalseWeight);
8842 Br1->setMetadata(LLVMContext::MD_prof,
8843 MDBuilder(Br1->getContext())
8844 .createBranchWeights(TrueWeight, FalseWeight));
8845
8846 NewTrueWeight = 2 * TrueWeight;
8847 NewFalseWeight = FalseWeight;
8848 scaleWeights(NewTrueWeight, NewFalseWeight);
8849 Br2->setMetadata(LLVMContext::MD_prof,
8850 MDBuilder(Br2->getContext())
8851 .createBranchWeights(TrueWeight, FalseWeight));
8852 }
8853 }
8854
8855 ModifiedDT = ModifyDT::ModifyBBDT;
8856 MadeChange = true;
8857
8858 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
8859 TmpBB->dump());
8860 }
8861 return MadeChange;
8862}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static unsigned getIntrinsicID(const SDNode *N)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
Optimize for code generation
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static void replaceAllUsesWith(Value *Old, Value *New, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, DenseMap< GCRelocateInst *, SmallVector< GCRelocateInst *, 2 > > &RelocateInstMap)
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static void DbgInserterHelper(DbgValueInst *DVI, Instruction *VI)
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches."))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
#define DEBUG_TYPE
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:148
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
Hexagon Common GEP
IRTranslator LLVM IR MI
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1497
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned logBase2() const
Definition: APInt.h:1703
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
an instruction to allocate memory on the stack
Definition: Instructions.h:59
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:132
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:125
void setAlignment(Align Align)
Definition: Instructions.h:136
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:519
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
static unsigned getPointerOperandIndex()
Definition: Instructions.h:675
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
static unsigned getPointerOperandIndex()
Definition: Instructions.h:912
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:498
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:396
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:639
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:347
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:198
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:557
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:439
void insertDbgRecordAfter(DbgRecord *DPV, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:447
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:469
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:263
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:366
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:155
void insertDbgRecordBefore(DbgRecord *DPV, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:65
void reinsertInstInDbgRecords(Instruction *I, std::optional< DbgRecord::self_iterator > Pos)
In rare circumstances instructions can be speculatively removed from blocks, and then be re-inserted ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:491
This class represents a no-op cast from one type to another.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1770
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1703
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1789
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1648
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1653
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1639
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:579
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:908
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:955
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:994
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:988
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:992
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:990
@ ICMP_EQ
equal
Definition: InstrTypes.h:986
@ ICMP_NE
not equal
Definition: InstrTypes.h:987
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1128
static CmpInst * Create(OtherOps Op, Predicate predicate, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a compare instruction, given the opcode, the predicate and the two operands.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Base class for constants with no operands.
Definition: Constants.h:51
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1016
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2140
static Constant * getNeg(Constant *C, bool HasNUW=false, bool HasNSW=false)
Definition: Constants.cpp:2525
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:204
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:159
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:144
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1449
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DPValue represents.
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
This represents the llvm.dbg.value instruction.
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue)
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
This class implements simplifications for calls to fortified library functions (__st*cpy_chk,...
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
const BasicBlock & getEntryBlock() const
Definition: Function.h:782
const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Represents a gc.statepoint intrinsic call.
Definition: Statepoint.h:61
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
Definition: Globals.cpp:128
bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition: Globals.cpp:288
Type * getValueType() const
Definition: GlobalValue.h:296
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2120
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:460
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2518
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2349
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2380
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2224
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2110
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1114
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1825
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1865
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:496
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:88
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:453
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:801
const BasicBlock * getParent() const
Definition: Instruction.h:151
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:148
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:84
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1633
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:251
bool isShift() const
Definition: Instruction.h:258
std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:450
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Machine Value Type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
iterator find(const KeyT &Key)
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:79
void clear()
Definition: MapVector.h:88
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memcpy/memmove intrinsics.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:783
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
PointerIntPair - This class implements a pair of a pointer and small integer.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool erase(const T &V)
Definition: SmallSet.h:207
void clear()
Definition: SmallSet.h:218
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:236
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
static unsigned getPointerOperandIndex()
Definition: Instructions.h:419
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:651
Class to represent struct types.
Definition: DerivedTypes.h:216
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
SelectSupportKind
Enum that describes what type of support for selects the target has.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
virtual bool getAddrModeArguments(IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual bool ExpandInlineAsm(CallInst *) const
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:243
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
'undef' values are things that do not have specified contents.
Definition: Constants.h:1350
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
See the file comment.
Definition: ValueMap.h:84
void clear()
Definition: ValueMap.h:145
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:736
user_iterator user_begin()
Definition: Value.h:397
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition: Value.cpp:234
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition: Value.cpp:153
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
bool use_empty() const
Definition: Value.h:344
user_iterator user_end()
Definition: Value.h:405
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
iterator_range< use_iterator > uses()
Definition: Value.h:376
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:815
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5203
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
This class represents zero extension of integer types.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr bool isNonZero() const
Definition: TypeSize.h:158
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
TypeSize getSequentialElementStride(const DataLayout &DL) const
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:477
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:903
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:160
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:765
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:821
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:163
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:541
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:548
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:294
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:234
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
Definition: ScaledNumber.h:252
ManagedStatic< cl::opt< FnT >, OptCreatorT > Action
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:456
bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return true if the given value is known to be non-zero when defined.
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:533
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:129
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2165
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1581
auto successors(const MachineBasicBlock *BB)
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2082
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Interval::pred_iterator pred_end(Interval *I)
Definition: Interval.h:112
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:241
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2068
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition: Local.cpp:3919
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1745
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:109
FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:50
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition: Analysis.cpp:581
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DPValue * > *DPValues=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:137
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1923
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2060
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DPValue types only and downcast.
CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:628
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
ExtAddrMode()=default
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
const DataLayout & DL
Definition: SimplifyQuery.h:61
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.