33#define GET_GICOMBINER_DEPS
34#include "AArch64GenPreLegalizeGICombiner.inc"
35#undef GET_GICOMBINER_DEPS
37#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
40using namespace MIPatternMatch;
44#define GET_GICOMBINER_TYPES
45#include "AArch64GenPreLegalizeGICombiner.inc"
46#undef GET_GICOMBINER_TYPES
51 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT);
53 const unsigned DstSize =
MRI.getType(DstReg).getSizeInBits();
54 if (DstSize != 32 && DstSize != 64)
60 return all_of(
MRI.use_nodbg_instructions(DstReg),
66 assert(
MI.getOpcode() == TargetOpcode::G_FCONSTANT);
68 const APFloat &ImmValAPF =
MI.getOperand(1).getFPImm()->getValueAPF();
78 assert(
MI.getOpcode() == TargetOpcode::G_ICMP && KB);
85 LLT LHSTy =
MRI.getType(LHS);
96 LLT WideTy =
MRI.getType(WideReg);
108 assert(
MI.getOpcode() == TargetOpcode::G_ICMP);
110 LLT WideTy =
MRI.getType(WideReg);
116 MI.getOperand(2).setReg(WideReg);
117 MI.getOperand(3).setReg(WideZero.getReg(0));
127 std::pair<uint64_t, uint64_t> &MatchInfo) {
128 assert(
MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
130 auto &GlobalOp =
MI.getOperand(1);
131 auto *GV = GlobalOp.getGlobal();
132 if (GV->isThreadLocal())
156 for (
auto &UseInstr :
MRI.use_nodbg_instructions(Dst)) {
157 if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
160 UseInstr.getOperand(2).getReg(),
MRI);
163 MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
168 uint64_t CurrOffset = GlobalOp.getOffset();
169 uint64_t NewOffset = MinOffset + CurrOffset;
170 if (NewOffset <= CurrOffset)
182 if (NewOffset >= (1 << 20))
185 Type *
T = GV->getValueType();
187 NewOffset > GV->getDataLayout().getTypeAllocSize(
T))
189 MatchInfo = std::make_pair(NewOffset, MinOffset);
195 std::pair<uint64_t, uint64_t> &MatchInfo) {
217 std::tie(
Offset, MinOffset) = MatchInfo;
218 B.setInstrAndDebugLoc(*std::next(
MI.getIterator()));
220 auto &GlobalOp =
MI.getOperand(1);
221 auto *GV = GlobalOp.getGlobal();
222 GlobalOp.ChangeToGA(GV,
Offset, GlobalOp.getTargetFlags());
225 MI.getOperand(0).setReg(NewGVDst);
229 B.buildConstant(
LLT::scalar(64), -
static_cast<int64_t
>(MinOffset)));
237 std::tuple<Register, Register, bool> &MatchInfo) {
238 assert(
MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
239 "Expected a G_VECREDUCE_ADD instruction");
240 assert(STI.hasDotProd() &&
"Target should have Dot Product feature");
245 LLT DstTy =
MRI.getType(DstReg);
246 LLT MidTy =
MRI.getType(MidReg);
251 auto I1Opc =
I1->getOpcode();
252 if (I1Opc == TargetOpcode::G_MUL) {
255 if (!
MRI.hasOneNonDBGUse(MidReg))
272 SrcTy =
MRI.getType(
I1->getOperand(1).getReg());
273 std::get<0>(MatchInfo) =
I1->getOperand(1).getReg();
274 std::get<1>(MatchInfo) = 0;
277 if (I1Opc == TargetOpcode::G_ZEXT)
278 std::get<2>(MatchInfo) = 0;
279 else if (I1Opc == TargetOpcode::G_SEXT)
280 std::get<2>(MatchInfo) = 1;
294 std::tuple<Register, Register, bool> &MatchInfo) {
295 assert(
MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
296 "Expected a G_VECREDUCE_ADD instruction");
297 assert(STI.hasDotProd() &&
"Target should have Dot Product feature");
301 std::get<2>(MatchInfo) ? AArch64::G_SDOT : AArch64::G_UDOT;
302 Register Ext1SrcReg = std::get<0>(MatchInfo);
307 if (std::get<1>(MatchInfo) == 0)
312 Ext2SrcReg = std::get<1>(MatchInfo);
315 LLT SrcTy =
MRI.getType(Ext1SrcReg);
329 if (NumOfDotMI == 1) {
331 auto Dot = Builder.
buildInstr(DotOpcode, {MidTy},
332 {Zeroes, Ext1SrcReg, Ext2SrcReg});
344 LLT LeftoverTy1, LeftoverTy2;
346 LeftoverTy1, Ext1UnmergeReg, Leftover1, Builder,
349 LeftoverTy2, Ext2UnmergeReg, Leftover2, Builder,
362 {Leftover1[0], v8Zeroes})
367 {Leftover2[0], v8Zeroes})
374 Ext1UnmergeReg, Builder,
MRI);
376 Ext2UnmergeReg, Builder,
MRI);
381 unsigned NumElements = 0;
382 for (
unsigned i = 0; i < Ext1UnmergeReg.
size(); i++) {
385 if (
MRI.getType(Ext1UnmergeReg[i]).getNumElements() == 16) {
395 .buildInstr(DotOpcode, {
MRI.getType(Zeroes)},
396 {Zeroes, Ext1UnmergeReg[i], Ext2UnmergeReg[i]})
406 ConcatMI->getOperand(0).getReg());
410 MI.eraseFromParent();
416 std::pair<Register, bool> &MatchInfo) {
417 assert(
MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
418 "Expected G_VECREDUCE_ADD Opcode");
424 if (ExtOpc == TargetOpcode::G_ZEXT)
425 std::get<1>(MatchInfo) = 0;
426 else if (ExtOpc == TargetOpcode::G_SEXT)
427 std::get<1>(MatchInfo) = 1;
433 LLT ExtSrcTy =
MRI.getType(ExtSrcReg);
434 LLT DstTy =
MRI.getType(
MI.getOperand(0).getReg());
441 std::get<0>(MatchInfo) = ExtSrcReg;
449 std::pair<Register, bool> &MatchInfo) {
450 assert(
MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
451 "Expected G_VECREDUCE_ADD Opcode");
453 unsigned Opc = std::get<1>(MatchInfo) ? AArch64::G_SADDLV : AArch64::G_UADDLV;
454 Register SrcReg = std::get<0>(MatchInfo);
456 LLT SrcTy =
MRI.getType(SrcReg);
457 LLT DstTy =
MRI.getType(DstReg);
465 if ((SrcScalSize == 8 && SrcNumElem > 16) ||
466 (SrcScalSize == 16 && SrcNumElem > 8) ||
467 (SrcScalSize == 32 && SrcNumElem > 4)) {
471 if (SrcScalSize == 8)
473 else if (SrcScalSize == 16)
475 else if (SrcScalSize == 32)
482 extractParts(SrcReg, SrcTy, MainTy, LeftoverTy, WorkingRegisters,
483 LeftoverRegs,
B,
MRI);
484 for (
unsigned I = 0;
I < LeftoverRegs.
size();
I++) {
495 for (
unsigned I = 0;
I < WorkingRegisters.
size();
I++) {
498 LLT WorkingRegTy =
MRI.getType(WorkingRegisters[
I]);
501 WorkingRegisters[
I] =
502 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
503 : TargetOpcode::G_ZEXT,
511 :
LLT::fixed_vector(2, 64);
513 B.buildInstr(Opc, {addlvTy}, {WorkingRegisters[
I]}).
getReg(0);
520 if (MidScalarSize == 32 || MidScalarSize == 64) {
521 WorkingRegisters[
I] =
B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
522 {MidScalarLLT}, {addlvReg, zeroReg})
525 Register extractReg =
B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
528 WorkingRegisters[
I] =
529 B.buildTrunc({MidScalarLLT}, {extractReg}).
getReg(0);
534 if (WorkingRegisters.
size() > 1) {
535 outReg =
B.buildAdd(MidScalarLLT, WorkingRegisters[0], WorkingRegisters[1])
537 for (
unsigned I = 2;
I < WorkingRegisters.
size();
I++) {
538 outReg =
B.buildAdd(MidScalarLLT, outReg, WorkingRegisters[
I]).getReg(0);
541 outReg = WorkingRegisters[0];
547 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
548 : TargetOpcode::G_ZEXT,
551 B.buildCopy(DstReg, outReg);
554 MI.eraseFromParent();
563 assert((
MI.getOpcode() == TargetOpcode::G_ADD ||
564 MI.getOpcode() == TargetOpcode::G_SUB) &&
565 "Expected a G_ADD or G_SUB instruction\n");
568 LLT DstTy =
MRI.getType(DstReg);
573 Register ExtDstReg =
MI.getOperand(1).getReg();
574 LLT Ext1SrcTy =
MRI.getType(SrcReg1);
575 LLT Ext2SrcTy =
MRI.getType(SrcReg2);
576 unsigned ExtDstScal =
MRI.getType(ExtDstReg).getScalarSizeInBits();
578 if (((Ext1SrcScal == 8 && ExtDstScal == 32) ||
579 ((Ext1SrcScal == 8 || Ext1SrcScal == 16) && ExtDstScal == 64)) &&
580 Ext1SrcTy == Ext2SrcTy)
589 LLT SrcTy =
MRI.getType(SrcReg1);
591 unsigned Opc = isSExt ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
595 B.buildInstr(
MI.getOpcode(), {MidTy}, {Ext1Reg, Ext2Reg}).getReg(0);
600 if (
MI.getOpcode() == TargetOpcode::G_ADD)
601 B.buildInstr(Opc, {DstReg}, {AddReg});
603 B.buildSExt(DstReg, AddReg);
605 MI.eraseFromParent();
638 auto &
MRI = *
B.getMRI();
647 LLT WideTy0 =
MRI.getType(Op0Wide);
648 LLT WideTy1 =
MRI.getType(Op1Wide);
650 LLT OpTy =
MRI.getType(ResVal);
657 if (Op0WideDef->
getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
658 Op1WideDef->
getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
666 (OpTySize != 8 && OpTySize != 16))
670 Register ResStatus =
MI.getOperand(1).getReg();
671 if (!
MRI.hasOneNonDBGUse(ResStatus))
674 if (CondUser->
getOpcode() != TargetOpcode::G_BRCOND)
682 if (
any_of(
MRI.use_nodbg_instructions(ResVal),
685 (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
690 B.setInstrAndDebugLoc(*
MI.getNextNode());
691 MI.eraseFromParent();
694 Register AddDst =
MRI.cloneVirtualRegister(Op0Wide);
695 B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
699 Register CondBit =
MRI.cloneVirtualRegister(Op0Wide);
702 B.buildConstant(
LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
708 B.buildZExtOrTrunc(ResVal, AddDst);
712 auto OldR =
U.getParent()->getOperand(0).getReg();
714 U.getParent()->eraseFromParent();
722class AArch64PreLegalizerCombinerImpl :
public Combiner {
726 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig;
730 AArch64PreLegalizerCombinerImpl(
733 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
737 static const char *
getName() {
return "AArch6400PreLegalizerCombiner"; }
744#define GET_GICOMBINER_CLASS_MEMBERS
745#include "AArch64GenPreLegalizeGICombiner.inc"
746#undef GET_GICOMBINER_CLASS_MEMBERS
749#define GET_GICOMBINER_IMPL
750#include "AArch64GenPreLegalizeGICombiner.inc"
751#undef GET_GICOMBINER_IMPL
753AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl(
756 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
759 :
Combiner(MF, CInfo, TPC, &KB, CSEInfo),
760 Helper(Observer,
B,
true, &KB, MDT, LI),
761 RuleConfig(RuleConfig), STI(STI),
763#include
"AArch64GenPreLegalizeGICombiner.inc"
768bool AArch64PreLegalizerCombinerImpl::tryCombineAll(
MachineInstr &
MI)
const {
769 if (tryCombineAllImpl(
MI))
772 unsigned Opc =
MI.getOpcode();
774 case TargetOpcode::G_SHUFFLE_VECTOR:
776 case TargetOpcode::G_UADDO:
777 return tryToSimplifyUADDO(
MI,
B, Helper, Observer);
778 case TargetOpcode::G_MEMCPY_INLINE:
780 case TargetOpcode::G_MEMCPY:
781 case TargetOpcode::G_MEMMOVE:
782 case TargetOpcode::G_MEMSET: {
785 unsigned MaxLen = CInfo.EnableOpt ? 0 : 32;
789 if (Opc == TargetOpcode::G_MEMSET)
805 AArch64PreLegalizerCombiner();
808 return "AArch64PreLegalizerCombiner";
816 AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
820void AArch64PreLegalizerCombiner::getAnalysisUsage(
AnalysisUsage &AU)
const {
833AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner()
837 if (!RuleConfig.parseCommandLineOption())
841bool AArch64PreLegalizerCombiner::runOnMachineFunction(
MachineFunction &MF) {
843 MachineFunctionProperties::Property::FailedISel))
845 auto &TPC = getAnalysis<TargetPassConfig>();
849 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
850 auto *CSEInfo = &
Wrapper.get(TPC.getCSEConfig());
853 const auto *LI =
ST.getLegalizerInfo();
858 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
860 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
862 nullptr, EnableOpt,
F.hasOptSize(),
864 AArch64PreLegalizerCombinerImpl Impl(MF, CInfo, &TPC, *KB, CSEInfo,
865 RuleConfig, ST, MDT, LI);
866 return Impl.combineMachineInstrs();
869char AArch64PreLegalizerCombiner::ID = 0;
871 "Combine AArch64 machine instrs before legalization",
882 return new AArch64PreLegalizerCombiner();
unsigned const MachineRegisterInfo * MRI
#define GET_GICOMBINER_CONSTRUCTOR_INITS
Combine AArch64 machine instrs before legalization
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Provides analysis for querying information about KnownBits during GISel passes.
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static StringRef getName(Value *V)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Target-Independent Code Generator Pass Configuration Options pass.
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
APInt bitcastToAPInt() const
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Optimize memcpy intrinsics et al, e.g.
bool tryEmitMemcpyInline(MachineInstr &MI)
Emit loads and stores that perform the given memcpy.
bool tryCombineShuffleVector(MachineInstr &MI)
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
virtual bool tryCombineAll(MachineInstr &I) const =0
FunctionPass class - This class is used to implement most global optimizations.
The actual analysis pass wrapper.
Simple wrapper that does the following.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
bool isEquality() const
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineFunctionProperties & getProperties() const
Get the function properties.
Helper class to build MachineInstr.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildVecReduceAdd(const DstOp &Dst, const SrcOp &Src)
Build and insert Res = G_VECREDUCE_ADD Src.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
MachineBasicBlock * getMBB() const
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
A Use represents the edge between a Value definition and its users.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, bool MinSize)
Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is supported and beneficial to ...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAArch64PreLegalizerCombiner()
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void initializeAArch64PreLegalizerCombinerPass(PassRegistry &)
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
auto instrs(const MachineBasicBlock &BB)