71using namespace PatternMatch;
73#define DEBUG_TYPE "complex-deinterleaving"
75STATISTIC(NumComplexTransformations,
"Amount of complex patterns transformed");
78 "enable-complex-deinterleaving",
99class ComplexDeinterleavingLegacyPass :
public FunctionPass {
103 ComplexDeinterleavingLegacyPass(
const TargetMachine *TM =
nullptr)
110 return "Complex Deinterleaving Pass";
123class ComplexDeinterleavingGraph;
124struct ComplexDeinterleavingCompositeNode {
131 friend class ComplexDeinterleavingGraph;
132 using NodePtr = std::shared_ptr<ComplexDeinterleavingCompositeNode>;
133 using RawNodePtr = ComplexDeinterleavingCompositeNode *;
146 Value *ReplacementNode =
nullptr;
155 auto PrintValue = [&](
Value *V) {
163 auto PrintNodeRef = [&](RawNodePtr
Ptr) {
170 OS <<
"- CompositeNode: " <<
this <<
"\n";
175 OS <<
" ReplacementNode: ";
176 PrintValue(ReplacementNode);
177 OS <<
" Operation: " << (int)
Operation <<
"\n";
178 OS <<
" Rotation: " << ((int)Rotation * 90) <<
"\n";
179 OS <<
" Operands: \n";
184 OS <<
" InternalInstructions:\n";
185 for (
const auto &
I : InternalInstructions) {
193class ComplexDeinterleavingGraph {
195 using NodePtr = ComplexDeinterleavingCompositeNode::NodePtr;
196 using RawNodePtr = ComplexDeinterleavingCompositeNode::RawNodePtr;
197 explicit ComplexDeinterleavingGraph(
const TargetLowering *tl) : TL(tl) {}
208 return std::make_shared<ComplexDeinterleavingCompositeNode>(
Operation, R,
212 NodePtr submitCompositeNode(NodePtr
Node) {
216 for (
auto *
I :
Node->InternalInstructions)
221 NodePtr getContainingComposite(
Value *R,
Value *
I) {
222 for (
const auto &CN : CompositeNodes) {
223 if (CN->Real == R && CN->Imag ==
I)
245 NodePtr identifyNodeWithImplicitAdd(
247 std::pair<Instruction *, Instruction *> &CommonOperandI);
265 for (
const auto &
Node : CompositeNodes)
279class ComplexDeinterleaving {
282 : TL(tl), TLI(tli) {}
294char ComplexDeinterleavingLegacyPass::ID = 0;
297 "Complex Deinterleaving",
false,
false)
305 if (!ComplexDeinterleaving(TL, &TLI).runOnFunction(
F))
314 return new ComplexDeinterleavingLegacyPass(
TM);
317bool ComplexDeinterleavingLegacyPass::runOnFunction(
Function &
F) {
318 const auto *TL =
TM->getSubtargetImpl(
F)->getTargetLowering();
319 auto TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
320 return ComplexDeinterleaving(TL, &TLI).runOnFunction(
F);
323bool ComplexDeinterleaving::runOnFunction(
Function &
F) {
326 dbgs() <<
"Complex deinterleaving has been explicitly disabled.\n");
332 dbgs() <<
"Complex deinterleaving has been disabled, target does "
333 "not support lowering of complex number operations.\n");
337 bool Changed =
false;
339 Changed |= evaluateBasicBlock(&
B);
346 if ((Mask.size() & 1))
349 int HalfNumElements = Mask.size() / 2;
350 for (
int Idx = 0;
Idx < HalfNumElements; ++
Idx) {
351 int MaskIdx =
Idx * 2;
352 if (Mask[MaskIdx] !=
Idx || Mask[MaskIdx + 1] != (
Idx + HalfNumElements))
361 int HalfNumElements = Mask.size() / 2;
363 for (
int Idx = 1;
Idx < HalfNumElements; ++
Idx) {
371bool ComplexDeinterleaving::evaluateBasicBlock(
BasicBlock *
B) {
372 bool Changed =
false;
377 auto *SVI = dyn_cast<ShuffleVectorInst>(&
I);
386 ComplexDeinterleavingGraph Graph(TL);
387 if (!Graph.identifyNodes(SVI))
390 Graph.replaceNodes();
395 for (
const auto &
I : DeadInstrRoots) {
396 if (!
I ||
I->getParent() ==
nullptr)
404ComplexDeinterleavingGraph::NodePtr
405ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd(
407 std::pair<Instruction *, Instruction *> &PartialMatch) {
408 LLVM_DEBUG(
dbgs() <<
"identifyNodeWithImplicitAdd " << *Real <<
" / " << *Imag
416 if (Real->
getOpcode() != Instruction::FMul ||
417 Imag->
getOpcode() != Instruction::FMul) {
418 LLVM_DEBUG(
dbgs() <<
" - Real or imaginary instruction is not fmul\n");
426 if (!R0 || !R1 || !I0 || !I1) {
435 if (R0->
getOpcode() == Instruction::FNeg ||
438 if (R0->
getOpcode() == Instruction::FNeg) {
440 R0 = dyn_cast<Instruction>(R0->
getOperand(0));
443 R1 = dyn_cast<Instruction>(R1->
getOperand(0));
448 if (I0->
getOpcode() == Instruction::FNeg ||
449 I1->getOpcode() == Instruction::FNeg) {
452 if (I0->
getOpcode() == Instruction::FNeg) {
454 I0 = dyn_cast<Instruction>(I0->
getOperand(0));
457 I1 = dyn_cast<Instruction>(
I1->getOperand(0));
469 if (R0 == I0 || R0 == I1) {
472 }
else if (R1 == I0 || R1 == I1) {
480 UncommonImagOp = (CommonOperand == I0) ? I1 : I0;
481 if (Rotation == ComplexDeinterleavingRotation::Rotation_90 ||
482 Rotation == ComplexDeinterleavingRotation::Rotation_270)
483 std::swap(UncommonRealOp, UncommonImagOp);
487 if (Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
488 Rotation == ComplexDeinterleavingRotation::Rotation_180)
489 PartialMatch.first = CommonOperand;
491 PartialMatch.second = CommonOperand;
493 if (!PartialMatch.first || !PartialMatch.second) {
498 NodePtr CommonNode = identifyNode(PartialMatch.first, PartialMatch.second);
504 NodePtr UncommonNode = identifyNode(UncommonRealOp, UncommonImagOp);
510 NodePtr
Node = prepareCompositeNode(
511 ComplexDeinterleavingOperation::CMulPartial, Real, Imag);
512 Node->Rotation = Rotation;
513 Node->addOperand(CommonNode);
514 Node->addOperand(UncommonNode);
515 Node->InternalInstructions.append(FNegs);
516 return submitCompositeNode(
Node);
519ComplexDeinterleavingGraph::NodePtr
520ComplexDeinterleavingGraph::identifyPartialMul(
Instruction *Real,
522 LLVM_DEBUG(
dbgs() <<
"identifyPartialMul " << *Real <<
" / " << *Imag
526 if (Real->
getOpcode() == Instruction::FAdd &&
528 Rotation = ComplexDeinterleavingRotation::Rotation_0;
529 else if (Real->
getOpcode() == Instruction::FSub &&
531 Rotation = ComplexDeinterleavingRotation::Rotation_90;
532 else if (Real->
getOpcode() == Instruction::FSub &&
534 Rotation = ComplexDeinterleavingRotation::Rotation_180;
535 else if (Real->
getOpcode() == Instruction::FAdd &&
537 Rotation = ComplexDeinterleavingRotation::Rotation_270;
545 LLVM_DEBUG(
dbgs() <<
" - Contract is missing from the FastMath flags.\n");
567 if (!R0 || !R1 || !I0 || !I1) {
576 if (R0 == I0 || R0 == I1) {
579 }
else if (R1 == I0 || R1 == I1) {
587 UncommonImagOp = (CommonOperand == I0) ? I1 : I0;
588 if (Rotation == ComplexDeinterleavingRotation::Rotation_90 ||
589 Rotation == ComplexDeinterleavingRotation::Rotation_270)
590 std::swap(UncommonRealOp, UncommonImagOp);
592 std::pair<Instruction *, Instruction *> PartialMatch(
593 (Rotation == ComplexDeinterleavingRotation::Rotation_0 ||
594 Rotation == ComplexDeinterleavingRotation::Rotation_180)
597 (Rotation == ComplexDeinterleavingRotation::Rotation_90 ||
598 Rotation == ComplexDeinterleavingRotation::Rotation_270)
601 NodePtr CNode = identifyNodeWithImplicitAdd(
602 cast<Instruction>(CR), cast<Instruction>(CI), PartialMatch);
608 NodePtr UncommonRes = identifyNode(UncommonRealOp, UncommonImagOp);
614 assert(PartialMatch.first && PartialMatch.second);
615 NodePtr CommonRes = identifyNode(PartialMatch.first, PartialMatch.second);
621 NodePtr
Node = prepareCompositeNode(
622 ComplexDeinterleavingOperation::CMulPartial, Real, Imag);
623 Node->addInstruction(RealMulI);
624 Node->addInstruction(ImagMulI);
625 Node->Rotation = Rotation;
626 Node->addOperand(CommonRes);
627 Node->addOperand(UncommonRes);
628 Node->addOperand(CNode);
629 return submitCompositeNode(
Node);
632ComplexDeinterleavingGraph::NodePtr
634 LLVM_DEBUG(
dbgs() <<
"identifyAdd " << *Real <<
" / " << *Imag <<
"\n");
638 if ((Real->
getOpcode() == Instruction::FSub &&
639 Imag->
getOpcode() == Instruction::FAdd) ||
640 (Real->
getOpcode() == Instruction::Sub &&
642 Rotation = ComplexDeinterleavingRotation::Rotation_90;
643 else if ((Real->
getOpcode() == Instruction::FAdd &&
644 Imag->
getOpcode() == Instruction::FSub) ||
645 (Real->
getOpcode() == Instruction::Add &&
647 Rotation = ComplexDeinterleavingRotation::Rotation_270;
649 LLVM_DEBUG(
dbgs() <<
" - Unhandled case, rotation is not assigned.\n");
653 auto *AR = dyn_cast<Instruction>(Real->
getOperand(0));
654 auto *BI = dyn_cast<Instruction>(Real->
getOperand(1));
655 auto *AI = dyn_cast<Instruction>(Imag->
getOperand(0));
658 if (!AR || !AI || !BR || !BI) {
663 NodePtr ResA = identifyNode(AR, AI);
665 LLVM_DEBUG(
dbgs() <<
" - AR/AI is not identified as a composite node.\n");
668 NodePtr ResB = identifyNode(BR, BI);
670 LLVM_DEBUG(
dbgs() <<
" - BR/BI is not identified as a composite node.\n");
675 prepareCompositeNode(ComplexDeinterleavingOperation::CAdd, Real, Imag);
676 Node->Rotation = Rotation;
677 Node->addOperand(ResA);
678 Node->addOperand(ResB);
679 return submitCompositeNode(
Node);
683 unsigned OpcA =
A->getOpcode();
684 unsigned OpcB =
B->getOpcode();
686 return (OpcA == Instruction::FSub && OpcB == Instruction::FAdd) ||
687 (OpcA == Instruction::FAdd && OpcB == Instruction::FSub) ||
688 (OpcA == Instruction::Sub && OpcB == Instruction::Add) ||
689 (OpcA == Instruction::Add && OpcB == Instruction::Sub);
699ComplexDeinterleavingGraph::NodePtr
701 LLVM_DEBUG(
dbgs() <<
"identifyNode on " << *Real <<
" / " << *Imag <<
"\n");
702 if (NodePtr CN = getContainingComposite(Real, Imag)) {
707 auto *RealShuffle = dyn_cast<ShuffleVectorInst>(Real);
708 auto *ImagShuffle = dyn_cast<ShuffleVectorInst>(Imag);
709 if (RealShuffle && ImagShuffle) {
710 Value *RealOp1 = RealShuffle->getOperand(1);
711 if (!isa<UndefValue>(RealOp1) && !isa<ConstantAggregateZero>(RealOp1)) {
715 Value *ImagOp1 = ImagShuffle->getOperand(1);
716 if (!isa<UndefValue>(ImagOp1) && !isa<ConstantAggregateZero>(ImagOp1)) {
721 Value *RealOp0 = RealShuffle->getOperand(0);
722 Value *ImagOp0 = ImagShuffle->getOperand(0);
724 if (RealOp0 != ImagOp0) {
736 if (RealMask[0] != 0 || ImagMask[0] != 1) {
737 LLVM_DEBUG(
dbgs() <<
" - Masks do not have the correct initial value.\n");
745 auto *ShuffleTy = cast<FixedVectorType>(
Shuffle->getType());
746 auto *OpTy = cast<FixedVectorType>(
Op->getType());
748 if (OpTy->getScalarType() != ShuffleTy->getScalarType())
750 if ((ShuffleTy->getNumElements() * 2) != OpTy->getNumElements())
764 auto *OpTy = cast<FixedVectorType>(
Op->getType());
765 int NumElements = OpTy->getNumElements();
769 return Last < NumElements;
772 if (RealShuffle->getType() != ImagShuffle->getType()) {
776 if (!CheckDeinterleavingShuffle(RealShuffle)) {
780 if (!CheckDeinterleavingShuffle(ImagShuffle)) {
785 NodePtr PlaceholderNode =
787 RealShuffle, ImagShuffle);
788 PlaceholderNode->ReplacementNode = RealShuffle->getOperand(0);
789 return submitCompositeNode(PlaceholderNode);
791 if (RealShuffle || ImagShuffle)
794 auto *VTy = cast<FixedVectorType>(Real->
getType());
799 ComplexDeinterleavingOperation::CMulPartial, NewVTy) &&
801 return identifyPartialMul(Real, Imag);
805 ComplexDeinterleavingOperation::CAdd, NewVTy) &&
807 return identifyAdd(Real, Imag);
813bool ComplexDeinterleavingGraph::identifyNodes(
Instruction *RootI) {
820 AllInstructions.
insert(RootI);
821 RootNode = identifyNode(Real, Imag);
826 dbgs() <<
"Complex deinterleaving graph for " <<
F->getName()
827 <<
"::" <<
B->getName() <<
".\n";
833 for (
const auto &
Node : CompositeNodes) {
834 if (!
Node->hasAllInternalUses(AllInstructions)) {
839 return RootNode !=
nullptr;
842Value *ComplexDeinterleavingGraph::replaceNode(
843 ComplexDeinterleavingGraph::RawNodePtr
Node) {
844 if (
Node->ReplacementNode)
845 return Node->ReplacementNode;
847 Value *Input0 = replaceNode(
Node->Operands[0]);
848 Value *Input1 = replaceNode(
Node->Operands[1]);
850 Node->Operands.size() > 2 ? replaceNode(
Node->Operands[2]) : nullptr;
853 "Node inputs need to be of the same type");
858 assert(
Node->ReplacementNode &&
"Target failed to create Intrinsic call.");
859 NumComplexTransformations += 1;
860 return Node->ReplacementNode;
863void ComplexDeinterleavingGraph::replaceNodes() {
864 Value *
R = replaceNode(RootNode.get());
865 assert(R &&
"Unable to find replacement for RootValue");
869bool ComplexDeinterleavingCompositeNode::hasAllInternalUses(
871 if (
Operation == ComplexDeinterleavingOperation::Shuffle)
882 for (
auto *
I : InternalInstructions) {
883 for (
auto *
User :
I->users()) {
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static cl::opt< bool > ComplexDeinterleavingEnabled("enable-complex-deinterleaving", cl::desc("Enable generation of complex instructions"), cl::init(true), cl::Hidden)
static bool isInstructionPairAdd(Instruction *A, Instruction *B)
static bool isInterleavingMask(ArrayRef< int > Mask)
Checks the given mask, and determines whether said mask is interleaving.
static bool isDeinterleavingMask(ArrayRef< int > Mask)
Checks the given mask, and determines whether said mask is deinterleaving.
static bool isInstructionPairMul(Instruction *A, Instruction *B)
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool runOnFunction(Function &F, bool PostInlining)
mir Rename Register Operands
PowerPC Reduce CR logical Operation
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
DEMANGLE_DUMP_METHOD void dump() const
A container for analyses that lazily runs them and caches their results.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
bool allowContract() const
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
An analysis over an "outer" IR unit that provides access to an analysis manager over an "inner" IR un...
const BasicBlock * getParent() const
const Function * getFunction() const
Return the function this instruction belongs to.
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
This instruction constructs a fixed permutation of two input vectors.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
virtual bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const
Does this target support complex deinterleaving with the given operation and type.
virtual Value * createComplexDeinterleavingIR(Instruction *I, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const
Create the IR node for the given complex deinterleaving operation.
virtual bool isComplexDeinterleavingSupported() const
Does this target support complex deinterleaving.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
This class implements an extremely fast bulk output stream that can only output to a stream.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ BR
Control flow instructions. These all have token chains.
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
BinaryOp_match< LHS, RHS, Instruction::FMul > m_FMul(const LHS &L, const RHS &R)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
void initializeComplexDeinterleavingLegacyPassPass(PassRegistry &)
ComplexDeinterleavingOperation
FunctionPass * createComplexDeinterleavingPass(const TargetMachine *TM)
This pass implements generation of target-specific intrinsics to support handling of complex number a...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
ComplexDeinterleavingRotation
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.