LLVM  16.0.0git
X86PartialReduction.cpp
Go to the documentation of this file.
1 //===-- X86PartialReduction.cpp -------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass looks for add instructions used by a horizontal reduction to see
10 // if we might be able to use pmaddwd or psadbw. Some cases of this require
11 // cross basic block knowledge and can't be done in SelectionDAG.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "X86.h"
16 #include "X86TargetMachine.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/Instructions.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/IntrinsicsX86.h"
24 #include "llvm/IR/Operator.h"
25 #include "llvm/IR/PatternMatch.h"
26 #include "llvm/Pass.h"
27 #include "llvm/Support/KnownBits.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "x86-partial-reduction"
32 
33 namespace {
34 
35 class X86PartialReduction : public FunctionPass {
36  const DataLayout *DL;
37  const X86Subtarget *ST;
38 
39 public:
40  static char ID; // Pass identification, replacement for typeid.
41 
42  X86PartialReduction() : FunctionPass(ID) { }
43 
44  bool runOnFunction(Function &Fn) override;
45 
46  void getAnalysisUsage(AnalysisUsage &AU) const override {
47  AU.setPreservesCFG();
48  }
49 
50  StringRef getPassName() const override {
51  return "X86 Partial Reduction";
52  }
53 
54 private:
55  bool tryMAddReplacement(Instruction *Op, bool ReduceInOneBB);
56  bool trySADReplacement(Instruction *Op);
57 };
58 }
59 
61  return new X86PartialReduction();
62 }
63 
65 
66 INITIALIZE_PASS(X86PartialReduction, DEBUG_TYPE,
67  "X86 Partial Reduction", false, false)
68 
69 // This function should be aligned with detectExtMul() in X86ISelLowering.cpp.
70 static bool matchVPDPBUSDPattern(const X86Subtarget *ST, BinaryOperator *Mul,
72  if (!ST->hasVNNI() && !ST->hasAVXVNNI())
73  return false;
74 
77 
78  if (isa<SExtInst>(LHS))
79  std::swap(LHS, RHS);
80 
81  auto IsFreeTruncation = [&](Value *Op) {
82  if (auto *Cast = dyn_cast<CastInst>(Op)) {
83  if (Cast->getParent() == Mul->getParent() &&
84  (Cast->getOpcode() == Instruction::SExt ||
85  Cast->getOpcode() == Instruction::ZExt) &&
86  Cast->getOperand(0)->getType()->getScalarSizeInBits() <= 8)
87  return true;
88  }
89 
90  return isa<Constant>(Op);
91  };
92 
93  // (dpbusd (zext a), (sext, b)). Since the first operand should be unsigned
94  // value, we need to check LHS is zero extended value. RHS should be signed
95  // value, so we just check the signed bits.
96  if ((IsFreeTruncation(LHS) &&
97  computeKnownBits(LHS, *DL).countMaxActiveBits() <= 8) &&
99  return true;
100 
101  return false;
102 }
103 
104 bool X86PartialReduction::tryMAddReplacement(Instruction *Op,
105  bool ReduceInOneBB) {
106  if (!ST->hasSSE2())
107  return false;
108 
109  // Need at least 8 elements.
110  if (cast<FixedVectorType>(Op->getType())->getNumElements() < 8)
111  return false;
112 
113  // Element type should be i32.
114  if (!cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(32))
115  return false;
116 
117  auto *Mul = dyn_cast<BinaryOperator>(Op);
118  if (!Mul || Mul->getOpcode() != Instruction::Mul)
119  return false;
120 
121  Value *LHS = Mul->getOperand(0);
122  Value *RHS = Mul->getOperand(1);
123 
124  // If the target support VNNI, leave it to ISel to combine reduce operation
125  // to VNNI instruction.
126  // TODO: we can support transforming reduce to VNNI intrinsic for across block
127  // in this pass.
128  if (ReduceInOneBB && matchVPDPBUSDPattern(ST, Mul, DL))
129  return false;
130 
131  // LHS and RHS should be only used once or if they are the same then only
132  // used twice. Only check this when SSE4.1 is enabled and we have zext/sext
133  // instructions, otherwise we use punpck to emulate zero extend in stages. The
134  // trunc/ we need to do likely won't introduce new instructions in that case.
135  if (ST->hasSSE41()) {
136  if (LHS == RHS) {
137  if (!isa<Constant>(LHS) && !LHS->hasNUses(2))
138  return false;
139  } else {
140  if (!isa<Constant>(LHS) && !LHS->hasOneUse())
141  return false;
142  if (!isa<Constant>(RHS) && !RHS->hasOneUse())
143  return false;
144  }
145  }
146 
147  auto CanShrinkOp = [&](Value *Op) {
148  auto IsFreeTruncation = [&](Value *Op) {
149  if (auto *Cast = dyn_cast<CastInst>(Op)) {
150  if (Cast->getParent() == Mul->getParent() &&
151  (Cast->getOpcode() == Instruction::SExt ||
152  Cast->getOpcode() == Instruction::ZExt) &&
153  Cast->getOperand(0)->getType()->getScalarSizeInBits() <= 16)
154  return true;
155  }
156 
157  return isa<Constant>(Op);
158  };
159 
160  // If the operation can be freely truncated and has enough sign bits we
161  // can shrink.
162  if (IsFreeTruncation(Op) &&
163  ComputeNumSignBits(Op, *DL, 0, nullptr, Mul) > 16)
164  return true;
165 
166  // SelectionDAG has limited support for truncating through an add or sub if
167  // the inputs are freely truncatable.
168  if (auto *BO = dyn_cast<BinaryOperator>(Op)) {
169  if (BO->getParent() == Mul->getParent() &&
170  IsFreeTruncation(BO->getOperand(0)) &&
171  IsFreeTruncation(BO->getOperand(1)) &&
172  ComputeNumSignBits(Op, *DL, 0, nullptr, Mul) > 16)
173  return true;
174  }
175 
176  return false;
177  };
178 
179  // Both Ops need to be shrinkable.
180  if (!CanShrinkOp(LHS) && !CanShrinkOp(RHS))
181  return false;
182 
183  IRBuilder<> Builder(Mul);
184 
185  auto *MulTy = cast<FixedVectorType>(Op->getType());
186  unsigned NumElts = MulTy->getNumElements();
187 
188  // Extract even elements and odd elements and add them together. This will
189  // be pattern matched by SelectionDAG to pmaddwd. This instruction will be
190  // half the original width.
191  SmallVector<int, 16> EvenMask(NumElts / 2);
192  SmallVector<int, 16> OddMask(NumElts / 2);
193  for (int i = 0, e = NumElts / 2; i != e; ++i) {
194  EvenMask[i] = i * 2;
195  OddMask[i] = i * 2 + 1;
196  }
197  // Creating a new mul so the replaceAllUsesWith below doesn't replace the
198  // uses in the shuffles we're creating.
199  Value *NewMul = Builder.CreateMul(Mul->getOperand(0), Mul->getOperand(1));
200  Value *EvenElts = Builder.CreateShuffleVector(NewMul, NewMul, EvenMask);
201  Value *OddElts = Builder.CreateShuffleVector(NewMul, NewMul, OddMask);
202  Value *MAdd = Builder.CreateAdd(EvenElts, OddElts);
203 
204  // Concatenate zeroes to extend back to the original type.
205  SmallVector<int, 32> ConcatMask(NumElts);
206  std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
207  Value *Zero = Constant::getNullValue(MAdd->getType());
208  Value *Concat = Builder.CreateShuffleVector(MAdd, Zero, ConcatMask);
209 
211  Mul->eraseFromParent();
212 
213  return true;
214 }
215 
216 bool X86PartialReduction::trySADReplacement(Instruction *Op) {
217  if (!ST->hasSSE2())
218  return false;
219 
220  // TODO: There's nothing special about i32, any integer type above i16 should
221  // work just as well.
222  if (!cast<VectorType>(Op->getType())->getElementType()->isIntegerTy(32))
223  return false;
224 
225  Value *LHS;
226  if (match(Op, PatternMatch::m_Intrinsic<Intrinsic::abs>())) {
227  LHS = Op->getOperand(0);
228  } else {
229  // Operand should be a select.
230  auto *SI = dyn_cast<SelectInst>(Op);
231  if (!SI)
232  return false;
233 
234  Value *RHS;
235  // Select needs to implement absolute value.
236  auto SPR = matchSelectPattern(SI, LHS, RHS);
237  if (SPR.Flavor != SPF_ABS)
238  return false;
239  }
240 
241  // Need a subtract of two values.
242  auto *Sub = dyn_cast<BinaryOperator>(LHS);
243  if (!Sub || Sub->getOpcode() != Instruction::Sub)
244  return false;
245 
246  // Look for zero extend from i8.
247  auto getZeroExtendedVal = [](Value *Op) -> Value * {
248  if (auto *ZExt = dyn_cast<ZExtInst>(Op))
249  if (cast<VectorType>(ZExt->getOperand(0)->getType())
250  ->getElementType()
251  ->isIntegerTy(8))
252  return ZExt->getOperand(0);
253 
254  return nullptr;
255  };
256 
257  // Both operands of the subtract should be extends from vXi8.
258  Value *Op0 = getZeroExtendedVal(Sub->getOperand(0));
259  Value *Op1 = getZeroExtendedVal(Sub->getOperand(1));
260  if (!Op0 || !Op1)
261  return false;
262 
264 
265  auto *OpTy = cast<FixedVectorType>(Op->getType());
266  unsigned NumElts = OpTy->getNumElements();
267 
268  unsigned IntrinsicNumElts;
269  Intrinsic::ID IID;
270  if (ST->hasBWI() && NumElts >= 64) {
271  IID = Intrinsic::x86_avx512_psad_bw_512;
272  IntrinsicNumElts = 64;
273  } else if (ST->hasAVX2() && NumElts >= 32) {
274  IID = Intrinsic::x86_avx2_psad_bw;
275  IntrinsicNumElts = 32;
276  } else {
277  IID = Intrinsic::x86_sse2_psad_bw;
278  IntrinsicNumElts = 16;
279  }
280 
281  Function *PSADBWFn = Intrinsic::getDeclaration(Op->getModule(), IID);
282 
283  if (NumElts < 16) {
284  // Pad input with zeroes.
285  SmallVector<int, 32> ConcatMask(16);
286  for (unsigned i = 0; i != NumElts; ++i)
287  ConcatMask[i] = i;
288  for (unsigned i = NumElts; i != 16; ++i)
289  ConcatMask[i] = (i % NumElts) + NumElts;
290 
291  Value *Zero = Constant::getNullValue(Op0->getType());
292  Op0 = Builder.CreateShuffleVector(Op0, Zero, ConcatMask);
293  Op1 = Builder.CreateShuffleVector(Op1, Zero, ConcatMask);
294  NumElts = 16;
295  }
296 
297  // Intrinsics produce vXi64 and need to be casted to vXi32.
298  auto *I32Ty =
299  FixedVectorType::get(Builder.getInt32Ty(), IntrinsicNumElts / 4);
300 
301  assert(NumElts % IntrinsicNumElts == 0 && "Unexpected number of elements!");
302  unsigned NumSplits = NumElts / IntrinsicNumElts;
303 
304  // First collect the pieces we need.
305  SmallVector<Value *, 4> Ops(NumSplits);
306  for (unsigned i = 0; i != NumSplits; ++i) {
307  SmallVector<int, 64> ExtractMask(IntrinsicNumElts);
308  std::iota(ExtractMask.begin(), ExtractMask.end(), i * IntrinsicNumElts);
309  Value *ExtractOp0 = Builder.CreateShuffleVector(Op0, Op0, ExtractMask);
310  Value *ExtractOp1 = Builder.CreateShuffleVector(Op1, Op0, ExtractMask);
311  Ops[i] = Builder.CreateCall(PSADBWFn, {ExtractOp0, ExtractOp1});
312  Ops[i] = Builder.CreateBitCast(Ops[i], I32Ty);
313  }
314 
315  assert(isPowerOf2_32(NumSplits) && "Expected power of 2 splits");
316  unsigned Stages = Log2_32(NumSplits);
317  for (unsigned s = Stages; s > 0; --s) {
318  unsigned NumConcatElts =
319  cast<FixedVectorType>(Ops[0]->getType())->getNumElements() * 2;
320  for (unsigned i = 0; i != 1U << (s - 1); ++i) {
321  SmallVector<int, 64> ConcatMask(NumConcatElts);
322  std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
323  Ops[i] = Builder.CreateShuffleVector(Ops[i*2], Ops[i*2+1], ConcatMask);
324  }
325  }
326 
327  // At this point the final value should be in Ops[0]. Now we need to adjust
328  // it to the final original type.
329  NumElts = cast<FixedVectorType>(OpTy)->getNumElements();
330  if (NumElts == 2) {
331  // Extract down to 2 elements.
332  Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{0, 1});
333  } else if (NumElts >= 8) {
334  SmallVector<int, 32> ConcatMask(NumElts);
335  unsigned SubElts =
336  cast<FixedVectorType>(Ops[0]->getType())->getNumElements();
337  for (unsigned i = 0; i != SubElts; ++i)
338  ConcatMask[i] = i;
339  for (unsigned i = SubElts; i != NumElts; ++i)
340  ConcatMask[i] = (i % SubElts) + SubElts;
341 
342  Value *Zero = Constant::getNullValue(Ops[0]->getType());
343  Ops[0] = Builder.CreateShuffleVector(Ops[0], Zero, ConcatMask);
344  }
345 
346  Op->replaceAllUsesWith(Ops[0]);
347  Op->eraseFromParent();
348 
349  return true;
350 }
351 
352 // Walk backwards from the ExtractElementInst and determine if it is the end of
353 // a horizontal reduction. Return the input to the reduction if we find one.
355  bool &ReduceInOneBB) {
356  ReduceInOneBB = true;
357  // Make sure we're extracting index 0.
358  auto *Index = dyn_cast<ConstantInt>(EE.getIndexOperand());
359  if (!Index || !Index->isNullValue())
360  return nullptr;
361 
362  const auto *BO = dyn_cast<BinaryOperator>(EE.getVectorOperand());
363  if (!BO || BO->getOpcode() != Instruction::Add || !BO->hasOneUse())
364  return nullptr;
365  if (EE.getParent() != BO->getParent())
366  ReduceInOneBB = false;
367 
368  unsigned NumElems = cast<FixedVectorType>(BO->getType())->getNumElements();
369  // Ensure the reduction size is a power of 2.
370  if (!isPowerOf2_32(NumElems))
371  return nullptr;
372 
373  const Value *Op = BO;
374  unsigned Stages = Log2_32(NumElems);
375  for (unsigned i = 0; i != Stages; ++i) {
376  const auto *BO = dyn_cast<BinaryOperator>(Op);
377  if (!BO || BO->getOpcode() != Instruction::Add)
378  return nullptr;
379  if (EE.getParent() != BO->getParent())
380  ReduceInOneBB = false;
381 
382  // If this isn't the first add, then it should only have 2 users, the
383  // shuffle and another add which we checked in the previous iteration.
384  if (i != 0 && !BO->hasNUses(2))
385  return nullptr;
386 
387  Value *LHS = BO->getOperand(0);
388  Value *RHS = BO->getOperand(1);
389 
390  auto *Shuffle = dyn_cast<ShuffleVectorInst>(LHS);
391  if (Shuffle) {
392  Op = RHS;
393  } else {
394  Shuffle = dyn_cast<ShuffleVectorInst>(RHS);
395  Op = LHS;
396  }
397 
398  // The first operand of the shuffle should be the same as the other operand
399  // of the bin op.
400  if (!Shuffle || Shuffle->getOperand(0) != Op)
401  return nullptr;
402 
403  // Verify the shuffle has the expected (at this stage of the pyramid) mask.
404  unsigned MaskEnd = 1 << i;
405  for (unsigned Index = 0; Index < MaskEnd; ++Index)
406  if (Shuffle->getMaskValue(Index) != (int)(MaskEnd + Index))
407  return nullptr;
408  }
409 
410  return const_cast<Value *>(Op);
411 }
412 
413 // See if this BO is reachable from this Phi by walking forward through single
414 // use BinaryOperators with the same opcode. If we get back then we know we've
415 // found a loop and it is safe to step through this Add to find more leaves.
416 static bool isReachableFromPHI(PHINode *Phi, BinaryOperator *BO) {
417  // The PHI itself should only have one use.
418  if (!Phi->hasOneUse())
419  return false;
420 
421  Instruction *U = cast<Instruction>(*Phi->user_begin());
422  if (U == BO)
423  return true;
424 
425  while (U->hasOneUse() && U->getOpcode() == BO->getOpcode())
426  U = cast<Instruction>(*U->user_begin());
427 
428  return U == BO;
429 }
430 
431 // Collect all the leaves of the tree of adds that feeds into the horizontal
432 // reduction. Root is the Value that is used by the horizontal reduction.
433 // We look through single use phis, single use adds, or adds that are used by
434 // a phi that forms a loop with the add.
436  SmallPtrSet<Value *, 8> Visited;
437  SmallVector<Value *, 8> Worklist;
438  Worklist.push_back(Root);
439 
440  while (!Worklist.empty()) {
441  Value *V = Worklist.pop_back_val();
442  if (!Visited.insert(V).second)
443  continue;
444 
445  if (auto *PN = dyn_cast<PHINode>(V)) {
446  // PHI node should have single use unless it is the root node, then it
447  // has 2 uses.
448  if (!PN->hasNUses(PN == Root ? 2 : 1))
449  break;
450 
451  // Push incoming values to the worklist.
452  append_range(Worklist, PN->incoming_values());
453 
454  continue;
455  }
456 
457  if (auto *BO = dyn_cast<BinaryOperator>(V)) {
458  if (BO->getOpcode() == Instruction::Add) {
459  // Simple case. Single use, just push its operands to the worklist.
460  if (BO->hasNUses(BO == Root ? 2 : 1)) {
461  append_range(Worklist, BO->operands());
462  continue;
463  }
464 
465  // If there is additional use, make sure it is an unvisited phi that
466  // gets us back to this node.
467  if (BO->hasNUses(BO == Root ? 3 : 2)) {
468  PHINode *PN = nullptr;
469  for (auto *U : BO->users())
470  if (auto *P = dyn_cast<PHINode>(U))
471  if (!Visited.count(P))
472  PN = P;
473 
474  // If we didn't find a 2-input PHI then this isn't a case we can
475  // handle.
476  if (!PN || PN->getNumIncomingValues() != 2)
477  continue;
478 
479  // Walk forward from this phi to see if it reaches back to this add.
480  if (!isReachableFromPHI(PN, BO))
481  continue;
482 
483  // The phi forms a loop with this Add, push its operands.
484  append_range(Worklist, BO->operands());
485  }
486  }
487  }
488 
489  // Not an add or phi, make it a leaf.
490  if (auto *I = dyn_cast<Instruction>(V)) {
491  if (!V->hasNUses(I == Root ? 2 : 1))
492  continue;
493 
494  // Add this as a leaf.
495  Leaves.push_back(I);
496  }
497  }
498 }
499 
501  if (skipFunction(F))
502  return false;
503 
504  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
505  if (!TPC)
506  return false;
507 
508  auto &TM = TPC->getTM<X86TargetMachine>();
509  ST = TM.getSubtargetImpl(F);
510 
511  DL = &F.getParent()->getDataLayout();
512 
513  bool MadeChange = false;
514  for (auto &BB : F) {
515  for (auto &I : BB) {
516  auto *EE = dyn_cast<ExtractElementInst>(&I);
517  if (!EE)
518  continue;
519 
520  bool ReduceInOneBB;
521  // First find a reduction tree.
522  // FIXME: Do we need to handle other opcodes than Add?
523  Value *Root = matchAddReduction(*EE, ReduceInOneBB);
524  if (!Root)
525  continue;
526 
528  collectLeaves(Root, Leaves);
529 
530  for (Instruction *I : Leaves) {
531  if (tryMAddReplacement(I, ReduceInOneBB)) {
532  MadeChange = true;
533  continue;
534  }
535 
536  // Don't do SAD matching on the root node. SelectionDAG already
537  // has support for that and currently generates better code.
538  if (I != Root && trySADReplacement(I))
539  MadeChange = true;
540  }
541  }
542  }
543 
544  return MadeChange;
545 }
i
i
Definition: README.txt:29
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
INITIALIZE_PASS
INITIALIZE_PASS(X86PartialReduction, DEBUG_TYPE, "X86 Partial Reduction", false, false) static bool matchVPDPBUSDPattern(const X86Subtarget *ST
collectLeaves
static void collectLeaves(Value *Root, SmallVectorImpl< Instruction * > &Leaves)
Definition: X86PartialReduction.cpp:435
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1421
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:104
IntrinsicInst.h
DL
BinaryOperator const DataLayout * DL
Definition: X86PartialReduction.cpp:71
llvm::ExtractElementInst
This instruction extracts a single (scalar) element from a VectorType value.
Definition: Instructions.h:1872
llvm::Function
Definition: Function.h:60
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1181
llvm::X86Subtarget
Definition: X86Subtarget.h:52
llvm::IRBuilder<>
matchAddReduction
static Value * matchAddReduction(const ExtractElementInst &EE, bool &ReduceInOneBB)
Definition: X86PartialReduction.cpp:354
ValueTracking.h
llvm::ComputeMaxSignificantBits
unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Get the upper bound on bit size for this Value Op as a signed integer.
Definition: ValueTracking.cpp:423
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
llvm::ComputeNumSignBits
unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Return the number of times the sign bit of the register is replicated into the other bits.
Definition: ValueTracking.cpp:415
Operator.h
llvm::Value::user_begin
user_iterator user_begin()
Definition: Value.h:397
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
llvm::matchSelectPattern
SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS, Instruction::CastOps *CastOp=nullptr, unsigned Depth=0)
Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind and providing the out param...
Definition: ValueTracking.cpp:6392
IsFreeTruncation
if(isa< SExtInst >(LHS)) std auto IsFreeTruncation
Definition: X86PartialReduction.cpp:81
F
#define F(x, y, z)
Definition: MD5.cpp:55
KnownBits.h
llvm::MipsISD::MAdd
@ MAdd
Definition: MipsISelLowering.h:139
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:164
X86.h
Constants.h
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::createX86PartialReductionPass
FunctionPass * createX86PartialReductionPass()
This pass optimizes arithmetic based on knowledge that is only used by a reduction sequence and is th...
Definition: X86PartialReduction.cpp:60
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:547
llvm::BinaryOperator::getOpcode
BinaryOps getOpcode() const
Definition: InstrTypes.h:392
llvm::Instruction
Definition: Instruction.h:42
Concat
static constexpr int Concat[]
Definition: X86InterleavedAccess.cpp:239
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
isReachableFromPHI
static bool isReachableFromPHI(PHINode *Phi, BinaryOperator *BO)
Definition: X86PartialReduction.cpp:416
PatternMatch.h
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:684
llvm::PHINode::getNumIncomingValues
unsigned getNumIncomingValues() const
Return the number of incoming edges.
Definition: Instructions.h:2791
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:77
Index
uint32_t Index
Definition: ELFObjHandler.cpp:82
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
DEBUG_TYPE
#define DEBUG_TYPE
Definition: X86PartialReduction.cpp:31
I
#define I(x, y, z)
Definition: MD5.cpp:58
TargetPassConfig.h
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:222
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::SPF_ABS
@ SPF_ABS
Floating point maxnum.
Definition: ValueTracking.h:695
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::X86TargetMachine
Definition: X86TargetMachine.h:28
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
getType
static M68kRelType getType(unsigned Kind, MCSymbolRefExpr::VariantKind &Modifier, bool &IsPCRel)
Definition: M68kELFObjectWriter.cpp:48
llvm::ArrayRef< int >
llvm::BinaryOperator
Definition: InstrTypes.h:188
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::ExtractElementInst::getVectorOperand
Value * getVectorOperand()
Definition: Instructions.h:1901
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1818
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
X86TargetMachine.h
llvm::Value::hasNUses
bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
Definition: Value.cpp:148
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
Instructions.h
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:659
llvm::PHINode
Definition: Instructions.h:2699
llvm::ExtractElementInst::getIndexOperand
Value * getIndexOperand()
Definition: Instructions.h:1902
llvm::SmallVectorImpl< Instruction * >
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38