LLVM  14.0.0git
SystemZTDC.cpp
Go to the documentation of this file.
1 //===-- SystemZTDC.cpp - Utilize Test Data Class instruction --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass looks for instructions that can be replaced by a Test Data Class
10 // instruction, and replaces them when profitable.
11 //
12 // Roughly, the following rules are recognized:
13 //
14 // 1: fcmp pred X, 0 -> tdc X, mask
15 // 2: fcmp pred X, +-inf -> tdc X, mask
16 // 3: fcmp pred X, +-minnorm -> tdc X, mask
17 // 4: tdc (fabs X), mask -> tdc X, newmask
18 // 5: icmp slt (bitcast float X to int), 0 -> tdc X, mask [ie. signbit]
19 // 6: icmp sgt (bitcast float X to int), -1 -> tdc X, mask
20 // 7: icmp ne/eq (call @llvm.s390.tdc.*(X, mask)) -> tdc X, mask/~mask
21 // 8: and i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 & M2)
22 // 9: or i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 | M2)
23 // 10: xor i1 (tdc X, M1), (tdc X, M2) -> tdc X, (M1 ^ M2)
24 //
25 // The pass works in 4 steps:
26 //
27 // 1. All fcmp and icmp instructions in a function are checked for a match
28 // with rules 1-3 and 5-7. Their TDC equivalents are stored in
29 // the ConvertedInsts mapping. If the operand of a fcmp instruction is
30 // a fabs, it's also folded according to rule 4.
31 // 2. All and/or/xor i1 instructions whose both operands have been already
32 // mapped are mapped according to rules 8-10. LogicOpsWorklist is used
33 // as a queue of instructions to check.
34 // 3. All mapped instructions that are considered worthy of conversion (ie.
35 // replacing them will actually simplify the final code) are replaced
36 // with a call to the s390.tdc intrinsic.
37 // 4. All intermediate results of replaced instructions are removed if unused.
38 //
39 // Instructions that match rules 1-3 are considered unworthy of conversion
40 // on their own (since a comparison instruction is superior), but are mapped
41 // in the hopes of folding the result using rules 4 and 8-10 (likely removing
42 // the original comparison in the process).
43 //
44 //===----------------------------------------------------------------------===//
45 
46 #include "SystemZ.h"
47 #include "SystemZSubtarget.h"
48 #include "llvm/ADT/MapVector.h"
50 #include "llvm/IR/Constants.h"
51 #include "llvm/IR/IRBuilder.h"
52 #include "llvm/IR/InstIterator.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/IntrinsicsS390.h"
57 #include "llvm/IR/Module.h"
59 #include <deque>
60 #include <set>
61 
62 using namespace llvm;
63 
64 namespace llvm {
66 }
67 
68 namespace {
69 
70 class SystemZTDCPass : public FunctionPass {
71 public:
72  static char ID;
73  SystemZTDCPass() : FunctionPass(ID) {
75  }
76 
77  bool runOnFunction(Function &F) override;
78 
79  void getAnalysisUsage(AnalysisUsage &AU) const override {
81  }
82 
83 private:
84  // Maps seen instructions that can be mapped to a TDC, values are
85  // (TDC operand, TDC mask, worthy flag) triples.
87  // The queue of and/or/xor i1 instructions to be potentially folded.
88  std::vector<BinaryOperator *> LogicOpsWorklist;
89  // Instructions matched while folding, to be removed at the end if unused.
90  std::set<Instruction *> PossibleJunk;
91 
92  // Tries to convert a fcmp instruction.
93  void convertFCmp(CmpInst &I);
94 
95  // Tries to convert an icmp instruction.
96  void convertICmp(CmpInst &I);
97 
98  // Tries to convert an i1 and/or/xor instruction, whose both operands
99  // have been already converted.
100  void convertLogicOp(BinaryOperator &I);
101 
102  // Marks an instruction as converted - adds it to ConvertedInsts and adds
103  // any and/or/xor i1 users to the queue.
104  void converted(Instruction *I, Value *V, int Mask, bool Worthy) {
105  ConvertedInsts[I] = std::make_tuple(V, Mask, Worthy);
106  auto &M = *I->getFunction()->getParent();
107  auto &Ctx = M.getContext();
108  for (auto *U : I->users()) {
109  auto *LI = dyn_cast<BinaryOperator>(U);
110  if (LI && LI->getType() == Type::getInt1Ty(Ctx) &&
111  (LI->getOpcode() == Instruction::And ||
112  LI->getOpcode() == Instruction::Or ||
113  LI->getOpcode() == Instruction::Xor)) {
114  LogicOpsWorklist.push_back(LI);
115  }
116  }
117  }
118 };
119 
120 } // end anonymous namespace
121 
122 char SystemZTDCPass::ID = 0;
123 INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc",
124  "SystemZ Test Data Class optimization", false, false)
125 
127  return new SystemZTDCPass();
128 }
129 
130 void SystemZTDCPass::convertFCmp(CmpInst &I) {
131  Value *Op0 = I.getOperand(0);
132  auto *Const = dyn_cast<ConstantFP>(I.getOperand(1));
133  auto Pred = I.getPredicate();
134  // Only comparisons with consts are interesting.
135  if (!Const)
136  return;
137  // Compute the smallest normal number (and its negation).
138  auto &Sem = Op0->getType()->getFltSemantics();
139  APFloat Smallest = APFloat::getSmallestNormalized(Sem);
140  APFloat NegSmallest = Smallest;
141  NegSmallest.changeSign();
142  // Check if Const is one of our recognized consts.
143  int WhichConst;
144  if (Const->isZero()) {
145  // All comparisons with 0 can be converted.
146  WhichConst = 0;
147  } else if (Const->isInfinity()) {
148  // Likewise for infinities.
149  WhichConst = Const->isNegative() ? 2 : 1;
150  } else if (Const->isExactlyValue(Smallest)) {
151  // For Smallest, we cannot do EQ separately from GT.
152  if ((Pred & CmpInst::FCMP_OGE) != CmpInst::FCMP_OGE &&
153  (Pred & CmpInst::FCMP_OGE) != 0)
154  return;
155  WhichConst = 3;
156  } else if (Const->isExactlyValue(NegSmallest)) {
157  // Likewise for NegSmallest, we cannot do EQ separately from LT.
158  if ((Pred & CmpInst::FCMP_OLE) != CmpInst::FCMP_OLE &&
159  (Pred & CmpInst::FCMP_OLE) != 0)
160  return;
161  WhichConst = 4;
162  } else {
163  // Not one of our special constants.
164  return;
165  }
166  // Partial masks to use for EQ, GT, LT, UN comparisons, respectively.
167  static const int Masks[][4] = {
168  { // 0
169  SystemZ::TDCMASK_ZERO, // eq
172  SystemZ::TDCMASK_NAN, // un
173  },
174  { // inf
176  0, // gt
181  SystemZ::TDCMASK_NAN, // un
182  },
183  { // -inf
189  0, // lt
190  SystemZ::TDCMASK_NAN, // un
191  },
192  { // minnorm
193  0, // eq (unsupported)
195  SystemZ::TDCMASK_INFINITY_PLUS), // gt (actually ge)
199  SystemZ::TDCMASK_NAN, // un
200  },
201  { // -minnorm
202  0, // eq (unsupported)
207  SystemZ::TDCMASK_INFINITY_MINUS), // lt (actually le)
208  SystemZ::TDCMASK_NAN, // un
209  }
210  };
211  // Construct the mask as a combination of the partial masks.
212  int Mask = 0;
213  if (Pred & CmpInst::FCMP_OEQ)
214  Mask |= Masks[WhichConst][0];
215  if (Pred & CmpInst::FCMP_OGT)
216  Mask |= Masks[WhichConst][1];
217  if (Pred & CmpInst::FCMP_OLT)
218  Mask |= Masks[WhichConst][2];
219  if (Pred & CmpInst::FCMP_UNO)
220  Mask |= Masks[WhichConst][3];
221  // A lone fcmp is unworthy of tdc conversion on its own, but may become
222  // worthy if combined with fabs.
223  bool Worthy = false;
224  if (CallInst *CI = dyn_cast<CallInst>(Op0)) {
225  Function *F = CI->getCalledFunction();
226  if (F && F->getIntrinsicID() == Intrinsic::fabs) {
227  // Fold with fabs - adjust the mask appropriately.
229  Mask |= Mask >> 1;
230  Op0 = CI->getArgOperand(0);
231  // A combination of fcmp with fabs is a win, unless the constant
232  // involved is 0 (which is handled by later passes).
233  Worthy = WhichConst != 0;
234  PossibleJunk.insert(CI);
235  }
236  }
237  converted(&I, Op0, Mask, Worthy);
238 }
239 
240 void SystemZTDCPass::convertICmp(CmpInst &I) {
241  Value *Op0 = I.getOperand(0);
242  auto *Const = dyn_cast<ConstantInt>(I.getOperand(1));
243  auto Pred = I.getPredicate();
244  // All our icmp rules involve comparisons with consts.
245  if (!Const)
246  return;
247  if (auto *Cast = dyn_cast<BitCastInst>(Op0)) {
248  // Check for icmp+bitcast used for signbit.
249  if (!Cast->getSrcTy()->isFloatTy() &&
250  !Cast->getSrcTy()->isDoubleTy() &&
251  !Cast->getSrcTy()->isFP128Ty())
252  return;
253  Value *V = Cast->getOperand(0);
254  int Mask;
255  if (Pred == CmpInst::ICMP_SLT && Const->isZero()) {
256  // icmp slt (bitcast X), 0 - set if sign bit true
258  } else if (Pred == CmpInst::ICMP_SGT && Const->isMinusOne()) {
259  // icmp sgt (bitcast X), -1 - set if sign bit false
261  } else {
262  // Not a sign bit check.
263  return;
264  }
265  PossibleJunk.insert(Cast);
266  converted(&I, V, Mask, true);
267  } else if (auto *CI = dyn_cast<CallInst>(Op0)) {
268  // Check if this is a pre-existing call of our tdc intrinsic.
269  Function *F = CI->getCalledFunction();
270  if (!F || F->getIntrinsicID() != Intrinsic::s390_tdc)
271  return;
272  if (!Const->isZero())
273  return;
274  Value *V = CI->getArgOperand(0);
275  auto *MaskC = dyn_cast<ConstantInt>(CI->getArgOperand(1));
276  // Bail if the mask is not a constant.
277  if (!MaskC)
278  return;
279  int Mask = MaskC->getZExtValue();
281  if (Pred == CmpInst::ICMP_NE) {
282  // icmp ne (call llvm.s390.tdc(...)), 0 -> simple TDC
283  } else if (Pred == CmpInst::ICMP_EQ) {
284  // icmp eq (call llvm.s390.tdc(...)), 0 -> TDC with inverted mask
286  } else {
287  // An unknown comparison - ignore.
288  return;
289  }
290  PossibleJunk.insert(CI);
291  converted(&I, V, Mask, false);
292  }
293 }
294 
295 void SystemZTDCPass::convertLogicOp(BinaryOperator &I) {
296  Value *Op0, *Op1;
297  int Mask0, Mask1;
298  bool Worthy0, Worthy1;
299  std::tie(Op0, Mask0, Worthy0) = ConvertedInsts[cast<Instruction>(I.getOperand(0))];
300  std::tie(Op1, Mask1, Worthy1) = ConvertedInsts[cast<Instruction>(I.getOperand(1))];
301  if (Op0 != Op1)
302  return;
303  int Mask;
304  switch (I.getOpcode()) {
305  case Instruction::And:
306  Mask = Mask0 & Mask1;
307  break;
308  case Instruction::Or:
309  Mask = Mask0 | Mask1;
310  break;
311  case Instruction::Xor:
312  Mask = Mask0 ^ Mask1;
313  break;
314  default:
315  llvm_unreachable("Unknown op in convertLogicOp");
316  }
317  converted(&I, Op0, Mask, true);
318 }
319 
321  auto &TPC = getAnalysis<TargetPassConfig>();
322  if (TPC.getTM<TargetMachine>()
324  .hasSoftFloat())
325  return false;
326 
327  ConvertedInsts.clear();
328  LogicOpsWorklist.clear();
329  PossibleJunk.clear();
330 
331  // Look for icmp+fcmp instructions.
332  for (auto &I : instructions(F)) {
333  if (I.getOpcode() == Instruction::FCmp)
334  convertFCmp(cast<CmpInst>(I));
335  else if (I.getOpcode() == Instruction::ICmp)
336  convertICmp(cast<CmpInst>(I));
337  }
338 
339  // If none found, bail already.
340  if (ConvertedInsts.empty())
341  return false;
342 
343  // Process the queue of logic instructions.
344  while (!LogicOpsWorklist.empty()) {
345  BinaryOperator *Op = LogicOpsWorklist.back();
346  LogicOpsWorklist.pop_back();
347  // If both operands mapped, and the instruction itself not yet mapped,
348  // convert it.
349  if (ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(0))) &&
350  ConvertedInsts.count(dyn_cast<Instruction>(Op->getOperand(1))) &&
351  !ConvertedInsts.count(Op))
352  convertLogicOp(*Op);
353  }
354 
355  // Time to actually replace the instructions. Do it in the reverse order
356  // of finding them, since there's a good chance the earlier ones will be
357  // unused (due to being folded into later ones).
358  Module &M = *F.getParent();
359  auto &Ctx = M.getContext();
360  Value *Zero32 = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
361  bool MadeChange = false;
362  for (auto &It : reverse(ConvertedInsts)) {
363  Instruction *I = It.first;
364  Value *V;
365  int Mask;
366  bool Worthy;
367  std::tie(V, Mask, Worthy) = It.second;
368  if (!I->user_empty()) {
369  // If used and unworthy of conversion, skip it.
370  if (!Worthy)
371  continue;
372  // Call the intrinsic, compare result with 0.
373  Function *TDCFunc =
374  Intrinsic::getDeclaration(&M, Intrinsic::s390_tdc, V->getType());
375  IRBuilder<> IRB(I);
376  Value *MaskVal = ConstantInt::get(Type::getInt64Ty(Ctx), Mask);
377  Instruction *TDC = IRB.CreateCall(TDCFunc, {V, MaskVal});
378  Value *ICmp = IRB.CreateICmp(CmpInst::ICMP_NE, TDC, Zero32);
379  I->replaceAllUsesWith(ICmp);
380  }
381  // If unused, or used and converted, remove it.
382  I->eraseFromParent();
383  MadeChange = true;
384  }
385 
386  if (!MadeChange)
387  return false;
388 
389  // We've actually done something - now clear misc accumulated junk (fabs,
390  // bitcast).
391  for (auto *I : PossibleJunk)
392  if (I->user_empty())
393  I->eraseFromParent();
394 
395  return true;
396 }
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:200
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SystemZ::TDCMASK_NAN
const unsigned TDCMASK_NAN
Definition: SystemZ.h:138
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:741
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
IntrinsicInst.h
InstIterator.h
llvm::Function
Definition: Function.h:61
llvm::MapVector::clear
void clear()
Definition: MapVector.h:88
llvm::IRBuilder<>
llvm::SystemZ::TDCMASK_NORMAL_PLUS
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:120
MapVector.h
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
INITIALIZE_PASS
INITIALIZE_PASS(SystemZTDCPass, "systemz-tdc", "SystemZ Test Data Class optimization", false, false) FunctionPass *llvm
Definition: SystemZTDC.cpp:123
llvm::CmpInst::ICMP_SGT
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:747
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
LegacyPassManager.h
llvm::CmpInst::FCMP_OGT
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:724
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::SystemZ::TDCMASK_ZERO
const unsigned TDCMASK_ZERO
Definition: SystemZ.h:131
TargetMachine.h
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::SystemZSubtarget::hasSoftFloat
bool hasSoftFloat() const
Definition: SystemZSubtarget.h:283
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::Type::getFltSemantics
const fltSemantics & getFltSemantics() const
Definition: Type.h:169
llvm::SystemZ::TDCMASK_PLUS
const unsigned TDCMASK_PLUS
Definition: SystemZ.h:142
llvm::CmpInst::FCMP_UNO
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition: InstrTypes.h:730
llvm::SystemZ::TDCMASK_INFINITY_MINUS
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:125
llvm::Instruction
Definition: Instruction.h:45
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::SystemZ::TDCMASK_POSITIVE
const unsigned TDCMASK_POSITIVE
Definition: SystemZ.h:132
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
llvm::CmpInst::FCMP_OEQ
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition: InstrTypes.h:723
llvm::CmpInst::FCMP_OLT
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:726
SystemZ.h
llvm::CmpInst
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:710
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::APFloat
Definition: APFloat.h:701
llvm::APFloat::changeSign
void changeSign()
Definition: APFloat.h:1088
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::SystemZISD::TDC
@ TDC
Definition: SystemZISelLowering.h:292
llvm::SystemZ::TDCMASK_INFINITY_PLUS
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:124
I
#define I(x, y, z)
Definition: MD5.cpp:59
TargetPassConfig.h
llvm::SystemZ::TDCMASK_NORMAL_MINUS
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:121
IRBuilder.h
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::CmpInst::FCMP_OGE
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:725
llvm::SystemZSubtarget
Definition: SystemZSubtarget.h:33
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::CmpInst::ICMP_SLT
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:749
SystemZSubtarget.h
llvm::SystemZ::TDCMASK_MINUS
const unsigned TDCMASK_MINUS
Definition: SystemZ.h:146
llvm::BinaryOperator
Definition: InstrTypes.h:189
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::createSystemZTDCPass
FunctionPass * createSystemZTDCPass()
llvm::SystemZ::TDCMASK_NEGATIVE
const unsigned TDCMASK_NEGATIVE
Definition: SystemZ.h:135
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::MapVector::empty
bool empty() const
Definition: MapVector.h:79
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:204
llvm::MapVector::count
size_type count(const KeyT &Key) const
Definition: MapVector.h:142
llvm::initializeSystemZTDCPassPass
void initializeSystemZTDCPassPass(PassRegistry &)
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::SystemZ::TDCMASK_ALL
const unsigned TDCMASK_ALL
Definition: SystemZ.h:150
llvm::codeview::ModifierOptions::Const
@ Const
Instructions.h
llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:166
llvm::CmpInst::FCMP_OLE
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition: InstrTypes.h:727
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::SystemZ::TDCMASK_SUBNORMAL_PLUS
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:122
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::APFloat::getSmallestNormalized
static APFloat getSmallestNormalized(const fltSemantics &Sem, bool Negative=false)
Returns the smallest (by magnitude) normalized finite number in the given semantics.
Definition: APFloat.h:954
llvm::SystemZ::TDCMASK_SUBNORMAL_MINUS
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:123
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37