LLVM  13.0.0git
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines a pattern matching instruction selector for PowerPC,
10 // converting from a legalized dag to a PPC dag.
11 //
12 //===----------------------------------------------------------------------===//
13 
16 #include "PPC.h"
17 #include "PPCISelLowering.h"
18 #include "PPCMachineFunctionInfo.h"
19 #include "PPCSubtarget.h"
20 #include "PPCTargetMachine.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/Statistic.h"
40 #include "llvm/IR/BasicBlock.h"
41 #include "llvm/IR/DebugLoc.h"
42 #include "llvm/IR/Function.h"
43 #include "llvm/IR/GlobalValue.h"
44 #include "llvm/IR/InlineAsm.h"
45 #include "llvm/IR/InstrTypes.h"
46 #include "llvm/IR/IntrinsicsPowerPC.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/Support/Casting.h"
49 #include "llvm/Support/CodeGen.h"
51 #include "llvm/Support/Compiler.h"
52 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/KnownBits.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 #include <iterator>
62 #include <limits>
63 #include <memory>
64 #include <new>
65 #include <tuple>
66 #include <utility>
67 
68 using namespace llvm;
69 
70 #define DEBUG_TYPE "ppc-codegen"
71 
72 STATISTIC(NumSextSetcc,
73  "Number of (sext(setcc)) nodes expanded into GPR sequence.");
74 STATISTIC(NumZextSetcc,
75  "Number of (zext(setcc)) nodes expanded into GPR sequence.");
76 STATISTIC(SignExtensionsAdded,
77  "Number of sign extensions for compare inputs added.");
78 STATISTIC(ZeroExtensionsAdded,
79  "Number of zero extensions for compare inputs added.");
80 STATISTIC(NumLogicOpsOnComparison,
81  "Number of logical ops on i1 values calculated in GPR.");
82 STATISTIC(OmittedForNonExtendUses,
83  "Number of compares not eliminated as they have non-extending uses.");
84 STATISTIC(NumP9Setb,
85  "Number of compares lowered to setb.");
86 
87 // FIXME: Remove this once the bug has been fixed!
88 cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
89 cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
90 
91 static cl::opt<bool>
92  UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
93  cl::desc("use aggressive ppc isel for bit permutations"),
94  cl::Hidden);
96  "ppc-bit-perm-rewriter-stress-rotates",
97  cl::desc("stress rotate selection in aggressive ppc isel for "
98  "bit permutations"),
99  cl::Hidden);
100 
102  "ppc-use-branch-hint", cl::init(true),
103  cl::desc("Enable static hinting of branches on ppc"),
104  cl::Hidden);
105 
107  "ppc-tls-opt", cl::init(true),
108  cl::desc("Enable tls optimization peephole"),
109  cl::Hidden);
110 
114 
116  "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
117  cl::desc("Specify the types of comparisons to emit GPR-only code for."),
118  cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
119  clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
120  clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
121  clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
122  clEnumValN(ICGPR_NonExtIn, "nonextin",
123  "Only comparisons where inputs don't need [sz]ext."),
124  clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
125  clEnumValN(ICGPR_ZextI32, "zexti32",
126  "Only i32 comparisons with zext result."),
127  clEnumValN(ICGPR_ZextI64, "zexti64",
128  "Only i64 comparisons with zext result."),
129  clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
130  clEnumValN(ICGPR_SextI32, "sexti32",
131  "Only i32 comparisons with sext result."),
132  clEnumValN(ICGPR_SextI64, "sexti64",
133  "Only i64 comparisons with sext result.")));
134 namespace {
135 
136  //===--------------------------------------------------------------------===//
137  /// PPCDAGToDAGISel - PPC specific code to select PPC machine
138  /// instructions for SelectionDAG operations.
139  ///
140  class PPCDAGToDAGISel : public SelectionDAGISel {
141  const PPCTargetMachine &TM;
142  const PPCSubtarget *Subtarget = nullptr;
143  const PPCTargetLowering *PPCLowering = nullptr;
144  unsigned GlobalBaseReg = 0;
145 
146  public:
147  explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOpt::Level OptLevel)
148  : SelectionDAGISel(tm, OptLevel), TM(tm) {}
149 
150  bool runOnMachineFunction(MachineFunction &MF) override {
151  // Make sure we re-emit a set of the global base reg if necessary
152  GlobalBaseReg = 0;
153  Subtarget = &MF.getSubtarget<PPCSubtarget>();
154  PPCLowering = Subtarget->getTargetLowering();
156 
157  return true;
158  }
159 
160  void PreprocessISelDAG() override;
161  void PostprocessISelDAG() override;
162 
163  /// getI16Imm - Return a target constant with the specified value, of type
164  /// i16.
165  inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
166  return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
167  }
168 
169  /// getI32Imm - Return a target constant with the specified value, of type
170  /// i32.
171  inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
172  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
173  }
174 
175  /// getI64Imm - Return a target constant with the specified value, of type
176  /// i64.
177  inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
178  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
179  }
180 
181  /// getSmallIPtrImm - Return a target constant of pointer type.
182  inline SDValue getSmallIPtrImm(unsigned Imm, const SDLoc &dl) {
183  return CurDAG->getTargetConstant(
184  Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
185  }
186 
187  /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
188  /// rotate and mask opcode and mask operation.
189  static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
190  unsigned &SH, unsigned &MB, unsigned &ME);
191 
192  /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
193  /// base register. Return the virtual register that holds this value.
194  SDNode *getGlobalBaseReg();
195 
196  void selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset = 0);
197 
198  // Select - Convert the specified operand from a target-independent to a
199  // target-specific node if it hasn't already been changed.
200  void Select(SDNode *N) override;
201 
202  bool tryBitfieldInsert(SDNode *N);
203  bool tryBitPermutation(SDNode *N);
204  bool tryIntCompareInGPR(SDNode *N);
205 
206  // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
207  // an X-Form load instruction with the offset being a relocation coming from
208  // the PPCISD::ADD_TLS.
209  bool tryTLSXFormLoad(LoadSDNode *N);
210  // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
211  // an X-Form store instruction with the offset being a relocation coming from
212  // the PPCISD::ADD_TLS.
213  bool tryTLSXFormStore(StoreSDNode *N);
214  /// SelectCC - Select a comparison of the specified values with the
215  /// specified condition code, returning the CR# of the expression.
216  SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
217  const SDLoc &dl, SDValue Chain = SDValue());
218 
219  /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
220  /// immediate field. Note that the operand at this point is already the
221  /// result of a prior SelectAddressRegImm call.
222  bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
223  if (N.getOpcode() == ISD::TargetConstant ||
224  N.getOpcode() == ISD::TargetGlobalAddress) {
225  Out = N;
226  return true;
227  }
228 
229  return false;
230  }
231 
232  /// SelectDSForm - Returns true if address N can be represented by the
233  /// addressing mode of DSForm instructions (a base register, plus a signed
234  /// 16-bit displacement that is a multiple of 4.
235  bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
236  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
237  Align(4)) == PPC::AM_DSForm;
238  }
239 
240  /// SelectDQForm - Returns true if address N can be represented by the
241  /// addressing mode of DQForm instructions (a base register, plus a signed
242  /// 16-bit displacement that is a multiple of 16.
243  bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
244  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
245  Align(16)) == PPC::AM_DQForm;
246  }
247 
248  /// SelectDForm - Returns true if address N can be represented by
249  /// the addressing mode of DForm instructions (a base register, plus a
250  /// signed 16-bit immediate.
251  bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
252  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
253  None) == PPC::AM_DForm;
254  }
255 
256  /// SelectXForm - Returns true if address N can be represented by the
257  /// addressing mode of XForm instructions (an indexed [r+r] operation).
258  bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
259  return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
260  None) == PPC::AM_XForm;
261  }
262 
263  /// SelectForceXForm - Given the specified address, force it to be
264  /// represented as an indexed [r+r] operation (an XForm instruction).
265  bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
266  SDValue &Base) {
267  return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
269  }
270 
271  /// SelectAddrIdx - Given the specified address, check to see if it can be
272  /// represented as an indexed [r+r] operation.
273  /// This is for xform instructions whose associated displacement form is D.
274  /// The last parameter \p 0 means associated D form has no requirment for 16
275  /// bit signed displacement.
276  /// Returns false if it can be represented by [r+imm], which are preferred.
277  bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
278  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG, None);
279  }
280 
281  /// SelectAddrIdx4 - Given the specified address, check to see if it can be
282  /// represented as an indexed [r+r] operation.
283  /// This is for xform instructions whose associated displacement form is DS.
284  /// The last parameter \p 4 means associated DS form 16 bit signed
285  /// displacement must be a multiple of 4.
286  /// Returns false if it can be represented by [r+imm], which are preferred.
287  bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
288  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
289  Align(4));
290  }
291 
292  /// SelectAddrIdx16 - Given the specified address, check to see if it can be
293  /// represented as an indexed [r+r] operation.
294  /// This is for xform instructions whose associated displacement form is DQ.
295  /// The last parameter \p 16 means associated DQ form 16 bit signed
296  /// displacement must be a multiple of 16.
297  /// Returns false if it can be represented by [r+imm], which are preferred.
298  bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
299  return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
300  Align(16));
301  }
302 
303  /// SelectAddrIdxOnly - Given the specified address, force it to be
304  /// represented as an indexed [r+r] operation.
305  bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
306  return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
307  }
308 
309  /// SelectAddrImm - Returns true if the address N can be represented by
310  /// a base register plus a signed 16-bit displacement [r+imm].
311  /// The last parameter \p 0 means D form has no requirment for 16 bit signed
312  /// displacement.
313  bool SelectAddrImm(SDValue N, SDValue &Disp,
314  SDValue &Base) {
315  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, None);
316  }
317 
318  /// SelectAddrImmX4 - Returns true if the address N can be represented by
319  /// a base register plus a signed 16-bit displacement that is a multiple of
320  /// 4 (last parameter). Suitable for use by STD and friends.
321  bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
322  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
323  }
324 
325  /// SelectAddrImmX16 - Returns true if the address N can be represented by
326  /// a base register plus a signed 16-bit displacement that is a multiple of
327  /// 16(last parameter). Suitable for use by STXV and friends.
328  bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
329  return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
330  Align(16));
331  }
332 
333  /// SelectAddrImmX34 - Returns true if the address N can be represented by
334  /// a base register plus a signed 34-bit displacement. Suitable for use by
335  /// PSTXVP and friends.
336  bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
337  return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
338  }
339 
340  // Select an address into a single register.
341  bool SelectAddr(SDValue N, SDValue &Base) {
342  Base = N;
343  return true;
344  }
345 
346  bool SelectAddrPCRel(SDValue N, SDValue &Base) {
347  return PPCLowering->SelectAddressPCRel(N, Base);
348  }
349 
350  /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
351  /// inline asm expressions. It is always correct to compute the value into
352  /// a register. The case of adding a (possibly relocatable) constant to a
353  /// register can be improved, but it is wrong to substitute Reg+Reg for
354  /// Reg in an asm, because the load or store opcode would have to change.
355  bool SelectInlineAsmMemoryOperand(const SDValue &Op,
356  unsigned ConstraintID,
357  std::vector<SDValue> &OutOps) override {
358  switch(ConstraintID) {
359  default:
360  errs() << "ConstraintID: " << ConstraintID << "\n";
361  llvm_unreachable("Unexpected asm memory constraint");
368  // We need to make sure that this one operand does not end up in r0
369  // (because we might end up lowering this as 0(%op)).
370  const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
371  const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
372  SDLoc dl(Op);
373  SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
374  SDValue NewOp =
375  SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
376  dl, Op.getValueType(),
377  Op, RC), 0);
378 
379  OutOps.push_back(NewOp);
380  return false;
381  }
382  return true;
383  }
384 
385  StringRef getPassName() const override {
386  return "PowerPC DAG->DAG Pattern Instruction Selection";
387  }
388 
389 // Include the pieces autogenerated from the target description.
390 #include "PPCGenDAGISel.inc"
391 
392 private:
393  bool trySETCC(SDNode *N);
394  bool tryFoldSWTestBRCC(SDNode *N);
395  bool tryAsSingleRLDICL(SDNode *N);
396  bool tryAsSingleRLDICR(SDNode *N);
397  bool tryAsSingleRLWINM(SDNode *N);
398  bool tryAsSingleRLWINM8(SDNode *N);
399  bool tryAsSingleRLWIMI(SDNode *N);
400  bool tryAsPairOfRLDICL(SDNode *N);
401  bool tryAsSingleRLDIMI(SDNode *N);
402 
403  void PeepholePPC64();
404  void PeepholePPC64ZExt();
405  void PeepholeCROps();
406 
407  SDValue combineToCMPB(SDNode *N);
408  void foldBoolExts(SDValue &Res, SDNode *&N);
409 
410  bool AllUsersSelectZero(SDNode *N);
411  void SwapAllSelectUsers(SDNode *N);
412 
413  bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
414  void transferMemOperands(SDNode *N, SDNode *Result);
415  };
416 
417 } // end anonymous namespace
418 
419 /// getGlobalBaseReg - Output the instructions required to put the
420 /// base address to use for accessing globals into a register.
421 ///
422 SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
423  if (!GlobalBaseReg) {
424  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
425  // Insert the set of GlobalBaseReg into the first MBB of the function
426  MachineBasicBlock &FirstMBB = MF->front();
428  const Module *M = MF->getFunction().getParent();
429  DebugLoc dl;
430 
431  if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
432  if (Subtarget->isTargetELF()) {
433  GlobalBaseReg = PPC::R30;
434  if (!Subtarget->isSecurePlt() &&
435  M->getPICLevel() == PICLevel::SmallPIC) {
436  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
437  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
438  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
439  } else {
440  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
441  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
442  Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
443  BuildMI(FirstMBB, MBBI, dl,
444  TII.get(PPC::UpdateGBR), GlobalBaseReg)
446  MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
447  }
448  } else {
449  GlobalBaseReg =
450  RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
451  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
452  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
453  }
454  } else {
455  // We must ensure that this sequence is dominated by the prologue.
456  // FIXME: This is a bit of a big hammer since we don't get the benefits
457  // of shrink-wrapping whenever we emit this instruction. Considering
458  // this is used in any function where we emit a jump table, this may be
459  // a significant limitation. We should consider inserting this in the
460  // block where it is used and then commoning this sequence up if it
461  // appears in multiple places.
462  // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
463  // MovePCtoLR8.
464  MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
465  GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
466  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
467  BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
468  }
469  }
470  return CurDAG->getRegister(GlobalBaseReg,
471  PPCLowering->getPointerTy(CurDAG->getDataLayout()))
472  .getNode();
473 }
474 
475 // Check if a SDValue has the toc-data attribute.
476 static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
477  GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
478  if (!GA)
479  return false;
480 
481  const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
482  if (!GV)
483  return false;
484 
485  if (!GV->hasAttribute("toc-data"))
486  return false;
487 
488  // TODO: These asserts should be updated as more support for the toc data
489  // transformation is added (64 bit, struct support, etc.).
490 
491  assert(PointerSize == 4 && "Only 32 Bit Codegen is currently supported by "
492  "the toc data transformation.");
493 
494  assert(PointerSize >= GV->getAlign().valueOrOne().value() &&
495  "GlobalVariables with an alignment requirement stricter then 4-bytes "
496  "not supported by the toc data transformation.");
497 
498  Type *PtrType = GV->getType();
499  assert(PtrType->isPointerTy() &&
500  "GlobalVariables always have pointer type!.");
501 
502  Type *GVType = dyn_cast<PointerType>(PtrType)->getElementType();
503 
504  assert(GVType->isSized() && "A GlobalVariable's size must be known to be "
505  "supported by the toc data transformation.");
506 
507  if (GVType->isVectorTy())
508  report_fatal_error("A GlobalVariable of Vector type is not currently "
509  "supported by the toc data transformation.");
510 
511  if (GVType->isArrayTy())
512  report_fatal_error("A GlobalVariable of Array type is not currently "
513  "supported by the toc data transformation.");
514 
515  if (GVType->isStructTy())
516  report_fatal_error("A GlobalVariable of Struct type is not currently "
517  "supported by the toc data transformation.");
518 
519  assert(GVType->getPrimitiveSizeInBits() <= PointerSize * 8 &&
520  "A GlobalVariable with size larger than 32 bits is not currently "
521  "supported by the toc data transformation.");
522 
523  if (GV->hasLocalLinkage() || GV->hasPrivateLinkage())
524  report_fatal_error("A GlobalVariable with private or local linkage is not "
525  "currently supported by the toc data transformation.");
526 
527  assert(!GV->hasCommonLinkage() &&
528  "Tentative definitions cannot have the mapping class XMC_TD.");
529 
530  return true;
531 }
532 
533 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
534 /// operand. If so Imm will receive the 32-bit value.
535 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
536  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
537  Imm = cast<ConstantSDNode>(N)->getZExtValue();
538  return true;
539  }
540  return false;
541 }
542 
543 /// isInt64Immediate - This method tests to see if the node is a 64-bit constant
544 /// operand. If so Imm will receive the 64-bit value.
545 static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
546  if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
547  Imm = cast<ConstantSDNode>(N)->getZExtValue();
548  return true;
549  }
550  return false;
551 }
552 
553 // isInt32Immediate - This method tests to see if a constant operand.
554 // If so Imm will receive the 32 bit value.
555 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
556  return isInt32Immediate(N.getNode(), Imm);
557 }
558 
559 /// isInt64Immediate - This method tests to see if the value is a 64-bit
560 /// constant operand. If so Imm will receive the 64-bit value.
561 static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
562  return isInt64Immediate(N.getNode(), Imm);
563 }
564 
565 static unsigned getBranchHint(unsigned PCC,
566  const FunctionLoweringInfo &FuncInfo,
567  const SDValue &DestMBB) {
568  assert(isa<BasicBlockSDNode>(DestMBB));
569 
570  if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
571 
572  const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
573  const Instruction *BBTerm = BB->getTerminator();
574 
575  if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
576 
577  const BasicBlock *TBB = BBTerm->getSuccessor(0);
578  const BasicBlock *FBB = BBTerm->getSuccessor(1);
579 
580  auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
581  auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
582 
583  // We only want to handle cases which are easy to predict at static time, e.g.
584  // C++ throw statement, that is very likely not taken, or calling never
585  // returned function, e.g. stdlib exit(). So we set Threshold to filter
586  // unwanted cases.
587  //
588  // Below is LLVM branch weight table, we only want to handle case 1, 2
589  //
590  // Case Taken:Nontaken Example
591  // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
592  // 2. Invoke-terminating 1:1048575
593  // 3. Coldblock 4:64 __builtin_expect
594  // 4. Loop Branch 124:4 For loop
595  // 5. PH/ZH/FPH 20:12
596  const uint32_t Threshold = 10000;
597 
598  if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
599  return PPC::BR_NO_HINT;
600 
601  LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
602  << "::" << BB->getName() << "'\n"
603  << " -> " << TBB->getName() << ": " << TProb << "\n"
604  << " -> " << FBB->getName() << ": " << FProb << "\n");
605 
606  const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
607 
608  // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
609  // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
610  if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
611  std::swap(TProb, FProb);
612 
613  return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
614 }
615 
616 // isOpcWithIntImmediate - This method tests to see if the node is a specific
617 // opcode and that it has a immediate integer right operand.
618 // If so Imm will receive the 32 bit value.
619 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
620  return N->getOpcode() == Opc
621  && isInt32Immediate(N->getOperand(1).getNode(), Imm);
622 }
623 
624 void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, unsigned Offset) {
625  SDLoc dl(SN);
626  int FI = cast<FrameIndexSDNode>(N)->getIndex();
627  SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
628  unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
629  if (SN->hasOneUse())
630  CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
631  getSmallIPtrImm(Offset, dl));
632  else
633  ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
634  getSmallIPtrImm(Offset, dl)));
635 }
636 
637 bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
638  bool isShiftMask, unsigned &SH,
639  unsigned &MB, unsigned &ME) {
640  // Don't even go down this path for i64, since different logic will be
641  // necessary for rldicl/rldicr/rldimi.
642  if (N->getValueType(0) != MVT::i32)
643  return false;
644 
645  unsigned Shift = 32;
646  unsigned Indeterminant = ~0; // bit mask marking indeterminant results
647  unsigned Opcode = N->getOpcode();
648  if (N->getNumOperands() != 2 ||
649  !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
650  return false;
651 
652  if (Opcode == ISD::SHL) {
653  // apply shift left to mask if it comes first
654  if (isShiftMask) Mask = Mask << Shift;
655  // determine which bits are made indeterminant by shift
656  Indeterminant = ~(0xFFFFFFFFu << Shift);
657  } else if (Opcode == ISD::SRL) {
658  // apply shift right to mask if it comes first
659  if (isShiftMask) Mask = Mask >> Shift;
660  // determine which bits are made indeterminant by shift
661  Indeterminant = ~(0xFFFFFFFFu >> Shift);
662  // adjust for the left rotate
663  Shift = 32 - Shift;
664  } else if (Opcode == ISD::ROTL) {
665  Indeterminant = 0;
666  } else {
667  return false;
668  }
669 
670  // if the mask doesn't intersect any Indeterminant bits
671  if (Mask && !(Mask & Indeterminant)) {
672  SH = Shift & 31;
673  // make sure the mask is still a mask (wrap arounds may not be)
674  return isRunOfOnes(Mask, MB, ME);
675  }
676  return false;
677 }
678 
679 bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
680  SDValue Base = ST->getBasePtr();
681  if (Base.getOpcode() != PPCISD::ADD_TLS)
682  return false;
683  SDValue Offset = ST->getOffset();
684  if (!Offset.isUndef())
685  return false;
687  return false;
688 
689  SDLoc dl(ST);
690  EVT MemVT = ST->getMemoryVT();
691  EVT RegVT = ST->getValue().getValueType();
692 
693  unsigned Opcode;
694  switch (MemVT.getSimpleVT().SimpleTy) {
695  default:
696  return false;
697  case MVT::i8: {
698  Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
699  break;
700  }
701  case MVT::i16: {
702  Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
703  break;
704  }
705  case MVT::i32: {
706  Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
707  break;
708  }
709  case MVT::i64: {
710  Opcode = PPC::STDXTLS;
711  break;
712  }
713  }
714  SDValue Chain = ST->getChain();
715  SDVTList VTs = ST->getVTList();
716  SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
717  Chain};
718  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
719  transferMemOperands(ST, MN);
720  ReplaceNode(ST, MN);
721  return true;
722 }
723 
724 bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
725  SDValue Base = LD->getBasePtr();
726  if (Base.getOpcode() != PPCISD::ADD_TLS)
727  return false;
728  SDValue Offset = LD->getOffset();
729  if (!Offset.isUndef())
730  return false;
732  return false;
733 
734  SDLoc dl(LD);
735  EVT MemVT = LD->getMemoryVT();
736  EVT RegVT = LD->getValueType(0);
737  unsigned Opcode;
738  switch (MemVT.getSimpleVT().SimpleTy) {
739  default:
740  return false;
741  case MVT::i8: {
742  Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
743  break;
744  }
745  case MVT::i16: {
746  Opcode = (RegVT == MVT::i32) ? PPC::LHZXTLS_32 : PPC::LHZXTLS;
747  break;
748  }
749  case MVT::i32: {
750  Opcode = (RegVT == MVT::i32) ? PPC::LWZXTLS_32 : PPC::LWZXTLS;
751  break;
752  }
753  case MVT::i64: {
754  Opcode = PPC::LDXTLS;
755  break;
756  }
757  }
758  SDValue Chain = LD->getChain();
759  SDVTList VTs = LD->getVTList();
760  SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
761  SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
762  transferMemOperands(LD, MN);
763  ReplaceNode(LD, MN);
764  return true;
765 }
766 
767 /// Turn an or of two masked values into the rotate left word immediate then
768 /// mask insert (rlwimi) instruction.
769 bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
770  SDValue Op0 = N->getOperand(0);
771  SDValue Op1 = N->getOperand(1);
772  SDLoc dl(N);
773 
774  KnownBits LKnown = CurDAG->computeKnownBits(Op0);
775  KnownBits RKnown = CurDAG->computeKnownBits(Op1);
776 
777  unsigned TargetMask = LKnown.Zero.getZExtValue();
778  unsigned InsertMask = RKnown.Zero.getZExtValue();
779 
780  if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
781  unsigned Op0Opc = Op0.getOpcode();
782  unsigned Op1Opc = Op1.getOpcode();
783  unsigned Value, SH = 0;
784  TargetMask = ~TargetMask;
785  InsertMask = ~InsertMask;
786 
787  // If the LHS has a foldable shift and the RHS does not, then swap it to the
788  // RHS so that we can fold the shift into the insert.
789  if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
790  if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
791  Op0.getOperand(0).getOpcode() == ISD::SRL) {
792  if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
793  Op1.getOperand(0).getOpcode() != ISD::SRL) {
794  std::swap(Op0, Op1);
795  std::swap(Op0Opc, Op1Opc);
796  std::swap(TargetMask, InsertMask);
797  }
798  }
799  } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
800  if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
801  Op1.getOperand(0).getOpcode() != ISD::SRL) {
802  std::swap(Op0, Op1);
803  std::swap(Op0Opc, Op1Opc);
804  std::swap(TargetMask, InsertMask);
805  }
806  }
807 
808  unsigned MB, ME;
809  if (isRunOfOnes(InsertMask, MB, ME)) {
810  if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
811  isInt32Immediate(Op1.getOperand(1), Value)) {
812  Op1 = Op1.getOperand(0);
813  SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
814  }
815  if (Op1Opc == ISD::AND) {
816  // The AND mask might not be a constant, and we need to make sure that
817  // if we're going to fold the masking with the insert, all bits not
818  // know to be zero in the mask are known to be one.
819  KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
820  bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
821 
822  unsigned SHOpc = Op1.getOperand(0).getOpcode();
823  if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
825  // Note that Value must be in range here (less than 32) because
826  // otherwise there would not be any bits set in InsertMask.
827  Op1 = Op1.getOperand(0).getOperand(0);
828  SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
829  }
830  }
831 
832  SH &= 31;
833  SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
834  getI32Imm(ME, dl) };
835  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
836  return true;
837  }
838  }
839  return false;
840 }
841 
842 static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
843  unsigned MaxTruncation = 0;
844  // Cannot use range-based for loop here as we need the actual use (i.e. we
845  // need the operand number corresponding to the use). A range-based for
846  // will unbox the use and provide an SDNode*.
847  for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
848  Use != UseEnd; ++Use) {
849  unsigned Opc =
850  Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
851  switch (Opc) {
852  default: return 0;
853  case ISD::TRUNCATE:
854  if (Use->isMachineOpcode())
855  return 0;
856  MaxTruncation =
857  std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
858  continue;
859  case ISD::STORE: {
860  if (Use->isMachineOpcode())
861  return 0;
862  StoreSDNode *STN = cast<StoreSDNode>(*Use);
863  unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
864  if (MemVTSize == 64 || Use.getOperandNo() != 0)
865  return 0;
866  MaxTruncation = std::max(MaxTruncation, MemVTSize);
867  continue;
868  }
869  case PPC::STW8:
870  case PPC::STWX8:
871  case PPC::STWU8:
872  case PPC::STWUX8:
873  if (Use.getOperandNo() != 0)
874  return 0;
875  MaxTruncation = std::max(MaxTruncation, 32u);
876  continue;
877  case PPC::STH8:
878  case PPC::STHX8:
879  case PPC::STHU8:
880  case PPC::STHUX8:
881  if (Use.getOperandNo() != 0)
882  return 0;
883  MaxTruncation = std::max(MaxTruncation, 16u);
884  continue;
885  case PPC::STB8:
886  case PPC::STBX8:
887  case PPC::STBU8:
888  case PPC::STBUX8:
889  if (Use.getOperandNo() != 0)
890  return 0;
891  MaxTruncation = std::max(MaxTruncation, 8u);
892  continue;
893  }
894  }
895  return MaxTruncation;
896 }
897 
898 // For any 32 < Num < 64, check if the Imm contains at least Num consecutive
899 // zeros and return the number of bits by the left of these consecutive zeros.
900 static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
901  unsigned HiTZ = countTrailingZeros<uint32_t>(Hi_32(Imm));
902  unsigned LoLZ = countLeadingZeros<uint32_t>(Lo_32(Imm));
903  if ((HiTZ + LoLZ) >= Num)
904  return (32 + HiTZ);
905  return 0;
906 }
907 
908 // Direct materialization of 64-bit constants by enumerated patterns.
909 static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
910  uint64_t Imm, unsigned &InstCnt) {
911  unsigned TZ = countTrailingZeros<uint64_t>(Imm);
912  unsigned LZ = countLeadingZeros<uint64_t>(Imm);
913  unsigned TO = countTrailingOnes<uint64_t>(Imm);
914  unsigned LO = countLeadingOnes<uint64_t>(Imm);
915  unsigned Hi32 = Hi_32(Imm);
916  unsigned Lo32 = Lo_32(Imm);
917  SDNode *Result = nullptr;
918  unsigned Shift = 0;
919 
920  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
921  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
922  };
923 
924  // Following patterns use 1 instructions to materialize the Imm.
925  InstCnt = 1;
926  // 1-1) Patterns : {zeros}{15-bit valve}
927  // {ones}{15-bit valve}
928  if (isInt<16>(Imm)) {
929  SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
930  return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
931  }
932  // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
933  // {ones}{15-bit valve}{16 zeros}
934  if (TZ > 15 && (LZ > 32 || LO > 32))
935  return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
936  getI32Imm((Imm >> 16) & 0xffff));
937 
938  // Following patterns use 2 instructions to materialize the Imm.
939  InstCnt = 2;
940  assert(LZ < 64 && "Unexpected leading zeros here.");
941  // Count of ones follwing the leading zeros.
942  unsigned FO = countLeadingOnes<uint64_t>(Imm << LZ);
943  // 2-1) Patterns : {zeros}{31-bit value}
944  // {ones}{31-bit value}
945  if (isInt<32>(Imm)) {
946  uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
947  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
948  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
949  return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
950  getI32Imm(Imm & 0xffff));
951  }
952  // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
953  // {zeros}{15-bit value}{zeros}
954  // {zeros}{ones}{15-bit value}
955  // {ones}{15-bit value}{zeros}
956  // We can take advantage of LI's sign-extension semantics to generate leading
957  // ones, and then use RLDIC to mask off the ones in both sides after rotation.
958  if ((LZ + FO + TZ) > 48) {
959  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
960  getI32Imm((Imm >> TZ) & 0xffff));
961  return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
962  getI32Imm(TZ), getI32Imm(LZ));
963  }
964  // 2-3) Pattern : {zeros}{15-bit value}{ones}
965  // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
966  // therefore we can take advantage of LI's sign-extension semantics, and then
967  // mask them off after rotation.
968  //
969  // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
970  // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
971  // +------------------------+ +------------------------+
972  // 63 0 63 0
973  // Imm (Imm >> (48 - LZ) & 0xffff)
974  // +----sext-----|--16-bit--+ +clear-|-----------------+
975  // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
976  // +------------------------+ +------------------------+
977  // 63 0 63 0
978  // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
979  if ((LZ + TO) > 48) {
980  // Since the immediates with (LZ > 32) have been handled by previous
981  // patterns, here we have (LZ <= 32) to make sure we will not shift right
982  // the Imm by a negative value.
983  assert(LZ <= 32 && "Unexpected shift value.");
984  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
985  getI32Imm((Imm >> (48 - LZ) & 0xffff)));
986  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
987  getI32Imm(48 - LZ), getI32Imm(LZ));
988  }
989  // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
990  // {ones}{15-bit value}{ones}
991  // We can take advantage of LI's sign-extension semantics to generate leading
992  // ones, and then use RLDICL to mask off the ones in left sides (if required)
993  // after rotation.
994  //
995  // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
996  // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
997  // +------------------------+ +------------------------+
998  // 63 0 63 0
999  // Imm (Imm >> TO) & 0xffff
1000  // +----sext-----|--16-bit--+ +LZ|---------------------+
1001  // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1002  // +------------------------+ +------------------------+
1003  // 63 0 63 0
1004  // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1005  if ((LZ + FO + TO) > 48) {
1006  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1007  getI32Imm((Imm >> TO) & 0xffff));
1008  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1009  getI32Imm(TO), getI32Imm(LZ));
1010  }
1011  // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1012  // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1013  // value, we can use LI for Lo16 without generating leading ones then add the
1014  // Hi16(in Lo32).
1015  if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1016  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1017  getI32Imm(Lo32 & 0xffff));
1018  return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1019  getI32Imm(Lo32 >> 16));
1020  }
1021  // 2-6) Patterns : {******}{49 zeros}{******}
1022  // {******}{49 ones}{******}
1023  // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1024  // bits remain on both sides. Rotate right the Imm to construct an int<16>
1025  // value, use LI for int<16> value and then use RLDICL without mask to rotate
1026  // it back.
1027  //
1028  // 1) findContiguousZerosAtLeast(Imm, 49)
1029  // +------|--zeros-|------+ +---ones--||---15 bit--+
1030  // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1031  // +----------------------+ +----------------------+
1032  // 63 0 63 0
1033  //
1034  // 2) findContiguousZerosAtLeast(~Imm, 49)
1035  // +------|--ones--|------+ +---ones--||---15 bit--+
1036  // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1037  // +----------------------+ +----------------------+
1038  // 63 0 63 0
1039  if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1040  (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1041  uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1042  Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1043  getI32Imm(RotImm & 0xffff));
1044  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1045  getI32Imm(Shift), getI32Imm(0));
1046  }
1047 
1048  // Following patterns use 3 instructions to materialize the Imm.
1049  InstCnt = 3;
1050  // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1051  // {zeros}{31-bit value}{zeros}
1052  // {zeros}{ones}{31-bit value}
1053  // {ones}{31-bit value}{zeros}
1054  // We can take advantage of LIS's sign-extension semantics to generate leading
1055  // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1056  // ones in both sides after rotation.
1057  if ((LZ + FO + TZ) > 32) {
1058  uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1059  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1060  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1061  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1062  getI32Imm((Imm >> TZ) & 0xffff));
1063  return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1064  getI32Imm(TZ), getI32Imm(LZ));
1065  }
1066  // 3-2) Pattern : {zeros}{31-bit value}{ones}
1067  // Shift right the Imm by (32 - LZ) bits to construct a negtive 32 bits value,
1068  // therefore we can take advantage of LIS's sign-extension semantics, add
1069  // the remaining bits with ORI, and then mask them off after rotation.
1070  // This is similar to Pattern 2-3, please refer to the diagram there.
1071  if ((LZ + TO) > 32) {
1072  // Since the immediates with (LZ > 32) have been handled by previous
1073  // patterns, here we have (LZ <= 32) to make sure we will not shift right
1074  // the Imm by a negative value.
1075  assert(LZ <= 32 && "Unexpected shift value.");
1076  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1077  getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1078  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1079  getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1080  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1081  getI32Imm(32 - LZ), getI32Imm(LZ));
1082  }
1083  // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1084  // {ones}{31-bit value}{ones}
1085  // We can take advantage of LIS's sign-extension semantics to generate leading
1086  // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1087  // ones in left sides (if required) after rotation.
1088  // This is similar to Pattern 2-4, please refer to the diagram there.
1089  if ((LZ + FO + TO) > 32) {
1090  Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1091  getI32Imm((Imm >> (TO + 16)) & 0xffff));
1092  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1093  getI32Imm((Imm >> TO) & 0xffff));
1094  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1095  getI32Imm(TO), getI32Imm(LZ));
1096  }
1097  // 3-4) Patterns : High word == Low word
1098  if (Hi32 == Lo32) {
1099  // Handle the first 32 bits.
1100  uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1101  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1102  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1103  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1104  getI32Imm(Lo32 & 0xffff));
1105  // Use rldimi to insert the Low word into High word.
1106  SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1107  getI32Imm(0)};
1108  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1109  }
1110  // 3-5) Patterns : {******}{33 zeros}{******}
1111  // {******}{33 ones}{******}
1112  // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1113  // bits remain on both sides. Rotate right the Imm to construct an int<32>
1114  // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1115  // rotate it back.
1116  // This is similar to Pattern 2-6, please refer to the diagram there.
1117  if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1118  (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1119  uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1120  uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1121  unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1122  Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1123  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1124  getI32Imm(RotImm & 0xffff));
1125  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1126  getI32Imm(Shift), getI32Imm(0));
1127  }
1128 
1129  InstCnt = 0;
1130  return nullptr;
1131 }
1132 
1133 // Try to select instructions to generate a 64 bit immediate using prefix as
1134 // well as non prefix instructions. The function will return the SDNode
1135 // to materialize that constant or it will return nullptr if it does not
1136 // find one. The variable InstCnt is set to the number of instructions that
1137 // were selected.
1139  uint64_t Imm, unsigned &InstCnt) {
1140  unsigned TZ = countTrailingZeros<uint64_t>(Imm);
1141  unsigned LZ = countLeadingZeros<uint64_t>(Imm);
1142  unsigned TO = countTrailingOnes<uint64_t>(Imm);
1143  unsigned FO = countLeadingOnes<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1144  unsigned Hi32 = Hi_32(Imm);
1145  unsigned Lo32 = Lo_32(Imm);
1146 
1147  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1148  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1149  };
1150 
1151  auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1152  return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1153  };
1154 
1155  // Following patterns use 1 instruction to materialize Imm.
1156  InstCnt = 1;
1157 
1158  // The pli instruction can materialize up to 34 bits directly.
1159  // If a constant fits within 34-bits, emit the pli instruction here directly.
1160  if (isInt<34>(Imm))
1161  return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1162  CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1163 
1164  // Require at least two instructions.
1165  InstCnt = 2;
1166  SDNode *Result = nullptr;
1167  // Patterns : {zeros}{ones}{33-bit value}{zeros}
1168  // {zeros}{33-bit value}{zeros}
1169  // {zeros}{ones}{33-bit value}
1170  // {ones}{33-bit value}{zeros}
1171  // We can take advantage of PLI's sign-extension semantics to generate leading
1172  // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1173  if ((LZ + FO + TZ) > 30) {
1174  APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1175  APInt Extended = SignedInt34.sext(64);
1176  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1177  getI64Imm(*Extended.getRawData()));
1178  return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1179  getI32Imm(TZ), getI32Imm(LZ));
1180  }
1181  // Pattern : {zeros}{33-bit value}{ones}
1182  // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1183  // therefore we can take advantage of PLI's sign-extension semantics, and then
1184  // mask them off after rotation.
1185  //
1186  // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1187  // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1188  // +------------------------+ +------------------------+
1189  // 63 0 63 0
1190  //
1191  // +----sext-----|--34-bit--+ +clear-|-----------------+
1192  // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1193  // +------------------------+ +------------------------+
1194  // 63 0 63 0
1195  if ((LZ + TO) > 30) {
1196  APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1197  APInt Extended = SignedInt34.sext(64);
1198  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1199  getI64Imm(*Extended.getRawData()));
1200  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1201  getI32Imm(30 - LZ), getI32Imm(LZ));
1202  }
1203  // Patterns : {zeros}{ones}{33-bit value}{ones}
1204  // {ones}{33-bit value}{ones}
1205  // Similar to LI we can take advantage of PLI's sign-extension semantics to
1206  // generate leading ones, and then use RLDICL to mask off the ones in left
1207  // sides (if required) after rotation.
1208  if ((LZ + FO + TO) > 30) {
1209  APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1210  APInt Extended = SignedInt34.sext(64);
1211  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1212  getI64Imm(*Extended.getRawData()));
1213  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1214  getI32Imm(TO), getI32Imm(LZ));
1215  }
1216  // Patterns : {******}{31 zeros}{******}
1217  // : {******}{31 ones}{******}
1218  // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1219  // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1220  // for the int<33> value and then use RLDICL without a mask to rotate it back.
1221  //
1222  // +------|--ones--|------+ +---ones--||---33 bit--+
1223  // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1224  // +----------------------+ +----------------------+
1225  // 63 0 63 0
1226  for (unsigned Shift = 0; Shift < 63; ++Shift) {
1227  uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1228  if (isInt<34>(RotImm)) {
1229  Result =
1230  CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1231  return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1232  SDValue(Result, 0), getI32Imm(Shift),
1233  getI32Imm(0));
1234  }
1235  }
1236 
1237  // Patterns : High word == Low word
1238  // This is basically a splat of a 32 bit immediate.
1239  if (Hi32 == Lo32) {
1240  Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1241  SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1242  getI32Imm(0)};
1243  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1244  }
1245 
1246  InstCnt = 3;
1247  // Catch-all
1248  // This pattern can form any 64 bit immediate in 3 instructions.
1249  SDNode *ResultHi =
1250  CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1251  SDNode *ResultLo =
1252  CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1253  SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1254  getI32Imm(0)};
1255  return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1256 }
1257 
1258 static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1259  unsigned *InstCnt = nullptr) {
1260  unsigned InstCntDirect = 0;
1261  // No more than 3 instructions is used if we can select the i64 immediate
1262  // directly.
1263  SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1264 
1265  const PPCSubtarget &Subtarget =
1267 
1268  // If we have prefixed instructions and there is a chance we can
1269  // materialize the constant with fewer prefixed instructions than
1270  // non-prefixed, try that.
1271  if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1272  unsigned InstCntDirectP = 0;
1273  SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1274  // Use the prefix case in either of two cases:
1275  // 1) We have no result from the non-prefix case to use.
1276  // 2) The non-prefix case uses more instructions than the prefix case.
1277  // If the prefix and non-prefix cases use the same number of instructions
1278  // we will prefer the non-prefix case.
1279  if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1280  if (InstCnt)
1281  *InstCnt = InstCntDirectP;
1282  return ResultP;
1283  }
1284  }
1285 
1286  if (Result) {
1287  if (InstCnt)
1288  *InstCnt = InstCntDirect;
1289  return Result;
1290  }
1291  auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1292  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1293  };
1294  // Handle the upper 32 bit value.
1295  Result =
1296  selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1297  // Add in the last bits as required.
1298  if (uint32_t Hi16 = (Lo_32(Imm) >> 16) & 0xffff) {
1299  Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1300  SDValue(Result, 0), getI32Imm(Hi16));
1301  ++InstCntDirect;
1302  }
1303  if (uint32_t Lo16 = Lo_32(Imm) & 0xffff) {
1304  Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1305  getI32Imm(Lo16));
1306  ++InstCntDirect;
1307  }
1308  if (InstCnt)
1309  *InstCnt = InstCntDirect;
1310  return Result;
1311 }
1312 
1313 // Select a 64-bit constant.
1315  SDLoc dl(N);
1316 
1317  // Get 64 bit value.
1318  int64_t Imm = cast<ConstantSDNode>(N)->getZExtValue();
1319  if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1320  uint64_t SextImm = SignExtend64(Imm, MinSize);
1321  SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1322  if (isInt<16>(SextImm))
1323  return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1324  }
1325  return selectI64Imm(CurDAG, dl, Imm);
1326 }
1327 
1328 namespace {
1329 
1330 class BitPermutationSelector {
1331  struct ValueBit {
1332  SDValue V;
1333 
1334  // The bit number in the value, using a convention where bit 0 is the
1335  // lowest-order bit.
1336  unsigned Idx;
1337 
1338  // ConstZero means a bit we need to mask off.
1339  // Variable is a bit comes from an input variable.
1340  // VariableKnownToBeZero is also a bit comes from an input variable,
1341  // but it is known to be already zero. So we do not need to mask them.
1342  enum Kind {
1343  ConstZero,
1344  Variable,
1345  VariableKnownToBeZero
1346  } K;
1347 
1348  ValueBit(SDValue V, unsigned I, Kind K = Variable)
1349  : V(V), Idx(I), K(K) {}
1350  ValueBit(Kind K = Variable)
1351  : V(SDValue(nullptr, 0)), Idx(UINT32_MAX), K(K) {}
1352 
1353  bool isZero() const {
1354  return K == ConstZero || K == VariableKnownToBeZero;
1355  }
1356 
1357  bool hasValue() const {
1358  return K == Variable || K == VariableKnownToBeZero;
1359  }
1360 
1361  SDValue getValue() const {
1362  assert(hasValue() && "Cannot get the value of a constant bit");
1363  return V;
1364  }
1365 
1366  unsigned getValueBitIndex() const {
1367  assert(hasValue() && "Cannot get the value bit index of a constant bit");
1368  return Idx;
1369  }
1370  };
1371 
1372  // A bit group has the same underlying value and the same rotate factor.
1373  struct BitGroup {
1374  SDValue V;
1375  unsigned RLAmt;
1376  unsigned StartIdx, EndIdx;
1377 
1378  // This rotation amount assumes that the lower 32 bits of the quantity are
1379  // replicated in the high 32 bits by the rotation operator (which is done
1380  // by rlwinm and friends in 64-bit mode).
1381  bool Repl32;
1382  // Did converting to Repl32 == true change the rotation factor? If it did,
1383  // it decreased it by 32.
1384  bool Repl32CR;
1385  // Was this group coalesced after setting Repl32 to true?
1386  bool Repl32Coalesced;
1387 
1388  BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1389  : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1390  Repl32Coalesced(false) {
1391  LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1392  << " [" << S << ", " << E << "]\n");
1393  }
1394  };
1395 
1396  // Information on each (Value, RLAmt) pair (like the number of groups
1397  // associated with each) used to choose the lowering method.
1398  struct ValueRotInfo {
1399  SDValue V;
1400  unsigned RLAmt = std::numeric_limits<unsigned>::max();
1401  unsigned NumGroups = 0;
1402  unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1403  bool Repl32 = false;
1404 
1405  ValueRotInfo() = default;
1406 
1407  // For sorting (in reverse order) by NumGroups, and then by
1408  // FirstGroupStartIdx.
1409  bool operator < (const ValueRotInfo &Other) const {
1410  // We need to sort so that the non-Repl32 come first because, when we're
1411  // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1412  // masking operation.
1413  if (Repl32 < Other.Repl32)
1414  return true;
1415  else if (Repl32 > Other.Repl32)
1416  return false;
1417  else if (NumGroups > Other.NumGroups)
1418  return true;
1419  else if (NumGroups < Other.NumGroups)
1420  return false;
1421  else if (RLAmt == 0 && Other.RLAmt != 0)
1422  return true;
1423  else if (RLAmt != 0 && Other.RLAmt == 0)
1424  return false;
1425  else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1426  return true;
1427  return false;
1428  }
1429  };
1430 
1431  using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1432  using ValueBitsMemoizer =
1434  ValueBitsMemoizer Memoizer;
1435 
1436  // Return a pair of bool and a SmallVector pointer to a memoization entry.
1437  // The bool is true if something interesting was deduced, otherwise if we're
1438  // providing only a generic representation of V (or something else likewise
1439  // uninteresting for instruction selection) through the SmallVector.
1440  std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1441  unsigned NumBits) {
1442  auto &ValueEntry = Memoizer[V];
1443  if (ValueEntry)
1444  return std::make_pair(ValueEntry->first, &ValueEntry->second);
1445  ValueEntry.reset(new ValueBitsMemoizedValue());
1446  bool &Interesting = ValueEntry->first;
1447  SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1448  Bits.resize(NumBits);
1449 
1450  switch (V.getOpcode()) {
1451  default: break;
1452  case ISD::ROTL:
1453  if (isa<ConstantSDNode>(V.getOperand(1))) {
1454  unsigned RotAmt = V.getConstantOperandVal(1);
1455 
1456  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1457 
1458  for (unsigned i = 0; i < NumBits; ++i)
1459  Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1460 
1461  return std::make_pair(Interesting = true, &Bits);
1462  }
1463  break;
1464  case ISD::SHL:
1465  case PPCISD::SHL:
1466  if (isa<ConstantSDNode>(V.getOperand(1))) {
1467  unsigned ShiftAmt = V.getConstantOperandVal(1);
1468 
1469  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1470 
1471  for (unsigned i = ShiftAmt; i < NumBits; ++i)
1472  Bits[i] = LHSBits[i - ShiftAmt];
1473 
1474  for (unsigned i = 0; i < ShiftAmt; ++i)
1475  Bits[i] = ValueBit(ValueBit::ConstZero);
1476 
1477  return std::make_pair(Interesting = true, &Bits);
1478  }
1479  break;
1480  case ISD::SRL:
1481  case PPCISD::SRL:
1482  if (isa<ConstantSDNode>(V.getOperand(1))) {
1483  unsigned ShiftAmt = V.getConstantOperandVal(1);
1484 
1485  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1486 
1487  for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1488  Bits[i] = LHSBits[i + ShiftAmt];
1489 
1490  for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1491  Bits[i] = ValueBit(ValueBit::ConstZero);
1492 
1493  return std::make_pair(Interesting = true, &Bits);
1494  }
1495  break;
1496  case ISD::AND:
1497  if (isa<ConstantSDNode>(V.getOperand(1))) {
1498  uint64_t Mask = V.getConstantOperandVal(1);
1499 
1500  const SmallVector<ValueBit, 64> *LHSBits;
1501  // Mark this as interesting, only if the LHS was also interesting. This
1502  // prevents the overall procedure from matching a single immediate 'and'
1503  // (which is non-optimal because such an and might be folded with other
1504  // things if we don't select it here).
1505  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1506 
1507  for (unsigned i = 0; i < NumBits; ++i)
1508  if (((Mask >> i) & 1) == 1)
1509  Bits[i] = (*LHSBits)[i];
1510  else {
1511  // AND instruction masks this bit. If the input is already zero,
1512  // we have nothing to do here. Otherwise, make the bit ConstZero.
1513  if ((*LHSBits)[i].isZero())
1514  Bits[i] = (*LHSBits)[i];
1515  else
1516  Bits[i] = ValueBit(ValueBit::ConstZero);
1517  }
1518 
1519  return std::make_pair(Interesting, &Bits);
1520  }
1521  break;
1522  case ISD::OR: {
1523  const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1524  const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1525 
1526  bool AllDisjoint = true;
1527  SDValue LastVal = SDValue();
1528  unsigned LastIdx = 0;
1529  for (unsigned i = 0; i < NumBits; ++i) {
1530  if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1531  // If both inputs are known to be zero and one is ConstZero and
1532  // another is VariableKnownToBeZero, we can select whichever
1533  // we like. To minimize the number of bit groups, we select
1534  // VariableKnownToBeZero if this bit is the next bit of the same
1535  // input variable from the previous bit. Otherwise, we select
1536  // ConstZero.
1537  if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1538  LHSBits[i].getValueBitIndex() == LastIdx + 1)
1539  Bits[i] = LHSBits[i];
1540  else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1541  RHSBits[i].getValueBitIndex() == LastIdx + 1)
1542  Bits[i] = RHSBits[i];
1543  else
1544  Bits[i] = ValueBit(ValueBit::ConstZero);
1545  }
1546  else if (LHSBits[i].isZero())
1547  Bits[i] = RHSBits[i];
1548  else if (RHSBits[i].isZero())
1549  Bits[i] = LHSBits[i];
1550  else {
1551  AllDisjoint = false;
1552  break;
1553  }
1554  // We remember the value and bit index of this bit.
1555  if (Bits[i].hasValue()) {
1556  LastVal = Bits[i].getValue();
1557  LastIdx = Bits[i].getValueBitIndex();
1558  }
1559  else {
1560  if (LastVal) LastVal = SDValue();
1561  LastIdx = 0;
1562  }
1563  }
1564 
1565  if (!AllDisjoint)
1566  break;
1567 
1568  return std::make_pair(Interesting = true, &Bits);
1569  }
1570  case ISD::ZERO_EXTEND: {
1571  // We support only the case with zero extension from i32 to i64 so far.
1572  if (V.getValueType() != MVT::i64 ||
1573  V.getOperand(0).getValueType() != MVT::i32)
1574  break;
1575 
1576  const SmallVector<ValueBit, 64> *LHSBits;
1577  const unsigned NumOperandBits = 32;
1578  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1579  NumOperandBits);
1580 
1581  for (unsigned i = 0; i < NumOperandBits; ++i)
1582  Bits[i] = (*LHSBits)[i];
1583 
1584  for (unsigned i = NumOperandBits; i < NumBits; ++i)
1585  Bits[i] = ValueBit(ValueBit::ConstZero);
1586 
1587  return std::make_pair(Interesting, &Bits);
1588  }
1589  case ISD::TRUNCATE: {
1591  EVT ToType = V.getValueType();
1592  // We support only the case with truncate from i64 to i32.
1593  if (FromType != MVT::i64 || ToType != MVT::i32)
1594  break;
1595  const unsigned NumAllBits = FromType.getSizeInBits();
1596  SmallVector<ValueBit, 64> *InBits;
1597  std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1598  NumAllBits);
1599  const unsigned NumValidBits = ToType.getSizeInBits();
1600 
1601  // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1602  // So, we cannot include this truncate.
1603  bool UseUpper32bit = false;
1604  for (unsigned i = 0; i < NumValidBits; ++i)
1605  if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1606  UseUpper32bit = true;
1607  break;
1608  }
1609  if (UseUpper32bit)
1610  break;
1611 
1612  for (unsigned i = 0; i < NumValidBits; ++i)
1613  Bits[i] = (*InBits)[i];
1614 
1615  return std::make_pair(Interesting, &Bits);
1616  }
1617  case ISD::AssertZext: {
1618  // For AssertZext, we look through the operand and
1619  // mark the bits known to be zero.
1620  const SmallVector<ValueBit, 64> *LHSBits;
1621  std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1622  NumBits);
1623 
1624  EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1625  const unsigned NumValidBits = FromType.getSizeInBits();
1626  for (unsigned i = 0; i < NumValidBits; ++i)
1627  Bits[i] = (*LHSBits)[i];
1628 
1629  // These bits are known to be zero but the AssertZext may be from a value
1630  // that already has some constant zero bits (i.e. from a masking and).
1631  for (unsigned i = NumValidBits; i < NumBits; ++i)
1632  Bits[i] = (*LHSBits)[i].hasValue()
1633  ? ValueBit((*LHSBits)[i].getValue(),
1634  (*LHSBits)[i].getValueBitIndex(),
1635  ValueBit::VariableKnownToBeZero)
1636  : ValueBit(ValueBit::ConstZero);
1637 
1638  return std::make_pair(Interesting, &Bits);
1639  }
1640  case ISD::LOAD:
1641  LoadSDNode *LD = cast<LoadSDNode>(V);
1642  if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1643  EVT VT = LD->getMemoryVT();
1644  const unsigned NumValidBits = VT.getSizeInBits();
1645 
1646  for (unsigned i = 0; i < NumValidBits; ++i)
1647  Bits[i] = ValueBit(V, i);
1648 
1649  // These bits are known to be zero.
1650  for (unsigned i = NumValidBits; i < NumBits; ++i)
1651  Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1652 
1653  // Zero-extending load itself cannot be optimized. So, it is not
1654  // interesting by itself though it gives useful information.
1655  return std::make_pair(Interesting = false, &Bits);
1656  }
1657  break;
1658  }
1659 
1660  for (unsigned i = 0; i < NumBits; ++i)
1661  Bits[i] = ValueBit(V, i);
1662 
1663  return std::make_pair(Interesting = false, &Bits);
1664  }
1665 
1666  // For each value (except the constant ones), compute the left-rotate amount
1667  // to get it from its original to final position.
1668  void computeRotationAmounts() {
1669  NeedMask = false;
1670  RLAmt.resize(Bits.size());
1671  for (unsigned i = 0; i < Bits.size(); ++i)
1672  if (Bits[i].hasValue()) {
1673  unsigned VBI = Bits[i].getValueBitIndex();
1674  if (i >= VBI)
1675  RLAmt[i] = i - VBI;
1676  else
1677  RLAmt[i] = Bits.size() - (VBI - i);
1678  } else if (Bits[i].isZero()) {
1679  NeedMask = true;
1680  RLAmt[i] = UINT32_MAX;
1681  } else {
1682  llvm_unreachable("Unknown value bit type");
1683  }
1684  }
1685 
1686  // Collect groups of consecutive bits with the same underlying value and
1687  // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1688  // they break up groups.
1689  void collectBitGroups(bool LateMask) {
1690  BitGroups.clear();
1691 
1692  unsigned LastRLAmt = RLAmt[0];
1693  SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1694  unsigned LastGroupStartIdx = 0;
1695  bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1696  for (unsigned i = 1; i < Bits.size(); ++i) {
1697  unsigned ThisRLAmt = RLAmt[i];
1698  SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1699  if (LateMask && !ThisValue) {
1700  ThisValue = LastValue;
1701  ThisRLAmt = LastRLAmt;
1702  // If we're doing late masking, then the first bit group always starts
1703  // at zero (even if the first bits were zero).
1704  if (BitGroups.empty())
1705  LastGroupStartIdx = 0;
1706  }
1707 
1708  // If this bit is known to be zero and the current group is a bit group
1709  // of zeros, we do not need to terminate the current bit group even the
1710  // Value or RLAmt does not match here. Instead, we terminate this group
1711  // when the first non-zero bit appears later.
1712  if (IsGroupOfZeros && Bits[i].isZero())
1713  continue;
1714 
1715  // If this bit has the same underlying value and the same rotate factor as
1716  // the last one, then they're part of the same group.
1717  if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1718  // We cannot continue the current group if this bits is not known to
1719  // be zero in a bit group of zeros.
1720  if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1721  continue;
1722 
1723  if (LastValue.getNode())
1724  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1725  i-1));
1726  LastRLAmt = ThisRLAmt;
1727  LastValue = ThisValue;
1728  LastGroupStartIdx = i;
1729  IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1730  }
1731  if (LastValue.getNode())
1732  BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1733  Bits.size()-1));
1734 
1735  if (BitGroups.empty())
1736  return;
1737 
1738  // We might be able to combine the first and last groups.
1739  if (BitGroups.size() > 1) {
1740  // If the first and last groups are the same, then remove the first group
1741  // in favor of the last group, making the ending index of the last group
1742  // equal to the ending index of the to-be-removed first group.
1743  if (BitGroups[0].StartIdx == 0 &&
1744  BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1745  BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1746  BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1747  LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1748  BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1749  BitGroups.erase(BitGroups.begin());
1750  }
1751  }
1752  }
1753 
1754  // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1755  // associated with each. If the number of groups are same, we prefer a group
1756  // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1757  // instruction. If there is a degeneracy, pick the one that occurs
1758  // first (in the final value).
1759  void collectValueRotInfo() {
1760  ValueRots.clear();
1761 
1762  for (auto &BG : BitGroups) {
1763  unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1764  ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1765  VRI.V = BG.V;
1766  VRI.RLAmt = BG.RLAmt;
1767  VRI.Repl32 = BG.Repl32;
1768  VRI.NumGroups += 1;
1769  VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1770  }
1771 
1772  // Now that we've collected the various ValueRotInfo instances, we need to
1773  // sort them.
1774  ValueRotsVec.clear();
1775  for (auto &I : ValueRots) {
1776  ValueRotsVec.push_back(I.second);
1777  }
1778  llvm::sort(ValueRotsVec);
1779  }
1780 
1781  // In 64-bit mode, rlwinm and friends have a rotation operator that
1782  // replicates the low-order 32 bits into the high-order 32-bits. The mask
1783  // indices of these instructions can only be in the lower 32 bits, so they
1784  // can only represent some 64-bit bit groups. However, when they can be used,
1785  // the 32-bit replication can be used to represent, as a single bit group,
1786  // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1787  // groups when possible. Returns true if any of the bit groups were
1788  // converted.
1789  void assignRepl32BitGroups() {
1790  // If we have bits like this:
1791  //
1792  // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1793  // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1794  // Groups: | RLAmt = 8 | RLAmt = 40 |
1795  //
1796  // But, making use of a 32-bit operation that replicates the low-order 32
1797  // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1798  // of 8.
1799 
1800  auto IsAllLow32 = [this](BitGroup & BG) {
1801  if (BG.StartIdx <= BG.EndIdx) {
1802  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1803  if (!Bits[i].hasValue())
1804  continue;
1805  if (Bits[i].getValueBitIndex() >= 32)
1806  return false;
1807  }
1808  } else {
1809  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1810  if (!Bits[i].hasValue())
1811  continue;
1812  if (Bits[i].getValueBitIndex() >= 32)
1813  return false;
1814  }
1815  for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1816  if (!Bits[i].hasValue())
1817  continue;
1818  if (Bits[i].getValueBitIndex() >= 32)
1819  return false;
1820  }
1821  }
1822 
1823  return true;
1824  };
1825 
1826  for (auto &BG : BitGroups) {
1827  // If this bit group has RLAmt of 0 and will not be merged with
1828  // another bit group, we don't benefit from Repl32. We don't mark
1829  // such group to give more freedom for later instruction selection.
1830  if (BG.RLAmt == 0) {
1831  auto PotentiallyMerged = [this](BitGroup & BG) {
1832  for (auto &BG2 : BitGroups)
1833  if (&BG != &BG2 && BG.V == BG2.V &&
1834  (BG2.RLAmt == 0 || BG2.RLAmt == 32))
1835  return true;
1836  return false;
1837  };
1838  if (!PotentiallyMerged(BG))
1839  continue;
1840  }
1841  if (BG.StartIdx < 32 && BG.EndIdx < 32) {
1842  if (IsAllLow32(BG)) {
1843  if (BG.RLAmt >= 32) {
1844  BG.RLAmt -= 32;
1845  BG.Repl32CR = true;
1846  }
1847 
1848  BG.Repl32 = true;
1849 
1850  LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
1851  << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
1852  << BG.StartIdx << ", " << BG.EndIdx << "]\n");
1853  }
1854  }
1855  }
1856 
1857  // Now walk through the bit groups, consolidating where possible.
1858  for (auto I = BitGroups.begin(); I != BitGroups.end();) {
1859  // We might want to remove this bit group by merging it with the previous
1860  // group (which might be the ending group).
1861  auto IP = (I == BitGroups.begin()) ?
1862  std::prev(BitGroups.end()) : std::prev(I);
1863  if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
1864  I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
1865 
1866  LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
1867  << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
1868  << I->StartIdx << ", " << I->EndIdx
1869  << "] with group with range [" << IP->StartIdx << ", "
1870  << IP->EndIdx << "]\n");
1871 
1872  IP->EndIdx = I->EndIdx;
1873  IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
1874  IP->Repl32Coalesced = true;
1875  I = BitGroups.erase(I);
1876  continue;
1877  } else {
1878  // There is a special case worth handling: If there is a single group
1879  // covering the entire upper 32 bits, and it can be merged with both
1880  // the next and previous groups (which might be the same group), then
1881  // do so. If it is the same group (so there will be only one group in
1882  // total), then we need to reverse the order of the range so that it
1883  // covers the entire 64 bits.
1884  if (I->StartIdx == 32 && I->EndIdx == 63) {
1885  assert(std::next(I) == BitGroups.end() &&
1886  "bit group ends at index 63 but there is another?");
1887  auto IN = BitGroups.begin();
1888 
1889  if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
1890  (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
1891  IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
1892  IsAllLow32(*I)) {
1893 
1894  LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
1895  << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
1896  << ", " << I->EndIdx
1897  << "] with 32-bit replicated groups with ranges ["
1898  << IP->StartIdx << ", " << IP->EndIdx << "] and ["
1899  << IN->StartIdx << ", " << IN->EndIdx << "]\n");
1900 
1901  if (IP == IN) {
1902  // There is only one other group; change it to cover the whole
1903  // range (backward, so that it can still be Repl32 but cover the
1904  // whole 64-bit range).
1905  IP->StartIdx = 31;
1906  IP->EndIdx = 30;
1907  IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
1908  IP->Repl32Coalesced = true;
1909  I = BitGroups.erase(I);
1910  } else {
1911  // There are two separate groups, one before this group and one
1912  // after us (at the beginning). We're going to remove this group,
1913  // but also the group at the very beginning.
1914  IP->EndIdx = IN->EndIdx;
1915  IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
1916  IP->Repl32Coalesced = true;
1917  I = BitGroups.erase(I);
1918  BitGroups.erase(BitGroups.begin());
1919  }
1920 
1921  // This must be the last group in the vector (and we might have
1922  // just invalidated the iterator above), so break here.
1923  break;
1924  }
1925  }
1926  }
1927 
1928  ++I;
1929  }
1930  }
1931 
1932  SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
1933  return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1934  }
1935 
1936  uint64_t getZerosMask() {
1937  uint64_t Mask = 0;
1938  for (unsigned i = 0; i < Bits.size(); ++i) {
1939  if (Bits[i].hasValue())
1940  continue;
1941  Mask |= (UINT64_C(1) << i);
1942  }
1943 
1944  return ~Mask;
1945  }
1946 
1947  // This method extends an input value to 64 bit if input is 32-bit integer.
1948  // While selecting instructions in BitPermutationSelector in 64-bit mode,
1949  // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
1950  // In such case, we extend it to 64 bit to be consistent with other values.
1951  SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
1952  if (V.getValueSizeInBits() == 64)
1953  return V;
1954 
1955  assert(V.getValueSizeInBits() == 32);
1956  SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1957  SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
1958  MVT::i64), 0);
1959  SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
1960  MVT::i64, ImDef, V,
1961  SubRegIdx), 0);
1962  return ExtVal;
1963  }
1964 
1965  SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
1966  if (V.getValueSizeInBits() == 32)
1967  return V;
1968 
1969  assert(V.getValueSizeInBits() == 64);
1970  SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
1971  SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
1972  MVT::i32, V, SubRegIdx), 0);
1973  return SubVal;
1974  }
1975 
1976  // Depending on the number of groups for a particular value, it might be
1977  // better to rotate, mask explicitly (using andi/andis), and then or the
1978  // result. Select this part of the result first.
1979  void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
1981  return;
1982 
1983  for (ValueRotInfo &VRI : ValueRotsVec) {
1984  unsigned Mask = 0;
1985  for (unsigned i = 0; i < Bits.size(); ++i) {
1986  if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
1987  continue;
1988  if (RLAmt[i] != VRI.RLAmt)
1989  continue;
1990  Mask |= (1u << i);
1991  }
1992 
1993  // Compute the masks for andi/andis that would be necessary.
1994  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
1995  assert((ANDIMask != 0 || ANDISMask != 0) &&
1996  "No set bits in mask for value bit groups");
1997  bool NeedsRotate = VRI.RLAmt != 0;
1998 
1999  // We're trying to minimize the number of instructions. If we have one
2000  // group, using one of andi/andis can break even. If we have three
2001  // groups, we can use both andi and andis and break even (to use both
2002  // andi and andis we also need to or the results together). We need four
2003  // groups if we also need to rotate. To use andi/andis we need to do more
2004  // than break even because rotate-and-mask instructions tend to be easier
2005  // to schedule.
2006 
2007  // FIXME: We've biased here against using andi/andis, which is right for
2008  // POWER cores, but not optimal everywhere. For example, on the A2,
2009  // andi/andis have single-cycle latency whereas the rotate-and-mask
2010  // instructions take two cycles, and it would be better to bias toward
2011  // andi/andis in break-even cases.
2012 
2013  unsigned NumAndInsts = (unsigned) NeedsRotate +
2014  (unsigned) (ANDIMask != 0) +
2015  (unsigned) (ANDISMask != 0) +
2016  (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2017  (unsigned) (bool) Res;
2018 
2019  LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2020  << " RL: " << VRI.RLAmt << ":"
2021  << "\n\t\t\tisel using masking: " << NumAndInsts
2022  << " using rotates: " << VRI.NumGroups << "\n");
2023 
2024  if (NumAndInsts >= VRI.NumGroups)
2025  continue;
2026 
2027  LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2028 
2029  if (InstCnt) *InstCnt += NumAndInsts;
2030 
2031  SDValue VRot;
2032  if (VRI.RLAmt) {
2033  SDValue Ops[] =
2034  { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2035  getI32Imm(0, dl), getI32Imm(31, dl) };
2036  VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2037  Ops), 0);
2038  } else {
2039  VRot = TruncateToInt32(VRI.V, dl);
2040  }
2041 
2042  SDValue ANDIVal, ANDISVal;
2043  if (ANDIMask != 0)
2044  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2045  VRot, getI32Imm(ANDIMask, dl)),
2046  0);
2047  if (ANDISMask != 0)
2048  ANDISVal =
2049  SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2050  getI32Imm(ANDISMask, dl)),
2051  0);
2052 
2053  SDValue TotalVal;
2054  if (!ANDIVal)
2055  TotalVal = ANDISVal;
2056  else if (!ANDISVal)
2057  TotalVal = ANDIVal;
2058  else
2059  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2060  ANDIVal, ANDISVal), 0);
2061 
2062  if (!Res)
2063  Res = TotalVal;
2064  else
2065  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2066  Res, TotalVal), 0);
2067 
2068  // Now, remove all groups with this underlying value and rotation
2069  // factor.
2070  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2071  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2072  });
2073  }
2074  }
2075 
2076  // Instruction selection for the 32-bit case.
2077  SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2078  SDLoc dl(N);
2079  SDValue Res;
2080 
2081  if (InstCnt) *InstCnt = 0;
2082 
2083  // Take care of cases that should use andi/andis first.
2084  SelectAndParts32(dl, Res, InstCnt);
2085 
2086  // If we've not yet selected a 'starting' instruction, and we have no zeros
2087  // to fill in, select the (Value, RLAmt) with the highest priority (largest
2088  // number of groups), and start with this rotated value.
2089  if ((!NeedMask || LateMask) && !Res) {
2090  ValueRotInfo &VRI = ValueRotsVec[0];
2091  if (VRI.RLAmt) {
2092  if (InstCnt) *InstCnt += 1;
2093  SDValue Ops[] =
2094  { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2095  getI32Imm(0, dl), getI32Imm(31, dl) };
2096  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2097  0);
2098  } else {
2099  Res = TruncateToInt32(VRI.V, dl);
2100  }
2101 
2102  // Now, remove all groups with this underlying value and rotation factor.
2103  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2104  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2105  });
2106  }
2107 
2108  if (InstCnt) *InstCnt += BitGroups.size();
2109 
2110  // Insert the other groups (one at a time).
2111  for (auto &BG : BitGroups) {
2112  if (!Res) {
2113  SDValue Ops[] =
2114  { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2115  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2116  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2117  Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2118  } else {
2119  SDValue Ops[] =
2120  { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2121  getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2122  getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2123  Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2124  }
2125  }
2126 
2127  if (LateMask) {
2128  unsigned Mask = (unsigned) getZerosMask();
2129 
2130  unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2131  assert((ANDIMask != 0 || ANDISMask != 0) &&
2132  "No set bits in zeros mask?");
2133 
2134  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2135  (unsigned) (ANDISMask != 0) +
2136  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2137 
2138  SDValue ANDIVal, ANDISVal;
2139  if (ANDIMask != 0)
2140  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2141  Res, getI32Imm(ANDIMask, dl)),
2142  0);
2143  if (ANDISMask != 0)
2144  ANDISVal =
2145  SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2146  getI32Imm(ANDISMask, dl)),
2147  0);
2148 
2149  if (!ANDIVal)
2150  Res = ANDISVal;
2151  else if (!ANDISVal)
2152  Res = ANDIVal;
2153  else
2154  Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2155  ANDIVal, ANDISVal), 0);
2156  }
2157 
2158  return Res.getNode();
2159  }
2160 
2161  unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2162  unsigned MaskStart, unsigned MaskEnd,
2163  bool IsIns) {
2164  // In the notation used by the instructions, 'start' and 'end' are reversed
2165  // because bits are counted from high to low order.
2166  unsigned InstMaskStart = 64 - MaskEnd - 1,
2167  InstMaskEnd = 64 - MaskStart - 1;
2168 
2169  if (Repl32)
2170  return 1;
2171 
2172  if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2173  InstMaskEnd == 63 - RLAmt)
2174  return 1;
2175 
2176  return 2;
2177  }
2178 
2179  // For 64-bit values, not all combinations of rotates and masks are
2180  // available. Produce one if it is available.
2181  SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2182  bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2183  unsigned *InstCnt = nullptr) {
2184  // In the notation used by the instructions, 'start' and 'end' are reversed
2185  // because bits are counted from high to low order.
2186  unsigned InstMaskStart = 64 - MaskEnd - 1,
2187  InstMaskEnd = 64 - MaskStart - 1;
2188 
2189  if (InstCnt) *InstCnt += 1;
2190 
2191  if (Repl32) {
2192  // This rotation amount assumes that the lower 32 bits of the quantity
2193  // are replicated in the high 32 bits by the rotation operator (which is
2194  // done by rlwinm and friends).
2195  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2196  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2197  SDValue Ops[] =
2198  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2199  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2200  return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2201  Ops), 0);
2202  }
2203 
2204  if (InstMaskEnd == 63) {
2205  SDValue Ops[] =
2206  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2207  getI32Imm(InstMaskStart, dl) };
2208  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2209  }
2210 
2211  if (InstMaskStart == 0) {
2212  SDValue Ops[] =
2213  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2214  getI32Imm(InstMaskEnd, dl) };
2215  return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2216  }
2217 
2218  if (InstMaskEnd == 63 - RLAmt) {
2219  SDValue Ops[] =
2220  { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2221  getI32Imm(InstMaskStart, dl) };
2222  return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2223  }
2224 
2225  // We cannot do this with a single instruction, so we'll use two. The
2226  // problem is that we're not free to choose both a rotation amount and mask
2227  // start and end independently. We can choose an arbitrary mask start and
2228  // end, but then the rotation amount is fixed. Rotation, however, can be
2229  // inverted, and so by applying an "inverse" rotation first, we can get the
2230  // desired result.
2231  if (InstCnt) *InstCnt += 1;
2232 
2233  // The rotation mask for the second instruction must be MaskStart.
2234  unsigned RLAmt2 = MaskStart;
2235  // The first instruction must rotate V so that the overall rotation amount
2236  // is RLAmt.
2237  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2238  if (RLAmt1)
2239  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2240  return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2241  }
2242 
2243  // For 64-bit values, not all combinations of rotates and masks are
2244  // available. Produce a rotate-mask-and-insert if one is available.
2245  SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2246  unsigned RLAmt, bool Repl32, unsigned MaskStart,
2247  unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2248  // In the notation used by the instructions, 'start' and 'end' are reversed
2249  // because bits are counted from high to low order.
2250  unsigned InstMaskStart = 64 - MaskEnd - 1,
2251  InstMaskEnd = 64 - MaskStart - 1;
2252 
2253  if (InstCnt) *InstCnt += 1;
2254 
2255  if (Repl32) {
2256  // This rotation amount assumes that the lower 32 bits of the quantity
2257  // are replicated in the high 32 bits by the rotation operator (which is
2258  // done by rlwinm and friends).
2259  assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2260  assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2261  SDValue Ops[] =
2262  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2263  getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2264  return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2265  Ops), 0);
2266  }
2267 
2268  if (InstMaskEnd == 63 - RLAmt) {
2269  SDValue Ops[] =
2270  { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2271  getI32Imm(InstMaskStart, dl) };
2272  return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2273  }
2274 
2275  // We cannot do this with a single instruction, so we'll use two. The
2276  // problem is that we're not free to choose both a rotation amount and mask
2277  // start and end independently. We can choose an arbitrary mask start and
2278  // end, but then the rotation amount is fixed. Rotation, however, can be
2279  // inverted, and so by applying an "inverse" rotation first, we can get the
2280  // desired result.
2281  if (InstCnt) *InstCnt += 1;
2282 
2283  // The rotation mask for the second instruction must be MaskStart.
2284  unsigned RLAmt2 = MaskStart;
2285  // The first instruction must rotate V so that the overall rotation amount
2286  // is RLAmt.
2287  unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2288  if (RLAmt1)
2289  V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2290  return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2291  }
2292 
2293  void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2295  return;
2296 
2297  // The idea here is the same as in the 32-bit version, but with additional
2298  // complications from the fact that Repl32 might be true. Because we
2299  // aggressively convert bit groups to Repl32 form (which, for small
2300  // rotation factors, involves no other change), and then coalesce, it might
2301  // be the case that a single 64-bit masking operation could handle both
2302  // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2303  // form allowed coalescing, then we must use a 32-bit rotaton in order to
2304  // completely capture the new combined bit group.
2305 
2306  for (ValueRotInfo &VRI : ValueRotsVec) {
2307  uint64_t Mask = 0;
2308 
2309  // We need to add to the mask all bits from the associated bit groups.
2310  // If Repl32 is false, we need to add bits from bit groups that have
2311  // Repl32 true, but are trivially convertable to Repl32 false. Such a
2312  // group is trivially convertable if it overlaps only with the lower 32
2313  // bits, and the group has not been coalesced.
2314  auto MatchingBG = [VRI](const BitGroup &BG) {
2315  if (VRI.V != BG.V)
2316  return false;
2317 
2318  unsigned EffRLAmt = BG.RLAmt;
2319  if (!VRI.Repl32 && BG.Repl32) {
2320  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2321  !BG.Repl32Coalesced) {
2322  if (BG.Repl32CR)
2323  EffRLAmt += 32;
2324  } else {
2325  return false;
2326  }
2327  } else if (VRI.Repl32 != BG.Repl32) {
2328  return false;
2329  }
2330 
2331  return VRI.RLAmt == EffRLAmt;
2332  };
2333 
2334  for (auto &BG : BitGroups) {
2335  if (!MatchingBG(BG))
2336  continue;
2337 
2338  if (BG.StartIdx <= BG.EndIdx) {
2339  for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2340  Mask |= (UINT64_C(1) << i);
2341  } else {
2342  for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2343  Mask |= (UINT64_C(1) << i);
2344  for (unsigned i = 0; i <= BG.EndIdx; ++i)
2345  Mask |= (UINT64_C(1) << i);
2346  }
2347  }
2348 
2349  // We can use the 32-bit andi/andis technique if the mask does not
2350  // require any higher-order bits. This can save an instruction compared
2351  // to always using the general 64-bit technique.
2352  bool Use32BitInsts = isUInt<32>(Mask);
2353  // Compute the masks for andi/andis that would be necessary.
2354  unsigned ANDIMask = (Mask & UINT16_MAX),
2355  ANDISMask = (Mask >> 16) & UINT16_MAX;
2356 
2357  bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2358 
2359  unsigned NumAndInsts = (unsigned) NeedsRotate +
2360  (unsigned) (bool) Res;
2361  unsigned NumOfSelectInsts = 0;
2362  selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2363  assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2364  if (Use32BitInsts)
2365  NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2366  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2367  else
2368  NumAndInsts += NumOfSelectInsts + /* and */ 1;
2369 
2370  unsigned NumRLInsts = 0;
2371  bool FirstBG = true;
2372  bool MoreBG = false;
2373  for (auto &BG : BitGroups) {
2374  if (!MatchingBG(BG)) {
2375  MoreBG = true;
2376  continue;
2377  }
2378  NumRLInsts +=
2379  SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2380  !FirstBG);
2381  FirstBG = false;
2382  }
2383 
2384  LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2385  << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2386  << "\n\t\t\tisel using masking: " << NumAndInsts
2387  << " using rotates: " << NumRLInsts << "\n");
2388 
2389  // When we'd use andi/andis, we bias toward using the rotates (andi only
2390  // has a record form, and is cracked on POWER cores). However, when using
2391  // general 64-bit constant formation, bias toward the constant form,
2392  // because that exposes more opportunities for CSE.
2393  if (NumAndInsts > NumRLInsts)
2394  continue;
2395  // When merging multiple bit groups, instruction or is used.
2396  // But when rotate is used, rldimi can inert the rotated value into any
2397  // register, so instruction or can be avoided.
2398  if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2399  continue;
2400 
2401  LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2402 
2403  if (InstCnt) *InstCnt += NumAndInsts;
2404 
2405  SDValue VRot;
2406  // We actually need to generate a rotation if we have a non-zero rotation
2407  // factor or, in the Repl32 case, if we care about any of the
2408  // higher-order replicated bits. In the latter case, we generate a mask
2409  // backward so that it actually includes the entire 64 bits.
2410  if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2411  VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2412  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2413  else
2414  VRot = VRI.V;
2415 
2416  SDValue TotalVal;
2417  if (Use32BitInsts) {
2418  assert((ANDIMask != 0 || ANDISMask != 0) &&
2419  "No set bits in mask when using 32-bit ands for 64-bit value");
2420 
2421  SDValue ANDIVal, ANDISVal;
2422  if (ANDIMask != 0)
2423  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2424  ExtendToInt64(VRot, dl),
2425  getI32Imm(ANDIMask, dl)),
2426  0);
2427  if (ANDISMask != 0)
2428  ANDISVal =
2429  SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2430  ExtendToInt64(VRot, dl),
2431  getI32Imm(ANDISMask, dl)),
2432  0);
2433 
2434  if (!ANDIVal)
2435  TotalVal = ANDISVal;
2436  else if (!ANDISVal)
2437  TotalVal = ANDIVal;
2438  else
2439  TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2440  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2441  } else {
2442  TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2443  TotalVal =
2444  SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2445  ExtendToInt64(VRot, dl), TotalVal),
2446  0);
2447  }
2448 
2449  if (!Res)
2450  Res = TotalVal;
2451  else
2452  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2453  ExtendToInt64(Res, dl), TotalVal),
2454  0);
2455 
2456  // Now, remove all groups with this underlying value and rotation
2457  // factor.
2458  eraseMatchingBitGroups(MatchingBG);
2459  }
2460  }
2461 
2462  // Instruction selection for the 64-bit case.
2463  SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2464  SDLoc dl(N);
2465  SDValue Res;
2466 
2467  if (InstCnt) *InstCnt = 0;
2468 
2469  // Take care of cases that should use andi/andis first.
2470  SelectAndParts64(dl, Res, InstCnt);
2471 
2472  // If we've not yet selected a 'starting' instruction, and we have no zeros
2473  // to fill in, select the (Value, RLAmt) with the highest priority (largest
2474  // number of groups), and start with this rotated value.
2475  if ((!NeedMask || LateMask) && !Res) {
2476  // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2477  // groups will come first, and so the VRI representing the largest number
2478  // of groups might not be first (it might be the first Repl32 groups).
2479  unsigned MaxGroupsIdx = 0;
2480  if (!ValueRotsVec[0].Repl32) {
2481  for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2482  if (ValueRotsVec[i].Repl32) {
2483  if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2484  MaxGroupsIdx = i;
2485  break;
2486  }
2487  }
2488 
2489  ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2490  bool NeedsRotate = false;
2491  if (VRI.RLAmt) {
2492  NeedsRotate = true;
2493  } else if (VRI.Repl32) {
2494  for (auto &BG : BitGroups) {
2495  if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2496  BG.Repl32 != VRI.Repl32)
2497  continue;
2498 
2499  // We don't need a rotate if the bit group is confined to the lower
2500  // 32 bits.
2501  if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2502  continue;
2503 
2504  NeedsRotate = true;
2505  break;
2506  }
2507  }
2508 
2509  if (NeedsRotate)
2510  Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2511  VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2512  InstCnt);
2513  else
2514  Res = VRI.V;
2515 
2516  // Now, remove all groups with this underlying value and rotation factor.
2517  if (Res)
2518  eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2519  return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2520  BG.Repl32 == VRI.Repl32;
2521  });
2522  }
2523 
2524  // Because 64-bit rotates are more flexible than inserts, we might have a
2525  // preference regarding which one we do first (to save one instruction).
2526  if (!Res)
2527  for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2528  if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2529  false) <
2530  SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2531  true)) {
2532  if (I != BitGroups.begin()) {
2533  BitGroup BG = *I;
2534  BitGroups.erase(I);
2535  BitGroups.insert(BitGroups.begin(), BG);
2536  }
2537 
2538  break;
2539  }
2540  }
2541 
2542  // Insert the other groups (one at a time).
2543  for (auto &BG : BitGroups) {
2544  if (!Res)
2545  Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2546  BG.EndIdx, InstCnt);
2547  else
2548  Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2549  BG.StartIdx, BG.EndIdx, InstCnt);
2550  }
2551 
2552  if (LateMask) {
2553  uint64_t Mask = getZerosMask();
2554 
2555  // We can use the 32-bit andi/andis technique if the mask does not
2556  // require any higher-order bits. This can save an instruction compared
2557  // to always using the general 64-bit technique.
2558  bool Use32BitInsts = isUInt<32>(Mask);
2559  // Compute the masks for andi/andis that would be necessary.
2560  unsigned ANDIMask = (Mask & UINT16_MAX),
2561  ANDISMask = (Mask >> 16) & UINT16_MAX;
2562 
2563  if (Use32BitInsts) {
2564  assert((ANDIMask != 0 || ANDISMask != 0) &&
2565  "No set bits in mask when using 32-bit ands for 64-bit value");
2566 
2567  if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2568  (unsigned) (ANDISMask != 0) +
2569  (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2570 
2571  SDValue ANDIVal, ANDISVal;
2572  if (ANDIMask != 0)
2573  ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2574  ExtendToInt64(Res, dl),
2575  getI32Imm(ANDIMask, dl)),
2576  0);
2577  if (ANDISMask != 0)
2578  ANDISVal =
2579  SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2580  ExtendToInt64(Res, dl),
2581  getI32Imm(ANDISMask, dl)),
2582  0);
2583 
2584  if (!ANDIVal)
2585  Res = ANDISVal;
2586  else if (!ANDISVal)
2587  Res = ANDIVal;
2588  else
2589  Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2590  ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2591  } else {
2592  unsigned NumOfSelectInsts = 0;
2593  SDValue MaskVal =
2594  SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2595  Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2596  ExtendToInt64(Res, dl), MaskVal),
2597  0);
2598  if (InstCnt)
2599  *InstCnt += NumOfSelectInsts + /* and */ 1;
2600  }
2601  }
2602 
2603  return Res.getNode();
2604  }
2605 
2606  SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2607  // Fill in BitGroups.
2608  collectBitGroups(LateMask);
2609  if (BitGroups.empty())
2610  return nullptr;
2611 
2612  // For 64-bit values, figure out when we can use 32-bit instructions.
2613  if (Bits.size() == 64)
2614  assignRepl32BitGroups();
2615 
2616  // Fill in ValueRotsVec.
2617  collectValueRotInfo();
2618 
2619  if (Bits.size() == 32) {
2620  return Select32(N, LateMask, InstCnt);
2621  } else {
2622  assert(Bits.size() == 64 && "Not 64 bits here?");
2623  return Select64(N, LateMask, InstCnt);
2624  }
2625 
2626  return nullptr;
2627  }
2628 
2629  void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2630  erase_if(BitGroups, F);
2631  }
2632 
2634 
2635  bool NeedMask = false;
2637 
2638  SmallVector<BitGroup, 16> BitGroups;
2639 
2640  DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2641  SmallVector<ValueRotInfo, 16> ValueRotsVec;
2642 
2643  SelectionDAG *CurDAG = nullptr;
2644 
2645 public:
2646  BitPermutationSelector(SelectionDAG *DAG)
2647  : CurDAG(DAG) {}
2648 
2649  // Here we try to match complex bit permutations into a set of
2650  // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2651  // known to produce optimal code for common cases (like i32 byte swapping).
2652  SDNode *Select(SDNode *N) {
2653  Memoizer.clear();
2654  auto Result =
2655  getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2656  if (!Result.first)
2657  return nullptr;
2658  Bits = std::move(*Result.second);
2659 
2660  LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2661  " selection for: ");
2662  LLVM_DEBUG(N->dump(CurDAG));
2663 
2664  // Fill it RLAmt and set NeedMask.
2665  computeRotationAmounts();
2666 
2667  if (!NeedMask)
2668  return Select(N, false);
2669 
2670  // We currently have two techniques for handling results with zeros: early
2671  // masking (the default) and late masking. Late masking is sometimes more
2672  // efficient, but because the structure of the bit groups is different, it
2673  // is hard to tell without generating both and comparing the results. With
2674  // late masking, we ignore zeros in the resulting value when inserting each
2675  // set of bit groups, and then mask in the zeros at the end. With early
2676  // masking, we only insert the non-zero parts of the result at every step.
2677 
2678  unsigned InstCnt = 0, InstCntLateMask = 0;
2679  LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2680  SDNode *RN = Select(N, false, &InstCnt);
2681  LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2682 
2683  LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2684  SDNode *RNLM = Select(N, true, &InstCntLateMask);
2685  LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2686  << " instructions\n");
2687 
2688  if (InstCnt <= InstCntLateMask) {
2689  LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2690  return RN;
2691  }
2692 
2693  LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2694  return RNLM;
2695  }
2696 };
2697 
2698 class IntegerCompareEliminator {
2699  SelectionDAG *CurDAG;
2700  PPCDAGToDAGISel *S;
2701  // Conversion type for interpreting results of a 32-bit instruction as
2702  // a 64-bit value or vice versa.
2703  enum ExtOrTruncConversion { Ext, Trunc };
2704 
2705  // Modifiers to guide how an ISD::SETCC node's result is to be computed
2706  // in a GPR.
2707  // ZExtOrig - use the original condition code, zero-extend value
2708  // ZExtInvert - invert the condition code, zero-extend value
2709  // SExtOrig - use the original condition code, sign-extend value
2710  // SExtInvert - invert the condition code, sign-extend value
2711  enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2712 
2713  // Comparisons against zero to emit GPR code sequences for. Each of these
2714  // sequences may need to be emitted for two or more equivalent patterns.
2715  // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2716  // matters as well as the extension type: sext (-1/0), zext (1/0).
2717  // GEZExt - (zext (LHS >= 0))
2718  // GESExt - (sext (LHS >= 0))
2719  // LEZExt - (zext (LHS <= 0))
2720  // LESExt - (sext (LHS <= 0))
2721  enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2722 
2723  SDNode *tryEXTEND(SDNode *N);
2724  SDNode *tryLogicOpOfCompares(SDNode *N);
2725  SDValue computeLogicOpInGPR(SDValue LogicOp);
2726  SDValue signExtendInputIfNeeded(SDValue Input);
2727  SDValue zeroExtendInputIfNeeded(SDValue Input);
2728  SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2729  SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2730  ZeroCompare CmpTy);
2731  SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2732  int64_t RHSValue, SDLoc dl);
2733  SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2734  int64_t RHSValue, SDLoc dl);
2735  SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2736  int64_t RHSValue, SDLoc dl);
2737  SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2738  int64_t RHSValue, SDLoc dl);
2739  SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2740 
2741 public:
2742  IntegerCompareEliminator(SelectionDAG *DAG,
2743  PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2744  assert(CurDAG->getTargetLoweringInfo()
2745  .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2746  "Only expecting to use this on 64 bit targets.");
2747  }
2748  SDNode *Select(SDNode *N) {
2749  if (CmpInGPR == ICGPR_None)
2750  return nullptr;
2751  switch (N->getOpcode()) {
2752  default: break;
2753  case ISD::ZERO_EXTEND:
2754  if (CmpInGPR == ICGPR_Sext || CmpInGPR == ICGPR_SextI32 ||
2756  return nullptr;
2758  case ISD::SIGN_EXTEND:
2759  if (CmpInGPR == ICGPR_Zext || CmpInGPR == ICGPR_ZextI32 ||
2761  return nullptr;
2762  return tryEXTEND(N);
2763  case ISD::AND:
2764  case ISD::OR:
2765  case ISD::XOR:
2766  return tryLogicOpOfCompares(N);
2767  }
2768  return nullptr;
2769  }
2770 };
2771 
2772 static bool isLogicOp(unsigned Opc) {
2773  return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR;
2774 }
2775 // The obvious case for wanting to keep the value in a GPR. Namely, the
2776 // result of the comparison is actually needed in a GPR.
2777 SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2778  assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2779  N->getOpcode() == ISD::SIGN_EXTEND) &&
2780  "Expecting a zero/sign extend node!");
2781  SDValue WideRes;
2782  // If we are zero-extending the result of a logical operation on i1
2783  // values, we can keep the values in GPRs.
2784  if (isLogicOp(N->getOperand(0).getOpcode()) &&
2785  N->getOperand(0).getValueType() == MVT::i1 &&
2786  N->getOpcode() == ISD::ZERO_EXTEND)
2787  WideRes = computeLogicOpInGPR(N->getOperand(0));
2788  else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2789  return nullptr;
2790  else
2791  WideRes =
2792  getSETCCInGPR(N->getOperand(0),
2793  N->getOpcode() == ISD::SIGN_EXTEND ?
2794  SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2795 
2796  if (!WideRes)
2797  return nullptr;
2798 
2799  SDLoc dl(N);
2800  bool Input32Bit = WideRes.getValueType() == MVT::i32;
2801  bool Output32Bit = N->getValueType(0) == MVT::i32;
2802 
2803  NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2804  NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2805 
2806  SDValue ConvOp = WideRes;
2807  if (Input32Bit != Output32Bit)
2808  ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2809  ExtOrTruncConversion::Trunc);
2810  return ConvOp.getNode();
2811 }
2812 
2813 // Attempt to perform logical operations on the results of comparisons while
2814 // keeping the values in GPRs. Without doing so, these would end up being
2815 // lowered to CR-logical operations which suffer from significant latency and
2816 // low ILP.
2817 SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2818  if (N->getValueType(0) != MVT::i1)
2819  return nullptr;
2820  assert(isLogicOp(N->getOpcode()) &&
2821  "Expected a logic operation on setcc results.");
2822  SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2823  if (!LoweredLogical)
2824  return nullptr;
2825 
2826  SDLoc dl(N);
2827  bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
2828  unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
2829  SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
2830  SDValue LHS = LoweredLogical.getOperand(0);
2831  SDValue RHS = LoweredLogical.getOperand(1);
2832  SDValue WideOp;
2833  SDValue OpToConvToRecForm;
2834 
2835  // Look through any 32-bit to 64-bit implicit extend nodes to find the
2836  // opcode that is input to the XORI.
2837  if (IsBitwiseNegate &&
2838  LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
2839  OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
2840  else if (IsBitwiseNegate)
2841  // If the input to the XORI isn't an extension, that's what we're after.
2842  OpToConvToRecForm = LoweredLogical.getOperand(0);
2843  else
2844  // If this is not an XORI, it is a reg-reg logical op and we can convert
2845  // it to record-form.
2846  OpToConvToRecForm = LoweredLogical;
2847 
2848  // Get the record-form version of the node we're looking to use to get the
2849  // CR result from.
2850  uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
2851  int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
2852 
2853  // Convert the right node to record-form. This is either the logical we're
2854  // looking at or it is the input node to the negation (if we're looking at
2855  // a bitwise negation).
2856  if (NewOpc != -1 && IsBitwiseNegate) {
2857  // The input to the XORI has a record-form. Use it.
2858  assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
2859  "Expected a PPC::XORI8 only for bitwise negation.");
2860  // Emit the record-form instruction.
2861  std::vector<SDValue> Ops;
2862  for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
2863  Ops.push_back(OpToConvToRecForm.getOperand(i));
2864 
2865  WideOp =
2866  SDValue(CurDAG->getMachineNode(NewOpc, dl,
2867  OpToConvToRecForm.getValueType(),
2868  MVT::Glue, Ops), 0);
2869  } else {
2870  assert((NewOpc != -1 || !IsBitwiseNegate) &&
2871  "No record form available for AND8/OR8/XOR8?");
2872  WideOp =
2873  SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
2874  dl, MVT::i64, MVT::Glue, LHS, RHS),
2875  0);
2876  }
2877 
2878  // Select this node to a single bit from CR0 set by the record-form node
2879  // just created. For bitwise negation, use the EQ bit which is the equivalent
2880  // of negating the result (i.e. it is a bit set when the result of the
2881  // operation is zero).
2882  SDValue SRIdxVal =
2883  CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
2884  SDValue CRBit =
2885  SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
2886  MVT::i1, CR0Reg, SRIdxVal,
2887  WideOp.getValue(1)), 0);
2888  return CRBit.getNode();
2889 }
2890 
2891 // Lower a logical operation on i1 values into a GPR sequence if possible.
2892 // The result can be kept in a GPR if requested.
2893 // Three types of inputs can be handled:
2894 // - SETCC
2895 // - TRUNCATE
2896 // - Logical operation (AND/OR/XOR)
2897 // There is also a special case that is handled (namely a complement operation
2898 // achieved with xor %a, -1).
2899 SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
2900  assert(isLogicOp(LogicOp.getOpcode()) &&
2901  "Can only handle logic operations here.");
2902  assert(LogicOp.getValueType() == MVT::i1 &&
2903  "Can only handle logic operations on i1 values here.");
2904  SDLoc dl(LogicOp);
2905  SDValue LHS, RHS;
2906 
2907  // Special case: xor %a, -1
2908  bool IsBitwiseNegation = isBitwiseNot(LogicOp);
2909 
2910  // Produces a GPR sequence for each operand of the binary logic operation.
2911  // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
2912  // the value in a GPR and for logic operations, it will recursively produce
2913  // a GPR sequence for the operation.
2914  auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
2915  unsigned OperandOpcode = Operand.getOpcode();
2916  if (OperandOpcode == ISD::SETCC)
2917  return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
2918  else if (OperandOpcode == ISD::TRUNCATE) {
2919  SDValue InputOp = Operand.getOperand(0);
2920  EVT InVT = InputOp.getValueType();
2921  return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
2922  PPC::RLDICL, dl, InVT, InputOp,
2923  S->getI64Imm(0, dl),
2924  S->getI64Imm(63, dl)), 0);
2925  } else if (isLogicOp(OperandOpcode))
2926  return computeLogicOpInGPR(Operand);
2927  return SDValue();
2928  };
2929  LHS = getLogicOperand(LogicOp.getOperand(0));
2930  RHS = getLogicOperand(LogicOp.getOperand(1));
2931 
2932  // If a GPR sequence can't be produced for the LHS we can't proceed.
2933  // Not producing a GPR sequence for the RHS is only a problem if this isn't
2934  // a bitwise negation operation.
2935  if (!LHS || (!RHS && !IsBitwiseNegation))
2936  return SDValue();
2937 
2938  NumLogicOpsOnComparison++;
2939 
2940  // We will use the inputs as 64-bit values.
2941  if (LHS.getValueType() == MVT::i32)
2942  LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
2943  if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
2944  RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
2945 
2946  unsigned NewOpc;
2947  switch (LogicOp.getOpcode()) {
2948  default: llvm_unreachable("Unknown logic operation.");
2949  case ISD::AND: NewOpc = PPC::AND8; break;
2950  case ISD::OR: NewOpc = PPC::OR8; break;
2951  case ISD::XOR: NewOpc = PPC::XOR8; break;
2952  }
2953 
2954  if (IsBitwiseNegation) {
2955  RHS = S->getI64Imm(1, dl);
2956  NewOpc = PPC::XORI8;
2957  }
2958 
2959  return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
2960 
2961 }
2962 
2963 /// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
2964 /// Otherwise just reinterpret it as a 64-bit value.
2965 /// Useful when emitting comparison code for 32-bit values without using
2966 /// the compare instruction (which only considers the lower 32-bits).
2967 SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
2968  assert(Input.getValueType() == MVT::i32 &&
2969  "Can only sign-extend 32-bit values here.");
2970  unsigned Opc = Input.getOpcode();
2971 
2972  // The value was sign extended and then truncated to 32-bits. No need to
2973  // sign extend it again.
2974  if (Opc == ISD::TRUNCATE &&
2975  (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
2976  Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
2977  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2978 
2979  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
2980  // The input is a sign-extending load. All ppc sign-extending loads
2981  // sign-extend to the full 64-bits.
2982  if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
2983  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2984 
2985  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
2986  // We don't sign-extend constants.
2987  if (InputConst)
2988  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
2989 
2990  SDLoc dl(Input);
2991  SignExtensionsAdded++;
2992  return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
2993  MVT::i64, Input), 0);
2994 }
2995 
2996 /// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
2997 /// Otherwise just reinterpret it as a 64-bit value.
2998 /// Useful when emitting comparison code for 32-bit values without using
2999 /// the compare instruction (which only considers the lower 32-bits).
3000 SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3001  assert(Input.getValueType() == MVT::i32 &&
3002  "Can only zero-extend 32-bit values here.");
3003  unsigned Opc = Input.getOpcode();
3004 
3005  // The only condition under which we can omit the actual extend instruction:
3006  // - The value is a positive constant
3007  // - The value comes from a load that isn't a sign-extending load
3008  // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3009  bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3010  (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3011  Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3012  if (IsTruncateOfZExt)
3013  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3014 
3015  ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3016  if (InputConst && InputConst->getSExtValue() >= 0)
3017  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3018 
3019  LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3020  // The input is a load that doesn't sign-extend (it will be zero-extended).
3021  if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3022  return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3023 
3024  // None of the above, need to zero-extend.
3025  SDLoc dl(Input);
3026  ZeroExtensionsAdded++;
3027  return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3028  S->getI64Imm(0, dl),
3029  S->getI64Imm(32, dl)), 0);
3030 }
3031 
3032 // Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3033 // course not actual zero/sign extensions that will generate machine code,
3034 // they're just a way to reinterpret a 32 bit value in a register as a
3035 // 64 bit value and vice-versa.
3036 SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3037  ExtOrTruncConversion Conv) {
3038  SDLoc dl(NatWidthRes);
3039 
3040  // For reinterpreting 32-bit values as 64 bit values, we generate
3041  // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3042  if (Conv == ExtOrTruncConversion::Ext) {
3043  SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3044  SDValue SubRegIdx =
3045  CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3046  return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3047  ImDef, NatWidthRes, SubRegIdx), 0);
3048  }
3049 
3050  assert(Conv == ExtOrTruncConversion::Trunc &&
3051  "Unknown convertion between 32 and 64 bit values.");
3052  // For reinterpreting 64-bit values as 32-bit values, we just need to
3053  // EXTRACT_SUBREG (i.e. extract the low word).
3054  SDValue SubRegIdx =
3055  CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3056  return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3057  NatWidthRes, SubRegIdx), 0);
3058 }
3059 
3060 // Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3061 // Handle both zero-extensions and sign-extensions.
3062 SDValue
3063 IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3064  ZeroCompare CmpTy) {
3065  EVT InVT = LHS.getValueType();
3066  bool Is32Bit = InVT == MVT::i32;
3067  SDValue ToExtend;
3068 
3069  // Produce the value that needs to be either zero or sign extended.
3070  switch (CmpTy) {
3071  case ZeroCompare::GEZExt:
3072  case ZeroCompare::GESExt:
3073  ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3074  dl, InVT, LHS, LHS), 0);
3075  break;
3076  case ZeroCompare::LEZExt:
3077  case ZeroCompare::LESExt: {
3078  if (Is32Bit) {
3079  // Upper 32 bits cannot be undefined for this sequence.
3080  LHS = signExtendInputIfNeeded(LHS);
3081  SDValue Neg =
3082  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3083  ToExtend =
3084  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3085  Neg, S->getI64Imm(1, dl),
3086  S->getI64Imm(63, dl)), 0);
3087  } else {
3088  SDValue Addi =
3089  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3090  S->getI64Imm(~0ULL, dl)), 0);
3091  ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3092  Addi, LHS), 0);
3093  }
3094  break;
3095  }
3096  }
3097 
3098  // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3099  if (!Is32Bit &&
3100  (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3101  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3102  ToExtend, S->getI64Imm(1, dl),
3103  S->getI64Imm(63, dl)), 0);
3104  if (!Is32Bit &&
3105  (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3106  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3107  S->getI64Imm(63, dl)), 0);
3108 
3109  assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3110  // For 32-bit sequences, the extensions differ between GE/LE cases.
3111  switch (CmpTy) {
3112  case ZeroCompare::GEZExt: {
3113  SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3114  S->getI32Imm(31, dl) };
3115  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3116  ShiftOps), 0);
3117  }
3118  case ZeroCompare::GESExt:
3119  return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3120  S->getI32Imm(31, dl)), 0);
3121  case ZeroCompare::LEZExt:
3122  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3123  S->getI32Imm(1, dl)), 0);
3124  case ZeroCompare::LESExt:
3125  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3126  S->getI32Imm(-1, dl)), 0);
3127  }
3128 
3129  // The above case covers all the enumerators so it can't have a default clause
3130  // to avoid compiler warnings.
3131  llvm_unreachable("Unknown zero-comparison type.");
3132 }
3133 
3134 /// Produces a zero-extended result of comparing two 32-bit values according to
3135 /// the passed condition code.
3136 SDValue
3137 IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3138  ISD::CondCode CC,
3139  int64_t RHSValue, SDLoc dl) {
3140  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3142  return SDValue();
3143  bool IsRHSZero = RHSValue == 0;
3144  bool IsRHSOne = RHSValue == 1;
3145  bool IsRHSNegOne = RHSValue == -1LL;
3146  switch (CC) {
3147  default: return SDValue();
3148  case ISD::SETEQ: {
3149  // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3150  // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3151  SDValue Xor = IsRHSZero ? LHS :
3152  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3153  SDValue Clz =
3154  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3155  SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3156  S->getI32Imm(31, dl) };
3157  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3158  ShiftOps), 0);
3159  }
3160  case ISD::SETNE: {
3161  // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3162  // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3163  SDValue Xor = IsRHSZero ? LHS :
3164  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3165  SDValue Clz =
3166  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3167  SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3168  S->getI32Imm(31, dl) };
3169  SDValue Shift =
3170  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3171  return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3172  S->getI32Imm(1, dl)), 0);
3173  }
3174  case ISD::SETGE: {
3175  // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3176  // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3177  if(IsRHSZero)
3178  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3179 
3180  // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3181  // by swapping inputs and falling through.
3182  std::swap(LHS, RHS);
3183  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3184  IsRHSZero = RHSConst && RHSConst->isNullValue();
3186  }
3187  case ISD::SETLE: {
3188  if (CmpInGPR == ICGPR_NonExtIn)
3189  return SDValue();
3190  // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3191  // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3192  if(IsRHSZero) {
3193  if (CmpInGPR == ICGPR_NonExtIn)
3194  return SDValue();
3195  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3196  }
3197 
3198  // The upper 32-bits of the register can't be undefined for this sequence.
3199  LHS = signExtendInputIfNeeded(LHS);
3200  RHS = signExtendInputIfNeeded(RHS);
3201  SDValue Sub =
3202  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3203  SDValue Shift =
3204  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3205  S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3206  0);
3207  return
3208  SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3209  MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3210  }
3211  case ISD::SETGT: {
3212  // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3213  // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3214  // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3215  // Handle SETLT -1 (which is equivalent to SETGE 0).
3216  if (IsRHSNegOne)
3217  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3218 
3219  if (IsRHSZero) {
3220  if (CmpInGPR == ICGPR_NonExtIn)
3221  return SDValue();
3222  // The upper 32-bits of the register can't be undefined for this sequence.
3223  LHS = signExtendInputIfNeeded(LHS);
3224  RHS = signExtendInputIfNeeded(RHS);
3225  SDValue Neg =
3226  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3227  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3228  Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3229  }
3230  // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3231  // (%b < %a) by swapping inputs and falling through.
3232  std::swap(LHS, RHS);
3233  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3234  IsRHSZero = RHSConst && RHSConst->isNullValue();
3235  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3237  }
3238  case ISD::SETLT: {
3239  // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3240  // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3241  // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3242  // Handle SETLT 1 (which is equivalent to SETLE 0).
3243  if (IsRHSOne) {
3244  if (CmpInGPR == ICGPR_NonExtIn)
3245  return SDValue();
3246  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3247  }
3248 
3249  if (IsRHSZero) {
3250  SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3251  S->getI32Imm(31, dl) };
3252  return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3253  ShiftOps), 0);
3254  }
3255 
3256  if (CmpInGPR == ICGPR_NonExtIn)
3257  return SDValue();
3258  // The upper 32-bits of the register can't be undefined for this sequence.
3259  LHS = signExtendInputIfNeeded(LHS);
3260  RHS = signExtendInputIfNeeded(RHS);
3261  SDValue SUBFNode =
3262  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3263  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3264  SUBFNode, S->getI64Imm(1, dl),
3265  S->getI64Imm(63, dl)), 0);
3266  }
3267  case ISD::SETUGE:
3268  // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3269  // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3270  std::swap(LHS, RHS);
3272  case ISD::SETULE: {
3273  if (CmpInGPR == ICGPR_NonExtIn)
3274  return SDValue();
3275  // The upper 32-bits of the register can't be undefined for this sequence.
3276  LHS = zeroExtendInputIfNeeded(LHS);
3277  RHS = zeroExtendInputIfNeeded(RHS);
3278  SDValue Subtract =
3279  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3280  SDValue SrdiNode =
3281  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3282  Subtract, S->getI64Imm(1, dl),
3283  S->getI64Imm(63, dl)), 0);
3284  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3285  S->getI32Imm(1, dl)), 0);
3286  }
3287  case ISD::SETUGT:
3288  // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3289  // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3290  std::swap(LHS, RHS);
3292  case ISD::SETULT: {
3293  if (CmpInGPR == ICGPR_NonExtIn)
3294  return SDValue();
3295  // The upper 32-bits of the register can't be undefined for this sequence.
3296  LHS = zeroExtendInputIfNeeded(LHS);
3297  RHS = zeroExtendInputIfNeeded(RHS);
3298  SDValue Subtract =
3299  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3300  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3301  Subtract, S->getI64Imm(1, dl),
3302  S->getI64Imm(63, dl)), 0);
3303  }
3304  }
3305 }
3306 
3307 /// Produces a sign-extended result of comparing two 32-bit values according to
3308 /// the passed condition code.
3309 SDValue
3310 IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3311  ISD::CondCode CC,
3312  int64_t RHSValue, SDLoc dl) {
3313  if (CmpInGPR == ICGPR_I64 || CmpInGPR == ICGPR_SextI64 ||
3315  return SDValue();
3316  bool IsRHSZero = RHSValue == 0;
3317  bool IsRHSOne = RHSValue == 1;
3318  bool IsRHSNegOne = RHSValue == -1LL;
3319 
3320  switch (CC) {
3321  default: return SDValue();
3322  case ISD::SETEQ: {
3323  // (sext (setcc %a, %b, seteq)) ->
3324  // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3325  // (sext (setcc %a, 0, seteq)) ->
3326  // (ashr (shl (ctlz %a), 58), 63)
3327  SDValue CountInput = IsRHSZero ? LHS :
3328  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3329  SDValue Cntlzw =
3330  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3331  SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3332  S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3333  SDValue Slwi =
3334  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3335  return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3336  }
3337  case ISD::SETNE: {
3338  // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3339  // flip the bit, finally take 2's complement.
3340  // (sext (setcc %a, %b, setne)) ->
3341  // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3342  // Same as above, but the first xor is not needed.
3343  // (sext (setcc %a, 0, setne)) ->
3344  // (neg (xor (lshr (ctlz %a), 5), 1))
3345  SDValue Xor = IsRHSZero ? LHS :
3346  SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3347  SDValue Clz =
3348  SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3349  SDValue ShiftOps[] =
3350  { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3351  SDValue Shift =
3352  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3353  SDValue Xori =
3354  SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3355  S->getI32Imm(1, dl)), 0);
3356  return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3357  }
3358  case ISD::SETGE: {
3359  // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3360  // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3361  if (IsRHSZero)
3362  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3363 
3364  // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3365  // by swapping inputs and falling through.
3366  std::swap(LHS, RHS);
3367  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3368  IsRHSZero = RHSConst && RHSConst->isNullValue();
3370  }
3371  case ISD::SETLE: {
3372  if (CmpInGPR == ICGPR_NonExtIn)
3373  return SDValue();
3374  // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3375  // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3376  if (IsRHSZero)
3377  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3378 
3379  // The upper 32-bits of the register can't be undefined for this sequence.
3380  LHS = signExtendInputIfNeeded(LHS);
3381  RHS = signExtendInputIfNeeded(RHS);
3382  SDValue SUBFNode =
3383  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3384  LHS, RHS), 0);
3385  SDValue Srdi =
3386  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3387  SUBFNode, S->getI64Imm(1, dl),
3388  S->getI64Imm(63, dl)), 0);
3389  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3390  S->getI32Imm(-1, dl)), 0);
3391  }
3392  case ISD::SETGT: {
3393  // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3394  // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3395  // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3396  if (IsRHSNegOne)
3397  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3398  if (IsRHSZero) {
3399  if (CmpInGPR == ICGPR_NonExtIn)
3400  return SDValue();
3401  // The upper 32-bits of the register can't be undefined for this sequence.
3402  LHS = signExtendInputIfNeeded(LHS);
3403  RHS = signExtendInputIfNeeded(RHS);
3404  SDValue Neg =
3405  SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3406  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3407  S->getI64Imm(63, dl)), 0);
3408  }
3409  // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3410  // (%b < %a) by swapping inputs and falling through.
3411  std::swap(LHS, RHS);
3412  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3413  IsRHSZero = RHSConst && RHSConst->isNullValue();
3414  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3416  }
3417  case ISD::SETLT: {
3418  // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3419  // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3420  // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3421  if (IsRHSOne) {
3422  if (CmpInGPR == ICGPR_NonExtIn)
3423  return SDValue();
3424  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3425  }
3426  if (IsRHSZero)
3427  return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3428  S->getI32Imm(31, dl)), 0);
3429 
3430  if (CmpInGPR == ICGPR_NonExtIn)
3431  return SDValue();
3432  // The upper 32-bits of the register can't be undefined for this sequence.
3433  LHS = signExtendInputIfNeeded(LHS);
3434  RHS = signExtendInputIfNeeded(RHS);
3435  SDValue SUBFNode =
3436  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3437  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3438  SUBFNode, S->getI64Imm(63, dl)), 0);
3439  }
3440  case ISD::SETUGE:
3441  // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3442  // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3443  std::swap(LHS, RHS);
3445  case ISD::SETULE: {
3446  if (CmpInGPR == ICGPR_NonExtIn)
3447  return SDValue();
3448  // The upper 32-bits of the register can't be undefined for this sequence.
3449  LHS = zeroExtendInputIfNeeded(LHS);
3450  RHS = zeroExtendInputIfNeeded(RHS);
3451  SDValue Subtract =
3452  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3453  SDValue Shift =
3454  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3455  S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3456  0);
3457  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3458  S->getI32Imm(-1, dl)), 0);
3459  }
3460  case ISD::SETUGT:
3461  // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3462  // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3463  std::swap(LHS, RHS);
3465  case ISD::SETULT: {
3466  if (CmpInGPR == ICGPR_NonExtIn)
3467  return SDValue();
3468  // The upper 32-bits of the register can't be undefined for this sequence.
3469  LHS = zeroExtendInputIfNeeded(LHS);
3470  RHS = zeroExtendInputIfNeeded(RHS);
3471  SDValue Subtract =
3472  SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3473  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3474  Subtract, S->getI64Imm(63, dl)), 0);
3475  }
3476  }
3477 }
3478 
3479 /// Produces a zero-extended result of comparing two 64-bit values according to
3480 /// the passed condition code.
3481 SDValue
3482 IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3483  ISD::CondCode CC,
3484  int64_t RHSValue, SDLoc dl) {
3485  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3487  return SDValue();
3488  bool IsRHSZero = RHSValue == 0;
3489  bool IsRHSOne = RHSValue == 1;
3490  bool IsRHSNegOne = RHSValue == -1LL;
3491  switch (CC) {
3492  default: return SDValue();
3493  case ISD::SETEQ: {
3494  // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3495  // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3496  SDValue Xor = IsRHSZero ? LHS :
3497  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3498  SDValue Clz =
3499  SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3500  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3501  S->getI64Imm(58, dl),
3502  S->getI64Imm(63, dl)), 0);
3503  }
3504  case ISD::SETNE: {
3505  // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3506  // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3507  // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3508  // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3509  SDValue Xor = IsRHSZero ? LHS :
3510  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3511  SDValue AC =
3512  SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3513  Xor, S->getI32Imm(~0U, dl)), 0);
3514  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3515  Xor, AC.getValue(1)), 0);
3516  }
3517  case ISD::SETGE: {
3518  // {subc.reg, subc.CA} = (subcarry %a, %b)
3519  // (zext (setcc %a, %b, setge)) ->
3520  // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3521  // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3522  if (IsRHSZero)
3523  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3524  std::swap(LHS, RHS);
3525  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3526  IsRHSZero = RHSConst && RHSConst->isNullValue();
3528  }
3529  case ISD::SETLE: {
3530  // {subc.reg, subc.CA} = (subcarry %b, %a)
3531  // (zext (setcc %a, %b, setge)) ->
3532  // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3533  // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3534  if (IsRHSZero)
3535  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3536  SDValue ShiftL =
3537  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3538  S->getI64Imm(1, dl),
3539  S->getI64Imm(63, dl)), 0);
3540  SDValue ShiftR =
3541  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3542  S->getI64Imm(63, dl)), 0);
3543  SDValue SubtractCarry =
3544  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3545  LHS, RHS), 1);
3546  return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3547  ShiftR, ShiftL, SubtractCarry), 0);
3548  }
3549  case ISD::SETGT: {
3550  // {subc.reg, subc.CA} = (subcarry %b, %a)
3551  // (zext (setcc %a, %b, setgt)) ->
3552  // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3553  // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3554  if (IsRHSNegOne)
3555  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3556  if (IsRHSZero) {
3557  SDValue Addi =
3558  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3559  S->getI64Imm(~0ULL, dl)), 0);
3560  SDValue Nor =
3561  SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3562  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3563  S->getI64Imm(1, dl),
3564  S->getI64Imm(63, dl)), 0);
3565  }
3566  std::swap(LHS, RHS);
3567  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3568  IsRHSZero = RHSConst && RHSConst->isNullValue();
3569  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3571  }
3572  case ISD::SETLT: {
3573  // {subc.reg, subc.CA} = (subcarry %a, %b)
3574  // (zext (setcc %a, %b, setlt)) ->
3575  // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3576  // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3577  if (IsRHSOne)
3578  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3579  if (IsRHSZero)
3580  return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3581  S->getI64Imm(1, dl),
3582  S->getI64Imm(63, dl)), 0);
3583  SDValue SRADINode =
3584  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3585  LHS, S->getI64Imm(63, dl)), 0);
3586  SDValue SRDINode =
3587  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3588  RHS, S->getI64Imm(1, dl),
3589  S->getI64Imm(63, dl)), 0);
3590  SDValue SUBFC8Carry =
3591  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3592  RHS, LHS), 1);
3593  SDValue ADDE8Node =
3594  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3595  SRDINode, SRADINode, SUBFC8Carry), 0);
3596  return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3597  ADDE8Node, S->getI64Imm(1, dl)), 0);
3598  }
3599  case ISD::SETUGE:
3600  // {subc.reg, subc.CA} = (subcarry %a, %b)
3601  // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3602  std::swap(LHS, RHS);
3604  case ISD::SETULE: {
3605  // {subc.reg, subc.CA} = (subcarry %b, %a)
3606  // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3607  SDValue SUBFC8Carry =
3608  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3609  LHS, RHS), 1);
3610  SDValue SUBFE8Node =
3611  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3612  LHS, LHS, SUBFC8Carry), 0);
3613  return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3614  SUBFE8Node, S->getI64Imm(1, dl)), 0);
3615  }
3616  case ISD::SETUGT:
3617  // {subc.reg, subc.CA} = (subcarry %b, %a)
3618  // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3619  std::swap(LHS, RHS);
3621  case ISD::SETULT: {
3622  // {subc.reg, subc.CA} = (subcarry %a, %b)
3623  // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3624  SDValue SubtractCarry =
3625  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3626  RHS, LHS), 1);
3627  SDValue ExtSub =
3628  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3629  LHS, LHS, SubtractCarry), 0);
3630  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3631  ExtSub), 0);
3632  }
3633  }
3634 }
3635 
3636 /// Produces a sign-extended result of comparing two 64-bit values according to
3637 /// the passed condition code.
3638 SDValue
3639 IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3640  ISD::CondCode CC,
3641  int64_t RHSValue, SDLoc dl) {
3642  if (CmpInGPR == ICGPR_I32 || CmpInGPR == ICGPR_SextI32 ||
3644  return SDValue();
3645  bool IsRHSZero = RHSValue == 0;
3646  bool IsRHSOne = RHSValue == 1;
3647  bool IsRHSNegOne = RHSValue == -1LL;
3648  switch (CC) {
3649  default: return SDValue();
3650  case ISD::SETEQ: {
3651  // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3652  // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3653  // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3654  // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3655  SDValue AddInput = IsRHSZero ? LHS :
3656  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3657  SDValue Addic =
3658  SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3659  AddInput, S->getI32Imm(~0U, dl)), 0);
3660  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3661  Addic, Addic.getValue(1)), 0);
3662  }
3663  case ISD::SETNE: {
3664  // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3665  // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3666  // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3667  // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3668  SDValue Xor = IsRHSZero ? LHS :
3669  SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3670  SDValue SC =
3671  SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3672  Xor, S->getI32Imm(0, dl)), 0);
3673  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3674  SC, SC.getValue(1)), 0);
3675  }
3676  case ISD::SETGE: {
3677  // {subc.reg, subc.CA} = (subcarry %a, %b)
3678  // (zext (setcc %a, %b, setge)) ->
3679  // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3680  // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3681  if (IsRHSZero)
3682  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3683  std::swap(LHS, RHS);
3684  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3685  IsRHSZero = RHSConst && RHSConst->isNullValue();
3687  }
3688  case ISD::SETLE: {
3689  // {subc.reg, subc.CA} = (subcarry %b, %a)
3690  // (zext (setcc %a, %b, setge)) ->
3691  // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3692  // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3693  if (IsRHSZero)
3694  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3695  SDValue ShiftR =
3696  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3697  S->getI64Imm(63, dl)), 0);
3698  SDValue ShiftL =
3699  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3700  S->getI64Imm(1, dl),
3701  S->getI64Imm(63, dl)), 0);
3702  SDValue SubtractCarry =
3703  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3704  LHS, RHS), 1);
3705  SDValue Adde =
3706  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3707  ShiftR, ShiftL, SubtractCarry), 0);
3708  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3709  }
3710  case ISD::SETGT: {
3711  // {subc.reg, subc.CA} = (subcarry %b, %a)
3712  // (zext (setcc %a, %b, setgt)) ->
3713  // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3714  // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3715  if (IsRHSNegOne)
3716  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3717  if (IsRHSZero) {
3718  SDValue Add =
3719  SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3720  S->getI64Imm(-1, dl)), 0);
3721  SDValue Nor =
3722  SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3723  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3724  S->getI64Imm(63, dl)), 0);
3725  }
3726  std::swap(LHS, RHS);
3727  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3728  IsRHSZero = RHSConst && RHSConst->isNullValue();
3729  IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3731  }
3732  case ISD::SETLT: {
3733  // {subc.reg, subc.CA} = (subcarry %a, %b)
3734  // (zext (setcc %a, %b, setlt)) ->
3735  // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3736  // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3737  if (IsRHSOne)
3738  return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3739  if (IsRHSZero) {
3740  return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3741  S->getI64Imm(63, dl)), 0);
3742  }
3743  SDValue SRADINode =
3744  SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3745  LHS, S->getI64Imm(63, dl)), 0);
3746  SDValue SRDINode =
3747  SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3748  RHS, S->getI64Imm(1, dl),
3749  S->getI64Imm(63, dl)), 0);
3750  SDValue SUBFC8Carry =
3751  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3752  RHS, LHS), 1);
3753  SDValue ADDE8Node =
3754  SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3755  SRDINode, SRADINode, SUBFC8Carry), 0);
3756  SDValue XORI8Node =
3757  SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3758  ADDE8Node, S->getI64Imm(1, dl)), 0);
3759  return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3760  XORI8Node), 0);
3761  }
3762  case ISD::SETUGE:
3763  // {subc.reg, subc.CA} = (subcarry %a, %b)
3764  // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3765  std::swap(LHS, RHS);
3767  case ISD::SETULE: {
3768  // {subc.reg, subc.CA} = (subcarry %b, %a)
3769  // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3770  SDValue SubtractCarry =
3771  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3772  LHS, RHS), 1);
3773  SDValue ExtSub =
3774  SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3775  LHS, SubtractCarry), 0);
3776  return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3777  ExtSub, ExtSub), 0);
3778  }
3779  case ISD::SETUGT:
3780  // {subc.reg, subc.CA} = (subcarry %b, %a)
3781  // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3782  std::swap(LHS, RHS);
3784  case ISD::SETULT: {
3785  // {subc.reg, subc.CA} = (subcarry %a, %b)
3786  // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3787  SDValue SubCarry =
3788  SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3789  RHS, LHS), 1);
3790  return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3791  LHS, LHS, SubCarry), 0);
3792  }
3793  }
3794 }
3795 
3796 /// Do all uses of this SDValue need the result in a GPR?
3797 /// This is meant to be used on values that have type i1 since
3798 /// it is somewhat meaningless to ask if values of other types
3799 /// should be kept in GPR's.
3800 static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3801  assert(Compare.getOpcode() == ISD::SETCC &&
3802  "An ISD::SETCC node required here.");
3803 
3804  // For values that have a single use, the caller should obviously already have
3805  // checked if that use is an extending use. We check the other uses here.
3806  if (Compare.hasOneUse())
3807  return true;
3808  // We want the value in a GPR if it is being extended, used for a select, or
3809  // used in logical operations.
3810  for (auto CompareUse : Compare.getNode()->uses())
3811  if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3812  CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3813  CompareUse->getOpcode() != ISD::SELECT &&
3814  !isLogicOp(CompareUse->getOpcode())) {
3815  OmittedForNonExtendUses++;
3816  return false;
3817  }
3818  return true;
3819 }
3820 
3821 /// Returns an equivalent of a SETCC node but with the result the same width as
3822 /// the inputs. This can also be used for SELECT_CC if either the true or false
3823 /// values is a power of two while the other is zero.
3824 SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3825  SetccInGPROpts ConvOpts) {
3826  assert((Compare.getOpcode() == ISD::SETCC ||
3827  Compare.getOpcode() == ISD::SELECT_CC) &&
3828  "An ISD::SETCC node required here.");
3829 
3830  // Don't convert this comparison to a GPR sequence because there are uses
3831  // of the i1 result (i.e. uses that require the result in the CR).
3832  if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
3833  return SDValue();
3834 
3835  SDValue LHS = Compare.getOperand(0);
3836  SDValue RHS = Compare.getOperand(1);
3837 
3838  // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
3839  int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
3840  ISD::CondCode CC =
3841  cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
3842  EVT InputVT = LHS.getValueType();
3843  if (InputVT != MVT::i32 && InputVT != MVT::i64)
3844  return SDValue();
3845 
3846  if (ConvOpts == SetccInGPROpts::ZExtInvert ||
3847  ConvOpts == SetccInGPROpts::SExtInvert)
3848  CC = ISD::getSetCCInverse(CC, InputVT);
3849 
3850  bool Inputs32Bit = InputVT == MVT::i32;
3851 
3852  SDLoc dl(Compare);
3853  ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3854  int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
3855  bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
3856  ConvOpts == SetccInGPROpts::SExtInvert;
3857 
3858  if (IsSext && Inputs32Bit)
3859  return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3860  else if (Inputs32Bit)
3861  return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3862  else if (IsSext)
3863  return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
3864  return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
3865 }
3866 
3867 } // end anonymous namespace
3868 
3869 bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
3870  if (N->getValueType(0) != MVT::i32 &&
3871  N->getValueType(0) != MVT::i64)
3872  return false;
3873 
3874  // This optimization will emit code that assumes 64-bit registers
3875  // so we don't want to run it in 32-bit mode. Also don't run it
3876  // on functions that are not to be optimized.
3877  if (TM.getOptLevel() == CodeGenOpt::None || !TM.isPPC64())
3878  return false;
3879 
3880  // For POWER10, it is more profitable to use the set boolean extension
3881  // instructions rather than the integer compare elimination codegen.
3882  // Users can override this via the command line option, `--ppc-gpr-icmps`.
3883  if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
3884  return false;
3885 
3886  switch (N->getOpcode()) {
3887  default: break;
3888  case ISD::ZERO_EXTEND:
3889  case ISD::SIGN_EXTEND:
3890  case ISD::AND:
3891  case ISD::OR:
3892  case ISD::XOR: {
3893  IntegerCompareEliminator ICmpElim(CurDAG, this);
3894  if (SDNode *New = ICmpElim.Select(N)) {
3895  ReplaceNode(N, New);
3896  return true;
3897  }
3898  }
3899  }
3900  return false;
3901 }
3902 
3903 bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
3904  if (N->getValueType(0) != MVT::i32 &&
3905  N->getValueType(0) != MVT::i64)
3906  return false;
3907 
3908  if (!UseBitPermRewriter)
3909  return false;
3910 
3911  switch (N->getOpcode()) {
3912  default: break;
3913  case ISD::ROTL:
3914  case ISD::SHL:
3915  case ISD::SRL:
3916  case ISD::AND:
3917  case ISD::OR: {
3918  BitPermutationSelector BPS(CurDAG);
3919  if (SDNode *New = BPS.Select(N)) {
3920  ReplaceNode(N, New);
3921  return true;
3922  }
3923  return false;
3924  }
3925  }
3926 
3927  return false;
3928 }
3929 
3930 /// SelectCC - Select a comparison of the specified values with the specified
3931 /// condition code, returning the CR# of the expression.
3932 SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3933  const SDLoc &dl, SDValue Chain) {
3934  // Always select the LHS.
3935  unsigned Opc;
3936 
3937  if (LHS.getValueType() == MVT::i32) {
3938  unsigned Imm;
3939  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3940  if (isInt32Immediate(RHS, Imm)) {
3941  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3942  if (isUInt<16>(Imm))
3943  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3944  getI32Imm(Imm & 0xFFFF, dl)),
3945  0);
3946  // If this is a 16-bit signed immediate, fold it.
3947  if (isInt<16>((int)Imm))
3948  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3949  getI32Imm(Imm & 0xFFFF, dl)),
3950  0);
3951 
3952  // For non-equality comparisons, the default code would materialize the
3953  // constant, then compare against it, like this:
3954  // lis r2, 4660
3955  // ori r2, r2, 22136
3956  // cmpw cr0, r3, r2
3957  // Since we are just comparing for equality, we can emit this instead:
3958  // xoris r0,r3,0x1234
3959  // cmplwi cr0,r0,0x5678
3960  // beq cr0,L6
3961  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
3962  getI32Imm(Imm >> 16, dl)), 0);
3963  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
3964  getI32Imm(Imm & 0xFFFF, dl)), 0);
3965  }
3966  Opc = PPC::CMPLW;
3967  } else if (ISD::isUnsignedIntSetCC(CC)) {
3968  if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
3969  return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
3970  getI32Imm(Imm & 0xFFFF, dl)), 0);
3971  Opc = PPC::CMPLW;
3972  } else {
3973  int16_t SImm;
3974  if (isIntS16Immediate(RHS, SImm))
3975  return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
3976  getI32Imm((int)SImm & 0xFFFF,
3977  dl)),
3978  0);
3979  Opc = PPC::CMPW;
3980  }
3981  } else if (LHS.getValueType() == MVT::i64) {
3982  uint64_t Imm;
3983  if (CC == ISD::SETEQ || CC == ISD::SETNE) {
3984  if (isInt64Immediate(RHS.getNode(), Imm)) {
3985  // SETEQ/SETNE comparison with 16-bit immediate, fold it.
3986  if (isUInt<16>(Imm))
3987  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
3988  getI32Imm(Imm & 0xFFFF, dl)),
3989  0);
3990  // If this is a 16-bit signed immediate, fold it.
3991  if (isInt<16>(Imm))
3992  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
3993  getI32Imm(Imm & 0xFFFF, dl)),
3994  0);
3995 
3996  // For non-equality comparisons, the default code would materialize the
3997  // constant, then compare against it, like this:
3998  // lis r2, 4660
3999  // ori r2, r2, 22136
4000  // cmpd cr0, r3, r2
4001  // Since we are just comparing for equality, we can emit this instead:
4002  // xoris r0,r3,0x1234
4003  // cmpldi cr0,r0,0x5678
4004  // beq cr0,L6
4005  if (isUInt<32>(Imm)) {
4006  SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4007  getI64Imm(Imm >> 16, dl)), 0);
4008  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4009  getI64Imm(Imm & 0xFFFF, dl)),
4010  0);
4011  }
4012  }
4013  Opc = PPC::CMPLD;
4014  } else if (ISD::isUnsignedIntSetCC(CC)) {
4015  if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4016  return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4017  getI64Imm(Imm & 0xFFFF, dl)), 0);
4018  Opc = PPC::CMPLD;
4019  } else {
4020  int16_t SImm;
4021  if (isIntS16Immediate(RHS, SImm))
4022  return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4023  getI64Imm(SImm & 0xFFFF, dl)),
4024  0);
4025  Opc = PPC::CMPD;
4026  }
4027  } else if (LHS.getValueType() == MVT::f32) {
4028  if (Subtarget->hasSPE()) {
4029  switch (CC) {
4030  default:
4031  case ISD::SETEQ:
4032  case ISD::SETNE:
4033  Opc = PPC::EFSCMPEQ;
4034  break;
4035  case ISD::SETLT:
4036  case ISD::SETGE:
4037  case ISD::SETOLT:
4038  case ISD::SETOGE:
4039  case ISD::SETULT:
4040  case ISD::SETUGE:
4041  Opc = PPC::EFSCMPLT;
4042  break;
4043  case ISD::SETGT:
4044  case ISD::SETLE:
4045  case ISD::SETOGT:
4046  case ISD::SETOLE:
4047  case ISD::SETUGT:
4048  case ISD::SETULE:
4049  Opc = PPC::EFSCMPGT;
4050  break;
4051  }
4052  } else
4053  Opc = PPC::FCMPUS;
4054  } else if (LHS.getValueType() == MVT::f64) {
4055  if (Subtarget->hasSPE()) {
4056  switch (CC) {
4057  default:
4058  case ISD::SETEQ:
4059  case ISD::SETNE:
4060  Opc = PPC::EFDCMPEQ;
4061  break;
4062  case ISD::SETLT:
4063  case ISD::SETGE:
4064  case ISD::SETOLT:
4065  case ISD::SETOGE:
4066  case ISD::SETULT:
4067  case ISD::SETUGE:
4068  Opc = PPC::EFDCMPLT;
4069  break;
4070  case ISD::SETGT:
4071  case ISD::SETLE:
4072  case ISD::SETOGT:
4073  case ISD::SETOLE:
4074  case ISD::SETUGT:
4075  case ISD::SETULE:
4076  Opc = PPC::EFDCMPGT;
4077  break;
4078  }
4079  } else
4080  Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4081  } else {
4082  assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4083  assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4084  Opc = PPC::XSCMPUQP;
4085  }
4086  if (Chain)
4087  return SDValue(
4088  CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4089  0);
4090  else
4091  return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4092 }
4093 
4095  const PPCSubtarget *Subtarget) {
4096  // For SPE instructions, the result is in GT bit of the CR
4097  bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4098 
4099  switch (CC) {
4100  case ISD::SETUEQ:
4101  case ISD::SETONE:
4102  case ISD::SETOLE:
4103  case ISD::SETOGE:
4104  llvm_unreachable("Should be lowered by legalize!");
4105  default: llvm_unreachable("Unknown condition!");
4106  case ISD::SETOEQ:
4107  case ISD::SETEQ:
4108  return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4109  case ISD::SETUNE:
4110  case ISD::SETNE:
4111  return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4112  case ISD::SETOLT:
4113  case ISD::SETLT:
4114  return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4115  case ISD::SETULE:
4116  case ISD::SETLE:
4117  return PPC::PRED_LE;
4118  case ISD::SETOGT:
4119  case ISD::SETGT:
4120  return PPC::PRED_GT;
4121  case ISD::SETUGE:
4122  case ISD::SETGE:
4123  return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4124  case ISD::SETO: return PPC::PRED_NU;
4125  case ISD::SETUO: return PPC::PRED_UN;
4126  // These two are invalid for floating point. Assume we have int.
4127  case ISD::SETULT: return PPC::PRED_LT;
4128  case ISD::SETUGT: return PPC::PRED_GT;
4129  }
4130 }
4131 
4132 /// getCRIdxForSetCC - Return the index of the condition register field
4133 /// associated with the SetCC condition, and whether or not the field is
4134 /// treated as inverted. That is, lt = 0; ge = 0 inverted.
4135 static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4136  Invert = false;
4137  switch (CC) {
4138  default: llvm_unreachable("Unknown condition!");
4139  case ISD::SETOLT:
4140  case ISD::SETLT: return 0; // Bit #0 = SETOLT
4141  case ISD::SETOGT:
4142  case ISD::SETGT: return 1; // Bit #1 = SETOGT
4143  case ISD::SETOEQ:
4144  case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4145  case ISD::SETUO: return 3; // Bit #3 = SETUO
4146  case ISD::SETUGE:
4147  case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4148  case ISD::SETULE:
4149  case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4150  case ISD::SETUNE:
4151  case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4152  case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4153  case ISD::SETUEQ:
4154  case ISD::SETOGE:
4155  case ISD::SETOLE:
4156  case ISD::SETONE:
4157  llvm_unreachable("Invalid branch code: should be expanded by legalize");
4158  // These are invalid for floating point. Assume integer.
4159  case ISD::SETULT: return 0;
4160  case ISD::SETUGT: return 1;
4161  }
4162 }
4163 
4164 // getVCmpInst: return the vector compare instruction for the specified
4165 // vector type and condition code. Since this is for altivec specific code,
4166 // only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4167 // and v4f32).
4168 static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4169  bool HasVSX, bool &Swap, bool &Negate) {
4170  Swap = false;
4171  Negate = false;
4172 
4173  if (VecVT.isFloatingPoint()) {
4174  /* Handle some cases by swapping input operands. */
4175  switch (CC) {
4176  case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4177  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4178  case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4179  case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4180  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4181  case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4182  default: break;
4183  }
4184  /* Handle some cases by negating the result. */
4185  switch (CC) {
4186  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4187  case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4188  case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4189  case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4190  default: break;
4191  }
4192  /* We have instructions implementing the remaining cases. */
4193  switch (CC) {
4194  case ISD::SETEQ:
4195  case ISD::SETOEQ:
4196  if (VecVT == MVT::v4f32)
4197  return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4198  else if (VecVT == MVT::v2f64)
4199  return PPC::XVCMPEQDP;
4200  break;
4201  case ISD::SETGT:
4202  case ISD::SETOGT:
4203  if (VecVT == MVT::v4f32)
4204  return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4205  else if (VecVT == MVT::v2f64)
4206  return PPC::XVCMPGTDP;
4207  break;
4208  case ISD::SETGE:
4209  case ISD::SETOGE:
4210  if (VecVT == MVT::v4f32)
4211  return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4212  else if (VecVT == MVT::v2f64)
4213  return PPC::XVCMPGEDP;
4214  break;
4215  default:
4216  break;
4217  }
4218  llvm_unreachable("Invalid floating-point vector compare condition");
4219  } else {
4220  /* Handle some cases by swapping input operands. */
4221  switch (CC) {
4222  case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4223  case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4224  case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4225  case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4226  default: break;
4227  }
4228  /* Handle some cases by negating the result. */
4229  switch (CC) {
4230  case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4231  case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4232  case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4233  case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4234  default: break;
4235  }
4236  /* We have instructions implementing the remaining cases. */
4237  switch (CC) {
4238  case ISD::SETEQ:
4239  case ISD::SETUEQ:
4240  if (VecVT == MVT::v16i8)
4241  return PPC::VCMPEQUB;
4242  else if (VecVT == MVT::v8i16)
4243  return PPC::VCMPEQUH;
4244  else if (VecVT == MVT::v4i32)
4245  return PPC::VCMPEQUW;
4246  else if (VecVT == MVT::v2i64)
4247  return PPC::VCMPEQUD;
4248  else if (VecVT == MVT::v1i128)
4249  return PPC::VCMPEQUQ;
4250  break;
4251  case ISD::SETGT:
4252  if (VecVT == MVT::v16i8)
4253  return PPC::VCMPGTSB;
4254  else if (VecVT == MVT::v8i16)
4255  return PPC::VCMPGTSH;
4256  else if (VecVT == MVT::v4i32)
4257  return PPC::VCMPGTSW;
4258  else if (VecVT == MVT::v2i64)
4259  return PPC::VCMPGTSD;
4260  else if (VecVT == MVT::v1i128)
4261  return PPC::VCMPGTSQ;
4262  break;
4263  case ISD::SETUGT:
4264  if (VecVT == MVT::v16i8)
4265  return PPC::VCMPGTUB;
4266  else if (VecVT == MVT::v8i16)
4267  return PPC::VCMPGTUH;
4268  else if (VecVT == MVT::v4i32)
4269  return PPC::VCMPGTUW;
4270  else if (VecVT == MVT::v2i64)
4271  return PPC::VCMPGTUD;
4272  else if (VecVT == MVT::v1i128)
4273  return PPC::VCMPGTUQ;
4274  break;
4275  default:
4276  break;
4277  }
4278  llvm_unreachable("Invalid integer vector compare condition");
4279  }
4280 }
4281 
4282 bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4283  SDLoc dl(N);
4284  unsigned Imm;
4285  bool IsStrict = N->isStrictFPOpcode();
4286  ISD::CondCode CC =
4287  cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4288  EVT PtrVT =
4289  CurDAG->getTargetLoweringInfo().getPointerTy(CurDAG->getDataLayout());
4290  bool isPPC64 = (PtrVT == MVT::i64);
4291  SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4292 
4293  SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4294  SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4295 
4296  if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4297  // We can codegen setcc op, imm very efficiently compared to a brcond.
4298  // Check for those cases here.
4299  // setcc op, 0
4300  if (Imm == 0) {
4301  SDValue Op = LHS;
4302  switch (CC) {
4303  default: break;
4304  case ISD::SETEQ: {
4305  Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4306  SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4307  getI32Imm(31, dl) };
4308  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4309  return true;
4310  }
4311  case ISD::SETNE: {
4312  if (isPPC64) break;
4313  SDValue AD =
4314  SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4315  Op, getI32Imm(~0U, dl)), 0);
4316  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4317  return true;
4318  }
4319  case ISD::SETLT: {
4320  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4321  getI32Imm(31, dl) };
4322  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4323  return true;
4324  }
4325  case ISD::SETGT: {
4326  SDValue T =
4327  SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4328  T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4329  SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4330  getI32Imm(31, dl) };
4331  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4332  return true;
4333  }
4334  }
4335  } else if (Imm == ~0U) { // setcc op, -1
4336  SDValue Op = LHS;
4337  switch (CC) {
4338  default: break;
4339  case ISD::SETEQ:
4340  if (isPPC64) break;
4341  Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4342  Op, getI32Imm(1, dl)), 0);
4343  CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4344  SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4345  MVT::i32,
4346  getI32Imm(0, dl)),
4347  0), Op.getValue(1));
4348  return true;
4349  case ISD::SETNE: {
4350  if (isPPC64) break;
4351  Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4352  SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4353  Op, getI32Imm(~0U, dl));
4354  CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4355  SDValue(AD, 1));
4356  return true;
4357  }
4358  case ISD::SETLT: {
4359  SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4360  getI32Imm(1, dl)), 0);
4361  SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4362  Op), 0);
4363  SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4364  getI32Imm(31, dl) };
4365  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4366  return true;
4367  }
4368  case ISD::SETGT: {
4369  SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4370  getI32Imm(31, dl) };
4371  Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4372  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4373  return true;
4374  }
4375  }
4376  }
4377  }
4378 
4379  // Altivec Vector compare instructions do not set any CR register by default and
4380  // vector compare operations return the same type as the operands.
4381  if (!IsStrict && LHS.getValueType().isVector()) {
4382  if (Subtarget->hasSPE())
4383  return false;
4384 
4385  EVT VecVT = LHS.getValueType();
4386  bool Swap, Negate;
4387  unsigned int VCmpInst =
4388  getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4389  if (Swap)
4390  std::swap(LHS, RHS);
4391 
4392  EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4393  if (Negate) {
4394  SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4395  CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4396  ResVT, VCmp, VCmp);
4397  return true;
4398  }
4399 
4400  CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4401  return true;
4402  }
4403 
4404  if (Subtarget->useCRBits())
4405  return false;
4406 
4407  bool Inv;
4408  unsigned Idx = getCRIdxForSetCC(CC, Inv);
4409  SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4410  if (IsStrict)
4411  CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4412  SDValue IntCR;
4413 
4414  // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4415  // The correct compare instruction is already set by SelectCC()
4416  if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4417  Idx = 1;
4418  }
4419 
4420  // Force the ccreg into CR7.
4421  SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4422 
4423  SDValue InFlag(nullptr, 0); // Null incoming flag value.
4424  CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4425  InFlag).getValue(1);
4426 
4427  IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4428  CCReg), 0);
4429 
4430  SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4431  getI32Imm(31, dl), getI32Imm(31, dl) };
4432  if (!Inv) {
4433  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4434  return true;
4435  }
4436 
4437  // Get the specified bit.
4438  SDValue Tmp =
4439  SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4440  CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4441  return true;
4442 }
4443 
4444 /// Does this node represent a load/store node whose address can be represented
4445 /// with a register plus an immediate that's a multiple of \p Val:
4446 bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4447  LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4448  StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4449  SDValue AddrOp;
4450  if (LDN)
4451  AddrOp = LDN->getOperand(1);
4452  else if (STN)
4453  AddrOp = STN->getOperand(2);
4454 
4455  // If the address points a frame object or a frame object with an offset,
4456  // we need to check the object alignment.
4457  short Imm = 0;
4458  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4459  AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4460  AddrOp)) {
4461  // If op0 is a frame index that is under aligned, we can't do it either,
4462  // because it is translated to r31 or r1 + slot + offset. We won't know the
4463  // slot number until the stack frame is finalized.
4464  const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4465  unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4466  if ((SlotAlign % Val) != 0)
4467  return false;
4468 
4469  // If we have an offset, we need further check on the offset.
4470  if (AddrOp.getOpcode() != ISD::ADD)
4471  return true;
4472  }
4473 
4474  if (AddrOp.getOpcode() == ISD::ADD)
4475  return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4476 
4477  // If the address comes from the outside, the offset will be zero.
4478  return AddrOp.getOpcode() == ISD::CopyFromReg;
4479 }
4480 
4481 void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4482  // Transfer memoperands.
4483  MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4484  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4485 }
4486 
4487 static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4488  bool &NeedSwapOps, bool &IsUnCmp) {
4489 
4490  assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4491 
4492  SDValue LHS = N->getOperand(0);
4493  SDValue RHS = N->getOperand(1);
4494  SDValue TrueRes = N->getOperand(2);
4495  SDValue FalseRes = N->getOperand(3);
4496  ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4497  if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4498  N->getSimpleValueType(0) != MVT::i32))
4499  return false;
4500 
4501  // We are looking for any of:
4502  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4503  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4504  // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4505  // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4506  int64_t TrueResVal = TrueConst->getSExtValue();
4507  if ((TrueResVal < -1 || TrueResVal > 1) ||
4508  (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4509  (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4510  (TrueResVal == 0 &&
4511  (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4512  return false;
4513 
4514  SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4515  ? FalseRes
4516  : FalseRes.getOperand(0);
4517  bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4518  if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4519  SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4520  return false;
4521 
4522  // Without this setb optimization, the outer SELECT_CC will be manually
4523  // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4524  // transforms pseudo instruction to isel instruction. When there are more than
4525  // one use for result like zext/sext, with current optimization we only see
4526  // isel is replaced by setb but can't see any significant gain. Since
4527  // setb has longer latency than original isel, we should avoid this. Another
4528  // point is that setb requires comparison always kept, it can break the
4529  // opportunity to get the comparison away if we have in future.
4530  if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4531  return false;
4532 
4533  SDValue InnerLHS = SetOrSelCC.getOperand(0);
4534  SDValue InnerRHS = SetOrSelCC.getOperand(1);
4535  ISD::CondCode InnerCC =
4536  cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4537  // If the inner comparison is a select_cc, make sure the true/false values are
4538  // 1/-1 and canonicalize it if needed.
4539  if (InnerIsSel) {
4540  ConstantSDNode *SelCCTrueConst =
4541  dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4542  ConstantSDNode *SelCCFalseConst =
4543  dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4544  if (!SelCCTrueConst || !SelCCFalseConst)
4545  return false;
4546  int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4547  int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4548  // The values must be -1/1 (requiring a swap) or 1/-1.
4549  if (SelCCTVal == -1 && SelCCFVal == 1) {
4550  std::swap(InnerLHS, InnerRHS);
4551  } else if (SelCCTVal != 1 || SelCCFVal != -1)
4552  return false;
4553  }
4554 
4555  // Canonicalize unsigned case
4556  if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4557  IsUnCmp = true;
4558  InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4559  }
4560 
4561  bool InnerSwapped = false;
4562  if (LHS == InnerRHS && RHS == InnerLHS)
4563  InnerSwapped = true;
4564  else if (LHS != InnerLHS || RHS != InnerRHS)
4565  return false;
4566 
4567  switch (CC) {
4568  // (select_cc lhs, rhs, 0, \
4569  // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4570  case ISD::SETEQ:
4571  if (!InnerIsSel)
4572  return false;
4573  if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4574  return false;
4575  NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4576  break;
4577 
4578  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4579  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4580  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4581  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4582  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4583  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4584  case ISD::SETULT:
4585  if (!IsUnCmp && InnerCC != ISD::SETNE)
4586  return false;
4587  IsUnCmp = true;
4589  case ISD::SETLT:
4590  if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4591  (InnerCC == ISD::SETLT && InnerSwapped))
4592  NeedSwapOps = (TrueResVal == 1);
4593  else
4594  return false;
4595  break;
4596 
4597  // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4598  // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4599  // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4600  // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4601  // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4602  // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4603  case ISD::SETUGT:
4604  if (!IsUnCmp && InnerCC != ISD::SETNE)
4605  return false;
4606  IsUnCmp = true;
4608  case ISD::SETGT:
4609  if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4610  (InnerCC == ISD::SETGT && InnerSwapped))
4611  NeedSwapOps = (TrueResVal == -1);
4612  else
4613  return false;
4614  break;
4615 
4616  default:
4617  return false;
4618  }
4619 
4620  LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4621  LLVM_DEBUG(N->dump());
4622 
4623  return true;
4624 }
4625 
4626 // Return true if it's a software square-root/divide operand.
4627 static bool isSWTestOp(SDValue N) {
4628  if (N.getOpcode() == PPCISD::FTSQRT)
4629  return true;
4630  if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)))
4631  return false;
4632  switch (N.getConstantOperandVal(0)) {
4633  case Intrinsic::ppc_vsx_xvtdivdp:
4634  case Intrinsic::ppc_vsx_xvtdivsp:
4635  case Intrinsic::ppc_vsx_xvtsqrtdp:
4636  case Intrinsic::ppc_vsx_xvtsqrtsp:
4637  return true;
4638  }
4639  return false;
4640 }
4641 
4642 bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4643  assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4644  // We are looking for following patterns, where `truncate to i1` actually has
4645  // the same semantic with `and 1`.
4646  // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4647  // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4648  // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4649  // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4650  // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4651  // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4652  // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4653  // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4654  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4655  if (CC != ISD::SETEQ && CC != ISD::SETNE)
4656  return false;
4657 
4658  SDValue CmpRHS = N->getOperand(3);
4659  if (!isa<ConstantSDNode>(CmpRHS) ||
4660  cast<ConstantSDNode>(CmpRHS)->getSExtValue() != 0)
4661  return false;
4662 
4663  SDValue CmpLHS = N->getOperand(2);
4664  if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4665  return false;
4666 
4667  unsigned PCC = 0;
4668  bool IsCCNE = CC == ISD::SETNE;
4669  if (CmpLHS.getOpcode() == ISD::AND &&
4670  isa<ConstantSDNode>(CmpLHS.getOperand(1)))
4671  switch (CmpLHS.getConstantOperandVal(1)) {
4672  case 1:
4673  PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4674  break;
4675  case 2:
4676  PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4677  break;
4678  case 4:
4679  PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4680  break;
4681  case 8:
4682  PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4683  break;
4684  default:
4685  return false;
4686  }
4687  else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4688  CmpLHS.getValueType() == MVT::i1)
4689  PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4690 
4691  if (PCC) {
4692  SDLoc dl(N);
4693  SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4694  N->getOperand(0)};
4695  CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4696  return true;
4697  }
4698  return false;
4699 }
4700 
4701 bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4702  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4703  unsigned Imm;
4704  if (!isInt32Immediate(N->getOperand(1), Imm))
4705  return false;
4706 
4707  SDLoc dl(N);
4708  SDValue Val = N->getOperand(0);
4709  unsigned SH, MB, ME;
4710  // If this is an and of a value rotated between 0 and 31 bits and then and'd
4711  // with a mask, emit rlwinm
4712  if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4713  Val = Val.getOperand(0);
4714  SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4715  getI32Imm(ME, dl)};
4716  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4717  return true;
4718  }
4719 
4720  // If this is just a masked value where the input is not handled, and
4721  // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4722  if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4723  SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4724  getI32Imm(ME, dl)};
4725  CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4726  return true;
4727  }
4728 
4729  // AND X, 0 -> 0, not "rlwinm 32".
4730  if (Imm == 0) {
4731  ReplaceUses(SDValue(N, 0), N->getOperand(1));
4732  return true;
4733  }
4734 
4735  return false;
4736 }
4737 
4738 bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
4739  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4740  uint64_t Imm64;
4741  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4742  return false;
4743 
4744  unsigned MB, ME;
4745  if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
4746  // MB ME
4747  // +----------------------+
4748  // |xxxxxxxxxxx00011111000|
4749  // +----------------------+
4750  // 0 32 64
4751  // We can only do it if the MB is larger than 32 and MB <= ME
4752  // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
4753  // we didn't rotate it.
4754  SDLoc dl(N);
4755  SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
4756  getI64Imm(ME - 32, dl)};
4757  CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
4758  return true;
4759  }
4760 
4761  return false;
4762 }
4763 
4764 bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
4765  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4766  uint64_t Imm64;
4767  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4768  return false;
4769 
4770  // Do nothing if it is 16-bit imm as the pattern in the .td file handle
4771  // it well with "andi.".
4772  if (isUInt<16>(Imm64))
4773  return false;
4774 
4775  SDLoc Loc(N);
4776  SDValue Val = N->getOperand(0);
4777 
4778  // Optimized with two rldicl's as follows:
4779  // Add missing bits on left to the mask and check that the mask is a
4780  // wrapped run of ones, i.e.
4781  // Change pattern |0001111100000011111111|
4782  // to |1111111100000011111111|.
4783  unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
4784  if (NumOfLeadingZeros != 0)
4785  Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
4786 
4787  unsigned MB, ME;
4788  if (!isRunOfOnes64(Imm64, MB, ME))
4789  return false;
4790 
4791  // ME MB MB-ME+63
4792  // +----------------------+ +----------------------+
4793  // |1111111100000011111111| -> |0000001111111111111111|
4794  // +----------------------+ +----------------------+
4795  // 0 63 0 63
4796  // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
4797  unsigned OnesOnLeft = ME + 1;
4798  unsigned ZerosInBetween = (MB - ME + 63) & 63;
4799  // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
4800  // on the left the bits that are already zeros in the mask.
4801  Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
4802  getI64Imm(OnesOnLeft, Loc),
4803  getI64Imm(ZerosInBetween, Loc)),
4804  0);
4805  // MB-ME+63 ME MB
4806  // +----------------------+ +----------------------+
4807  // |0000001111111111111111| -> |0001111100000011111111|
4808  // +----------------------+ +----------------------+
4809  // 0 63 0 63
4810  // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
4811  // left the number of ones we previously added.
4812  SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
4813  getI64Imm(NumOfLeadingZeros, Loc)};
4814  CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4815  return true;
4816 }
4817 
4818 bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
4819  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4820  unsigned Imm;
4821  if (!isInt32Immediate(N->getOperand(1), Imm))
4822  return false;
4823 
4824  SDValue Val = N->getOperand(0);
4825  unsigned Imm2;
4826  // ISD::OR doesn't get all the bitfield insertion fun.
4827  // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
4828  // bitfield insert.
4829  if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
4830  return false;
4831 
4832  // The idea here is to check whether this is equivalent to:
4833  // (c1 & m) | (x & ~m)
4834  // where m is a run-of-ones mask. The logic here is that, for each bit in
4835  // c1 and c2:
4836  // - if both are 1, then the output will be 1.
4837  // - if both are 0, then the output will be 0.
4838  // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
4839  // come from x.
4840  // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
4841  // be 0.
4842  // If that last condition is never the case, then we can form m from the
4843  // bits that are the same between c1 and c2.
4844  unsigned MB, ME;
4845  if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
4846  SDLoc dl(N);
4847  SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
4848  getI32Imm(MB, dl), getI32Imm(ME, dl)};
4849  ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
4850  return true;
4851  }
4852 
4853  return false;
4854 }
4855 
4856 bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
4857  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4858  uint64_t Imm64;
4859  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
4860  return false;
4861 
4862  // If this is a 64-bit zero-extension mask, emit rldicl.
4863  unsigned MB = 64 - countTrailingOnes(Imm64);
4864  unsigned SH = 0;
4865  unsigned Imm;
4866  SDValue Val = N->getOperand(0);
4867  SDLoc dl(N);
4868 
4869  if (Val.getOpcode() == ISD::ANY_EXTEND) {
4870  auto Op0 = Val.getOperand(0);
4871  if (Op0.getOpcode() == ISD::SRL &&
4872  isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
4873 
4874  auto ResultType = Val.getNode()->getValueType(0);
4875  auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
4876  SDValue IDVal(ImDef, 0);
4877 
4878  Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
4879  IDVal, Op0.getOperand(0),
4880  getI32Imm(1, dl)),
4881  0);
4882  SH = 64 - Imm;
4883  }
4884  }
4885 
4886  // If the operand is a logical right shift, we can fold it into this
4887  // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
4888  // for n <= mb. The right shift is really a left rotate followed by a
4889  // mask, and this mask is a more-restrictive sub-mask of the mask implied
4890  // by the shift.
4891  if (Val.getOpcode() == ISD::SRL &&
4892  isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
4893  assert(Imm < 64 && "Illegal shift amount");
4894  Val = Val.getOperand(0);
4895  SH = 64 - Imm;
4896  }
4897 
4898  SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
4899  CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
4900  return true;
4901 }
4902 
4903 bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
4904  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4905  uint64_t Imm64;
4906  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
4907  !isMask_64(~Imm64))
4908  return false;
4909 
4910  // If this is a negated 64-bit zero-extension mask,
4911  // i.e. the immediate is a sequence of ones from most significant side
4912  // and all zero for reminder, we should use rldicr.
4913  unsigned MB = 63 - countTrailingOnes(~Imm64);
4914  unsigned SH = 0;
4915  SDLoc dl(N);
4916  SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
4917  CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
4918  return true;
4919 }
4920 
4921 bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
4922  assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
4923  uint64_t Imm64;
4924  unsigned MB, ME;
4925  SDValue N0 = N->getOperand(0);
4926 
4927  // We won't get fewer instructions if the imm is 32-bit integer.
4928  // rldimi requires the imm to have consecutive ones with both sides zero.
4929  // Also, make sure the first Op has only one use, otherwise this may increase
4930  // register pressure since rldimi is destructive.
4931  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
4932  isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
4933  return false;
4934 
4935  unsigned SH = 63 - ME;
4936  SDLoc Dl(N);
4937  // Use select64Imm for making LI instr instead of directly putting Imm64
4938  SDValue Ops[] = {
4939  N->getOperand(0),
4940  SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
4941  getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
4942  CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
4943  return true;
4944 }
4945 
4946 // Select - Convert the specified operand from a target-independent to a
4947 // target-specific node if it hasn't already been changed.
4949  SDLoc dl(N);
4950  if (N->isMachineOpcode()) {
4951  N->setNodeId(-1);
4952  return; // Already selected.
4953  }
4954 
4955  // In case any misguided DAG-level optimizations form an ADD with a
4956  // TargetConstant operand, crash here instead of miscompiling (by selecting
4957  // an r+r add instead of some kind of r+i add).
4958  if (N->getOpcode() == ISD::ADD &&
4959  N->getOperand(1).getOpcode() == ISD::TargetConstant)
4960  llvm_unreachable("Invalid ADD with TargetConstant operand");
4961 
4962  // Try matching complex bit permutations before doing anything else.
4963  if (tryBitPermutation(N))
4964  return;
4965 
4966  // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
4967  if (tryIntCompareInGPR(N))
4968  return;
4969 
4970  switch (N->getOpcode()) {
4971  default: break;
4972 
4973  case ISD::Constant:
4974  if (N->getValueType(0) == MVT::i64) {
4975  ReplaceNode(N,