LLVM 22.0.0git
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pattern matching instruction selector for PowerPC,
10// converting from a legalized dag to a PPC dag.
11//
12//===----------------------------------------------------------------------===//
13
16#include "PPC.h"
17#include "PPCISelLowering.h"
19#include "PPCSelectionDAGInfo.h"
20#include "PPCSubtarget.h"
21#include "PPCTargetMachine.h"
22#include "llvm/ADT/APInt.h"
23#include "llvm/ADT/APSInt.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/STLExtras.h"
28#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/DebugLoc.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/InlineAsm.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/IntrinsicsPowerPC.h"
51#include "llvm/IR/Module.h"
56#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <new>
68#include <tuple>
69#include <utility>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "ppc-isel"
74#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
75
76STATISTIC(NumSextSetcc,
77 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
78STATISTIC(NumZextSetcc,
79 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
80STATISTIC(SignExtensionsAdded,
81 "Number of sign extensions for compare inputs added.");
82STATISTIC(ZeroExtensionsAdded,
83 "Number of zero extensions for compare inputs added.");
84STATISTIC(NumLogicOpsOnComparison,
85 "Number of logical ops on i1 values calculated in GPR.");
86STATISTIC(OmittedForNonExtendUses,
87 "Number of compares not eliminated as they have non-extending uses.");
88STATISTIC(NumP9Setb,
89 "Number of compares lowered to setb.");
90
91// FIXME: Remove this once the bug has been fixed!
92cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
93cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
94
95static cl::opt<bool>
96 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
97 cl::desc("use aggressive ppc isel for bit permutations"),
100 "ppc-bit-perm-rewriter-stress-rotates",
101 cl::desc("stress rotate selection in aggressive ppc isel for "
102 "bit permutations"),
103 cl::Hidden);
104
106 "ppc-use-branch-hint", cl::init(true),
107 cl::desc("Enable static hinting of branches on ppc"),
108 cl::Hidden);
109
111 "ppc-tls-opt", cl::init(true),
112 cl::desc("Enable tls optimization peephole"),
113 cl::Hidden);
114
118
120 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
121 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
122 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
123 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
124 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
125 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
126 clEnumValN(ICGPR_NonExtIn, "nonextin",
127 "Only comparisons where inputs don't need [sz]ext."),
128 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
129 clEnumValN(ICGPR_ZextI32, "zexti32",
130 "Only i32 comparisons with zext result."),
131 clEnumValN(ICGPR_ZextI64, "zexti64",
132 "Only i64 comparisons with zext result."),
133 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
134 clEnumValN(ICGPR_SextI32, "sexti32",
135 "Only i32 comparisons with sext result."),
136 clEnumValN(ICGPR_SextI64, "sexti64",
137 "Only i64 comparisons with sext result.")));
138namespace {
139
140 //===--------------------------------------------------------------------===//
141 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
142 /// instructions for SelectionDAG operations.
143 ///
144 class PPCDAGToDAGISel : public SelectionDAGISel {
145 const PPCTargetMachine &TM;
146 const PPCSubtarget *Subtarget = nullptr;
147 const PPCTargetLowering *PPCLowering = nullptr;
148 unsigned GlobalBaseReg = 0;
149
150 public:
151 PPCDAGToDAGISel() = delete;
152
153 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
154 : SelectionDAGISel(tm, OptLevel), TM(tm) {}
155
156 bool runOnMachineFunction(MachineFunction &MF) override {
157 // Make sure we re-emit a set of the global base reg if necessary
158 GlobalBaseReg = 0;
159 Subtarget = &MF.getSubtarget<PPCSubtarget>();
160 PPCLowering = Subtarget->getTargetLowering();
161 if (Subtarget->hasROPProtect()) {
162 // Create a place on the stack for the ROP Protection Hash.
163 // The ROP Protection Hash will always be 8 bytes and aligned to 8
164 // bytes.
165 MachineFrameInfo &MFI = MF.getFrameInfo();
166 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
167 const int Result = MFI.CreateStackObject(8, Align(8), false);
169 }
171
172 return true;
173 }
174
175 void PreprocessISelDAG() override;
176 void PostprocessISelDAG() override;
177
178 /// getI16Imm - Return a target constant with the specified value, of type
179 /// i16.
180 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
181 return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
182 }
183
184 /// getI32Imm - Return a target constant with the specified value, of type
185 /// i32.
186 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
187 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
188 }
189
190 /// getI64Imm - Return a target constant with the specified value, of type
191 /// i64.
192 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
193 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
194 }
195
196 /// getSmallIPtrImm - Return a target constant of pointer type.
197 inline SDValue getSmallIPtrImm(int64_t Imm, const SDLoc &dl) {
198 return CurDAG->getSignedTargetConstant(
199 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
200 }
201
202 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
203 /// rotate and mask opcode and mask operation.
204 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
205 unsigned &SH, unsigned &MB, unsigned &ME);
206
207 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
208 /// base register. Return the virtual register that holds this value.
209 SDNode *getGlobalBaseReg();
210
211 void selectFrameIndex(SDNode *SN, SDNode *N, int64_t Offset = 0);
212
213 // Select - Convert the specified operand from a target-independent to a
214 // target-specific node if it hasn't already been changed.
215 void Select(SDNode *N) override;
216
217 bool tryBitfieldInsert(SDNode *N);
218 bool tryBitPermutation(SDNode *N);
219 bool tryIntCompareInGPR(SDNode *N);
220
221 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
222 // an X-Form load instruction with the offset being a relocation coming from
223 // the PPCISD::ADD_TLS.
224 bool tryTLSXFormLoad(LoadSDNode *N);
225 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
226 // an X-Form store instruction with the offset being a relocation coming from
227 // the PPCISD::ADD_TLS.
228 bool tryTLSXFormStore(StoreSDNode *N);
229 /// SelectCC - Select a comparison of the specified values with the
230 /// specified condition code, returning the CR# of the expression.
232 const SDLoc &dl, SDValue Chain = SDValue());
233
234 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
235 /// immediate field. Note that the operand at this point is already the
236 /// result of a prior SelectAddressRegImm call.
237 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
238 if (N.getOpcode() == ISD::TargetConstant ||
239 N.getOpcode() == ISD::TargetGlobalAddress) {
240 Out = N;
241 return true;
242 }
243
244 return false;
245 }
246
247 /// SelectDSForm - Returns true if address N can be represented by the
248 /// addressing mode of DSForm instructions (a base register, plus a signed
249 /// 16-bit displacement that is a multiple of 4.
250 bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
251 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
252 Align(4)) == PPC::AM_DSForm;
253 }
254
255 /// SelectDQForm - Returns true if address N can be represented by the
256 /// addressing mode of DQForm instructions (a base register, plus a signed
257 /// 16-bit displacement that is a multiple of 16.
258 bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
259 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
260 Align(16)) == PPC::AM_DQForm;
261 }
262
263 /// SelectDForm - Returns true if address N can be represented by
264 /// the addressing mode of DForm instructions (a base register, plus a
265 /// signed 16-bit immediate.
266 bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
267 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
268 std::nullopt) == PPC::AM_DForm;
269 }
270
271 /// SelectPCRelForm - Returns true if address N can be represented by
272 /// PC-Relative addressing mode.
273 bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
274 SDValue &Base) {
275 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
276 std::nullopt) == PPC::AM_PCRel;
277 }
278
279 /// SelectPDForm - Returns true if address N can be represented by Prefixed
280 /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
281 bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
282 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
283 std::nullopt) ==
285 }
286
287 /// SelectXForm - Returns true if address N can be represented by the
288 /// addressing mode of XForm instructions (an indexed [r+r] operation).
289 bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
290 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
291 std::nullopt) == PPC::AM_XForm;
292 }
293
294 /// SelectForceXForm - Given the specified address, force it to be
295 /// represented as an indexed [r+r] operation (an XForm instruction).
296 bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
297 SDValue &Base) {
298 return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
300 }
301
302 /// SelectAddrIdx - Given the specified address, check to see if it can be
303 /// represented as an indexed [r+r] operation.
304 /// This is for xform instructions whose associated displacement form is D.
305 /// The last parameter \p 0 means associated D form has no requirment for 16
306 /// bit signed displacement.
307 /// Returns false if it can be represented by [r+imm], which are preferred.
308 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
309 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
310 std::nullopt);
311 }
312
313 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
314 /// represented as an indexed [r+r] operation.
315 /// This is for xform instructions whose associated displacement form is DS.
316 /// The last parameter \p 4 means associated DS form 16 bit signed
317 /// displacement must be a multiple of 4.
318 /// Returns false if it can be represented by [r+imm], which are preferred.
319 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
320 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
321 Align(4));
322 }
323
324 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
325 /// represented as an indexed [r+r] operation.
326 /// This is for xform instructions whose associated displacement form is DQ.
327 /// The last parameter \p 16 means associated DQ form 16 bit signed
328 /// displacement must be a multiple of 16.
329 /// Returns false if it can be represented by [r+imm], which are preferred.
330 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
331 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
332 Align(16));
333 }
334
335 /// SelectAddrIdxOnly - Given the specified address, force it to be
336 /// represented as an indexed [r+r] operation.
337 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
338 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
339 }
340
341 /// SelectAddrImm - Returns true if the address N can be represented by
342 /// a base register plus a signed 16-bit displacement [r+imm].
343 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
344 /// displacement.
345 bool SelectAddrImm(SDValue N, SDValue &Disp,
346 SDValue &Base) {
347 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
348 std::nullopt);
349 }
350
351 /// SelectAddrImmX4 - Returns true if the address N can be represented by
352 /// a base register plus a signed 16-bit displacement that is a multiple of
353 /// 4 (last parameter). Suitable for use by STD and friends.
354 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
355 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
356 }
357
358 /// SelectAddrImmX16 - Returns true if the address N can be represented by
359 /// a base register plus a signed 16-bit displacement that is a multiple of
360 /// 16(last parameter). Suitable for use by STXV and friends.
361 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
362 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
363 Align(16));
364 }
365
366 /// SelectAddrImmX34 - Returns true if the address N can be represented by
367 /// a base register plus a signed 34-bit displacement. Suitable for use by
368 /// PSTXVP and friends.
369 bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
370 return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
371 }
372
373 // Select an address into a single register.
374 bool SelectAddr(SDValue N, SDValue &Base) {
375 Base = N;
376 return true;
377 }
378
379 bool SelectAddrPCRel(SDValue N, SDValue &Base) {
380 return PPCLowering->SelectAddressPCRel(N, Base);
381 }
382
383 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
384 /// inline asm expressions. It is always correct to compute the value into
385 /// a register. The case of adding a (possibly relocatable) constant to a
386 /// register can be improved, but it is wrong to substitute Reg+Reg for
387 /// Reg in an asm, because the load or store opcode would have to change.
388 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
389 InlineAsm::ConstraintCode ConstraintID,
390 std::vector<SDValue> &OutOps) override {
391 switch(ConstraintID) {
392 default:
393 errs() << "ConstraintID: "
394 << InlineAsm::getMemConstraintName(ConstraintID) << "\n";
395 llvm_unreachable("Unexpected asm memory constraint");
396 case InlineAsm::ConstraintCode::es:
397 case InlineAsm::ConstraintCode::m:
398 case InlineAsm::ConstraintCode::o:
399 case InlineAsm::ConstraintCode::Q:
400 case InlineAsm::ConstraintCode::Z:
401 case InlineAsm::ConstraintCode::Zy:
402 // We need to make sure that this one operand does not end up in r0
403 // (because we might end up lowering this as 0(%op)).
404 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
405 const TargetRegisterClass *TRC = TRI->getPointerRegClass(/*Kind=*/1);
406 SDLoc dl(Op);
407 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
408 SDValue NewOp =
409 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
410 dl, Op.getValueType(),
411 Op, RC), 0);
412
413 OutOps.push_back(NewOp);
414 return false;
415 }
416 return true;
417 }
418
419// Include the pieces autogenerated from the target description.
420#include "PPCGenDAGISel.inc"
421
422private:
423 bool trySETCC(SDNode *N);
424 bool tryFoldSWTestBRCC(SDNode *N);
425 bool trySelectLoopCountIntrinsic(SDNode *N);
426 bool tryAsSingleRLDICL(SDNode *N);
427 bool tryAsSingleRLDCL(SDNode *N);
428 bool tryAsSingleRLDICR(SDNode *N);
429 bool tryAsSingleRLWINM(SDNode *N);
430 bool tryAsSingleRLWINM8(SDNode *N);
431 bool tryAsSingleRLWIMI(SDNode *N);
432 bool tryAsPairOfRLDICL(SDNode *N);
433 bool tryAsSingleRLDIMI(SDNode *N);
434
435 void PeepholePPC64();
436 void PeepholePPC64ZExt();
437 void PeepholeCROps();
438
439 SDValue combineToCMPB(SDNode *N);
440 void foldBoolExts(SDValue &Res, SDNode *&N);
441
442 bool AllUsersSelectZero(SDNode *N);
443 void SwapAllSelectUsers(SDNode *N);
444
445 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
446 void transferMemOperands(SDNode *N, SDNode *Result);
447 };
448
449 class PPCDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
450 public:
451 static char ID;
452 explicit PPCDAGToDAGISelLegacy(PPCTargetMachine &tm,
453 CodeGenOptLevel OptLevel)
454 : SelectionDAGISelLegacy(
455 ID, std::make_unique<PPCDAGToDAGISel>(tm, OptLevel)) {}
456 };
457} // end anonymous namespace
458
459char PPCDAGToDAGISelLegacy::ID = 0;
460
461INITIALIZE_PASS(PPCDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
462
463/// getGlobalBaseReg - Output the instructions required to put the
464/// base address to use for accessing globals into a register.
465///
466SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
467 if (!GlobalBaseReg) {
468 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
469 // Insert the set of GlobalBaseReg into the first MBB of the function
470 MachineBasicBlock &FirstMBB = MF->front();
472 const Module *M = MF->getFunction().getParent();
473 DebugLoc dl;
474
475 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
476 if (Subtarget->isTargetELF()) {
477 GlobalBaseReg = PPC::R30;
478 if (!Subtarget->isSecurePlt() &&
479 M->getPICLevel() == PICLevel::SmallPIC) {
480 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
481 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
482 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
483 } else {
484 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
485 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
486 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
487 BuildMI(FirstMBB, MBBI, dl,
488 TII.get(PPC::UpdateGBR), GlobalBaseReg)
489 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
490 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
491 }
492 } else {
494 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
495 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
496 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
497 }
498 } else {
499 // We must ensure that this sequence is dominated by the prologue.
500 // FIXME: This is a bit of a big hammer since we don't get the benefits
501 // of shrink-wrapping whenever we emit this instruction. Considering
502 // this is used in any function where we emit a jump table, this may be
503 // a significant limitation. We should consider inserting this in the
504 // block where it is used and then commoning this sequence up if it
505 // appears in multiple places.
506 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
507 // MovePCtoLR8.
508 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
509 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
510 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
511 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
512 }
513 }
514 return CurDAG->getRegister(GlobalBaseReg,
515 PPCLowering->getPointerTy(CurDAG->getDataLayout()))
516 .getNode();
517}
518
519// Check if a SDValue has the toc-data attribute.
520static bool hasTocDataAttr(SDValue Val) {
522 if (!GA)
523 return false;
524
526 if (!GV)
527 return false;
528
529 if (!GV->hasAttribute("toc-data"))
530 return false;
531 return true;
532}
533
535 const TargetMachine &TM,
536 const SDNode *Node) {
537 // If there isn't an attribute to override the module code model
538 // this will be the effective code model.
539 CodeModel::Model ModuleModel = TM.getCodeModel();
540
542 if (!GA)
543 return ModuleModel;
544
545 const GlobalValue *GV = GA->getGlobal();
546 if (!GV)
547 return ModuleModel;
548
549 return Subtarget.getCodeModel(TM, GV);
550}
551
552/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
553/// operand. If so Imm will receive the 32-bit value.
554static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
555 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
556 Imm = N->getAsZExtVal();
557 return true;
558 }
559 return false;
560}
561
562/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
563/// operand. If so Imm will receive the 64-bit value.
564static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
565 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
566 Imm = N->getAsZExtVal();
567 return true;
568 }
569 return false;
570}
571
572// isInt32Immediate - This method tests to see if a constant operand.
573// If so Imm will receive the 32 bit value.
574static bool isInt32Immediate(SDValue N, unsigned &Imm) {
575 return isInt32Immediate(N.getNode(), Imm);
576}
577
578/// isInt64Immediate - This method tests to see if the value is a 64-bit
579/// constant operand. If so Imm will receive the 64-bit value.
580static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
581 return isInt64Immediate(N.getNode(), Imm);
582}
583
584static unsigned getBranchHint(unsigned PCC,
585 const FunctionLoweringInfo &FuncInfo,
586 const SDValue &DestMBB) {
588
589 if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
590
591 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
592 const Instruction *BBTerm = BB->getTerminator();
593
594 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
595
596 const BasicBlock *TBB = BBTerm->getSuccessor(0);
597 const BasicBlock *FBB = BBTerm->getSuccessor(1);
598
599 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
600 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
601
602 // We only want to handle cases which are easy to predict at static time, e.g.
603 // C++ throw statement, that is very likely not taken, or calling never
604 // returned function, e.g. stdlib exit(). So we set Threshold to filter
605 // unwanted cases.
606 //
607 // Below is LLVM branch weight table, we only want to handle case 1, 2
608 //
609 // Case Taken:Nontaken Example
610 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
611 // 2. Invoke-terminating 1:1048575
612 // 3. Coldblock 4:64 __builtin_expect
613 // 4. Loop Branch 124:4 For loop
614 // 5. PH/ZH/FPH 20:12
615 const uint32_t Threshold = 10000;
616
617 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
618 return PPC::BR_NO_HINT;
619
620 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
621 << "::" << BB->getName() << "'\n"
622 << " -> " << TBB->getName() << ": " << TProb << "\n"
623 << " -> " << FBB->getName() << ": " << FProb << "\n");
624
625 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
626
627 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
628 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
629 if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
630 std::swap(TProb, FProb);
631
632 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
633}
634
635// isOpcWithIntImmediate - This method tests to see if the node is a specific
636// opcode and that it has a immediate integer right operand.
637// If so Imm will receive the 32 bit value.
638static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
639 return N->getOpcode() == Opc
640 && isInt32Immediate(N->getOperand(1).getNode(), Imm);
641}
642
643void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, int64_t Offset) {
644 SDLoc dl(SN);
645 int FI = cast<FrameIndexSDNode>(N)->getIndex();
646 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
647 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
648 if (SN->hasOneUse())
649 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
650 getSmallIPtrImm(Offset, dl));
651 else
652 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
653 getSmallIPtrImm(Offset, dl)));
654}
655
656bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
657 bool isShiftMask, unsigned &SH,
658 unsigned &MB, unsigned &ME) {
659 // Don't even go down this path for i64, since different logic will be
660 // necessary for rldicl/rldicr/rldimi.
661 if (N->getValueType(0) != MVT::i32)
662 return false;
663
664 unsigned Shift = 32;
665 unsigned Indeterminant = ~0; // bit mask marking indeterminant results
666 unsigned Opcode = N->getOpcode();
667 if (N->getNumOperands() != 2 ||
668 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
669 return false;
670
671 if (Opcode == ISD::SHL) {
672 // apply shift left to mask if it comes first
673 if (isShiftMask) Mask = Mask << Shift;
674 // determine which bits are made indeterminant by shift
675 Indeterminant = ~(0xFFFFFFFFu << Shift);
676 } else if (Opcode == ISD::SRL) {
677 // apply shift right to mask if it comes first
678 if (isShiftMask) Mask = Mask >> Shift;
679 // determine which bits are made indeterminant by shift
680 Indeterminant = ~(0xFFFFFFFFu >> Shift);
681 // adjust for the left rotate
682 Shift = 32 - Shift;
683 } else if (Opcode == ISD::ROTL) {
684 Indeterminant = 0;
685 } else {
686 return false;
687 }
688
689 // if the mask doesn't intersect any Indeterminant bits
690 if (Mask && !(Mask & Indeterminant)) {
691 SH = Shift & 31;
692 // make sure the mask is still a mask (wrap arounds may not be)
693 return isRunOfOnes(Mask, MB, ME);
694 }
695 return false;
696}
697
698// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
699// instruction use the thread pointer.
701 assert(
702 Base.getOpcode() == PPCISD::ADD_TLS &&
703 "Only expecting the ADD_TLS instruction to acquire the thread pointer!");
704 const PPCSubtarget &Subtarget =
706 SDValue ADDTLSOp1 = Base.getOperand(0);
707 unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
708
709 // Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
710 //
711 // Although ADD_TLS does not explicitly use the thread pointer
712 // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
713 // instruction will have a relocation specifier, @got@tprel, that is used to
714 // generate a GOT entry. The linker replaces this entry with an offset for a
715 // for a thread local variable, which will be relative to the thread pointer.
716 if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
717 return true;
718 // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
719 // node is produced instead to represent the aforementioned situation.
720 LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSOp1);
721 if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
722 return true;
723
724 // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
725 // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
726 // later returning it into R3.
727 if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
728 return true;
729
730 // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
731 RegisterSDNode *AddFirstOpReg =
732 dyn_cast_or_null<RegisterSDNode>(ADDTLSOp1.getNode());
733 if (AddFirstOpReg &&
734 AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
735 return true;
736
737 return false;
738}
739
740// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
741// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
742// operation, can be optimized to use an X-Form load or store, allowing the
743// ADD_TLS node to be removed completely.
745
746 // Do not do this transformation at -O0.
748 return false;
749
750 // In order to perform this optimization inside tryTLSXForm[Load|Store],
751 // Base is expected to be an ADD_TLS node.
752 if (Base.getOpcode() != PPCISD::ADD_TLS)
753 return false;
754 for (auto *ADDTLSUse : Base.getNode()->users()) {
755 // The optimization to convert the D-Form load/store into its X-Form
756 // counterpart should only occur if the source value offset of the load/
757 // store is 0. This also means that The offset should always be undefined.
758 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSUse)) {
759 if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
760 return false;
761 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(ADDTLSUse)) {
762 if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
763 return false;
764 } else // Don't optimize if there are ADD_TLS users that aren't load/stores.
765 return false;
766 }
767
768 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
769 return false;
770
771 // Does the ADD_TLS node of the load/store use the thread pointer?
772 // If the thread pointer is not used as one of the operands of ADD_TLS,
773 // then this optimization is not valid.
774 return isThreadPointerAcquisitionNode(Base, CurDAG);
775}
776
777bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
778 SDValue Base = ST->getBasePtr();
779 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
780 return false;
781
782 SDLoc dl(ST);
783 EVT MemVT = ST->getMemoryVT();
784 EVT RegVT = ST->getValue().getValueType();
785
786 unsigned Opcode;
787 switch (MemVT.getSimpleVT().SimpleTy) {
788 default:
789 return false;
790 case MVT::i8: {
791 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
792 break;
793 }
794 case MVT::i16: {
795 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
796 break;
797 }
798 case MVT::i32: {
799 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
800 break;
801 }
802 case MVT::i64: {
803 Opcode = PPC::STDXTLS;
804 break;
805 }
806 case MVT::f32: {
807 Opcode = PPC::STFSXTLS;
808 break;
809 }
810 case MVT::f64: {
811 Opcode = PPC::STFDXTLS;
812 break;
813 }
814 }
815 SDValue Chain = ST->getChain();
816 SDVTList VTs = ST->getVTList();
817 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
818 Chain};
819 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
820 transferMemOperands(ST, MN);
821 ReplaceNode(ST, MN);
822 return true;
823}
824
825bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
826 SDValue Base = LD->getBasePtr();
827 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
828 return false;
829
830 SDLoc dl(LD);
831 EVT MemVT = LD->getMemoryVT();
832 EVT RegVT = LD->getValueType(0);
833 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
834 unsigned Opcode;
835 switch (MemVT.getSimpleVT().SimpleTy) {
836 default:
837 return false;
838 case MVT::i8: {
839 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
840 break;
841 }
842 case MVT::i16: {
843 if (RegVT == MVT::i32)
844 Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
845 else
846 Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
847 break;
848 }
849 case MVT::i32: {
850 if (RegVT == MVT::i32)
851 Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
852 else
853 Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
854 break;
855 }
856 case MVT::i64: {
857 Opcode = PPC::LDXTLS;
858 break;
859 }
860 case MVT::f32: {
861 Opcode = PPC::LFSXTLS;
862 break;
863 }
864 case MVT::f64: {
865 Opcode = PPC::LFDXTLS;
866 break;
867 }
868 }
869 SDValue Chain = LD->getChain();
870 SDVTList VTs = LD->getVTList();
871 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
872 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
873 transferMemOperands(LD, MN);
874 ReplaceNode(LD, MN);
875 return true;
876}
877
878/// Turn an or of two masked values into the rotate left word immediate then
879/// mask insert (rlwimi) instruction.
880bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
881 SDValue Op0 = N->getOperand(0);
882 SDValue Op1 = N->getOperand(1);
883 SDLoc dl(N);
884
885 KnownBits LKnown = CurDAG->computeKnownBits(Op0);
886 KnownBits RKnown = CurDAG->computeKnownBits(Op1);
887
888 unsigned TargetMask = LKnown.Zero.getZExtValue();
889 unsigned InsertMask = RKnown.Zero.getZExtValue();
890
891 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
892 unsigned Op0Opc = Op0.getOpcode();
893 unsigned Op1Opc = Op1.getOpcode();
894 unsigned Value, SH = 0;
895 TargetMask = ~TargetMask;
896 InsertMask = ~InsertMask;
897
898 // If the LHS has a foldable shift and the RHS does not, then swap it to the
899 // RHS so that we can fold the shift into the insert.
900 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
901 if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
902 Op0.getOperand(0).getOpcode() == ISD::SRL) {
903 if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
904 Op1.getOperand(0).getOpcode() != ISD::SRL) {
905 std::swap(Op0, Op1);
906 std::swap(Op0Opc, Op1Opc);
907 std::swap(TargetMask, InsertMask);
908 }
909 }
910 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
911 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
912 Op1.getOperand(0).getOpcode() != ISD::SRL) {
913 std::swap(Op0, Op1);
914 std::swap(Op0Opc, Op1Opc);
915 std::swap(TargetMask, InsertMask);
916 }
917 }
918
919 unsigned MB, ME;
920 if (isRunOfOnes(InsertMask, MB, ME)) {
921 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
923 Op1 = Op1.getOperand(0);
924 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
925 }
926 if (Op1Opc == ISD::AND) {
927 // The AND mask might not be a constant, and we need to make sure that
928 // if we're going to fold the masking with the insert, all bits not
929 // know to be zero in the mask are known to be one.
930 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
931 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
932
933 unsigned SHOpc = Op1.getOperand(0).getOpcode();
934 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
936 // Note that Value must be in range here (less than 32) because
937 // otherwise there would not be any bits set in InsertMask.
938 Op1 = Op1.getOperand(0).getOperand(0);
939 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
940 }
941 }
942
943 SH &= 31;
944 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
945 getI32Imm(ME, dl) };
946 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
947 return true;
948 }
949 }
950 return false;
951}
952
953static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
954 unsigned MaxTruncation = 0;
955 // Cannot use range-based for loop here as we need the actual use (i.e. we
956 // need the operand number corresponding to the use). A range-based for
957 // will unbox the use and provide an SDNode*.
958 for (SDUse &Use : N->uses()) {
959 SDNode *User = Use.getUser();
960 unsigned Opc =
961 User->isMachineOpcode() ? User->getMachineOpcode() : User->getOpcode();
962 switch (Opc) {
963 default: return 0;
964 case ISD::TRUNCATE:
965 if (User->isMachineOpcode())
966 return 0;
967 MaxTruncation = std::max(MaxTruncation,
968 (unsigned)User->getValueType(0).getSizeInBits());
969 continue;
970 case ISD::STORE: {
971 if (User->isMachineOpcode())
972 return 0;
974 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
975 if (MemVTSize == 64 || Use.getOperandNo() != 0)
976 return 0;
977 MaxTruncation = std::max(MaxTruncation, MemVTSize);
978 continue;
979 }
980 case PPC::STW8:
981 case PPC::STWX8:
982 case PPC::STWU8:
983 case PPC::STWUX8:
984 if (Use.getOperandNo() != 0)
985 return 0;
986 MaxTruncation = std::max(MaxTruncation, 32u);
987 continue;
988 case PPC::STH8:
989 case PPC::STHX8:
990 case PPC::STHU8:
991 case PPC::STHUX8:
992 if (Use.getOperandNo() != 0)
993 return 0;
994 MaxTruncation = std::max(MaxTruncation, 16u);
995 continue;
996 case PPC::STB8:
997 case PPC::STBX8:
998 case PPC::STBU8:
999 case PPC::STBUX8:
1000 if (Use.getOperandNo() != 0)
1001 return 0;
1002 MaxTruncation = std::max(MaxTruncation, 8u);
1003 continue;
1004 }
1005 }
1006 return MaxTruncation;
1007}
1008
1009// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
1010// zeros and return the number of bits by the left of these consecutive zeros.
1011static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
1012 unsigned HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
1013 unsigned LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
1014 if ((HiTZ + LoLZ) >= Num)
1015 return (32 + HiTZ);
1016 return 0;
1017}
1018
1019// Direct materialization of 64-bit constants by enumerated patterns.
1020static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
1021 uint64_t Imm, unsigned &InstCnt) {
1022 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1023 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1024 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1025 unsigned LO = llvm::countl_one<uint64_t>(Imm);
1026 unsigned Hi32 = Hi_32(Imm);
1027 unsigned Lo32 = Lo_32(Imm);
1028 SDNode *Result = nullptr;
1029 unsigned Shift = 0;
1030
1031 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1032 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1033 };
1034
1035 // Following patterns use 1 instructions to materialize the Imm.
1036 InstCnt = 1;
1037 // 1-1) Patterns : {zeros}{15-bit valve}
1038 // {ones}{15-bit valve}
1039 if (isInt<16>(Imm)) {
1040 SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1041 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1042 }
1043 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
1044 // {ones}{15-bit valve}{16 zeros}
1045 if (TZ > 15 && (LZ > 32 || LO > 32))
1046 return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1047 getI32Imm((Imm >> 16) & 0xffff));
1048
1049 // Following patterns use 2 instructions to materialize the Imm.
1050 InstCnt = 2;
1051 assert(LZ < 64 && "Unexpected leading zeros here.");
1052 // Count of ones follwing the leading zeros.
1053 unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
1054 // 2-1) Patterns : {zeros}{31-bit value}
1055 // {ones}{31-bit value}
1056 if (isInt<32>(Imm)) {
1057 uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
1058 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1059 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1060 return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1061 getI32Imm(Imm & 0xffff));
1062 }
1063 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
1064 // {zeros}{15-bit value}{zeros}
1065 // {zeros}{ones}{15-bit value}
1066 // {ones}{15-bit value}{zeros}
1067 // We can take advantage of LI's sign-extension semantics to generate leading
1068 // ones, and then use RLDIC to mask off the ones in both sides after rotation.
1069 if ((LZ + FO + TZ) > 48) {
1070 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1071 getI32Imm((Imm >> TZ) & 0xffff));
1072 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1073 getI32Imm(TZ), getI32Imm(LZ));
1074 }
1075 // 2-3) Pattern : {zeros}{15-bit value}{ones}
1076 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
1077 // therefore we can take advantage of LI's sign-extension semantics, and then
1078 // mask them off after rotation.
1079 //
1080 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
1081 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1082 // +------------------------+ +------------------------+
1083 // 63 0 63 0
1084 // Imm (Imm >> (48 - LZ) & 0xffff)
1085 // +----sext-----|--16-bit--+ +clear-|-----------------+
1086 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1087 // +------------------------+ +------------------------+
1088 // 63 0 63 0
1089 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
1090 if ((LZ + TO) > 48) {
1091 // Since the immediates with (LZ > 32) have been handled by previous
1092 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1093 // the Imm by a negative value.
1094 assert(LZ <= 32 && "Unexpected shift value.");
1095 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1096 getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1097 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1098 getI32Imm(48 - LZ), getI32Imm(LZ));
1099 }
1100 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1101 // {ones}{15-bit value}{ones}
1102 // We can take advantage of LI's sign-extension semantics to generate leading
1103 // ones, and then use RLDICL to mask off the ones in left sides (if required)
1104 // after rotation.
1105 //
1106 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1107 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1108 // +------------------------+ +------------------------+
1109 // 63 0 63 0
1110 // Imm (Imm >> TO) & 0xffff
1111 // +----sext-----|--16-bit--+ +LZ|---------------------+
1112 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1113 // +------------------------+ +------------------------+
1114 // 63 0 63 0
1115 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1116 if ((LZ + FO + TO) > 48) {
1117 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1118 getI32Imm((Imm >> TO) & 0xffff));
1119 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1120 getI32Imm(TO), getI32Imm(LZ));
1121 }
1122 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1123 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1124 // value, we can use LI for Lo16 without generating leading ones then add the
1125 // Hi16(in Lo32).
1126 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1127 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1128 getI32Imm(Lo32 & 0xffff));
1129 return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1130 getI32Imm(Lo32 >> 16));
1131 }
1132 // 2-6) Patterns : {******}{49 zeros}{******}
1133 // {******}{49 ones}{******}
1134 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1135 // bits remain on both sides. Rotate right the Imm to construct an int<16>
1136 // value, use LI for int<16> value and then use RLDICL without mask to rotate
1137 // it back.
1138 //
1139 // 1) findContiguousZerosAtLeast(Imm, 49)
1140 // +------|--zeros-|------+ +---ones--||---15 bit--+
1141 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1142 // +----------------------+ +----------------------+
1143 // 63 0 63 0
1144 //
1145 // 2) findContiguousZerosAtLeast(~Imm, 49)
1146 // +------|--ones--|------+ +---ones--||---15 bit--+
1147 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1148 // +----------------------+ +----------------------+
1149 // 63 0 63 0
1150 if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1151 (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1152 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1153 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1154 getI32Imm(RotImm & 0xffff));
1155 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1156 getI32Imm(Shift), getI32Imm(0));
1157 }
1158 // 2-7) Patterns : High word == Low word
1159 // This may require 2 to 3 instructions, depending on whether Lo32 can be
1160 // materialized in 1 instruction.
1161 if (Hi32 == Lo32) {
1162 // Handle the first 32 bits.
1163 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1164 uint64_t ImmLo16 = Lo32 & 0xffff;
1165 if (isInt<16>(Lo32))
1166 Result =
1167 CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
1168 else if (!ImmLo16)
1169 Result =
1170 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1171 else {
1172 InstCnt = 3;
1173 Result =
1174 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1175 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1176 SDValue(Result, 0), getI32Imm(ImmLo16));
1177 }
1178 // Use rldimi to insert the Low word into High word.
1179 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1180 getI32Imm(0)};
1181 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1182 }
1183
1184 // Following patterns use 3 instructions to materialize the Imm.
1185 InstCnt = 3;
1186 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1187 // {zeros}{31-bit value}{zeros}
1188 // {zeros}{ones}{31-bit value}
1189 // {ones}{31-bit value}{zeros}
1190 // We can take advantage of LIS's sign-extension semantics to generate leading
1191 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1192 // ones in both sides after rotation.
1193 if ((LZ + FO + TZ) > 32) {
1194 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1195 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1196 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1197 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1198 getI32Imm((Imm >> TZ) & 0xffff));
1199 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1200 getI32Imm(TZ), getI32Imm(LZ));
1201 }
1202 // 3-2) Pattern : {zeros}{31-bit value}{ones}
1203 // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
1204 // value, therefore we can take advantage of LIS's sign-extension semantics,
1205 // add the remaining bits with ORI, and then mask them off after rotation.
1206 // This is similar to Pattern 2-3, please refer to the diagram there.
1207 if ((LZ + TO) > 32) {
1208 // Since the immediates with (LZ > 32) have been handled by previous
1209 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1210 // the Imm by a negative value.
1211 assert(LZ <= 32 && "Unexpected shift value.");
1212 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1213 getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1214 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1215 getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1216 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1217 getI32Imm(32 - LZ), getI32Imm(LZ));
1218 }
1219 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1220 // {ones}{31-bit value}{ones}
1221 // We can take advantage of LIS's sign-extension semantics to generate leading
1222 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1223 // ones in left sides (if required) after rotation.
1224 // This is similar to Pattern 2-4, please refer to the diagram there.
1225 if ((LZ + FO + TO) > 32) {
1226 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1227 getI32Imm((Imm >> (TO + 16)) & 0xffff));
1228 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1229 getI32Imm((Imm >> TO) & 0xffff));
1230 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1231 getI32Imm(TO), getI32Imm(LZ));
1232 }
1233 // 3-4) Patterns : {******}{33 zeros}{******}
1234 // {******}{33 ones}{******}
1235 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1236 // bits remain on both sides. Rotate right the Imm to construct an int<32>
1237 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1238 // rotate it back.
1239 // This is similar to Pattern 2-6, please refer to the diagram there.
1240 if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1241 (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1242 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1243 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1244 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1245 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1246 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1247 getI32Imm(RotImm & 0xffff));
1248 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1249 getI32Imm(Shift), getI32Imm(0));
1250 }
1251
1252 InstCnt = 0;
1253 return nullptr;
1254}
1255
1256// Try to select instructions to generate a 64 bit immediate using prefix as
1257// well as non prefix instructions. The function will return the SDNode
1258// to materialize that constant or it will return nullptr if it does not
1259// find one. The variable InstCnt is set to the number of instructions that
1260// were selected.
1262 uint64_t Imm, unsigned &InstCnt) {
1263 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1264 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1265 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1266 unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1267 unsigned Hi32 = Hi_32(Imm);
1268 unsigned Lo32 = Lo_32(Imm);
1269
1270 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1271 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1272 };
1273
1274 auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1275 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1276 };
1277
1278 // Following patterns use 1 instruction to materialize Imm.
1279 InstCnt = 1;
1280
1281 // The pli instruction can materialize up to 34 bits directly.
1282 // If a constant fits within 34-bits, emit the pli instruction here directly.
1283 if (isInt<34>(Imm))
1284 return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1285 CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1286
1287 // Require at least two instructions.
1288 InstCnt = 2;
1289 SDNode *Result = nullptr;
1290 // Patterns : {zeros}{ones}{33-bit value}{zeros}
1291 // {zeros}{33-bit value}{zeros}
1292 // {zeros}{ones}{33-bit value}
1293 // {ones}{33-bit value}{zeros}
1294 // We can take advantage of PLI's sign-extension semantics to generate leading
1295 // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1296 if ((LZ + FO + TZ) > 30) {
1297 APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1298 APInt Extended = SignedInt34.sext(64);
1299 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1300 getI64Imm(Extended.getZExtValue()));
1301 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1302 getI32Imm(TZ), getI32Imm(LZ));
1303 }
1304 // Pattern : {zeros}{33-bit value}{ones}
1305 // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1306 // therefore we can take advantage of PLI's sign-extension semantics, and then
1307 // mask them off after rotation.
1308 //
1309 // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1310 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1311 // +------------------------+ +------------------------+
1312 // 63 0 63 0
1313 //
1314 // +----sext-----|--34-bit--+ +clear-|-----------------+
1315 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1316 // +------------------------+ +------------------------+
1317 // 63 0 63 0
1318 if ((LZ + TO) > 30) {
1319 APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1320 APInt Extended = SignedInt34.sext(64);
1321 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1322 getI64Imm(Extended.getZExtValue()));
1323 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1324 getI32Imm(30 - LZ), getI32Imm(LZ));
1325 }
1326 // Patterns : {zeros}{ones}{33-bit value}{ones}
1327 // {ones}{33-bit value}{ones}
1328 // Similar to LI we can take advantage of PLI's sign-extension semantics to
1329 // generate leading ones, and then use RLDICL to mask off the ones in left
1330 // sides (if required) after rotation.
1331 if ((LZ + FO + TO) > 30) {
1332 APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1333 APInt Extended = SignedInt34.sext(64);
1334 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1335 getI64Imm(Extended.getZExtValue()));
1336 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1337 getI32Imm(TO), getI32Imm(LZ));
1338 }
1339 // Patterns : {******}{31 zeros}{******}
1340 // : {******}{31 ones}{******}
1341 // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1342 // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1343 // for the int<33> value and then use RLDICL without a mask to rotate it back.
1344 //
1345 // +------|--ones--|------+ +---ones--||---33 bit--+
1346 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1347 // +----------------------+ +----------------------+
1348 // 63 0 63 0
1349 for (unsigned Shift = 0; Shift < 63; ++Shift) {
1350 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1351 if (isInt<34>(RotImm)) {
1352 Result =
1353 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1354 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1355 SDValue(Result, 0), getI32Imm(Shift),
1356 getI32Imm(0));
1357 }
1358 }
1359
1360 // Patterns : High word == Low word
1361 // This is basically a splat of a 32 bit immediate.
1362 if (Hi32 == Lo32) {
1363 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1364 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1365 getI32Imm(0)};
1366 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1367 }
1368
1369 InstCnt = 3;
1370 // Catch-all
1371 // This pattern can form any 64 bit immediate in 3 instructions.
1372 SDNode *ResultHi =
1373 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1374 SDNode *ResultLo =
1375 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1376 SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1377 getI32Imm(0)};
1378 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1379}
1380
1381static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1382 unsigned *InstCnt = nullptr) {
1383 unsigned InstCntDirect = 0;
1384 // No more than 3 instructions are used if we can select the i64 immediate
1385 // directly.
1386 SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1387
1388 const PPCSubtarget &Subtarget =
1390
1391 // If we have prefixed instructions and there is a chance we can
1392 // materialize the constant with fewer prefixed instructions than
1393 // non-prefixed, try that.
1394 if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1395 unsigned InstCntDirectP = 0;
1396 SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1397 // Use the prefix case in either of two cases:
1398 // 1) We have no result from the non-prefix case to use.
1399 // 2) The non-prefix case uses more instructions than the prefix case.
1400 // If the prefix and non-prefix cases use the same number of instructions
1401 // we will prefer the non-prefix case.
1402 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1403 if (InstCnt)
1404 *InstCnt = InstCntDirectP;
1405 return ResultP;
1406 }
1407 }
1408
1409 if (Result) {
1410 if (InstCnt)
1411 *InstCnt = InstCntDirect;
1412 return Result;
1413 }
1414 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1415 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1416 };
1417
1418 uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;
1419 uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;
1420
1421 // Try to use 4 instructions to materialize the immediate which is "almost" a
1422 // splat of a 32 bit immediate.
1423 if (Hi16OfLo32 && Lo16OfLo32) {
1424 uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;
1425 uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;
1426 bool IsSelected = false;
1427
1428 auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
1429 SDNode *Result =
1430 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
1431 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1432 SDValue(Result, 0), getI32Imm(Lo16));
1433 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1434 getI32Imm(0)};
1435 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1436 };
1437
1438 if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1439 IsSelected = true;
1440 Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1441 // Modify Hi16OfHi32.
1442 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),
1443 getI32Imm(0)};
1444 Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1445 } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1446 IsSelected = true;
1447 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1448 // Modify Lo16OfLo32.
1449 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1450 getI32Imm(16), getI32Imm(31)};
1451 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1452 } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1453 IsSelected = true;
1454 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1455 // Modify Hi16OfLo32.
1456 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1457 getI32Imm(0), getI32Imm(15)};
1458 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1459 }
1460 if (IsSelected == true) {
1461 if (InstCnt)
1462 *InstCnt = 4;
1463 return Result;
1464 }
1465 }
1466
1467 // Handle the upper 32 bit value.
1468 Result =
1469 selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1470 // Add in the last bits as required.
1471 if (Hi16OfLo32) {
1472 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1473 SDValue(Result, 0), getI32Imm(Hi16OfLo32));
1474 ++InstCntDirect;
1475 }
1476 if (Lo16OfLo32) {
1477 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1478 getI32Imm(Lo16OfLo32));
1479 ++InstCntDirect;
1480 }
1481 if (InstCnt)
1482 *InstCnt = InstCntDirect;
1483 return Result;
1484}
1485
1486// Select a 64-bit constant.
1488 SDLoc dl(N);
1489
1490 // Get 64 bit value.
1491 int64_t Imm = N->getAsZExtVal();
1492 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1493 uint64_t SextImm = SignExtend64(Imm, MinSize);
1494 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1495 if (isInt<16>(SextImm))
1496 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1497 }
1498 return selectI64Imm(CurDAG, dl, Imm);
1499}
1500
1501namespace {
1502
1503class BitPermutationSelector {
1504 struct ValueBit {
1505 SDValue V;
1506
1507 // The bit number in the value, using a convention where bit 0 is the
1508 // lowest-order bit.
1509 unsigned Idx;
1510
1511 // ConstZero means a bit we need to mask off.
1512 // Variable is a bit comes from an input variable.
1513 // VariableKnownToBeZero is also a bit comes from an input variable,
1514 // but it is known to be already zero. So we do not need to mask them.
1515 enum Kind {
1516 ConstZero,
1517 Variable,
1518 VariableKnownToBeZero
1519 } K;
1520
1521 ValueBit(SDValue V, unsigned I, Kind K = Variable)
1522 : V(V), Idx(I), K(K) {}
1523 ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1524
1525 bool isZero() const {
1526 return K == ConstZero || K == VariableKnownToBeZero;
1527 }
1528
1529 bool hasValue() const {
1530 return K == Variable || K == VariableKnownToBeZero;
1531 }
1532
1533 SDValue getValue() const {
1534 assert(hasValue() && "Cannot get the value of a constant bit");
1535 return V;
1536 }
1537
1538 unsigned getValueBitIndex() const {
1539 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1540 return Idx;
1541 }
1542 };
1543
1544 // A bit group has the same underlying value and the same rotate factor.
1545 struct BitGroup {
1546 SDValue V;
1547 unsigned RLAmt;
1548 unsigned StartIdx, EndIdx;
1549
1550 // This rotation amount assumes that the lower 32 bits of the quantity are
1551 // replicated in the high 32 bits by the rotation operator (which is done
1552 // by rlwinm and friends in 64-bit mode).
1553 bool Repl32;
1554 // Did converting to Repl32 == true change the rotation factor? If it did,
1555 // it decreased it by 32.
1556 bool Repl32CR;
1557 // Was this group coalesced after setting Repl32 to true?
1558 bool Repl32Coalesced;
1559
1560 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1561 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1562 Repl32Coalesced(false) {
1563 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1564 << " [" << S << ", " << E << "]\n");
1565 }
1566 };
1567
1568 // Information on each (Value, RLAmt) pair (like the number of groups
1569 // associated with each) used to choose the lowering method.
1570 struct ValueRotInfo {
1571 SDValue V;
1572 unsigned RLAmt = std::numeric_limits<unsigned>::max();
1573 unsigned NumGroups = 0;
1574 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1575 bool Repl32 = false;
1576
1577 ValueRotInfo() = default;
1578
1579 // For sorting (in reverse order) by NumGroups, and then by
1580 // FirstGroupStartIdx.
1581 bool operator < (const ValueRotInfo &Other) const {
1582 // We need to sort so that the non-Repl32 come first because, when we're
1583 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1584 // masking operation.
1585 if (Repl32 < Other.Repl32)
1586 return true;
1587 else if (Repl32 > Other.Repl32)
1588 return false;
1589 else if (NumGroups > Other.NumGroups)
1590 return true;
1591 else if (NumGroups < Other.NumGroups)
1592 return false;
1593 else if (RLAmt == 0 && Other.RLAmt != 0)
1594 return true;
1595 else if (RLAmt != 0 && Other.RLAmt == 0)
1596 return false;
1597 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1598 return true;
1599 return false;
1600 }
1601 };
1602
1603 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1604 using ValueBitsMemoizer =
1605 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1606 ValueBitsMemoizer Memoizer;
1607
1608 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1609 // The bool is true if something interesting was deduced, otherwise if we're
1610 // providing only a generic representation of V (or something else likewise
1611 // uninteresting for instruction selection) through the SmallVector.
1612 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1613 unsigned NumBits) {
1614 auto &ValueEntry = Memoizer[V];
1615 if (ValueEntry)
1616 return std::make_pair(ValueEntry->first, &ValueEntry->second);
1617 ValueEntry.reset(new ValueBitsMemoizedValue());
1618 bool &Interesting = ValueEntry->first;
1619 SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1620 Bits.resize(NumBits);
1621
1622 switch (V.getOpcode()) {
1623 default: break;
1624 case ISD::ROTL:
1625 if (isa<ConstantSDNode>(V.getOperand(1))) {
1626 assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1627 unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);
1628
1629 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1630
1631 for (unsigned i = 0; i < NumBits; ++i)
1632 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1633
1634 return std::make_pair(Interesting = true, &Bits);
1635 }
1636 break;
1637 case ISD::SHL:
1638 case PPCISD::SHL:
1639 if (isa<ConstantSDNode>(V.getOperand(1))) {
1640 // sld takes 7 bits, slw takes 6.
1641 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1642
1643 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1644
1645 if (ShiftAmt >= NumBits) {
1646 for (unsigned i = 0; i < NumBits; ++i)
1647 Bits[i] = ValueBit(ValueBit::ConstZero);
1648 } else {
1649 for (unsigned i = ShiftAmt; i < NumBits; ++i)
1650 Bits[i] = LHSBits[i - ShiftAmt];
1651 for (unsigned i = 0; i < ShiftAmt; ++i)
1652 Bits[i] = ValueBit(ValueBit::ConstZero);
1653 }
1654
1655 return std::make_pair(Interesting = true, &Bits);
1656 }
1657 break;
1658 case ISD::SRL:
1659 case PPCISD::SRL:
1660 if (isa<ConstantSDNode>(V.getOperand(1))) {
1661 // srd takes lowest 7 bits, srw takes 6.
1662 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1663
1664 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1665
1666 if (ShiftAmt >= NumBits) {
1667 for (unsigned i = 0; i < NumBits; ++i)
1668 Bits[i] = ValueBit(ValueBit::ConstZero);
1669 } else {
1670 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1671 Bits[i] = LHSBits[i + ShiftAmt];
1672 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1673 Bits[i] = ValueBit(ValueBit::ConstZero);
1674 }
1675
1676 return std::make_pair(Interesting = true, &Bits);
1677 }
1678 break;
1679 case ISD::AND:
1680 if (isa<ConstantSDNode>(V.getOperand(1))) {
1681 uint64_t Mask = V.getConstantOperandVal(1);
1682
1683 const SmallVector<ValueBit, 64> *LHSBits;
1684 // Mark this as interesting, only if the LHS was also interesting. This
1685 // prevents the overall procedure from matching a single immediate 'and'
1686 // (which is non-optimal because such an and might be folded with other
1687 // things if we don't select it here).
1688 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1689
1690 for (unsigned i = 0; i < NumBits; ++i)
1691 if (((Mask >> i) & 1) == 1)
1692 Bits[i] = (*LHSBits)[i];
1693 else {
1694 // AND instruction masks this bit. If the input is already zero,
1695 // we have nothing to do here. Otherwise, make the bit ConstZero.
1696 if ((*LHSBits)[i].isZero())
1697 Bits[i] = (*LHSBits)[i];
1698 else
1699 Bits[i] = ValueBit(ValueBit::ConstZero);
1700 }
1701
1702 return std::make_pair(Interesting, &Bits);
1703 }
1704 break;
1705 case ISD::OR: {
1706 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1707 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1708
1709 bool AllDisjoint = true;
1710 SDValue LastVal = SDValue();
1711 unsigned LastIdx = 0;
1712 for (unsigned i = 0; i < NumBits; ++i) {
1713 if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1714 // If both inputs are known to be zero and one is ConstZero and
1715 // another is VariableKnownToBeZero, we can select whichever
1716 // we like. To minimize the number of bit groups, we select
1717 // VariableKnownToBeZero if this bit is the next bit of the same
1718 // input variable from the previous bit. Otherwise, we select
1719 // ConstZero.
1720 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1721 LHSBits[i].getValueBitIndex() == LastIdx + 1)
1722 Bits[i] = LHSBits[i];
1723 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1724 RHSBits[i].getValueBitIndex() == LastIdx + 1)
1725 Bits[i] = RHSBits[i];
1726 else
1727 Bits[i] = ValueBit(ValueBit::ConstZero);
1728 }
1729 else if (LHSBits[i].isZero())
1730 Bits[i] = RHSBits[i];
1731 else if (RHSBits[i].isZero())
1732 Bits[i] = LHSBits[i];
1733 else {
1734 AllDisjoint = false;
1735 break;
1736 }
1737 // We remember the value and bit index of this bit.
1738 if (Bits[i].hasValue()) {
1739 LastVal = Bits[i].getValue();
1740 LastIdx = Bits[i].getValueBitIndex();
1741 }
1742 else {
1743 if (LastVal) LastVal = SDValue();
1744 LastIdx = 0;
1745 }
1746 }
1747
1748 if (!AllDisjoint)
1749 break;
1750
1751 return std::make_pair(Interesting = true, &Bits);
1752 }
1753 case ISD::ZERO_EXTEND: {
1754 // We support only the case with zero extension from i32 to i64 so far.
1755 if (V.getValueType() != MVT::i64 ||
1756 V.getOperand(0).getValueType() != MVT::i32)
1757 break;
1758
1759 const SmallVector<ValueBit, 64> *LHSBits;
1760 const unsigned NumOperandBits = 32;
1761 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1762 NumOperandBits);
1763
1764 for (unsigned i = 0; i < NumOperandBits; ++i)
1765 Bits[i] = (*LHSBits)[i];
1766
1767 for (unsigned i = NumOperandBits; i < NumBits; ++i)
1768 Bits[i] = ValueBit(ValueBit::ConstZero);
1769
1770 return std::make_pair(Interesting, &Bits);
1771 }
1772 case ISD::TRUNCATE: {
1773 EVT FromType = V.getOperand(0).getValueType();
1774 EVT ToType = V.getValueType();
1775 // We support only the case with truncate from i64 to i32.
1776 if (FromType != MVT::i64 || ToType != MVT::i32)
1777 break;
1778 const unsigned NumAllBits = FromType.getSizeInBits();
1780 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1781 NumAllBits);
1782 const unsigned NumValidBits = ToType.getSizeInBits();
1783
1784 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1785 // So, we cannot include this truncate.
1786 bool UseUpper32bit = false;
1787 for (unsigned i = 0; i < NumValidBits; ++i)
1788 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1789 UseUpper32bit = true;
1790 break;
1791 }
1792 if (UseUpper32bit)
1793 break;
1794
1795 for (unsigned i = 0; i < NumValidBits; ++i)
1796 Bits[i] = (*InBits)[i];
1797
1798 return std::make_pair(Interesting, &Bits);
1799 }
1800 case ISD::AssertZext: {
1801 // For AssertZext, we look through the operand and
1802 // mark the bits known to be zero.
1803 const SmallVector<ValueBit, 64> *LHSBits;
1804 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1805 NumBits);
1806
1807 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1808 const unsigned NumValidBits = FromType.getSizeInBits();
1809 for (unsigned i = 0; i < NumValidBits; ++i)
1810 Bits[i] = (*LHSBits)[i];
1811
1812 // These bits are known to be zero but the AssertZext may be from a value
1813 // that already has some constant zero bits (i.e. from a masking and).
1814 for (unsigned i = NumValidBits; i < NumBits; ++i)
1815 Bits[i] = (*LHSBits)[i].hasValue()
1816 ? ValueBit((*LHSBits)[i].getValue(),
1817 (*LHSBits)[i].getValueBitIndex(),
1818 ValueBit::VariableKnownToBeZero)
1819 : ValueBit(ValueBit::ConstZero);
1820
1821 return std::make_pair(Interesting, &Bits);
1822 }
1823 case ISD::LOAD:
1824 LoadSDNode *LD = cast<LoadSDNode>(V);
1825 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1826 EVT VT = LD->getMemoryVT();
1827 const unsigned NumValidBits = VT.getSizeInBits();
1828
1829 for (unsigned i = 0; i < NumValidBits; ++i)
1830 Bits[i] = ValueBit(V, i);
1831
1832 // These bits are known to be zero.
1833 for (unsigned i = NumValidBits; i < NumBits; ++i)
1834 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1835
1836 // Zero-extending load itself cannot be optimized. So, it is not
1837 // interesting by itself though it gives useful information.
1838 return std::make_pair(Interesting = false, &Bits);
1839 }
1840 break;
1841 }
1842
1843 for (unsigned i = 0; i < NumBits; ++i)
1844 Bits[i] = ValueBit(V, i);
1845
1846 return std::make_pair(Interesting = false, &Bits);
1847 }
1848
1849 // For each value (except the constant ones), compute the left-rotate amount
1850 // to get it from its original to final position.
1851 void computeRotationAmounts() {
1852 NeedMask = false;
1853 RLAmt.resize(Bits.size());
1854 for (unsigned i = 0; i < Bits.size(); ++i)
1855 if (Bits[i].hasValue()) {
1856 unsigned VBI = Bits[i].getValueBitIndex();
1857 if (i >= VBI)
1858 RLAmt[i] = i - VBI;
1859 else
1860 RLAmt[i] = Bits.size() - (VBI - i);
1861 } else if (Bits[i].isZero()) {
1862 NeedMask = true;
1863 RLAmt[i] = UINT32_MAX;
1864 } else {
1865 llvm_unreachable("Unknown value bit type");
1866 }
1867 }
1868
1869 // Collect groups of consecutive bits with the same underlying value and
1870 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1871 // they break up groups.
1872 void collectBitGroups(bool LateMask) {
1873 BitGroups.clear();
1874
1875 unsigned LastRLAmt = RLAmt[0];
1876 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1877 unsigned LastGroupStartIdx = 0;
1878 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1879 for (unsigned i = 1; i < Bits.size(); ++i) {
1880 unsigned ThisRLAmt = RLAmt[i];
1881 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1882 if (LateMask && !ThisValue) {
1883 ThisValue = LastValue;
1884 ThisRLAmt = LastRLAmt;
1885 // If we're doing late masking, then the first bit group always starts
1886 // at zero (even if the first bits were zero).
1887 if (BitGroups.empty())
1888 LastGroupStartIdx = 0;
1889 }
1890
1891 // If this bit is known to be zero and the current group is a bit group
1892 // of zeros, we do not need to terminate the current bit group even the
1893 // Value or RLAmt does not match here. Instead, we terminate this group
1894 // when the first non-zero bit appears later.
1895 if (IsGroupOfZeros && Bits[i].isZero())
1896 continue;
1897
1898 // If this bit has the same underlying value and the same rotate factor as
1899 // the last one, then they're part of the same group.
1900 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1901 // We cannot continue the current group if this bits is not known to
1902 // be zero in a bit group of zeros.
1903 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1904 continue;
1905
1906 if (LastValue.getNode())
1907 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1908 i-1));
1909 LastRLAmt = ThisRLAmt;
1910 LastValue = ThisValue;
1911 LastGroupStartIdx = i;
1912 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1913 }
1914 if (LastValue.getNode())
1915 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1916 Bits.size()-1));
1917
1918 if (BitGroups.empty())
1919 return;
1920
1921 // We might be able to combine the first and last groups.
1922 if (BitGroups.size() > 1) {
1923 // If the first and last groups are the same, then remove the first group
1924 // in favor of the last group, making the ending index of the last group
1925 // equal to the ending index of the to-be-removed first group.
1926 if (BitGroups[0].StartIdx == 0 &&
1927 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1928 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1929 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1930 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1931 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1932 BitGroups.erase(BitGroups.begin());
1933 }
1934 }
1935 }
1936
1937 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1938 // associated with each. If the number of groups are same, we prefer a group
1939 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1940 // instruction. If there is a degeneracy, pick the one that occurs
1941 // first (in the final value).
1942 void collectValueRotInfo() {
1943 ValueRots.clear();
1944
1945 for (auto &BG : BitGroups) {
1946 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1947 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1948 VRI.V = BG.V;
1949 VRI.RLAmt = BG.RLAmt;
1950 VRI.Repl32 = BG.Repl32;
1951 VRI.NumGroups += 1;
1952 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1953 }
1954
1955 // Now that we've collected the various ValueRotInfo instances, we need to
1956 // sort them.
1957 ValueRotsVec.clear();
1958 for (auto &I : ValueRots) {
1959 ValueRotsVec.push_back(I.second);
1960 }
1961 llvm::sort(ValueRotsVec);
1962 }
1963
1964 // In 64-bit mode, rlwinm and friends have a rotation operator that
1965 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1966 // indices of these instructions can only be in the lower 32 bits, so they
1967 // can only represent some 64-bit bit groups. However, when they can be used,
1968 // the 32-bit replication can be used to represent, as a single bit group,
1969 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1970 // groups when possible. Returns true if any of the bit groups were
1971 // converted.
1972 void assignRepl32BitGroups() {
1973 // If we have bits like this:
1974 //
1975 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1976 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1977 // Groups: | RLAmt = 8 | RLAmt = 40 |
1978 //
1979 // But, making use of a 32-bit operation that replicates the low-order 32
1980 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1981 // of 8.
1982
1983 auto IsAllLow32 = [this](BitGroup & BG) {
1984 if (BG.StartIdx <= BG.EndIdx) {
1985 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1986 if (!Bits[i].hasValue())
1987 continue;
1988 if (Bits[i].getValueBitIndex() >= 32)
1989 return false;
1990 }
1991 } else {
1992 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1993 if (!Bits[i].hasValue())
1994 continue;
1995 if (Bits[i].getValueBitIndex() >= 32)
1996 return false;
1997 }
1998 for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1999 if (!Bits[i].hasValue())
2000 continue;
2001 if (Bits[i].getValueBitIndex() >= 32)
2002 return false;
2003 }
2004 }
2005
2006 return true;
2007 };
2008
2009 for (auto &BG : BitGroups) {
2010 // If this bit group has RLAmt of 0 and will not be merged with
2011 // another bit group, we don't benefit from Repl32. We don't mark
2012 // such group to give more freedom for later instruction selection.
2013 if (BG.RLAmt == 0) {
2014 auto PotentiallyMerged = [this](BitGroup & BG) {
2015 for (auto &BG2 : BitGroups)
2016 if (&BG != &BG2 && BG.V == BG2.V &&
2017 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
2018 return true;
2019 return false;
2020 };
2021 if (!PotentiallyMerged(BG))
2022 continue;
2023 }
2024 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
2025 if (IsAllLow32(BG)) {
2026 if (BG.RLAmt >= 32) {
2027 BG.RLAmt -= 32;
2028 BG.Repl32CR = true;
2029 }
2030
2031 BG.Repl32 = true;
2032
2033 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
2034 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
2035 << BG.StartIdx << ", " << BG.EndIdx << "]\n");
2036 }
2037 }
2038 }
2039
2040 // Now walk through the bit groups, consolidating where possible.
2041 for (auto I = BitGroups.begin(); I != BitGroups.end();) {
2042 // We might want to remove this bit group by merging it with the previous
2043 // group (which might be the ending group).
2044 auto IP = (I == BitGroups.begin()) ?
2045 std::prev(BitGroups.end()) : std::prev(I);
2046 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
2047 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
2048
2049 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
2050 << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
2051 << I->StartIdx << ", " << I->EndIdx
2052 << "] with group with range [" << IP->StartIdx << ", "
2053 << IP->EndIdx << "]\n");
2054
2055 IP->EndIdx = I->EndIdx;
2056 IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
2057 IP->Repl32Coalesced = true;
2058 I = BitGroups.erase(I);
2059 continue;
2060 } else {
2061 // There is a special case worth handling: If there is a single group
2062 // covering the entire upper 32 bits, and it can be merged with both
2063 // the next and previous groups (which might be the same group), then
2064 // do so. If it is the same group (so there will be only one group in
2065 // total), then we need to reverse the order of the range so that it
2066 // covers the entire 64 bits.
2067 if (I->StartIdx == 32 && I->EndIdx == 63) {
2068 assert(std::next(I) == BitGroups.end() &&
2069 "bit group ends at index 63 but there is another?");
2070 auto IN = BitGroups.begin();
2071
2072 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
2073 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
2074 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
2075 IsAllLow32(*I)) {
2076
2077 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
2078 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
2079 << ", " << I->EndIdx
2080 << "] with 32-bit replicated groups with ranges ["
2081 << IP->StartIdx << ", " << IP->EndIdx << "] and ["
2082 << IN->StartIdx << ", " << IN->EndIdx << "]\n");
2083
2084 if (IP == IN) {
2085 // There is only one other group; change it to cover the whole
2086 // range (backward, so that it can still be Repl32 but cover the
2087 // whole 64-bit range).
2088 IP->StartIdx = 31;
2089 IP->EndIdx = 30;
2090 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
2091 IP->Repl32Coalesced = true;
2092 I = BitGroups.erase(I);
2093 } else {
2094 // There are two separate groups, one before this group and one
2095 // after us (at the beginning). We're going to remove this group,
2096 // but also the group at the very beginning.
2097 IP->EndIdx = IN->EndIdx;
2098 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
2099 IP->Repl32Coalesced = true;
2100 I = BitGroups.erase(I);
2101 BitGroups.erase(BitGroups.begin());
2102 }
2103
2104 // This must be the last group in the vector (and we might have
2105 // just invalidated the iterator above), so break here.
2106 break;
2107 }
2108 }
2109 }
2110
2111 ++I;
2112 }
2113 }
2114
2115 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
2116 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
2117 }
2118
2119 uint64_t getZerosMask() {
2120 uint64_t Mask = 0;
2121 for (unsigned i = 0; i < Bits.size(); ++i) {
2122 if (Bits[i].hasValue())
2123 continue;
2124 Mask |= (UINT64_C(1) << i);
2125 }
2126
2127 return ~Mask;
2128 }
2129
2130 // This method extends an input value to 64 bit if input is 32-bit integer.
2131 // While selecting instructions in BitPermutationSelector in 64-bit mode,
2132 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
2133 // In such case, we extend it to 64 bit to be consistent with other values.
2134 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
2135 if (V.getValueSizeInBits() == 64)
2136 return V;
2137
2138 assert(V.getValueSizeInBits() == 32);
2139 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2140 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2141 MVT::i64), 0);
2142 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2143 MVT::i64, ImDef, V,
2144 SubRegIdx), 0);
2145 return ExtVal;
2146 }
2147
2148 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
2149 if (V.getValueSizeInBits() == 32)
2150 return V;
2151
2152 assert(V.getValueSizeInBits() == 64);
2153 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2154 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
2155 MVT::i32, V, SubRegIdx), 0);
2156 return SubVal;
2157 }
2158
2159 // Depending on the number of groups for a particular value, it might be
2160 // better to rotate, mask explicitly (using andi/andis), and then or the
2161 // result. Select this part of the result first.
2162 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2164 return;
2165
2166 for (ValueRotInfo &VRI : ValueRotsVec) {
2167 unsigned Mask = 0;
2168 for (unsigned i = 0; i < Bits.size(); ++i) {
2169 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2170 continue;
2171 if (RLAmt[i] != VRI.RLAmt)
2172 continue;
2173 Mask |= (1u << i);
2174 }
2175
2176 // Compute the masks for andi/andis that would be necessary.
2177 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2178 assert((ANDIMask != 0 || ANDISMask != 0) &&
2179 "No set bits in mask for value bit groups");
2180 bool NeedsRotate = VRI.RLAmt != 0;
2181
2182 // We're trying to minimize the number of instructions. If we have one
2183 // group, using one of andi/andis can break even. If we have three
2184 // groups, we can use both andi and andis and break even (to use both
2185 // andi and andis we also need to or the results together). We need four
2186 // groups if we also need to rotate. To use andi/andis we need to do more
2187 // than break even because rotate-and-mask instructions tend to be easier
2188 // to schedule.
2189
2190 // FIXME: We've biased here against using andi/andis, which is right for
2191 // POWER cores, but not optimal everywhere. For example, on the A2,
2192 // andi/andis have single-cycle latency whereas the rotate-and-mask
2193 // instructions take two cycles, and it would be better to bias toward
2194 // andi/andis in break-even cases.
2195
2196 unsigned NumAndInsts = (unsigned) NeedsRotate +
2197 (unsigned) (ANDIMask != 0) +
2198 (unsigned) (ANDISMask != 0) +
2199 (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2200 (unsigned) (bool) Res;
2201
2202 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2203 << " RL: " << VRI.RLAmt << ":"
2204 << "\n\t\t\tisel using masking: " << NumAndInsts
2205 << " using rotates: " << VRI.NumGroups << "\n");
2206
2207 if (NumAndInsts >= VRI.NumGroups)
2208 continue;
2209
2210 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2211
2212 if (InstCnt) *InstCnt += NumAndInsts;
2213
2214 SDValue VRot;
2215 if (VRI.RLAmt) {
2216 SDValue Ops[] =
2217 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2218 getI32Imm(0, dl), getI32Imm(31, dl) };
2219 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2220 Ops), 0);
2221 } else {
2222 VRot = TruncateToInt32(VRI.V, dl);
2223 }
2224
2225 SDValue ANDIVal, ANDISVal;
2226 if (ANDIMask != 0)
2227 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2228 VRot, getI32Imm(ANDIMask, dl)),
2229 0);
2230 if (ANDISMask != 0)
2231 ANDISVal =
2232 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2233 getI32Imm(ANDISMask, dl)),
2234 0);
2235
2236 SDValue TotalVal;
2237 if (!ANDIVal)
2238 TotalVal = ANDISVal;
2239 else if (!ANDISVal)
2240 TotalVal = ANDIVal;
2241 else
2242 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2243 ANDIVal, ANDISVal), 0);
2244
2245 if (!Res)
2246 Res = TotalVal;
2247 else
2248 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2249 Res, TotalVal), 0);
2250
2251 // Now, remove all groups with this underlying value and rotation
2252 // factor.
2253 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2254 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2255 });
2256 }
2257 }
2258
2259 // Instruction selection for the 32-bit case.
2260 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2261 SDLoc dl(N);
2262 SDValue Res;
2263
2264 if (InstCnt) *InstCnt = 0;
2265
2266 // Take care of cases that should use andi/andis first.
2267 SelectAndParts32(dl, Res, InstCnt);
2268
2269 // If we've not yet selected a 'starting' instruction, and we have no zeros
2270 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2271 // number of groups), and start with this rotated value.
2272 if ((!NeedMask || LateMask) && !Res) {
2273 ValueRotInfo &VRI = ValueRotsVec[0];
2274 if (VRI.RLAmt) {
2275 if (InstCnt) *InstCnt += 1;
2276 SDValue Ops[] =
2277 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2278 getI32Imm(0, dl), getI32Imm(31, dl) };
2279 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2280 0);
2281 } else {
2282 Res = TruncateToInt32(VRI.V, dl);
2283 }
2284
2285 // Now, remove all groups with this underlying value and rotation factor.
2286 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2287 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2288 });
2289 }
2290
2291 if (InstCnt) *InstCnt += BitGroups.size();
2292
2293 // Insert the other groups (one at a time).
2294 for (auto &BG : BitGroups) {
2295 if (!Res) {
2296 SDValue Ops[] =
2297 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2298 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2299 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2300 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2301 } else {
2302 SDValue Ops[] =
2303 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2304 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2305 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2306 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2307 }
2308 }
2309
2310 if (LateMask) {
2311 unsigned Mask = (unsigned) getZerosMask();
2312
2313 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2314 assert((ANDIMask != 0 || ANDISMask != 0) &&
2315 "No set bits in zeros mask?");
2316
2317 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2318 (unsigned) (ANDISMask != 0) +
2319 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2320
2321 SDValue ANDIVal, ANDISVal;
2322 if (ANDIMask != 0)
2323 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2324 Res, getI32Imm(ANDIMask, dl)),
2325 0);
2326 if (ANDISMask != 0)
2327 ANDISVal =
2328 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2329 getI32Imm(ANDISMask, dl)),
2330 0);
2331
2332 if (!ANDIVal)
2333 Res = ANDISVal;
2334 else if (!ANDISVal)
2335 Res = ANDIVal;
2336 else
2337 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2338 ANDIVal, ANDISVal), 0);
2339 }
2340
2341 return Res.getNode();
2342 }
2343
2344 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2345 unsigned MaskStart, unsigned MaskEnd,
2346 bool IsIns) {
2347 // In the notation used by the instructions, 'start' and 'end' are reversed
2348 // because bits are counted from high to low order.
2349 unsigned InstMaskStart = 64 - MaskEnd - 1,
2350 InstMaskEnd = 64 - MaskStart - 1;
2351
2352 if (Repl32)
2353 return 1;
2354
2355 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2356 InstMaskEnd == 63 - RLAmt)
2357 return 1;
2358
2359 return 2;
2360 }
2361
2362 // For 64-bit values, not all combinations of rotates and masks are
2363 // available. Produce one if it is available.
2364 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2365 bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2366 unsigned *InstCnt = nullptr) {
2367 // In the notation used by the instructions, 'start' and 'end' are reversed
2368 // because bits are counted from high to low order.
2369 unsigned InstMaskStart = 64 - MaskEnd - 1,
2370 InstMaskEnd = 64 - MaskStart - 1;
2371
2372 if (InstCnt) *InstCnt += 1;
2373
2374 if (Repl32) {
2375 // This rotation amount assumes that the lower 32 bits of the quantity
2376 // are replicated in the high 32 bits by the rotation operator (which is
2377 // done by rlwinm and friends).
2378 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2379 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2380 SDValue Ops[] =
2381 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2382 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2383 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2384 Ops), 0);
2385 }
2386
2387 if (InstMaskEnd == 63) {
2388 SDValue Ops[] =
2389 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2390 getI32Imm(InstMaskStart, dl) };
2391 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2392 }
2393
2394 if (InstMaskStart == 0) {
2395 SDValue Ops[] =
2396 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2397 getI32Imm(InstMaskEnd, dl) };
2398 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2399 }
2400
2401 if (InstMaskEnd == 63 - RLAmt) {
2402 SDValue Ops[] =
2403 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2404 getI32Imm(InstMaskStart, dl) };
2405 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2406 }
2407
2408 // We cannot do this with a single instruction, so we'll use two. The
2409 // problem is that we're not free to choose both a rotation amount and mask
2410 // start and end independently. We can choose an arbitrary mask start and
2411 // end, but then the rotation amount is fixed. Rotation, however, can be
2412 // inverted, and so by applying an "inverse" rotation first, we can get the
2413 // desired result.
2414 if (InstCnt) *InstCnt += 1;
2415
2416 // The rotation mask for the second instruction must be MaskStart.
2417 unsigned RLAmt2 = MaskStart;
2418 // The first instruction must rotate V so that the overall rotation amount
2419 // is RLAmt.
2420 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2421 if (RLAmt1)
2422 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2423 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2424 }
2425
2426 // For 64-bit values, not all combinations of rotates and masks are
2427 // available. Produce a rotate-mask-and-insert if one is available.
2428 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2429 unsigned RLAmt, bool Repl32, unsigned MaskStart,
2430 unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2431 // In the notation used by the instructions, 'start' and 'end' are reversed
2432 // because bits are counted from high to low order.
2433 unsigned InstMaskStart = 64 - MaskEnd - 1,
2434 InstMaskEnd = 64 - MaskStart - 1;
2435
2436 if (InstCnt) *InstCnt += 1;
2437
2438 if (Repl32) {
2439 // This rotation amount assumes that the lower 32 bits of the quantity
2440 // are replicated in the high 32 bits by the rotation operator (which is
2441 // done by rlwinm and friends).
2442 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2443 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2444 SDValue Ops[] =
2445 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2446 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2447 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2448 Ops), 0);
2449 }
2450
2451 if (InstMaskEnd == 63 - RLAmt) {
2452 SDValue Ops[] =
2453 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2454 getI32Imm(InstMaskStart, dl) };
2455 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2456 }
2457
2458 // We cannot do this with a single instruction, so we'll use two. The
2459 // problem is that we're not free to choose both a rotation amount and mask
2460 // start and end independently. We can choose an arbitrary mask start and
2461 // end, but then the rotation amount is fixed. Rotation, however, can be
2462 // inverted, and so by applying an "inverse" rotation first, we can get the
2463 // desired result.
2464 if (InstCnt) *InstCnt += 1;
2465
2466 // The rotation mask for the second instruction must be MaskStart.
2467 unsigned RLAmt2 = MaskStart;
2468 // The first instruction must rotate V so that the overall rotation amount
2469 // is RLAmt.
2470 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2471 if (RLAmt1)
2472 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2473 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2474 }
2475
2476 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2478 return;
2479
2480 // The idea here is the same as in the 32-bit version, but with additional
2481 // complications from the fact that Repl32 might be true. Because we
2482 // aggressively convert bit groups to Repl32 form (which, for small
2483 // rotation factors, involves no other change), and then coalesce, it might
2484 // be the case that a single 64-bit masking operation could handle both
2485 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2486 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2487 // completely capture the new combined bit group.
2488
2489 for (ValueRotInfo &VRI : ValueRotsVec) {
2490 uint64_t Mask = 0;
2491
2492 // We need to add to the mask all bits from the associated bit groups.
2493 // If Repl32 is false, we need to add bits from bit groups that have
2494 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2495 // group is trivially convertable if it overlaps only with the lower 32
2496 // bits, and the group has not been coalesced.
2497 auto MatchingBG = [VRI](const BitGroup &BG) {
2498 if (VRI.V != BG.V)
2499 return false;
2500
2501 unsigned EffRLAmt = BG.RLAmt;
2502 if (!VRI.Repl32 && BG.Repl32) {
2503 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2504 !BG.Repl32Coalesced) {
2505 if (BG.Repl32CR)
2506 EffRLAmt += 32;
2507 } else {
2508 return false;
2509 }
2510 } else if (VRI.Repl32 != BG.Repl32) {
2511 return false;
2512 }
2513
2514 return VRI.RLAmt == EffRLAmt;
2515 };
2516
2517 for (auto &BG : BitGroups) {
2518 if (!MatchingBG(BG))
2519 continue;
2520
2521 if (BG.StartIdx <= BG.EndIdx) {
2522 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2523 Mask |= (UINT64_C(1) << i);
2524 } else {
2525 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2526 Mask |= (UINT64_C(1) << i);
2527 for (unsigned i = 0; i <= BG.EndIdx; ++i)
2528 Mask |= (UINT64_C(1) << i);
2529 }
2530 }
2531
2532 // We can use the 32-bit andi/andis technique if the mask does not
2533 // require any higher-order bits. This can save an instruction compared
2534 // to always using the general 64-bit technique.
2535 bool Use32BitInsts = isUInt<32>(Mask);
2536 // Compute the masks for andi/andis that would be necessary.
2537 unsigned ANDIMask = (Mask & UINT16_MAX),
2538 ANDISMask = (Mask >> 16) & UINT16_MAX;
2539
2540 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2541
2542 unsigned NumAndInsts = (unsigned) NeedsRotate +
2543 (unsigned) (bool) Res;
2544 unsigned NumOfSelectInsts = 0;
2545 selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2546 assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2547 if (Use32BitInsts)
2548 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2549 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2550 else
2551 NumAndInsts += NumOfSelectInsts + /* and */ 1;
2552
2553 unsigned NumRLInsts = 0;
2554 bool FirstBG = true;
2555 bool MoreBG = false;
2556 for (auto &BG : BitGroups) {
2557 if (!MatchingBG(BG)) {
2558 MoreBG = true;
2559 continue;
2560 }
2561 NumRLInsts +=
2562 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2563 !FirstBG);
2564 FirstBG = false;
2565 }
2566
2567 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2568 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2569 << "\n\t\t\tisel using masking: " << NumAndInsts
2570 << " using rotates: " << NumRLInsts << "\n");
2571
2572 // When we'd use andi/andis, we bias toward using the rotates (andi only
2573 // has a record form, and is cracked on POWER cores). However, when using
2574 // general 64-bit constant formation, bias toward the constant form,
2575 // because that exposes more opportunities for CSE.
2576 if (NumAndInsts > NumRLInsts)
2577 continue;
2578 // When merging multiple bit groups, instruction or is used.
2579 // But when rotate is used, rldimi can inert the rotated value into any
2580 // register, so instruction or can be avoided.
2581 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2582 continue;
2583
2584 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2585
2586 if (InstCnt) *InstCnt += NumAndInsts;
2587
2588 SDValue VRot;
2589 // We actually need to generate a rotation if we have a non-zero rotation
2590 // factor or, in the Repl32 case, if we care about any of the
2591 // higher-order replicated bits. In the latter case, we generate a mask
2592 // backward so that it actually includes the entire 64 bits.
2593 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2594 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2595 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2596 else
2597 VRot = VRI.V;
2598
2599 SDValue TotalVal;
2600 if (Use32BitInsts) {
2601 assert((ANDIMask != 0 || ANDISMask != 0) &&
2602 "No set bits in mask when using 32-bit ands for 64-bit value");
2603
2604 SDValue ANDIVal, ANDISVal;
2605 if (ANDIMask != 0)
2606 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2607 ExtendToInt64(VRot, dl),
2608 getI32Imm(ANDIMask, dl)),
2609 0);
2610 if (ANDISMask != 0)
2611 ANDISVal =
2612 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2613 ExtendToInt64(VRot, dl),
2614 getI32Imm(ANDISMask, dl)),
2615 0);
2616
2617 if (!ANDIVal)
2618 TotalVal = ANDISVal;
2619 else if (!ANDISVal)
2620 TotalVal = ANDIVal;
2621 else
2622 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2623 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2624 } else {
2625 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2626 TotalVal =
2627 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2628 ExtendToInt64(VRot, dl), TotalVal),
2629 0);
2630 }
2631
2632 if (!Res)
2633 Res = TotalVal;
2634 else
2635 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2636 ExtendToInt64(Res, dl), TotalVal),
2637 0);
2638
2639 // Now, remove all groups with this underlying value and rotation
2640 // factor.
2641 eraseMatchingBitGroups(MatchingBG);
2642 }
2643 }
2644
2645 // Instruction selection for the 64-bit case.
2646 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2647 SDLoc dl(N);
2648 SDValue Res;
2649
2650 if (InstCnt) *InstCnt = 0;
2651
2652 // Take care of cases that should use andi/andis first.
2653 SelectAndParts64(dl, Res, InstCnt);
2654
2655 // If we've not yet selected a 'starting' instruction, and we have no zeros
2656 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2657 // number of groups), and start with this rotated value.
2658 if ((!NeedMask || LateMask) && !Res) {
2659 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2660 // groups will come first, and so the VRI representing the largest number
2661 // of groups might not be first (it might be the first Repl32 groups).
2662 unsigned MaxGroupsIdx = 0;
2663 if (!ValueRotsVec[0].Repl32) {
2664 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2665 if (ValueRotsVec[i].Repl32) {
2666 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2667 MaxGroupsIdx = i;
2668 break;
2669 }
2670 }
2671
2672 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2673 bool NeedsRotate = false;
2674 if (VRI.RLAmt) {
2675 NeedsRotate = true;
2676 } else if (VRI.Repl32) {
2677 for (auto &BG : BitGroups) {
2678 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2679 BG.Repl32 != VRI.Repl32)
2680 continue;
2681
2682 // We don't need a rotate if the bit group is confined to the lower
2683 // 32 bits.
2684 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2685 continue;
2686
2687 NeedsRotate = true;
2688 break;
2689 }
2690 }
2691
2692 if (NeedsRotate)
2693 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2694 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2695 InstCnt);
2696 else
2697 Res = VRI.V;
2698
2699 // Now, remove all groups with this underlying value and rotation factor.
2700 if (Res)
2701 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2702 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2703 BG.Repl32 == VRI.Repl32;
2704 });
2705 }
2706
2707 // Because 64-bit rotates are more flexible than inserts, we might have a
2708 // preference regarding which one we do first (to save one instruction).
2709 if (!Res)
2710 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2711 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2712 false) <
2713 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2714 true)) {
2715 if (I != BitGroups.begin()) {
2716 BitGroup BG = *I;
2717 BitGroups.erase(I);
2718 BitGroups.insert(BitGroups.begin(), BG);
2719 }
2720
2721 break;
2722 }
2723 }
2724
2725 // Insert the other groups (one at a time).
2726 for (auto &BG : BitGroups) {
2727 if (!Res)
2728 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2729 BG.EndIdx, InstCnt);
2730 else
2731 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2732 BG.StartIdx, BG.EndIdx, InstCnt);
2733 }
2734
2735 if (LateMask) {
2736 uint64_t Mask = getZerosMask();
2737
2738 // We can use the 32-bit andi/andis technique if the mask does not
2739 // require any higher-order bits. This can save an instruction compared
2740 // to always using the general 64-bit technique.
2741 bool Use32BitInsts = isUInt<32>(Mask);
2742 // Compute the masks for andi/andis that would be necessary.
2743 unsigned ANDIMask = (Mask & UINT16_MAX),
2744 ANDISMask = (Mask >> 16) & UINT16_MAX;
2745
2746 if (Use32BitInsts) {
2747 assert((ANDIMask != 0 || ANDISMask != 0) &&
2748 "No set bits in mask when using 32-bit ands for 64-bit value");
2749
2750 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2751 (unsigned) (ANDISMask != 0) +
2752 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2753
2754 SDValue ANDIVal, ANDISVal;
2755 if (ANDIMask != 0)
2756 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2757 ExtendToInt64(Res, dl),
2758 getI32Imm(ANDIMask, dl)),
2759 0);
2760 if (ANDISMask != 0)
2761 ANDISVal =
2762 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2763 ExtendToInt64(Res, dl),
2764 getI32Imm(ANDISMask, dl)),
2765 0);
2766
2767 if (!ANDIVal)
2768 Res = ANDISVal;
2769 else if (!ANDISVal)
2770 Res = ANDIVal;
2771 else
2772 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2773 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2774 } else {
2775 unsigned NumOfSelectInsts = 0;
2776 SDValue MaskVal =
2777 SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2778 Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2779 ExtendToInt64(Res, dl), MaskVal),
2780 0);
2781 if (InstCnt)
2782 *InstCnt += NumOfSelectInsts + /* and */ 1;
2783 }
2784 }
2785
2786 return Res.getNode();
2787 }
2788
2789 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2790 // Fill in BitGroups.
2791 collectBitGroups(LateMask);
2792 if (BitGroups.empty())
2793 return nullptr;
2794
2795 // For 64-bit values, figure out when we can use 32-bit instructions.
2796 if (Bits.size() == 64)
2797 assignRepl32BitGroups();
2798
2799 // Fill in ValueRotsVec.
2800 collectValueRotInfo();
2801
2802 if (Bits.size() == 32) {
2803 return Select32(N, LateMask, InstCnt);
2804 } else {
2805 assert(Bits.size() == 64 && "Not 64 bits here?");
2806 return Select64(N, LateMask, InstCnt);
2807 }
2808
2809 return nullptr;
2810 }
2811
2812 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2813 erase_if(BitGroups, F);
2814 }
2815
2817
2818 bool NeedMask = false;
2820
2821 SmallVector<BitGroup, 16> BitGroups;
2822
2823 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2824 SmallVector<ValueRotInfo, 16> ValueRotsVec;
2825
2826 SelectionDAG *CurDAG = nullptr;
2827
2828public:
2829 BitPermutationSelector(SelectionDAG *DAG)
2830 : CurDAG(DAG) {}
2831
2832 // Here we try to match complex bit permutations into a set of
2833 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2834 // known to produce optimal code for common cases (like i32 byte swapping).
2835 SDNode *Select(SDNode *N) {
2836 Memoizer.clear();
2837 auto Result =
2838 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2839 if (!Result.first)
2840 return nullptr;
2841 Bits = std::move(*Result.second);
2842
2843 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2844 " selection for: ");
2845 LLVM_DEBUG(N->dump(CurDAG));
2846
2847 // Fill it RLAmt and set NeedMask.
2848 computeRotationAmounts();
2849
2850 if (!NeedMask)
2851 return Select(N, false);
2852
2853 // We currently have two techniques for handling results with zeros: early
2854 // masking (the default) and late masking. Late masking is sometimes more
2855 // efficient, but because the structure of the bit groups is different, it
2856 // is hard to tell without generating both and comparing the results. With
2857 // late masking, we ignore zeros in the resulting value when inserting each
2858 // set of bit groups, and then mask in the zeros at the end. With early
2859 // masking, we only insert the non-zero parts of the result at every step.
2860
2861 unsigned InstCnt = 0, InstCntLateMask = 0;
2862 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2863 SDNode *RN = Select(N, false, &InstCnt);
2864 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2865
2866 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2867 SDNode *RNLM = Select(N, true, &InstCntLateMask);
2868 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2869 << " instructions\n");
2870
2871 if (InstCnt <= InstCntLateMask) {
2872 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2873 return RN;
2874 }
2875
2876 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2877 return RNLM;
2878 }
2879};
2880
2881class IntegerCompareEliminator {
2882 SelectionDAG *CurDAG;
2883 PPCDAGToDAGISel *S;
2884 // Conversion type for interpreting results of a 32-bit instruction as
2885 // a 64-bit value or vice versa.
2886 enum ExtOrTruncConversion { Ext, Trunc };
2887
2888 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2889 // in a GPR.
2890 // ZExtOrig - use the original condition code, zero-extend value
2891 // ZExtInvert - invert the condition code, zero-extend value
2892 // SExtOrig - use the original condition code, sign-extend value
2893 // SExtInvert - invert the condition code, sign-extend value
2894 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2895
2896 // Comparisons against zero to emit GPR code sequences for. Each of these
2897 // sequences may need to be emitted for two or more equivalent patterns.
2898 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2899 // matters as well as the extension type: sext (-1/0), zext (1/0).
2900 // GEZExt - (zext (LHS >= 0))
2901 // GESExt - (sext (LHS >= 0))
2902 // LEZExt - (zext (LHS <= 0))
2903 // LESExt - (sext (LHS <= 0))
2904 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2905
2906 SDNode *tryEXTEND(SDNode *N);
2907 SDNode *tryLogicOpOfCompares(SDNode *N);
2908 SDValue computeLogicOpInGPR(SDValue LogicOp);
2909 SDValue signExtendInputIfNeeded(SDValue Input);
2910 SDValue zeroExtendInputIfNeeded(SDValue Input);
2911 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2912 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2913 ZeroCompare CmpTy);
2914 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2915 int64_t RHSValue, SDLoc dl);
2916 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2917 int64_t RHSValue, SDLoc dl);
2918 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2919 int64_t RHSValue, SDLoc dl);
2920 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2921 int64_t RHSValue, SDLoc dl);
2922 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2923
2924public:
2925 IntegerCompareEliminator(SelectionDAG *DAG,
2926 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2928 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2929 "Only expecting to use this on 64 bit targets.");
2930 }
2931 SDNode *Select(SDNode *N) {
2932 if (CmpInGPR == ICGPR_None)
2933 return nullptr;
2934 switch (N->getOpcode()) {
2935 default: break;
2936 case ISD::ZERO_EXTEND:
2939 return nullptr;
2940 [[fallthrough]];
2941 case ISD::SIGN_EXTEND:
2944 return nullptr;
2945 return tryEXTEND(N);
2946 case ISD::AND:
2947 case ISD::OR:
2948 case ISD::XOR:
2949 return tryLogicOpOfCompares(N);
2950 }
2951 return nullptr;
2952 }
2953};
2954
2955// The obvious case for wanting to keep the value in a GPR. Namely, the
2956// result of the comparison is actually needed in a GPR.
2957SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2958 assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2959 N->getOpcode() == ISD::SIGN_EXTEND) &&
2960 "Expecting a zero/sign extend node!");
2961 SDValue WideRes;
2962 // If we are zero-extending the result of a logical operation on i1
2963 // values, we can keep the values in GPRs.
2964 if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) &&
2965 N->getOperand(0).getValueType() == MVT::i1 &&
2966 N->getOpcode() == ISD::ZERO_EXTEND)
2967 WideRes = computeLogicOpInGPR(N->getOperand(0));
2968 else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2969 return nullptr;
2970 else
2971 WideRes =
2972 getSETCCInGPR(N->getOperand(0),
2973 N->getOpcode() == ISD::SIGN_EXTEND ?
2974 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2975
2976 if (!WideRes)
2977 return nullptr;
2978
2979 bool Input32Bit = WideRes.getValueType() == MVT::i32;
2980 bool Output32Bit = N->getValueType(0) == MVT::i32;
2981
2982 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2983 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2984
2985 SDValue ConvOp = WideRes;
2986 if (Input32Bit != Output32Bit)
2987 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2988 ExtOrTruncConversion::Trunc);
2989 return ConvOp.getNode();
2990}
2991
2992// Attempt to perform logical operations on the results of comparisons while
2993// keeping the values in GPRs. Without doing so, these would end up being
2994// lowered to CR-logical operations which suffer from significant latency and
2995// low ILP.
2996SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2997 if (N->getValueType(0) != MVT::i1)
2998 return nullptr;
2999 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
3000 "Expected a logic operation on setcc results.");
3001 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
3002 if (!LoweredLogical)
3003 return nullptr;
3004
3005 SDLoc dl(N);
3006 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
3007 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3008 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
3009 SDValue LHS = LoweredLogical.getOperand(0);
3010 SDValue RHS = LoweredLogical.getOperand(1);
3011 SDValue WideOp;
3012 SDValue OpToConvToRecForm;
3013
3014 // Look through any 32-bit to 64-bit implicit extend nodes to find the
3015 // opcode that is input to the XORI.
3016 if (IsBitwiseNegate &&
3017 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
3018 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
3019 else if (IsBitwiseNegate)
3020 // If the input to the XORI isn't an extension, that's what we're after.
3021 OpToConvToRecForm = LoweredLogical.getOperand(0);
3022 else
3023 // If this is not an XORI, it is a reg-reg logical op and we can convert
3024 // it to record-form.
3025 OpToConvToRecForm = LoweredLogical;
3026
3027 // Get the record-form version of the node we're looking to use to get the
3028 // CR result from.
3029 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
3030 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
3031
3032 // Convert the right node to record-form. This is either the logical we're
3033 // looking at or it is the input node to the negation (if we're looking at
3034 // a bitwise negation).
3035 if (NewOpc != -1 && IsBitwiseNegate) {
3036 // The input to the XORI has a record-form. Use it.
3037 assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
3038 "Expected a PPC::XORI8 only for bitwise negation.");
3039 // Emit the record-form instruction.
3040 std::vector<SDValue> Ops;
3041 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
3042 Ops.push_back(OpToConvToRecForm.getOperand(i));
3043
3044 WideOp =
3045 SDValue(CurDAG->getMachineNode(NewOpc, dl,
3046 OpToConvToRecForm.getValueType(),
3047 MVT::Glue, Ops), 0);
3048 } else {
3049 assert((NewOpc != -1 || !IsBitwiseNegate) &&
3050 "No record form available for AND8/OR8/XOR8?");
3051 WideOp =
3052 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
3053 dl, MVT::i64, MVT::Glue, LHS, RHS),
3054 0);
3055 }
3056
3057 // Select this node to a single bit from CR0 set by the record-form node
3058 // just created. For bitwise negation, use the EQ bit which is the equivalent
3059 // of negating the result (i.e. it is a bit set when the result of the
3060 // operation is zero).
3061 SDValue SRIdxVal =
3062 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
3063 SDValue CRBit =
3064 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
3065 MVT::i1, CR0Reg, SRIdxVal,
3066 WideOp.getValue(1)), 0);
3067 return CRBit.getNode();
3068}
3069
3070// Lower a logical operation on i1 values into a GPR sequence if possible.
3071// The result can be kept in a GPR if requested.
3072// Three types of inputs can be handled:
3073// - SETCC
3074// - TRUNCATE
3075// - Logical operation (AND/OR/XOR)
3076// There is also a special case that is handled (namely a complement operation
3077// achieved with xor %a, -1).
3078SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
3080 "Can only handle logic operations here.");
3081 assert(LogicOp.getValueType() == MVT::i1 &&
3082 "Can only handle logic operations on i1 values here.");
3083 SDLoc dl(LogicOp);
3084 SDValue LHS, RHS;
3085
3086 // Special case: xor %a, -1
3087 bool IsBitwiseNegation = isBitwiseNot(LogicOp);
3088
3089 // Produces a GPR sequence for each operand of the binary logic operation.
3090 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
3091 // the value in a GPR and for logic operations, it will recursively produce
3092 // a GPR sequence for the operation.
3093 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
3094 unsigned OperandOpcode = Operand.getOpcode();
3095 if (OperandOpcode == ISD::SETCC)
3096 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
3097 else if (OperandOpcode == ISD::TRUNCATE) {
3098 SDValue InputOp = Operand.getOperand(0);
3099 EVT InVT = InputOp.getValueType();
3100 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
3101 PPC::RLDICL, dl, InVT, InputOp,
3102 S->getI64Imm(0, dl),
3103 S->getI64Imm(63, dl)), 0);
3104 } else if (ISD::isBitwiseLogicOp(OperandOpcode))
3105 return computeLogicOpInGPR(Operand);
3106 return SDValue();
3107 };
3108 LHS = getLogicOperand(LogicOp.getOperand(0));
3109 RHS = getLogicOperand(LogicOp.getOperand(1));
3110
3111 // If a GPR sequence can't be produced for the LHS we can't proceed.
3112 // Not producing a GPR sequence for the RHS is only a problem if this isn't
3113 // a bitwise negation operation.
3114 if (!LHS || (!RHS && !IsBitwiseNegation))
3115 return SDValue();
3116
3117 NumLogicOpsOnComparison++;
3118
3119 // We will use the inputs as 64-bit values.
3120 if (LHS.getValueType() == MVT::i32)
3121 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
3122 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
3123 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
3124
3125 unsigned NewOpc;
3126 switch (LogicOp.getOpcode()) {
3127 default: llvm_unreachable("Unknown logic operation.");
3128 case ISD::AND: NewOpc = PPC::AND8; break;
3129 case ISD::OR: NewOpc = PPC::OR8; break;
3130 case ISD::XOR: NewOpc = PPC::XOR8; break;
3131 }
3132
3133 if (IsBitwiseNegation) {
3134 RHS = S->getI64Imm(1, dl);
3135 NewOpc = PPC::XORI8;
3136 }
3137
3138 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
3139
3140}
3141
3142/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
3143/// Otherwise just reinterpret it as a 64-bit value.
3144/// Useful when emitting comparison code for 32-bit values without using
3145/// the compare instruction (which only considers the lower 32-bits).
3146SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
3147 assert(Input.getValueType() == MVT::i32 &&
3148 "Can only sign-extend 32-bit values here.");
3149 unsigned Opc = Input.getOpcode();
3150
3151 // The value was sign extended and then truncated to 32-bits. No need to
3152 // sign extend it again.
3153 if (Opc == ISD::TRUNCATE &&
3154 (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
3155 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
3156 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3157
3158 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3159 // The input is a sign-extending load. All ppc sign-extending loads
3160 // sign-extend to the full 64-bits.
3161 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3162 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3163
3164 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3165 // We don't sign-extend constants.
3166 if (InputConst)
3167 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3168
3169 SDLoc dl(Input);
3170 SignExtensionsAdded++;
3171 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3172 MVT::i64, Input), 0);
3173}
3174
3175/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3176/// Otherwise just reinterpret it as a 64-bit value.
3177/// Useful when emitting comparison code for 32-bit values without using
3178/// the compare instruction (which only considers the lower 32-bits).
3179SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3180 assert(Input.getValueType() == MVT::i32 &&
3181 "Can only zero-extend 32-bit values here.");
3182 unsigned Opc = Input.getOpcode();
3183
3184 // The only condition under which we can omit the actual extend instruction:
3185 // - The value is a positive constant
3186 // - The value comes from a load that isn't a sign-extending load
3187 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3188 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3189 (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3190 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3191 if (IsTruncateOfZExt)
3192 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3193
3194 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3195 if (InputConst && InputConst->getSExtValue() >= 0)
3196 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3197
3198 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3199 // The input is a load that doesn't sign-extend (it will be zero-extended).
3200 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3201 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3202
3203 // None of the above, need to zero-extend.
3204 SDLoc dl(Input);
3205 ZeroExtensionsAdded++;
3206 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3207 S->getI64Imm(0, dl),
3208 S->getI64Imm(32, dl)), 0);
3209}
3210
3211// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3212// course not actual zero/sign extensions that will generate machine code,
3213// they're just a way to reinterpret a 32 bit value in a register as a
3214// 64 bit value and vice-versa.
3215SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3216 ExtOrTruncConversion Conv) {
3217 SDLoc dl(NatWidthRes);
3218
3219 // For reinterpreting 32-bit values as 64 bit values, we generate
3220 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3221 if (Conv == ExtOrTruncConversion::Ext) {
3222 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3223 SDValue SubRegIdx =
3224 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3225 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3226 ImDef, NatWidthRes, SubRegIdx), 0);
3227 }
3228
3229 assert(Conv == ExtOrTruncConversion::Trunc &&
3230 "Unknown convertion between 32 and 64 bit values.");
3231 // For reinterpreting 64-bit values as 32-bit values, we just need to
3232 // EXTRACT_SUBREG (i.e. extract the low word).
3233 SDValue SubRegIdx =
3234 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3235 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3236 NatWidthRes, SubRegIdx), 0);
3237}
3238
3239// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3240// Handle both zero-extensions and sign-extensions.
3241SDValue
3242IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3243 ZeroCompare CmpTy) {
3244 EVT InVT = LHS.getValueType();
3245 bool Is32Bit = InVT == MVT::i32;
3246 SDValue ToExtend;
3247
3248 // Produce the value that needs to be either zero or sign extended.
3249 switch (CmpTy) {
3250 case ZeroCompare::GEZExt:
3251 case ZeroCompare::GESExt:
3252 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3253 dl, InVT, LHS, LHS), 0);
3254 break;
3255 case ZeroCompare::LEZExt:
3256 case ZeroCompare::LESExt: {
3257 if (Is32Bit) {
3258 // Upper 32 bits cannot be undefined for this sequence.
3259 LHS = signExtendInputIfNeeded(LHS);
3260 SDValue Neg =
3261 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3262 ToExtend =
3263 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3264 Neg, S->getI64Imm(1, dl),
3265 S->getI64Imm(63, dl)), 0);
3266 } else {
3267 SDValue Addi =
3268 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3269 S->getI64Imm(~0ULL, dl)), 0);
3270 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3271 Addi, LHS), 0);
3272 }
3273 break;
3274 }
3275 }
3276
3277 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3278 if (!Is32Bit &&
3279 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3280 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3281 ToExtend, S->getI64Imm(1, dl),
3282 S->getI64Imm(63, dl)), 0);
3283 if (!Is32Bit &&
3284 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3285 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3286 S->getI64Imm(63, dl)), 0);
3287
3288 assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3289 // For 32-bit sequences, the extensions differ between GE/LE cases.
3290 switch (CmpTy) {
3291 case ZeroCompare::GEZExt: {
3292 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3293 S->getI32Imm(31, dl) };
3294 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3295 ShiftOps), 0);
3296 }
3297 case ZeroCompare::GESExt:
3298 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3299 S->getI32Imm(31, dl)), 0);
3300 case ZeroCompare::LEZExt:
3301 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3302 S->getI32Imm(1, dl)), 0);
3303 case ZeroCompare::LESExt:
3304 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3305 S->getI32Imm(-1, dl)), 0);
3306 }
3307
3308 // The above case covers all the enumerators so it can't have a default clause
3309 // to avoid compiler warnings.
3310 llvm_unreachable("Unknown zero-comparison type.");
3311}
3312
3313/// Produces a zero-extended result of comparing two 32-bit values according to
3314/// the passed condition code.
3315SDValue
3316IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3317 ISD::CondCode CC,
3318 int64_t RHSValue, SDLoc dl) {
3321 return SDValue();
3322 bool IsRHSZero = RHSValue == 0;
3323 bool IsRHSOne = RHSValue == 1;
3324 bool IsRHSNegOne = RHSValue == -1LL;
3325 switch (CC) {
3326 default: return SDValue();
3327 case ISD::SETEQ: {
3328 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3329 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3330 SDValue Xor = IsRHSZero ? LHS :
3331 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3332 SDValue Clz =
3333 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3334 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3335 S->getI32Imm(31, dl) };
3336 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3337 ShiftOps), 0);
3338 }
3339 case ISD::SETNE: {
3340 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3341 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3342 SDValue Xor = IsRHSZero ? LHS :
3343 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3344 SDValue Clz =
3345 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3346 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3347 S->getI32Imm(31, dl) };
3348 SDValue Shift =
3349 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3350 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3351 S->getI32Imm(1, dl)), 0);
3352 }
3353 case ISD::SETGE: {
3354 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3355 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3356 if(IsRHSZero)
3357 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3358
3359 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3360 // by swapping inputs and falling through.
3361 std::swap(LHS, RHS);
3362 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3363 IsRHSZero = RHSConst && RHSConst->isZero();
3364 [[fallthrough]];
3365 }
3366 case ISD::SETLE: {
3367 if (CmpInGPR == ICGPR_NonExtIn)
3368 return SDValue();
3369 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3370 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3371 if(IsRHSZero) {
3372 if (CmpInGPR == ICGPR_NonExtIn)
3373 return SDValue();
3374 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3375 }
3376
3377 // The upper 32-bits of the register can't be undefined for this sequence.
3378 LHS = signExtendInputIfNeeded(LHS);
3379 RHS = signExtendInputIfNeeded(RHS);
3380 SDValue Sub =
3381 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3382 SDValue Shift =
3383 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3384 S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3385 0);
3386 return
3387 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3388 MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3389 }
3390 case ISD::SETGT: {
3391 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3392 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3393 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3394 // Handle SETLT -1 (which is equivalent to SETGE 0).
3395 if (IsRHSNegOne)
3396 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3397
3398 if (IsRHSZero) {
3399 if (CmpInGPR == ICGPR_NonExtIn)
3400 return SDValue();
3401 // The upper 32-bits of the register can't be undefined for this sequence.
3402 LHS = signExtendInputIfNeeded(LHS);
3403 RHS = signExtendInputIfNeeded(RHS);
3404 SDValue Neg =
3405 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3406 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3407 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3408 }
3409 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3410 // (%b < %a) by swapping inputs and falling through.
3411 std::swap(LHS, RHS);
3412 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3413 IsRHSZero = RHSConst && RHSConst->isZero();
3414 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3415 [[fallthrough]];
3416 }
3417 case ISD::SETLT: {
3418 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3419 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3420 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3421 // Handle SETLT 1 (which is equivalent to SETLE 0).
3422 if (IsRHSOne) {
3423 if (CmpInGPR == ICGPR_NonExtIn)
3424 return SDValue();
3425 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3426 }
3427
3428 if (IsRHSZero) {
3429 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3430 S->getI32Imm(31, dl) };
3431 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3432 ShiftOps), 0);
3433 }
3434
3435 if (CmpInGPR == ICGPR_NonExtIn)
3436 return SDValue();
3437 // The upper 32-bits of the register can't be undefined for this sequence.
3438 LHS = signExtendInputIfNeeded(LHS);
3439 RHS = signExtendInputIfNeeded(RHS);
3440 SDValue SUBFNode =
3441 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3442 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3443 SUBFNode, S->getI64Imm(1, dl),
3444 S->getI64Imm(63, dl)), 0);
3445 }
3446 case ISD::SETUGE:
3447 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3448 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3449 std::swap(LHS, RHS);
3450 [[fallthrough]];
3451 case ISD::SETULE: {
3452 if (CmpInGPR == ICGPR_NonExtIn)
3453 return SDValue();
3454 // The upper 32-bits of the register can't be undefined for this sequence.
3455 LHS = zeroExtendInputIfNeeded(LHS);
3456 RHS = zeroExtendInputIfNeeded(RHS);
3458 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3459 SDValue SrdiNode =
3460 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3461 Subtract, S->getI64Imm(1, dl),
3462 S->getI64Imm(63, dl)), 0);
3463 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3464 S->getI32Imm(1, dl)), 0);
3465 }
3466 case ISD::SETUGT:
3467 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3468 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3469 std::swap(LHS, RHS);
3470 [[fallthrough]];
3471 case ISD::SETULT: {
3472 if (CmpInGPR == ICGPR_NonExtIn)
3473 return SDValue();
3474 // The upper 32-bits of the register can't be undefined for this sequence.
3475 LHS = zeroExtendInputIfNeeded(LHS);
3476 RHS = zeroExtendInputIfNeeded(RHS);
3478 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3479 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3480 Subtract, S->getI64Imm(1, dl),
3481 S->getI64Imm(63, dl)), 0);
3482 }
3483 }
3484}
3485
3486/// Produces a sign-extended result of comparing two 32-bit values according to
3487/// the passed condition code.
3488SDValue
3489IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3490 ISD::CondCode CC,
3491 int64_t RHSValue, SDLoc dl) {
3494 return SDValue();
3495 bool IsRHSZero = RHSValue == 0;
3496 bool IsRHSOne = RHSValue == 1;
3497 bool IsRHSNegOne = RHSValue == -1LL;
3498
3499 switch (CC) {
3500 default: return SDValue();
3501 case ISD::SETEQ: {
3502 // (sext (setcc %a, %b, seteq)) ->
3503 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3504 // (sext (setcc %a, 0, seteq)) ->
3505 // (ashr (shl (ctlz %a), 58), 63)
3506 SDValue CountInput = IsRHSZero ? LHS :
3507 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3508 SDValue Cntlzw =
3509 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3510 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3511 S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3512 SDValue Slwi =
3513 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3514 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3515 }
3516 case ISD::SETNE: {
3517 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3518 // flip the bit, finally take 2's complement.
3519 // (sext (setcc %a, %b, setne)) ->
3520 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3521 // Same as above, but the first xor is not needed.
3522 // (sext (setcc %a, 0, setne)) ->
3523 // (neg (xor (lshr (ctlz %a), 5), 1))
3524 SDValue Xor = IsRHSZero ? LHS :
3525 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3526 SDValue Clz =
3527 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3528 SDValue ShiftOps[] =
3529 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3530 SDValue Shift =
3531 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3532 SDValue Xori =
3533 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3534 S->getI32Imm(1, dl)), 0);
3535 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3536 }
3537 case ISD::SETGE: {
3538 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3539 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3540 if (IsRHSZero)
3541 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3542
3543 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3544 // by swapping inputs and falling through.
3545 std::swap(LHS, RHS);
3546 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3547 IsRHSZero = RHSConst && RHSConst->isZero();
3548 [[fallthrough]];
3549 }
3550 case ISD::SETLE: {
3551 if (CmpInGPR == ICGPR_NonExtIn)
3552 return SDValue();
3553 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3554 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3555 if (IsRHSZero)
3556 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3557
3558 // The upper 32-bits of the register can't be undefined for this sequence.
3559 LHS = signExtendInputIfNeeded(LHS);
3560 RHS = signExtendInputIfNeeded(RHS);
3561 SDValue SUBFNode =
3562 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3563 LHS, RHS), 0);
3564 SDValue Srdi =
3565 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3566 SUBFNode, S->getI64Imm(1, dl),
3567 S->getI64Imm(63, dl)), 0);
3568 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3569 S->getI32Imm(-1, dl)), 0);
3570 }
3571 case ISD::SETGT: {
3572 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3573 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3574 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3575 if (IsRHSNegOne)
3576 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3577 if (IsRHSZero) {
3578 if (CmpInGPR == ICGPR_NonExtIn)
3579 return SDValue();
3580 // The upper 32-bits of the register can't be undefined for this sequence.
3581 LHS = signExtendInputIfNeeded(LHS);
3582 RHS = signExtendInputIfNeeded(RHS);
3583 SDValue Neg =
3584 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3585 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3586 S->getI64Imm(63, dl)), 0);
3587 }
3588 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3589 // (%b < %a) by swapping inputs and falling through.
3590 std::swap(LHS, RHS);
3591 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3592 IsRHSZero = RHSConst && RHSConst->isZero();
3593 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3594 [[fallthrough]];
3595 }
3596 case ISD::SETLT: {
3597 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3598 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3599 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3600 if (IsRHSOne) {
3601 if (CmpInGPR == ICGPR_NonExtIn)
3602 return SDValue();
3603 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3604 }
3605 if (IsRHSZero)
3606 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3607 S->getI32Imm(31, dl)), 0);
3608
3609 if (CmpInGPR == ICGPR_NonExtIn)
3610 return SDValue();
3611 // The upper 32-bits of the register can't be undefined for this sequence.
3612 LHS = signExtendInputIfNeeded(LHS);
3613 RHS = signExtendInputIfNeeded(RHS);
3614 SDValue SUBFNode =
3615 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3616 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3617 SUBFNode, S->getI64Imm(63, dl)), 0);
3618 }
3619 case ISD::SETUGE:
3620 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3621 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3622 std::swap(LHS, RHS);
3623 [[fallthrough]];
3624 case ISD::SETULE: {
3625 if (CmpInGPR == ICGPR_NonExtIn)
3626 return SDValue();
3627 // The upper 32-bits of the register can't be undefined for this sequence.
3628 LHS = zeroExtendInputIfNeeded(LHS);
3629 RHS = zeroExtendInputIfNeeded(RHS);
3631 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3632 SDValue Shift =
3633 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3634 S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3635 0);
3636 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3637 S->getI32Imm(-1, dl)), 0);
3638 }
3639 case ISD::SETUGT:
3640 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3641 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3642 std::swap(LHS, RHS);
3643 [[fallthrough]];
3644 case ISD::SETULT: {
3645 if (CmpInGPR == ICGPR_NonExtIn)
3646 return SDValue();
3647 // The upper 32-bits of the register can't be undefined for this sequence.
3648 LHS = zeroExtendInputIfNeeded(LHS);
3649 RHS = zeroExtendInputIfNeeded(RHS);
3651 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3652 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3653 Subtract, S->getI64Imm(63, dl)), 0);
3654 }
3655 }
3656}
3657
3658/// Produces a zero-extended result of comparing two 64-bit values according to
3659/// the passed condition code.
3660SDValue
3661IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3662 ISD::CondCode CC,
3663 int64_t RHSValue, SDLoc dl) {
3666 return SDValue();
3667 bool IsRHSZero = RHSValue == 0;
3668 bool IsRHSOne = RHSValue == 1;
3669 bool IsRHSNegOne = RHSValue == -1LL;
3670 switch (CC) {
3671 default: return SDValue();
3672 case ISD::SETEQ: {
3673 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3674 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3675 SDValue Xor = IsRHSZero ? LHS :
3676 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3677 SDValue Clz =
3678 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3679 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3680 S->getI64Imm(58, dl),
3681 S->getI64Imm(63, dl)), 0);
3682 }
3683 case ISD::SETNE: {
3684 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3685 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3686 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3687 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3688 SDValue Xor = IsRHSZero ? LHS :
3689 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3690 SDValue AC =
3691 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3692 Xor, S->getI32Imm(~0U, dl)), 0);
3693 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3694 Xor, AC.getValue(1)), 0);
3695 }
3696 case ISD::SETGE: {
3697 // {subc.reg, subc.CA} = (subcarry %a, %b)
3698 // (zext (setcc %a, %b, setge)) ->
3699 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3700 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3701 if (IsRHSZero)
3702 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3703 std::swap(LHS, RHS);
3704 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3705 IsRHSZero = RHSConst && RHSConst->isZero();
3706 [[fallthrough]];
3707 }
3708 case ISD::SETLE: {
3709 // {subc.reg, subc.CA} = (subcarry %b, %a)
3710 // (zext (setcc %a, %b, setge)) ->
3711 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3712 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3713 if (IsRHSZero)
3714 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3715 SDValue ShiftL =
3716 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3717 S->getI64Imm(1, dl),
3718 S->getI64Imm(63, dl)), 0);
3719 SDValue ShiftR =
3720 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3721 S->getI64Imm(63, dl)), 0);
3722 SDValue SubtractCarry =
3723 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3724 LHS, RHS), 1);
3725 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3726 ShiftR, ShiftL, SubtractCarry), 0);
3727 }
3728 case ISD::SETGT: {
3729 // {subc.reg, subc.CA} = (subcarry %b, %a)
3730 // (zext (setcc %a, %b, setgt)) ->
3731 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3732 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3733 if (IsRHSNegOne)
3734 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3735 if (IsRHSZero) {
3736 SDValue Addi =
3737 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3738 S->getI64Imm(~0ULL, dl)), 0);
3739 SDValue Nor =
3740 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3741 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3742 S->getI64Imm(1, dl),
3743 S->getI64Imm(63, dl)), 0);
3744 }
3745 std::swap(LHS, RHS);
3746 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3747 IsRHSZero = RHSConst && RHSConst->isZero();
3748 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3749 [[fallthrough]];
3750 }
3751 case ISD::SETLT: {
3752 // {subc.reg, subc.CA} = (subcarry %a, %b)
3753 // (zext (setcc %a, %b, setlt)) ->
3754 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3755 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3756 if (IsRHSOne)
3757 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3758 if (IsRHSZero)
3759 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3760 S->getI64Imm(1, dl),
3761 S->getI64Imm(63, dl)), 0);
3762 SDValue SRADINode =
3763 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3764 LHS, S->getI64Imm(63, dl)), 0);
3765 SDValue SRDINode =
3766 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3767 RHS, S->getI64Imm(1, dl),
3768 S->getI64Imm(63, dl)), 0);
3769 SDValue SUBFC8Carry =
3770 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3771 RHS, LHS), 1);
3772 SDValue ADDE8Node =
3773 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3774 SRDINode, SRADINode, SUBFC8Carry), 0);
3775 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3776 ADDE8Node, S->getI64Imm(1, dl)), 0);
3777 }
3778 case ISD::SETUGE:
3779 // {subc.reg, subc.CA} = (subcarry %a, %b)
3780 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3781 std::swap(LHS, RHS);
3782 [[fallthrough]];
3783 case ISD::SETULE: {
3784 // {subc.reg, subc.CA} = (subcarry %b, %a)
3785 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3786 SDValue SUBFC8Carry =
3787 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3788 LHS, RHS), 1);
3789 SDValue SUBFE8Node =
3790 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3791 LHS, LHS, SUBFC8Carry), 0);
3792 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3793 SUBFE8Node, S->getI64Imm(1, dl)), 0);
3794 }
3795 case ISD::SETUGT:
3796 // {subc.reg, subc.CA} = (subcarry %b, %a)
3797 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3798 std::swap(LHS, RHS);
3799 [[fallthrough]];
3800 case ISD::SETULT: {
3801 // {subc.reg, subc.CA} = (subcarry %a, %b)
3802 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3803 SDValue SubtractCarry =
3804 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3805 RHS, LHS), 1);
3806 SDValue ExtSub =
3807 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3808 LHS, LHS, SubtractCarry), 0);
3809 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3810 ExtSub), 0);
3811 }
3812 }
3813}
3814
3815/// Produces a sign-extended result of comparing two 64-bit values according to
3816/// the passed condition code.
3817SDValue
3818IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3819 ISD::CondCode CC,
3820 int64_t RHSValue, SDLoc dl) {
3823 return SDValue();
3824 bool IsRHSZero = RHSValue == 0;
3825 bool IsRHSOne = RHSValue == 1;
3826 bool IsRHSNegOne = RHSValue == -1LL;
3827 switch (CC) {
3828 default: return SDValue();
3829 case ISD::SETEQ: {
3830 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3831 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3832 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3833 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3834 SDValue AddInput = IsRHSZero ? LHS :
3835 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3836 SDValue Addic =
3837 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3838 AddInput, S->getI32Imm(~0U, dl)), 0);
3839 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3840 Addic, Addic.getValue(1)), 0);
3841 }
3842 case ISD::SETNE: {
3843 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3844 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3845 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3846 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3847 SDValue Xor = IsRHSZero ? LHS :
3848 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3849 SDValue SC =
3850 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3851 Xor, S->getI32Imm(0, dl)), 0);
3852 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3853 SC, SC.getValue(1)), 0);
3854 }
3855 case ISD::SETGE: {
3856 // {subc.reg, subc.CA} = (subcarry %a, %b)
3857 // (zext (setcc %a, %b, setge)) ->
3858 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3859 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3860 if (IsRHSZero)
3861 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3862 std::swap(LHS, RHS);
3863 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3864 IsRHSZero = RHSConst && RHSConst->isZero();
3865 [[fallthrough]];
3866 }
3867 case ISD::SETLE: {
3868 // {subc.reg, subc.CA} = (subcarry %b, %a)
3869 // (zext (setcc %a, %b, setge)) ->
3870 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3871 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3872 if (IsRHSZero)
3873 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3874 SDValue ShiftR =
3875 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3876 S->getI64Imm(63, dl)), 0);
3877 SDValue ShiftL =
3878 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3879 S->getI64Imm(1, dl),
3880 S->getI64Imm(63, dl)), 0);
3881 SDValue SubtractCarry =
3882 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3883 LHS, RHS), 1);
3884 SDValue Adde =
3885 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3886 ShiftR, ShiftL, SubtractCarry), 0);
3887 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3888 }
3889 case ISD::SETGT: {
3890 // {subc.reg, subc.CA} = (subcarry %b, %a)
3891 // (zext (setcc %a, %b, setgt)) ->
3892 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3893 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3894 if (IsRHSNegOne)
3895 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3896 if (IsRHSZero) {
3897 SDValue Add =
3898 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3899 S->getI64Imm(-1, dl)), 0);
3900 SDValue Nor =
3901 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3902 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3903 S->getI64Imm(63, dl)), 0);
3904 }
3905 std::swap(LHS, RHS);
3906 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3907 IsRHSZero = RHSConst && RHSConst->isZero();
3908 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3909 [[fallthrough]];
3910 }
3911 case ISD::SETLT: {
3912 // {subc.reg, subc.CA} = (subcarry %a, %b)
3913 // (zext (setcc %a, %b, setlt)) ->
3914 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3915 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3916 if (IsRHSOne)
3917 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3918 if (IsRHSZero) {
3919 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3920 S->getI64Imm(63, dl)), 0);
3921 }
3922 SDValue SRADINode =
3923 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3924 LHS, S->getI64Imm(63, dl)), 0);
3925 SDValue SRDINode =
3926 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3927 RHS, S->getI64Imm(1, dl),
3928 S->getI64Imm(63, dl)), 0);
3929 SDValue SUBFC8Carry =
3930 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3931 RHS, LHS), 1);
3932 SDValue ADDE8Node =
3933 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3934 SRDINode, SRADINode, SUBFC8Carry), 0);
3935 SDValue XORI8Node =
3936 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3937 ADDE8Node, S->getI64Imm(1, dl)), 0);
3938 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3939 XORI8Node), 0);
3940 }
3941 case ISD::SETUGE:
3942 // {subc.reg, subc.CA} = (subcarry %a, %b)
3943 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3944 std::swap(LHS, RHS);
3945 [[fallthrough]];
3946 case ISD::SETULE: {
3947 // {subc.reg, subc.CA} = (subcarry %b, %a)
3948 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3949 SDValue SubtractCarry =
3950 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3951 LHS, RHS), 1);
3952 SDValue ExtSub =
3953 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3954 LHS, SubtractCarry), 0);
3955 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3956 ExtSub, ExtSub), 0);
3957 }
3958 case ISD::SETUGT:
3959 // {subc.reg, subc.CA} = (subcarry %b, %a)
3960 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3961 std::swap(LHS, RHS);
3962 [[fallthrough]];
3963 case ISD::SETULT: {
3964 // {subc.reg, subc.CA} = (subcarry %a, %b)
3965 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3966 SDValue SubCarry =
3967 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3968 RHS, LHS), 1);
3969 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3970 LHS, LHS, SubCarry), 0);
3971 }
3972 }
3973}
3974
3975/// Do all uses of this SDValue need the result in a GPR?
3976/// This is meant to be used on values that have type i1 since
3977/// it is somewhat meaningless to ask if values of other types
3978/// should be kept in GPR's.
3979static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3980 assert(Compare.getOpcode() == ISD::SETCC &&
3981 "An ISD::SETCC node required here.");
3982
3983 // For values that have a single use, the caller should obviously already have
3984 // checked if that use is an extending use. We check the other uses here.
3985 if (Compare.hasOneUse())
3986 return true;
3987 // We want the value in a GPR if it is being extended, used for a select, or
3988 // used in logical operations.
3989 for (auto *CompareUse : Compare.getNode()->users())
3990 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3991 CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3992 CompareUse->getOpcode() != ISD::SELECT &&
3993 !ISD::isBitwiseLogicOp(CompareUse->getOpcode())) {
3994 OmittedForNonExtendUses++;
3995 return false;
3996 }
3997 return true;
3998}
3999
4000/// Returns an equivalent of a SETCC node but with the result the same width as
4001/// the inputs. This can also be used for SELECT_CC if either the true or false
4002/// values is a power of two while the other is zero.
4003SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
4004 SetccInGPROpts ConvOpts) {
4005 assert((Compare.getOpcode() == ISD::SETCC ||
4006 Compare.getOpcode() == ISD::SELECT_CC) &&
4007 "An ISD::SETCC node required here.");
4008
4009 // Don't convert this comparison to a GPR sequence because there are uses
4010 // of the i1 result (i.e. uses that require the result in the CR).
4011 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
4012 return SDValue();
4013
4014 SDValue LHS = Compare.getOperand(0);
4015 SDValue RHS = Compare.getOperand(1);
4016
4017 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
4018 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
4019 ISD::CondCode CC =
4020 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
4021 EVT InputVT = LHS.getValueType();
4022 if (InputVT != MVT::i32 && InputVT != MVT::i64)
4023 return SDValue();
4024
4025 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
4026 ConvOpts == SetccInGPROpts::SExtInvert)
4027 CC = ISD::getSetCCInverse(CC, InputVT);
4028
4029 bool Inputs32Bit = InputVT == MVT::i32;
4030
4031 SDLoc dl(Compare);
4032 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
4033 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
4034 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
4035 ConvOpts == SetccInGPROpts::SExtInvert;
4036
4037 if (IsSext && Inputs32Bit)
4038 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4039 else if (Inputs32Bit)
4040 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4041 else if (IsSext)
4042 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4043 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4044}
4045
4046} // end anonymous namespace
4047
4048bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
4049 if (N->getValueType(0) != MVT::i32 &&
4050 N->getValueType(0) != MVT::i64)
4051 return false;
4052
4053 // This optimization will emit code that assumes 64-bit registers
4054 // so we don't want to run it in 32-bit mode. Also don't run it
4055 // on functions that are not to be optimized.
4056 if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64())
4057 return false;
4058
4059 // For POWER10, it is more profitable to use the set boolean extension
4060 // instructions rather than the integer compare elimination codegen.
4061 // Users can override this via the command line option, `--ppc-gpr-icmps`.
4062 if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
4063 return false;
4064
4065 switch (N->getOpcode()) {
4066 default: break;
4067 case ISD::ZERO_EXTEND:
4068 case ISD::SIGN_EXTEND:
4069 case ISD::AND:
4070 case ISD::OR:
4071 case ISD::XOR: {
4072 IntegerCompareEliminator ICmpElim(CurDAG, this);
4073 if (SDNode *New = ICmpElim.Select(N)) {
4074 ReplaceNode(N, New);
4075 return true;
4076 }
4077 }
4078 }
4079 return false;
4080}
4081
4082bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
4083 if (N->getValueType(0) != MVT::i32 &&
4084 N->getValueType(0) != MVT::i64)
4085 return false;
4086
4087 if (!UseBitPermRewriter)
4088 return false;
4089
4090 switch (N->getOpcode()) {
4091 default: break;
4092 case ISD::SRL:
4093 // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
4094 // uses the BRH instruction.
4095 if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
4096 N->getOperand(0).getOpcode() == ISD::BSWAP) {
4097 auto &OpRight = N->getOperand(1);
4098 ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
4099 if (SRLConst && SRLConst->getSExtValue() == 16)
4100 return false;
4101 }
4102 [[fallthrough]];
4103 case ISD::ROTL:
4104 case ISD::SHL:
4105 case ISD::AND:
4106 case ISD::OR: {
4107 BitPermutationSelector BPS(CurDAG);
4108 if (SDNode *New = BPS.Select(N)) {
4109 ReplaceNode(N, New);
4110 return true;
4111 }
4112 return false;
4113 }
4114 }
4115
4116 return false;
4117}
4118
4119/// SelectCC - Select a comparison of the specified values with the specified
4120/// condition code, returning the CR# of the expression.
4121SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4122 const SDLoc &dl, SDValue Chain) {
4123 // Always select the LHS.
4124 unsigned Opc;
4125
4126 if (LHS.getValueType() == MVT::i32) {
4127 unsigned Imm;
4128 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4129 if (isInt32Immediate(RHS, Imm)) {
4130 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4131 if (isUInt<16>(Imm))
4132 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4133 getI32Imm(Imm & 0xFFFF, dl)),
4134 0);
4135 // If this is a 16-bit signed immediate, fold it.
4136 if (isInt<16>((int)Imm))
4137 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4138 getI32Imm(Imm & 0xFFFF, dl)),
4139 0);
4140
4141 // For non-equality comparisons, the default code would materialize the
4142 // constant, then compare against it, like this:
4143 // lis r2, 4660
4144 // ori r2, r2, 22136
4145 // cmpw cr0, r3, r2
4146 // Since we are just comparing for equality, we can emit this instead:
4147 // xoris r0,r3,0x1234
4148 // cmplwi cr0,r0,0x5678
4149 // beq cr0,L6
4150 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
4151 getI32Imm(Imm >> 16, dl)), 0);
4152 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
4153 getI32Imm(Imm & 0xFFFF, dl)), 0);
4154 }
4155 Opc = PPC::CMPLW;
4156 } else if (ISD::isUnsignedIntSetCC(CC)) {
4157 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
4158 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4159 getI32Imm(Imm & 0xFFFF, dl)), 0);
4160 Opc = PPC::CMPLW;
4161 } else {
4162 int16_t SImm;
4163 if (isIntS16Immediate(RHS, SImm))
4164 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4165 getI32Imm((int)SImm & 0xFFFF,
4166 dl)),
4167 0);
4168 Opc = PPC::CMPW;
4169 }
4170 } else if (LHS.getValueType() == MVT::i64) {
4171 uint64_t Imm;
4172 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4173 if (isInt64Immediate(RHS.getNode(), Imm)) {
4174 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4175 if (isUInt<16>(Imm))
4176 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4177 getI32Imm(Imm & 0xFFFF, dl)),
4178 0);
4179 // If this is a 16-bit signed immediate, fold it.
4180 if (isInt<16>(Imm))
4181 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4182 getI32Imm(Imm & 0xFFFF, dl)),
4183 0);
4184
4185 // For non-equality comparisons, the default code would materialize the
4186 // constant, then compare against it, like this:
4187 // lis r2, 4660
4188 // ori r2, r2, 22136
4189 // cmpd cr0, r3, r2
4190 // Since we are just comparing for equality, we can emit this instead:
4191 // xoris r0,r3,0x1234
4192 // cmpldi cr0,r0,0x5678
4193 // beq cr0,L6
4194 if (isUInt<32>(Imm)) {
4195 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4196 getI64Imm(Imm >> 16, dl)), 0);
4197 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4198 getI64Imm(Imm & 0xFFFF, dl)),
4199 0);
4200 }
4201 }
4202 Opc = PPC::CMPLD;
4203 } else if (ISD::isUnsignedIntSetCC(CC)) {
4204 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4205 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4206 getI64Imm(Imm & 0xFFFF, dl)), 0);
4207 Opc = PPC::CMPLD;
4208 } else {
4209 int16_t SImm;
4210 if (isIntS16Immediate(RHS, SImm))
4211 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4212 getI64Imm(SImm & 0xFFFF, dl)),
4213 0);
4214 Opc = PPC::CMPD;
4215 }
4216 } else if (LHS.getValueType() == MVT::f32) {
4217 if (Subtarget->hasSPE()) {
4218 switch (CC) {
4219 default:
4220 case ISD::SETEQ:
4221 case ISD::SETNE:
4222 Opc = PPC::EFSCMPEQ;
4223 break;
4224 case ISD::SETLT:
4225 case ISD::SETGE:
4226 case ISD::SETOLT:
4227 case ISD::SETOGE:
4228 case ISD::SETULT:
4229 case ISD::SETUGE:
4230 Opc = PPC::EFSCMPLT;
4231 break;
4232 case ISD::SETGT:
4233 case ISD::SETLE:
4234 case ISD::SETOGT:
4235 case ISD::SETOLE:
4236 case ISD::SETUGT:
4237 case ISD::SETULE:
4238 Opc = PPC::EFSCMPGT;
4239 break;
4240 }
4241 } else
4242 Opc = PPC::FCMPUS;
4243 } else if (LHS.getValueType() == MVT::f64) {
4244 if (Subtarget->hasSPE()) {
4245 switch (CC) {
4246 default:
4247 case ISD::SETEQ:
4248 case ISD::SETNE:
4249 Opc = PPC::EFDCMPEQ;
4250 break;
4251 case ISD::SETLT:
4252 case ISD::SETGE:
4253 case ISD::SETOLT:
4254 case ISD::SETOGE:
4255 case ISD::SETULT:
4256 case ISD::SETUGE:
4257 Opc = PPC::EFDCMPLT;
4258 break;
4259 case ISD::SETGT:
4260 case ISD::SETLE:
4261 case ISD::SETOGT:
4262 case ISD::SETOLE:
4263 case ISD::SETUGT:
4264 case ISD::SETULE:
4265 Opc = PPC::EFDCMPGT;
4266 break;
4267 }
4268 } else
4269 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4270 } else {
4271 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4272 assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4273 Opc = PPC::XSCMPUQP;
4274 }
4275 if (Chain)
4276 return SDValue(
4277 CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4278 0);
4279 else
4280 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4281}
4282
4284 const PPCSubtarget *Subtarget) {
4285 // For SPE instructions, the result is in GT bit of the CR
4286 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4287
4288 switch (CC) {
4289 case ISD::SETUEQ:
4290 case ISD::SETONE:
4291 case ISD::SETOLE:
4292 case ISD::SETOGE:
4293 llvm_unreachable("Should be lowered by legalize!");
4294 default: llvm_unreachable("Unknown condition!");
4295 case ISD::SETOEQ:
4296 case ISD::SETEQ:
4297 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4298 case ISD::SETUNE:
4299 case ISD::SETNE:
4300 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4301 case ISD::SETOLT:
4302 case ISD::SETLT:
4303 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4304 case ISD::SETULE:
4305 case ISD::SETLE:
4306 return PPC::PRED_LE;
4307 case ISD::SETOGT:
4308 case ISD::SETGT:
4309 return PPC::PRED_GT;
4310 case ISD::SETUGE:
4311 case ISD::SETGE:
4312 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4313 case ISD::SETO: return PPC::PRED_NU;
4314 case ISD::SETUO: return PPC::PRED_UN;
4315 // These two are invalid for floating point. Assume we have int.
4316 case ISD::SETULT: return PPC::PRED_LT;
4317 case ISD::SETUGT: return PPC::PRED_GT;
4318 }
4319}
4320
4321/// getCRIdxForSetCC - Return the index of the condition register field
4322/// associated with the SetCC condition, and whether or not the field is
4323/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4324static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4325 Invert = false;
4326 switch (CC) {
4327 default: llvm_unreachable("Unknown condition!");
4328 case ISD::SETOLT:
4329 case ISD::SETLT: return 0; // Bit #0 = SETOLT
4330 case ISD::SETOGT:
4331 case ISD::SETGT: return 1; // Bit #1 = SETOGT
4332 case ISD::SETOEQ:
4333 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4334 case ISD::SETUO: return 3; // Bit #3 = SETUO
4335 case ISD::SETUGE:
4336 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4337 case ISD::SETULE:
4338 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4339 case ISD::SETUNE:
4340 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4341 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4342 case ISD::SETUEQ:
4343 case ISD::SETOGE:
4344 case ISD::SETOLE:
4345 case ISD::SETONE:
4346 llvm_unreachable("Invalid branch code: should be expanded by legalize");
4347 // These are invalid for floating point. Assume integer.
4348 case ISD::SETULT: return 0;
4349 case ISD::SETUGT: return 1;
4350 }
4351}
4352
4353// getVCmpInst: return the vector compare instruction for the specified
4354// vector type and condition code. Since this is for altivec specific code,
4355// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4356// and v4f32).
4357static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4358 bool HasVSX, bool &Swap, bool &Negate) {
4359 Swap = false;
4360 Negate = false;
4361
4362 if (VecVT.isFloatingPoint()) {
4363 /* Handle some cases by swapping input operands. */
4364 switch (CC) {
4365 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4366 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4367 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4368 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4369 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4370 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4371 default: break;
4372 }
4373 /* Handle some cases by negating the result. */
4374 switch (CC) {
4375 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4376 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4377 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4378 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4379 default: break;
4380 }
4381 /* We have instructions implementing the remaining cases. */
4382 switch (CC) {
4383 case ISD::SETEQ:
4384 case ISD::SETOEQ:
4385 if (VecVT == MVT::v4f32)
4386 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4387 else if (VecVT == MVT::v2f64)
4388 return PPC::XVCMPEQDP;
4389 break;
4390 case ISD::SETGT:
4391 case ISD::SETOGT:
4392 if (VecVT == MVT::v4f32)
4393 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4394 else if (VecVT == MVT::v2f64)
4395 return PPC::XVCMPGTDP;
4396 break;
4397 case ISD::SETGE:
4398 case ISD::SETOGE:
4399 if (VecVT == MVT::v4f32)
4400 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4401 else if (VecVT == MVT::v2f64)
4402 return PPC::XVCMPGEDP;
4403 break;
4404 default:
4405 break;
4406 }
4407 llvm_unreachable("Invalid floating-point vector compare condition");
4408 } else {
4409 /* Handle some cases by swapping input operands. */
4410 switch (CC) {
4411 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4412 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4413 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4414 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4415 default: break;
4416 }
4417 /* Handle some cases by negating the result. */
4418 switch (CC) {
4419 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4420 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4421 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4422 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4423 default: break;
4424 }
4425 /* We have instructions implementing the remaining cases. */
4426 switch (CC) {
4427 case ISD::SETEQ:
4428 case ISD::SETUEQ:
4429 if (VecVT == MVT::v16i8)
4430 return PPC::VCMPEQUB;
4431 else if (VecVT == MVT::v8i16)
4432 return PPC::VCMPEQUH;
4433 else if (VecVT == MVT::v4i32)
4434 return PPC::VCMPEQUW;
4435 else if (VecVT == MVT::v2i64)
4436 return PPC::VCMPEQUD;
4437 else if (VecVT == MVT::v1i128)
4438 return PPC::VCMPEQUQ;
4439 break;
4440 case ISD::SETGT:
4441 if (VecVT == MVT::v16i8)
4442 return PPC::VCMPGTSB;
4443 else if (VecVT == MVT::v8i16)
4444 return PPC::VCMPGTSH;
4445 else if (VecVT == MVT::v4i32)
4446 return PPC::VCMPGTSW;
4447 else if (VecVT == MVT::v2i64)
4448 return PPC::VCMPGTSD;
4449 else if (VecVT == MVT::v1i128)
4450 return PPC::VCMPGTSQ;
4451 break;
4452 case ISD::SETUGT:
4453 if (VecVT == MVT::v16i8)
4454 return PPC::VCMPGTUB;
4455 else if (VecVT == MVT::v8i16)
4456 return PPC::VCMPGTUH;
4457 else if (VecVT == MVT::v4i32)
4458 return PPC::VCMPGTUW;
4459 else if (VecVT == MVT::v2i64)
4460 return PPC::VCMPGTUD;
4461 else if (VecVT == MVT::v1i128)
4462 return PPC::VCMPGTUQ;
4463 break;
4464 default:
4465 break;
4466 }
4467 llvm_unreachable("Invalid integer vector compare condition");
4468 }
4469}
4470
4471bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4472 SDLoc dl(N);
4473 unsigned Imm;
4474 bool IsStrict = N->isStrictFPOpcode();
4475 ISD::CondCode CC =
4476 cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4477 EVT PtrVT =
4479 bool isPPC64 = (PtrVT == MVT::i64);
4480 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4481
4482 SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4483 SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4484
4485 if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4486 // We can codegen setcc op, imm very efficiently compared to a brcond.
4487 // Check for those cases here.
4488 // setcc op, 0
4489 if (Imm == 0) {
4490 SDValue Op = LHS;
4491 switch (CC) {
4492 default: break;
4493 case ISD::SETEQ: {
4494 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4495 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4496 getI32Imm(31, dl) };
4497 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4498 return true;
4499 }
4500 case ISD::SETNE: {
4501 if (isPPC64) break;
4502 SDValue AD =
4503 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4504 Op, getI32Imm(~0U, dl)), 0);
4505 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4506 return true;
4507 }
4508 case ISD::SETLT: {
4509 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4510 getI32Imm(31, dl) };
4511 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4512 return true;
4513 }
4514 case ISD::SETGT: {
4515 SDValue T =
4516 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4517 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4518 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4519 getI32Imm(31, dl) };
4520 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4521 return true;
4522 }
4523 }
4524 } else if (Imm == ~0U) { // setcc op, -1
4525 SDValue Op = LHS;
4526 switch (CC) {
4527 default: break;
4528 case ISD::SETEQ:
4529 if (isPPC64) break;
4530 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4531 Op, getI32Imm(1, dl)), 0);
4532 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4533 SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4534 MVT::i32,
4535 getI32Imm(0, dl)),
4536 0), Op.getValue(1));
4537 return true;
4538 case ISD::SETNE: {
4539 if (isPPC64) break;
4540 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4541 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4542 Op, getI32Imm(~0U, dl));
4543 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4544 SDValue(AD, 1));
4545 return true;
4546 }
4547 case ISD::SETLT: {
4548 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4549 getI32Imm(1, dl)), 0);
4550 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4551 Op), 0);
4552 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4553 getI32Imm(31, dl) };
4554 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4555 return true;
4556 }
4557 case ISD::SETGT: {
4558 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4559 getI32Imm(31, dl) };
4560 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4561 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4562 return true;
4563 }
4564 }
4565 }
4566 }
4567
4568 // Altivec Vector compare instructions do not set any CR register by default and
4569 // vector compare operations return the same type as the operands.
4570 if (!IsStrict && LHS.getValueType().isVector()) {
4571 if (Subtarget->hasSPE())
4572 return false;
4573
4574 EVT VecVT = LHS.getValueType();
4575 bool Swap, Negate;
4576 unsigned int VCmpInst =
4577 getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4578 if (Swap)
4579 std::swap(LHS, RHS);
4580
4581 EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4582 if (Negate) {
4583 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4584 CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4585 ResVT, VCmp, VCmp);
4586 return true;
4587 }
4588
4589 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4590 return true;
4591 }
4592
4593 if (Subtarget->useCRBits())
4594 return false;
4595
4596 bool Inv;
4597 unsigned Idx = getCRIdxForSetCC(CC, Inv);
4598 SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4599 if (IsStrict)
4600 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4601 SDValue IntCR;
4602
4603 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4604 // The correct compare instruction is already set by SelectCC()
4605 if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4606 Idx = 1;
4607 }
4608
4609 // Force the ccreg into CR7.
4610 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4611
4612 SDValue InGlue; // Null incoming flag value.
4613 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4614 InGlue).getValue(1);
4615
4616 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4617 CCReg), 0);
4618
4619 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4620 getI32Imm(31, dl), getI32Imm(31, dl) };
4621 if (!Inv) {
4622 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4623 return true;
4624 }
4625
4626 // Get the specified bit.
4627 SDValue Tmp =
4628 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4629 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4630 return true;
4631}
4632
4633/// Does this node represent a load/store node whose address can be represented
4634/// with a register plus an immediate that's a multiple of \p Val:
4635bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4636 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4637 StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4638 MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
4639 SDValue AddrOp;
4640 if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4641 AddrOp = N->getOperand(1);
4642 else if (STN)
4643 AddrOp = STN->getOperand(2);
4644
4645 // If the address points a frame object or a frame object with an offset,
4646 // we need to check the object alignment.
4647 short Imm = 0;
4648 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4649 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4650 AddrOp)) {
4651 // If op0 is a frame index that is under aligned, we can't do it either,
4652 // because it is translated to r31 or r1 + slot + offset. We won't know the
4653 // slot number until the stack frame is finalized.
4654 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4655 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4656 if ((SlotAlign % Val) != 0)
4657 return false;
4658
4659 // If we have an offset, we need further check on the offset.
4660 if (AddrOp.getOpcode() != ISD::ADD)
4661 return true;
4662 }
4663
4664 if (AddrOp.getOpcode() == ISD::ADD)
4665 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4666
4667 // If the address comes from the outside, the offset will be zero.
4668 return AddrOp.getOpcode() == ISD::CopyFromReg;
4669}
4670
4671void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4672 // Transfer memoperands.
4673 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4674 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4675}
4676
4677static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4678 bool &NeedSwapOps, bool &IsUnCmp) {
4679
4680 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4681
4682 SDValue LHS = N->getOperand(0);
4683 SDValue RHS = N->getOperand(1);
4684 SDValue TrueRes = N->getOperand(2);
4685 SDValue FalseRes = N->getOperand(3);
4686 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4687 if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4688 N->getSimpleValueType(0) != MVT::i32))
4689 return false;
4690
4691 // We are looking for any of:
4692 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4693 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4694 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4695 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4696 int64_t TrueResVal = TrueConst->getSExtValue();
4697 if ((TrueResVal < -1 || TrueResVal > 1) ||
4698 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4699 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4700 (TrueResVal == 0 &&
4701 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4702 return false;
4703
4704 SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4705 ? FalseRes
4706 : FalseRes.getOperand(0);
4707 bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4708 if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4709 SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4710 return false;
4711
4712 // Without this setb optimization, the outer SELECT_CC will be manually
4713 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4714 // transforms pseudo instruction to isel instruction. When there are more than
4715 // one use for result like zext/sext, with current optimization we only see
4716 // isel is replaced by setb but can't see any significant gain. Since
4717 // setb has longer latency than original isel, we should avoid this. Another
4718 // point is that setb requires comparison always kept, it can break the
4719 // opportunity to get the comparison away if we have in future.
4720 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4721 return false;
4722
4723 SDValue InnerLHS = SetOrSelCC.getOperand(0);
4724 SDValue InnerRHS = SetOrSelCC.getOperand(1);
4725 ISD::CondCode InnerCC =
4726 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4727 // If the inner comparison is a select_cc, make sure the true/false values are
4728 // 1/-1 and canonicalize it if needed.
4729 if (InnerIsSel) {
4730 ConstantSDNode *SelCCTrueConst =
4731 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4732 ConstantSDNode *SelCCFalseConst =
4733 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4734 if (!SelCCTrueConst || !SelCCFalseConst)
4735 return false;
4736 int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4737 int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4738 // The values must be -1/1 (requiring a swap) or 1/-1.
4739 if (SelCCTVal == -1 && SelCCFVal == 1) {
4740 std::swap(InnerLHS, InnerRHS);
4741 } else if (SelCCTVal != 1 || SelCCFVal != -1)
4742 return false;
4743 }
4744
4745 // Canonicalize unsigned case
4746 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4747 IsUnCmp = true;
4748 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4749 }
4750
4751 bool InnerSwapped = false;
4752 if (LHS == InnerRHS && RHS == InnerLHS)
4753 InnerSwapped = true;
4754 else if (LHS != InnerLHS || RHS != InnerRHS)
4755 return false;
4756
4757 switch (CC) {
4758 // (select_cc lhs, rhs, 0, \
4759 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4760 case ISD::SETEQ:
4761 if (!InnerIsSel)
4762 return false;
4763 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4764 return false;
4765 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4766 break;
4767
4768 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4769 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4770 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4771 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4772 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4773 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4774 case ISD::SETULT:
4775 if (!IsUnCmp && InnerCC != ISD::SETNE)
4776 return false;
4777 IsUnCmp = true;
4778 [[fallthrough]];
4779 case ISD::SETLT:
4780 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4781 (InnerCC == ISD::SETLT && InnerSwapped))
4782 NeedSwapOps = (TrueResVal == 1);
4783 else
4784 return false;
4785 break;
4786
4787 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4788 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4789 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4790 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4791 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4792 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4793 case ISD::SETUGT:
4794 if (!IsUnCmp && InnerCC != ISD::SETNE)
4795 return false;
4796 IsUnCmp = true;
4797 [[fallthrough]];
4798 case ISD::SETGT:
4799 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4800 (InnerCC == ISD::SETGT && InnerSwapped))
4801 NeedSwapOps = (TrueResVal == -1);
4802 else
4803 return false;
4804 break;
4805
4806 default:
4807 return false;
4808 }
4809
4810 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4811 LLVM_DEBUG(N->dump());
4812
4813 return true;
4814}
4815
4816// Return true if it's a software square-root/divide operand.
4817static bool isSWTestOp(SDValue N) {
4818 if (N.getOpcode() == PPCISD::FTSQRT)
4819 return true;
4820 if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
4821 N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4822 return false;
4823 switch (N.getConstantOperandVal(0)) {
4824 case Intrinsic::ppc_vsx_xvtdivdp:
4825 case Intrinsic::ppc_vsx_xvtdivsp:
4826 case Intrinsic::ppc_vsx_xvtsqrtdp:
4827 case Intrinsic::ppc_vsx_xvtsqrtsp:
4828 return true;
4829 }
4830 return false;
4831}
4832
4833bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4834 assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4835 // We are looking for following patterns, where `truncate to i1` actually has
4836 // the same semantic with `and 1`.
4837 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4838 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4839 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4840 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4841 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4842 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4843 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4844 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4845 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4846 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4847 return false;
4848
4849 SDValue CmpRHS = N->getOperand(3);
4850 if (!isNullConstant(CmpRHS))
4851 return false;
4852
4853 SDValue CmpLHS = N->getOperand(2);
4854 if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4855 return false;
4856
4857 unsigned PCC = 0;
4858 bool IsCCNE = CC == ISD::SETNE;
4859 if (CmpLHS.getOpcode() == ISD::AND &&
4861 switch (CmpLHS.getConstantOperandVal(1)) {
4862 case 1:
4863 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4864 break;
4865 case 2:
4866 PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4867 break;
4868 case 4:
4869 PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4870 break;
4871 case 8:
4872 PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4873 break;
4874 default:
4875 return false;
4876 }
4877 else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4878 CmpLHS.getValueType() == MVT::i1)
4879 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4880
4881 if (PCC) {
4882 SDLoc dl(N);
4883 SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4884 N->getOperand(0)};
4885 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4886 return true;
4887 }
4888 return false;
4889}
4890
4891bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
4892 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
4893 // value, for example when crbits is disabled. If so, select the
4894 // loop_decrement intrinsics now.
4895 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4896 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
4897
4898 if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) ||
4899 isNullConstant(LHS.getOperand(1)))
4900 return false;
4901
4902 if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4903 LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement)
4904 return false;
4905
4907 return false;
4908
4909 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
4910 "Counter decrement comparison is not EQ or NE");
4911
4912 SDValue OldDecrement = LHS.getOperand(0);
4913 assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
4914
4915 SDLoc DecrementLoc(OldDecrement);
4916 SDValue ChainInput = OldDecrement.getOperand(0);
4917 SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc)
4918 : getI32Imm(1, DecrementLoc)};
4919 unsigned DecrementOpcode =
4920 Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4921 SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,
4922 MVT::i1, DecrementOps);
4923
4924 unsigned Val = RHS->getAsZExtVal();
4925 bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);
4926 unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4927
4928 ReplaceUses(LHS.getValue(0), LHS.getOperand(1));
4929 CurDAG->RemoveDeadNode(LHS.getNode());
4930
4931 // Mark the old loop_decrement intrinsic as dead.
4932 ReplaceUses(OldDecrement.getValue(1), ChainInput);
4933 CurDAG->RemoveDeadNode(OldDecrement.getNode());
4934
4935 SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
4936 ChainInput, N->getOperand(0));
4937
4938 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0),
4939 N->getOperand(4), Chain);
4940 return true;
4941}
4942
4943bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4944 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4945 unsigned Imm;
4946 if (!isInt32Immediate(N->getOperand(1), Imm))
4947 return false;
4948
4949 SDLoc dl(N);
4950 SDValue Val = N->getOperand(0);
4951 unsigned SH, MB, ME;
4952 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4953 // with a mask, emit rlwinm
4954 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4955 Val = Val.getOperand(0);
4956 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4957 getI32Imm(ME, dl)};
4958 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4959 return true;
4960 }
4961
4962 // If this is just a masked value where the input is not handled, and
4963 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4964 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4965 // The result of LBARX/LHARX do not need to be cleared as the instructions
4966 // implicitly clear the upper bits.
4967 unsigned AlreadyCleared = 0;
4968 if (Val.getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4969 auto IntrinsicID = Val.getConstantOperandVal(1);
4970 if (IntrinsicID == Intrinsic::ppc_lbarx)
4971 AlreadyCleared = 24;
4972 else if (IntrinsicID == Intrinsic::ppc_lharx)
4973 AlreadyCleared = 16;
4974 if (AlreadyCleared != 0 && AlreadyCleared == MB && ME == 31) {
4975 ReplaceUses(SDValue(N, 0), N->getOperand(0));
4976 return true;
4977 }
4978 }
4979
4980 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4981 getI32Imm(ME, dl)};
4982 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4983 return true;
4984 }
4985
4986 // AND X, 0 -> 0, not "rlwinm 32".
4987 if (Imm == 0) {
4988 ReplaceUses(SDValue(N, 0), N->getOperand(1));
4989 return true;
4990 }
4991
4992 return false;
4993}
4994
4995bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
4996 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4997 uint64_t Imm64;
4998 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4999 return false;
5000
5001 unsigned MB, ME;
5002 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
5003 // MB ME
5004 // +----------------------+
5005 // |xxxxxxxxxxx00011111000|
5006 // +----------------------+
5007 // 0 32 64
5008 // We can only do it if the MB is larger than 32 and MB <= ME
5009 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
5010 // we didn't rotate it.
5011 SDLoc dl(N);
5012 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
5013 getI64Imm(ME - 32, dl)};
5014 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
5015 return true;
5016 }
5017
5018 return false;
5019}
5020
5021bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
5022 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5023 uint64_t Imm64;
5024 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
5025 return false;
5026
5027 // Do nothing if it is 16-bit imm as the pattern in the .td file handle
5028 // it well with "andi.".
5029 if (isUInt<16>(Imm64))
5030 return false;
5031
5032 SDLoc Loc(N);
5033 SDValue Val = N->getOperand(0);
5034
5035 // Optimized with two rldicl's as follows:
5036 // Add missing bits on left to the mask and check that the mask is a
5037 // wrapped run of ones, i.e.
5038 // Change pattern |0001111100000011111111|
5039 // to |1111111100000011111111|.
5040 unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64);
5041 if (NumOfLeadingZeros != 0)
5042 Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
5043
5044 unsigned MB, ME;
5045 if (!isRunOfOnes64(Imm64, MB, ME))
5046 return false;
5047
5048 // ME MB MB-ME+63
5049 // +----------------------+ +----------------------+
5050 // |1111111100000011111111| -> |0000001111111111111111|
5051 // +----------------------+ +----------------------+
5052 // 0 63 0 63
5053 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
5054 unsigned OnesOnLeft = ME + 1;
5055 unsigned ZerosInBetween = (MB - ME + 63) & 63;
5056 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
5057 // on the left the bits that are already zeros in the mask.
5058 Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
5059 getI64Imm(OnesOnLeft, Loc),
5060 getI64Imm(ZerosInBetween, Loc)),
5061 0);
5062 // MB-ME+63 ME MB
5063 // +----------------------+ +----------------------+
5064 // |0000001111111111111111| -> |0001111100000011111111|
5065 // +----------------------+ +----------------------+
5066 // 0 63 0 63
5067 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
5068 // left the number of ones we previously added.
5069 SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
5070 getI64Imm(NumOfLeadingZeros, Loc)};
5071 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5072 return true;
5073}
5074
5075bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
5076 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5077 unsigned Imm;
5078 if (!isInt32Immediate(N->getOperand(1), Imm))
5079 return false;
5080
5081 SDValue Val = N->getOperand(0);
5082 unsigned Imm2;
5083 // ISD::OR doesn't get all the bitfield insertion fun.
5084 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
5085 // bitfield insert.
5086 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
5087 return false;
5088
5089 // The idea here is to check whether this is equivalent to:
5090 // (c1 & m) | (x & ~m)
5091 // where m is a run-of-ones mask. The logic here is that, for each bit in
5092 // c1 and c2:
5093 // - if both are 1, then the output will be 1.
5094 // - if both are 0, then the output will be 0.
5095 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
5096 // come from x.
5097 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
5098 // be 0.
5099 // If that last condition is never the case, then we can form m from the
5100 // bits that are the same between c1 and c2.
5101 unsigned MB, ME;
5102 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
5103 SDLoc dl(N);
5104 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
5105 getI32Imm(MB, dl), getI32Imm(ME, dl)};
5106 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
5107 return true;
5108 }
5109
5110 return false;
5111}
5112
5113bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5114 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5115
5116 uint64_t Imm64;
5117 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5118 return false;
5119
5120 SDValue Val = N->getOperand(0);
5121
5122 if (Val.getOpcode() != ISD::ROTL)
5123 return false;
5124
5125 // Looking to try to avoid a situation like this one:
5126 // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5127 // %and1 = and i64 %2, 9223372036854775807
5128 // In this function we are looking to try to match RLDCL. However, the above
5129 // DAG would better match RLDICL instead which is not what we are looking
5130 // for here.
5131 SDValue RotateAmt = Val.getOperand(1);
5132 if (RotateAmt.getOpcode() == ISD::Constant)
5133 return false;
5134
5135 unsigned MB = 64 - llvm::countr_one(Imm64);
5136 SDLoc dl(N);
5137 SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
5138 CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
5139 return true;
5140}
5141
5142bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
5143 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5144 uint64_t Imm64;
5145 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5146 return false;
5147
5148 // If this is a 64-bit zero-extension mask, emit rldicl.
5149 unsigned MB = 64 - llvm::countr_one(Imm64);
5150 unsigned SH = 0;
5151 unsigned Imm;
5152 SDValue Val = N->getOperand(0);
5153 SDLoc dl(N);
5154
5155 if (Val.getOpcode() == ISD::ANY_EXTEND) {
5156 auto Op0 = Val.getOperand(0);
5157 if (Op0.getOpcode() == ISD::SRL &&
5158 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
5159
5160 auto ResultType = Val.getNode()->getValueType(0);
5161 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
5162 SDValue IDVal(ImDef, 0);
5163
5164 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
5165 IDVal, Op0.getOperand(0),
5166 getI32Imm(1, dl)),
5167 0);
5168 SH = 64 - Imm;
5169 }
5170 }
5171
5172 // If the operand is a logical right shift, we can fold it into this
5173 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
5174 // for n <= mb. The right shift is really a left rotate followed by a
5175 // mask, and this mask is a more-restrictive sub-mask of the mask implied
5176 // by the shift.
5177 if (Val.getOpcode() == ISD::SRL &&
5178 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
5179 assert(Imm < 64 && "Illegal shift amount");
5180 Val = Val.getOperand(0);
5181 SH = 64 - Imm;
5182 }
5183
5184 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
5185 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5186 return true;
5187}
5188
5189bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
5190 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5191 uint64_t Imm64;
5192 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5193 !isMask_64(~Imm64))
5194 return false;
5195
5196 // If this is a negated 64-bit zero-extension mask,
5197 // i.e. the immediate is a sequence of ones from most significant side
5198 // and all zero for reminder, we should use rldicr.
5199 unsigned MB = 63 - llvm::countr_one(~Imm64);
5200 unsigned SH = 0;
5201 SDLoc dl(N);
5202 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
5203 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5204 return true;
5205}
5206
5207bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
5208 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
5209 uint64_t Imm64;
5210 unsigned MB, ME;
5211 SDValue N0 = N->getOperand(0);
5212
5213 // We won't get fewer instructions if the imm is 32-bit integer.
5214 // rldimi requires the imm to have consecutive ones with both sides zero.
5215 // Also, make sure the first Op has only one use, otherwise this may increase
5216 // register pressure since rldimi is destructive.
5217 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5218 isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
5219 return false;
5220
5221 unsigned SH = 63 - ME;
5222 SDLoc Dl(N);
5223 // Use select64Imm for making LI instr instead of directly putting Imm64
5224 SDValue Ops[] = {
5225 N->getOperand(0),
5226 SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
5227 getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
5228 CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
5229 return true;
5230}
5231
5232// Select - Convert the specified operand from a target-independent to a
5233// target-specific node if it hasn't already been changed.
5234void PPCDAGToDAGISel::Select(SDNode *N) {
5235 SDLoc dl(N);
5236 if (N->isMachineOpcode()) {
5237 N->setNodeId(-1);
5238 return; // Already selected.
5239 }
5240
5241 // In case any misguided DAG-level optimizations form an ADD with a
5242 // TargetConstant operand, crash here instead of miscompiling (by selecting
5243 // an r+r add instead of some kind of r+i add).
5244 if (N->getOpcode() == ISD::ADD &&
5245 N->getOperand(1).getOpcode() == ISD::TargetConstant)
5246 llvm_unreachable("Invalid ADD with TargetConstant operand");
5247
5248 // Try matching complex bit permutations before doing anything else.
5249 if (tryBitPermutation(N))
5250 return;
5251
5252 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
5253 if (tryIntCompareInGPR(N))
5254 return;
5255
5256 switch (N->getOpcode()) {
5257 default: break;
5258
5259 case ISD::Constant:
5260 if (N->getValueType(0) == MVT::i64) {
5261 ReplaceNode(N, selectI64Imm(CurDAG, N));
5262 return;
5263 }
5264 break;
5265
5266 case ISD::INTRINSIC_VOID: {
5267 auto IntrinsicID = N->getConstantOperandVal(1);
5268 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
5269 IntrinsicID != Intrinsic::ppc_trapd &&
5270 IntrinsicID != Intrinsic::ppc_trap)
5271 break;
5272 unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||
5273 IntrinsicID == Intrinsic::ppc_trapd)
5274 ? PPC::TDI
5275 : PPC::TWI;
5276 SmallVector<SDValue, 4> OpsWithMD;
5277 unsigned MDIndex;
5278 if (IntrinsicID == Intrinsic::ppc_tdw ||
5279 IntrinsicID == Intrinsic::ppc_tw) {
5280 SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
5281 int16_t SImmOperand2;
5282 int16_t SImmOperand3;
5283 int16_t SImmOperand4;
5284 bool isOperand2IntS16Immediate =
5285 isIntS16Immediate(N->getOperand(2), SImmOperand2);
5286 bool isOperand3IntS16Immediate =
5287 isIntS16Immediate(N->getOperand(3), SImmOperand3);
5288 // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5289 // reg or imm + imm. The imm + imm form will be optimized to either an
5290 // unconditional trap or a nop in a later pass.
5291 if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5292 Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5293 else if (isOperand3IntS16Immediate)
5294 // The 2nd and 3rd operands are reg + imm.
5295 Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
5296 else {
5297 // The 2nd and 3rd operands are imm + reg.
5298 bool isOperand4IntS16Immediate =
5299 isIntS16Immediate(N->getOperand(4), SImmOperand4);
5300 (void)isOperand4IntS16Immediate;
5301 assert(isOperand4IntS16Immediate &&
5302 "The 4th operand is not an Immediate");
5303 // We need to flip the condition immediate TO.
5304 int16_t TO = int(SImmOperand4) & 0x1F;
5305 // We swap the first and second bit of TO if they are not same.
5306 if ((TO & 0x1) != ((TO & 0x2) >> 1))
5307 TO = (TO & 0x1) ? TO + 1 : TO - 1;
5308 // We swap the fourth and fifth bit of TO if they are not same.
5309 if ((TO & 0x8) != ((TO & 0x10) >> 1))
5310 TO = (TO & 0x8) ? TO + 8 : TO - 8;
5311 Ops[0] = getI32Imm(TO, dl);
5312 Ops[1] = N->getOperand(3);
5313 Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
5314 }
5315 OpsWithMD = {Ops[0], Ops[1], Ops[2]};
5316 MDIndex = 5;
5317 } else {
5318 OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)};
5319 MDIndex = 3;
5320 }
5321
5322 if (N->getNumOperands() > MDIndex) {
5323 SDValue MDV = N->getOperand(MDIndex);
5324 const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
5325 assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!");
5326 assert((isa<MDString>(MD->getOperand(0)) &&
5327 cast<MDString>(MD->getOperand(0))->getString() ==
5328 "ppc-trap-reason") &&
5329 "Unsupported annotation data type!");
5330 for (unsigned i = 1; i < MD->getNumOperands(); i++) {
5331 assert(isa<MDString>(MD->getOperand(i)) &&
5332 "Invalid data type for annotation ppc-trap-reason!");
5333 OpsWithMD.push_back(
5334 getI32Imm(std::stoi(cast<MDString>(
5335 MD->getOperand(i))->getString().str()), dl));
5336 }
5337 }
5338 OpsWithMD.push_back(N->getOperand(0)); // chain
5339 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);
5340 return;
5341 }
5342
5344 // We emit the PPC::FSELS instruction here because of type conflicts with
5345 // the comparison operand. The FSELS instruction is defined to use an 8-byte
5346 // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5347 // value for the comparison. When selecting through a .td file, a type
5348 // error is raised. Must check this first so we never break on the
5349 // !Subtarget->isISA3_1() check.
5350 auto IntID = N->getConstantOperandVal(0);
5351 if (IntID == Intrinsic::ppc_fsels) {
5352 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
5353 CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
5354 return;
5355 }
5356
5357 if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
5358 auto Pred = N->getConstantOperandVal(1);
5359 unsigned Opcode =
5360 IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5361 unsigned SubReg = 0;
5362 unsigned ShiftVal = 0;
5363 bool Reverse = false;
5364 switch (Pred) {
5365 case 0:
5366 SubReg = PPC::sub_eq;
5367 ShiftVal = 1;
5368 break;
5369 case 1:
5370 SubReg = PPC::sub_eq;
5371 ShiftVal = 1;
5372 Reverse = true;
5373 break;
5374 case 2:
5375 SubReg = PPC::sub_lt;
5376 ShiftVal = 3;
5377 break;
5378 case 3:
5379 SubReg = PPC::sub_lt;
5380 ShiftVal = 3;
5381 Reverse = true;
5382 break;
5383 case 4:
5384 SubReg = PPC::sub_gt;
5385 ShiftVal = 2;
5386 break;
5387 case 5:
5388 SubReg = PPC::sub_gt;
5389 ShiftVal = 2;
5390 Reverse = true;
5391 break;
5392 case 6:
5393 SubReg = PPC::sub_un;
5394 break;
5395 case 7:
5396 SubReg = PPC::sub_un;
5397 Reverse = true;
5398 break;
5399 }
5400
5401 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5402 SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
5403 CurDAG->getTargetConstant(0, dl, MVT::i32)};
5404 SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
5405 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5406 // On Power10, we can use SETBC[R]. On prior architectures, we have to use
5407 // MFOCRF and shift/negate the value.
5408 if (Subtarget->isISA3_1()) {
5409 SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
5410 SDValue CRBit = SDValue(
5411 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5412 CR6Reg, SubRegIdx, BCDOp.getValue(1)),
5413 0);
5414 CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
5415 CRBit);
5416 } else {
5417 SDValue Move =
5418 SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
5419 BCDOp.getValue(1)),
5420 0);
5421 SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
5422 getI32Imm(31, dl), getI32Imm(31, dl)};
5423 if (!Reverse)
5424 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5425 else {
5426 SDValue Shift = SDValue(
5427 CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
5428 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
5429 }
5430 }
5431 return;
5432 }
5433
5434 if (!Subtarget->isISA3_1())
5435 break;
5436 unsigned Opcode = 0;
5437 switch (IntID) {
5438 default:
5439 break;
5440 case Intrinsic::ppc_altivec_vstribr_p:
5441 Opcode = PPC::VSTRIBR_rec;
5442 break;
5443 case Intrinsic::ppc_altivec_vstribl_p:
5444 Opcode = PPC::VSTRIBL_rec;
5445 break;
5446 case Intrinsic::ppc_altivec_vstrihr_p:
5447 Opcode = PPC::VSTRIHR_rec;
5448 break;
5449 case Intrinsic::ppc_altivec_vstrihl_p:
5450 Opcode = PPC::VSTRIHL_rec;
5451 break;
5452 }
5453 if (!Opcode)
5454 break;
5455
5456 // Generate the appropriate vector string isolate intrinsic to match.
5457 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5458 SDValue VecStrOp =
5459 SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
5460 // Vector string isolate instructions update the EQ bit of CR6.
5461 // Generate a SETBC instruction to extract the bit and place it in a GPR.
5462 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
5463 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5464 SDValue CRBit = SDValue(
5465 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5466 CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
5467 0);
5468 CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
5469 return;
5470 }
5471
5472 case ISD::SETCC:
5473 case ISD::STRICT_FSETCC:
5475 if (trySETCC(N))
5476 return;
5477 break;
5478 // These nodes will be transformed into GETtlsADDR32 node, which
5479 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5480 case PPCISD::ADDI_TLSLD_L_ADDR:
5481 case PPCISD::ADDI_TLSGD_L_ADDR: {
5482 const Module *Mod = MF->getFunction().getParent();
5483 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5484 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
5485 Mod->getPICLevel() == PICLevel::SmallPIC)
5486 break;
5487 // Attach global base pointer on GETtlsADDR32 node in order to
5488 // generate secure plt code for TLS symbols.
5489 getGlobalBaseReg();
5490 } break;
5491 case PPCISD::CALL:
5492 case PPCISD::CALL_RM: {
5493 if (Subtarget->isPPC64() || !TM.isPositionIndependent() ||
5494 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF())
5495 break;
5496
5497 SDValue Op = N->getOperand(1);
5498
5499 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5500 if (GA->getTargetFlags() == PPCII::MO_PLT)
5501 getGlobalBaseReg();
5502 }
5503 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
5504 if (ES->getTargetFlags() == PPCII::MO_PLT)
5505 getGlobalBaseReg();
5506 }
5507 } break;
5508
5510 ReplaceNode(N, getGlobalBaseReg());
5511 return;
5512
5513 case ISD::FrameIndex:
5514 selectFrameIndex(N, N);
5515 return;
5516
5517 case PPCISD::MFOCRF: {
5518 SDValue InGlue = N->getOperand(1);
5519 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
5520 N->getOperand(0), InGlue));
5521 return;
5522 }
5523
5525 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
5526 MVT::Other, N->getOperand(0)));
5527 return;
5528
5529 case PPCISD::SRA_ADDZE: {
5530 SDValue N0 = N->getOperand(0);
5531 SDValue ShiftAmt =
5532 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
5533 getConstantIntValue(), dl,
5534 N->getValueType(0));
5535 if (N->getValueType(0) == MVT::i64) {
5536 SDNode *Op =
5537 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
5538 N0, ShiftAmt);
5539 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
5540 SDValue(Op, 1));
5541 return;
5542 } else {
5543 assert(N->getValueType(0) == MVT::i32 &&
5544 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5545 SDNode *Op =
5546 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
5547 N0, ShiftAmt);
5548 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
5549 SDValue(Op, 1));
5550 return;
5551 }
5552 }
5553
5554 case ISD::STORE: {
5555 // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
5556 // X-form stores.
5557 StoreSDNode *ST = cast<StoreSDNode>(N);
5558 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) &&
5559 ST->getAddressingMode() != ISD::PRE_INC)
5560 if (tryTLSXFormStore(ST))
5561 return;
5562 break;
5563 }
5564 case ISD::LOAD: {
5565 // Handle preincrement loads.
5566 LoadSDNode *LD = cast<LoadSDNode>(N);
5567 EVT LoadedVT = LD->getMemoryVT();
5568
5569 // Normal loads are handled by code generated from the .td file.
5570 if (LD->getAddressingMode() != ISD::PRE_INC) {
5571 // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
5572 // X-form loads.
5573 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()))
5574 if (tryTLSXFormLoad(LD))
5575 return;
5576 break;
5577 }
5578
5579 SDValue Offset = LD->getOffset();
5580 if (Offset.getOpcode() == ISD::TargetConstant ||
5581 Offset.getOpcode() == ISD::TargetGlobalAddress) {
5582
5583 unsigned Opcode;
5584 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5585 if (LD->getValueType(0) != MVT::i64) {
5586 // Handle PPC32 integer and normal FP loads.
5587 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5588 switch (LoadedVT.getSimpleVT().SimpleTy) {
5589 default: llvm_unreachable("Invalid PPC load type!");
5590 case MVT::f64: Opcode = PPC::LFDU; break;
5591 case MVT::f32: Opcode = PPC::LFSU; break;
5592 case MVT::i32: Opcode = PPC::LWZU; break;
5593 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5594 case MVT::i1:
5595 case MVT::i8: Opcode = PPC::LBZU; break;
5596 }
5597 } else {
5598 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5599 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5600 switch (LoadedVT.getSimpleVT().SimpleTy) {
5601 default: llvm_unreachable("Invalid PPC load type!");
5602 case MVT::i64: Opcode = PPC::LDU; break;
5603 case MVT::i32: Opcode = PPC::LWZU8; break;
5604 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5605 case MVT::i1:
5606 case MVT::i8: Opcode = PPC::LBZU8; break;
5607 }
5608 }
5609
5610 SDValue Chain = LD->getChain();
5611 SDValue Base = LD->getBasePtr();
5612 SDValue Ops[] = { Offset, Base, Chain };
5613 SDNode *MN = CurDAG->getMachineNode(
5614 Opcode, dl, LD->getValueType(0),
5615 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5616 transferMemOperands(N, MN);
5617 ReplaceNode(N, MN);
5618 return;
5619 } else {
5620 unsigned Opcode;
5621 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5622 if (LD->getValueType(0) != MVT::i64) {
5623 // Handle PPC32 integer and normal FP loads.
5624 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5625 switch (LoadedVT.getSimpleVT().SimpleTy) {
5626 default: llvm_unreachable("Invalid PPC load type!");
5627 case MVT::f64: Opcode = PPC::LFDUX; break;
5628 case MVT::f32: Opcode = PPC::LFSUX; break;
5629 case MVT::i32: Opcode = PPC::LWZUX; break;
5630 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5631 case MVT::i1:
5632 case MVT::i8: Opcode = PPC::LBZUX; break;
5633 }
5634 } else {
5635 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5636 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
5637 "Invalid sext update load");
5638 switch (LoadedVT.getSimpleVT().SimpleTy) {
5639 default: llvm_unreachable("Invalid PPC load type!");
5640 case MVT::i64: Opcode = PPC::LDUX; break;
5641 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5642 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5643 case MVT::i1:
5644 case MVT::i8: Opcode = PPC::LBZUX8; break;
5645 }
5646 }
5647
5648 SDValue Chain = LD->getChain();
5649 SDValue Base = LD->getBasePtr();
5650 SDValue Ops[] = { Base, Offset, Chain };
5651 SDNode *MN = CurDAG->getMachineNode(
5652 Opcode, dl, LD->getValueType(0),
5653 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5654 transferMemOperands(N, MN);
5655 ReplaceNode(N, MN);
5656 return;
5657 }
5658 }
5659
5660 case ISD::AND:
5661 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5662 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
5663 tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
5664 tryAsPairOfRLDICL(N))
5665 return;
5666
5667 // Other cases are autogenerated.
5668 break;
5669 case ISD::OR: {
5670 if (N->getValueType(0) == MVT::i32)
5671 if (tryBitfieldInsert(N))
5672 return;
5673
5674 int16_t Imm;
5675 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5676 isIntS16Immediate(N->getOperand(1), Imm)) {
5677 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
5678
5679 // If this is equivalent to an add, then we can fold it with the
5680 // FrameIndex calculation.
5681 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
5682 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5683 return;
5684 }
5685 }
5686
5687 // If this is 'or' against an imm with consecutive ones and both sides zero,
5688 // try to emit rldimi
5689 if (tryAsSingleRLDIMI(N))
5690 return;
5691
5692 // OR with a 32-bit immediate can be handled by ori + oris
5693 // without creating an immediate in a GPR.
5694 uint64_t Imm64 = 0;
5695 bool IsPPC64 = Subtarget->isPPC64();
5696 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5697 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5698 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5699 uint64_t ImmHi = Imm64 >> 16;
5700 uint64_t ImmLo = Imm64 & 0xFFFF;
5701 if (ImmHi != 0 && ImmLo != 0) {
5702 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
5703 N->getOperand(0),
5704 getI16Imm(ImmLo, dl));
5705 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5706 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
5707 return;
5708 }
5709 }
5710
5711 // Other cases are autogenerated.
5712 break;
5713 }
5714 case ISD::XOR: {
5715 // XOR with a 32-bit immediate can be handled by xori + xoris
5716 // without creating an immediate in a GPR.
5717 uint64_t Imm64 = 0;
5718 bool IsPPC64 = Subtarget->isPPC64();
5719 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5720 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5721 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5722 uint64_t ImmHi = Imm64 >> 16;
5723 uint64_t ImmLo = Imm64 & 0xFFFF;
5724 if (ImmHi != 0 && ImmLo != 0) {
5725 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
5726 N->getOperand(0),
5727 getI16Imm(ImmLo, dl));
5728 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5729 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
5730 return;
5731 }
5732 }
5733
5734 break;
5735 }
5736 case ISD::ADD: {
5737 int16_t Imm;
5738 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5739 isIntS16Immediate(N->getOperand(1), Imm)) {
5740 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5741 return;
5742 }
5743
5744 break;
5745 }
5746 case ISD::SHL: {
5747 unsigned Imm, SH, MB, ME;
5748 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5749 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5750 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5751 getI32Imm(SH, dl), getI32Imm(MB, dl),
5752 getI32Imm(ME, dl) };
5753 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5754 return;
5755 }
5756
5757 // Other cases are autogenerated.
5758 break;
5759 }
5760 case ISD::SRL: {
5761 unsigned Imm, SH, MB, ME;
5762 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5763 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5764 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5765 getI32Imm(SH, dl), getI32Imm(MB, dl),
5766 getI32Imm(ME, dl) };
5767 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5768 return;
5769 }
5770
5771 // Other cases are autogenerated.
5772 break;
5773 }
5774 case ISD::MUL: {
5775 SDValue Op1 = N->getOperand(1);
5776 if (Op1.getOpcode() != ISD::Constant ||
5777 (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
5778 break;
5779
5780 // If the multiplier fits int16, we can handle it with mulli.
5781 int64_t Imm = Op1->getAsZExtVal();
5782 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
5783 if (isInt<16>(Imm) || !Shift)
5784 break;
5785
5786 // If the shifted value fits int16, we can do this transformation:
5787 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5788 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5789 uint64_t ImmSh = Imm >> Shift;
5790 if (!isInt<16>(ImmSh))
5791 break;
5792
5793 uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
5794 if (Op1.getValueType() == MVT::i64) {
5795 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
5796 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
5797 N->getOperand(0), SDImm);
5798
5799 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5800 getI32Imm(63 - Shift, dl)};
5801 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5802 return;
5803 } else {
5804 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
5805 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
5806 N->getOperand(0), SDImm);
5807
5808 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5809 getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};
5810 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5811 return;
5812 }
5813 break;
5814 }
5815 // FIXME: Remove this once the ANDI glue bug is fixed:
5818 if (!ANDIGlueBug)
5819 break;
5820
5821 EVT InVT = N->getOperand(0).getValueType();
5822 assert((InVT == MVT::i64 || InVT == MVT::i32) &&
5823 "Invalid input type for ANDI_rec_1_EQ_BIT");
5824
5825 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5826 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
5827 N->getOperand(0),
5828 CurDAG->getTargetConstant(1, dl, InVT)),
5829 0);
5830 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
5831 SDValue SRIdxVal = CurDAG->getTargetConstant(
5832 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5833 dl, MVT::i32);
5834
5835 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
5836 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
5837 return;
5838 }
5839 case ISD::SELECT_CC: {
5840 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
5841 EVT PtrVT =
5843 bool isPPC64 = (PtrVT == MVT::i64);
5844
5845 // If this is a select of i1 operands, we'll pattern match it.
5846 if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
5847 break;
5848
5849 if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5850 bool NeedSwapOps = false;
5851 bool IsUnCmp = false;
5852 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
5853 SDValue LHS = N->getOperand(0);
5854 SDValue RHS = N->getOperand(1);
5855 if (NeedSwapOps)
5856 std::swap(LHS, RHS);
5857
5858 // Make use of SelectCC to generate the comparison to set CR bits, for
5859 // equality comparisons having one literal operand, SelectCC probably
5860 // doesn't need to materialize the whole literal and just use xoris to
5861 // check it first, it leads the following comparison result can't
5862 // exactly represent GT/LT relationship. So to avoid this we specify
5863 // SETGT/SETUGT here instead of SETEQ.
5864 SDValue GenCC =
5865 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5866 CurDAG->SelectNodeTo(
5867 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5868 N->getValueType(0), GenCC);
5869 NumP9Setb++;
5870 return;
5871 }
5872 }
5873
5874 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5875 if (!isPPC64 && isNullConstant(N->getOperand(1)) &&
5876 isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) &&
5877 CC == ISD::SETNE &&
5878 // FIXME: Implement this optzn for PPC64.
5879 N->getValueType(0) == MVT::i32) {
5880 SDNode *Tmp =
5881 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
5882 N->getOperand(0), getI32Imm(~0U, dl));
5883 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
5884 N->getOperand(0), SDValue(Tmp, 1));
5885 return;
5886 }
5887
5888 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
5889
5890 if (N->getValueType(0) == MVT::i1) {
5891 // An i1 select is: (c & t) | (!c & f).
5892 bool Inv;
5893 unsigned Idx = getCRIdxForSetCC(CC, Inv);
5894
5895 unsigned SRI;
5896 switch (Idx) {
5897 default: llvm_unreachable("Invalid CC index");
5898 case 0: SRI = PPC::sub_lt; break;
5899 case 1: SRI = PPC::sub_gt; break;
5900 case 2: SRI = PPC::sub_eq; break;
5901 case 3: SRI = PPC::sub_un; break;
5902 }
5903
5904 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
5905
5906 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
5907 CCBit, CCBit), 0);
5908 SDValue C = Inv ? NotCCBit : CCBit,
5909 NotC = Inv ? CCBit : NotCCBit;
5910
5911 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5912 C, N->getOperand(2)), 0);
5913 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5914 NotC, N->getOperand(3)), 0);
5915
5916 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
5917 return;
5918 }
5919
5920 unsigned BROpc =
5921 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
5922
5923 unsigned SelectCCOp;
5924 if (N->getValueType(0) == MVT::i32)
5925 SelectCCOp = PPC::SELECT_CC_I4;
5926 else if (N->getValueType(0) == MVT::i64)
5927 SelectCCOp = PPC::SELECT_CC_I8;
5928 else if (N->getValueType(0) == MVT::f32) {
5929 if (Subtarget->hasP8Vector())
5930 SelectCCOp = PPC::SELECT_CC_VSSRC;
5931 else if (Subtarget->hasSPE())
5932 SelectCCOp = PPC::SELECT_CC_SPE4;
5933 else
5934 SelectCCOp = PPC::SELECT_CC_F4;
5935 } else if (N->getValueType(0) == MVT::f64) {
5936 if (Subtarget->hasVSX())
5937 SelectCCOp = PPC::SELECT_CC_VSFRC;
5938 else if (Subtarget->hasSPE())
5939 SelectCCOp = PPC::SELECT_CC_SPE;
5940 else
5941 SelectCCOp = PPC::SELECT_CC_F8;
5942 } else if (N->getValueType(0) == MVT::f128)
5943 SelectCCOp = PPC::SELECT_CC_F16;
5944 else if (Subtarget->hasSPE())
5945 SelectCCOp = PPC::SELECT_CC_SPE;
5946 else if (N->getValueType(0) == MVT::v2f64 ||
5947 N->getValueType(0) == MVT::v2i64)
5948 SelectCCOp = PPC::SELECT_CC_VSRC;
5949 else
5950 SelectCCOp = PPC::SELECT_CC_VRRC;
5951
5952 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
5953 getI32Imm(BROpc, dl) };
5954 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
5955 return;
5956 }
5958 if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
5959 N->getValueType(0) == MVT::v2i64)) {
5960 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
5961
5962 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
5963 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
5964 unsigned DM[2];
5965
5966 for (int i = 0; i < 2; ++i)
5967 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
5968 DM[i] = 0;
5969 else
5970 DM[i] = 1;
5971
5972 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5974 isa<LoadSDNode>(Op1.getOperand(0))) {
5975 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
5977
5978 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5979 (LD->getMemoryVT() == MVT::f64 ||
5980 LD->getMemoryVT() == MVT::i64) &&
5981 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
5982 SDValue Chain = LD->getChain();
5983 SDValue Ops[] = { Base, Offset, Chain };
5984 MachineMemOperand *MemOp = LD->getMemOperand();
5985 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
5986 N->getValueType(0), Ops);
5987 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
5988 return;
5989 }
5990 }
5991
5992 // For little endian, we must swap the input operands and adjust
5993 // the mask elements (reverse and invert them).
5994 if (Subtarget->isLittleEndian()) {
5995 std::swap(Op1, Op2);
5996 unsigned tmp = DM[0];
5997 DM[0] = 1 - DM[1];
5998 DM[1] = 1 - tmp;
5999 }
6000
6001 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
6002 MVT::i32);
6003 SDValue Ops[] = { Op1, Op2, DMV };
6004 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
6005 return;
6006 }
6007
6008 break;
6009 case PPCISD::BDNZ:
6010 case PPCISD::BDZ: {
6011 bool IsPPC64 = Subtarget->isPPC64();
6012 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
6013 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
6014 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
6015 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
6016 MVT::Other, Ops);
6017 return;
6018 }
6019 case PPCISD::COND_BRANCH: {
6020 // Op #0 is the Chain.
6021 // Op #1 is the PPC::PRED_* number.
6022 // Op #2 is the CR#
6023 // Op #3 is the Dest MBB
6024 // Op #4 is the Flag.
6025 // Prevent PPC::PRED_* from being selected into LI.
6026 unsigned PCC = N->getConstantOperandVal(1);
6027 if (EnableBranchHint)
6028 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
6029
6030 SDValue Pred = getI32Imm(PCC, dl);
6031 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
6032 N->getOperand(0), N->getOperand(4) };
6033 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6034 return;
6035 }
6036 case ISD::BR_CC: {
6037 if (tryFoldSWTestBRCC(N))
6038 return;
6039 if (trySelectLoopCountIntrinsic(N))
6040 return;
6041 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
6042 unsigned PCC =
6043 getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
6044
6045 if (N->getOperand(2).getValueType() == MVT::i1) {
6046 unsigned Opc;
6047 bool Swap;
6048 switch (PCC) {
6049 default: llvm_unreachable("Unexpected Boolean-operand predicate");
6050 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
6051 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
6052 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
6053 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
6054 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
6055 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
6056 }
6057
6058 // A signed comparison of i1 values produces the opposite result to an
6059 // unsigned one if the condition code includes less-than or greater-than.
6060 // This is because 1 is the most negative signed i1 number and the most
6061 // positive unsigned i1 number. The CR-logical operations used for such
6062 // comparisons are non-commutative so for signed comparisons vs. unsigned
6063 // ones, the input operands just need to be swapped.
6064 if (ISD::isSignedIntSetCC(CC))
6065 Swap = !Swap;
6066
6067 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
6068 N->getOperand(Swap ? 3 : 2),
6069 N->getOperand(Swap ? 2 : 3)), 0);
6070 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
6071 N->getOperand(0));
6072 return;
6073 }
6074
6075 if (EnableBranchHint)
6076 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
6077
6078 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
6079 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
6080 N->getOperand(4), N->getOperand(0) };
6081 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6082 return;
6083 }
6084 case ISD::BRIND: {
6085 // FIXME: Should custom lower this.
6086 SDValue Chain = N->getOperand(0);
6087 SDValue Target = N->getOperand(1);
6088 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
6089 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
6090 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
6091 Chain), 0);
6092 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
6093 return;
6094 }
6095 case PPCISD::TOC_ENTRY: {
6096 const bool isPPC64 = Subtarget->isPPC64();
6097 const bool isELFABI = Subtarget->isSVR4ABI();
6098 const bool isAIXABI = Subtarget->isAIXABI();
6099
6100 // PowerPC only support small, medium and large code model.
6101 const CodeModel::Model CModel = getCodeModel(*Subtarget, TM, N);
6102
6103 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
6104 "PowerPC doesn't support tiny or kernel code models.");
6105
6106 if (isAIXABI && CModel == CodeModel::Medium)
6107 report_fatal_error("Medium code model is not supported on AIX.");
6108
6109 // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
6110 // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
6111 // small code model, we need to check for a toc-data attribute.
6112 if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
6113 break;
6114
6115 auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
6116 EVT OperandTy) {
6117 SDValue GA = TocEntry->getOperand(0);
6118 SDValue TocBase = TocEntry->getOperand(1);
6119 SDNode *MN = nullptr;
6120 if (OpCode == PPC::ADDItoc || OpCode == PPC::ADDItoc8)
6121 // toc-data access doesn't involve in loading from got, no need to
6122 // keep memory operands.
6123 MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, TocBase, GA);
6124 else {
6125 MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
6126 transferMemOperands(TocEntry, MN);
6127 }
6128 ReplaceNode(TocEntry, MN);
6129 };
6130
6131 // Handle 32-bit small code model.
6132 if (!isPPC64 && CModel == CodeModel::Small) {
6133 // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
6134 // PPC::ADDItoc, or PPC::LWZtoc
6135 if (isELFABI) {
6137 "32-bit ELF can only have TOC entries in position independent"
6138 " code.");
6139 // 32-bit ELF always uses a small code model toc access.
6140 replaceWith(PPC::LWZtoc, N, MVT::i32);
6141 return;
6142 }
6143
6144 assert(isAIXABI && "ELF ABI already handled");
6145
6146 if (hasTocDataAttr(N->getOperand(0))) {
6147 replaceWith(PPC::ADDItoc, N, MVT::i32);
6148 return;
6149 }
6150
6151 replaceWith(PPC::LWZtoc, N, MVT::i32);
6152 return;
6153 }
6154
6155 if (isPPC64 && CModel == CodeModel::Small) {
6156 assert(isAIXABI && "ELF ABI handled in common SelectCode");
6157
6158 if (hasTocDataAttr(N->getOperand(0))) {
6159 replaceWith(PPC::ADDItoc8, N, MVT::i64);
6160 return;
6161 }
6162 // Break if it doesn't have toc data attribute. Proceed with common
6163 // SelectCode.
6164 break;
6165 }
6166
6167 assert(CModel != CodeModel::Small && "All small code models handled.");
6168
6169 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
6170 " ELF/AIX or 32-bit AIX in the following.");
6171
6172 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode,
6173 // 64-bit medium (ELF-only), or 64-bit large (ELF and AIX) code model code
6174 // that does not contain TOC data symbols. We generate two instructions as
6175 // described below. The first source operand is a symbol reference. If it
6176 // must be referenced via the TOC according to Subtarget, we generate:
6177 // [32-bit AIX]
6178 // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
6179 // [64-bit ELF/AIX]
6180 // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
6181 // Otherwise for medium code model ELF we generate:
6182 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6183
6184 // And finally for AIX with toc-data we generate:
6185 // [32-bit AIX]
6186 // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
6187 // [64-bit AIX]
6188 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6189
6190 SDValue GA = N->getOperand(0);
6191 SDValue TOCbase = N->getOperand(1);
6192
6193 EVT VT = Subtarget->getScalarIntVT();
6194 SDNode *Tmp = CurDAG->getMachineNode(
6195 isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
6196
6197 // On AIX, if the symbol has the toc-data attribute it will be defined
6198 // in the TOC entry, so we use an ADDItocL/ADDItocL8.
6199 if (isAIXABI && hasTocDataAttr(GA)) {
6200 ReplaceNode(
6201 N, CurDAG->getMachineNode(isPPC64 ? PPC::ADDItocL8 : PPC::ADDItocL,
6202 dl, VT, SDValue(Tmp, 0), GA));
6203 return;
6204 }
6205
6206 if (PPCLowering->isAccessedAsGotIndirect(GA)) {
6207 // If it is accessed as got-indirect, we need an extra LWZ/LD to load
6208 // the address.
6209 SDNode *MN = CurDAG->getMachineNode(
6210 isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
6211
6212 transferMemOperands(N, MN);
6213 ReplaceNode(N, MN);
6214 return;
6215 }
6216
6217 assert(isPPC64 && "TOC_ENTRY already handled for 32-bit.");
6218 // Build the address relative to the TOC-pointer.
6219 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64,
6220 SDValue(Tmp, 0), GA));
6221 return;
6222 }
6224 // Generate a PIC-safe GOT reference.
6225 assert(Subtarget->is32BitELFABI() &&
6226 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
6227 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
6228 PPCLowering->getPointerTy(CurDAG->getDataLayout()),
6229 MVT::i32);
6230 return;
6231
6232 case PPCISD::VADD_SPLAT: {
6233 // This expands into one of three sequences, depending on whether
6234 // the first operand is odd or even, positive or negative.
6235 assert(isa<ConstantSDNode>(N->getOperand(0)) &&
6236 isa<ConstantSDNode>(N->getOperand(1)) &&
6237 "Invalid operand on VADD_SPLAT!");
6238
6239 int Elt = N->getConstantOperandVal(0);
6240 int EltSize = N->getConstantOperandVal(1);
6241 unsigned Opc1, Opc2, Opc3;
6242 EVT VT;
6243
6244 if (EltSize == 1) {
6245 Opc1 = PPC::VSPLTISB;
6246 Opc2 = PPC::VADDUBM;
6247 Opc3 = PPC::VSUBUBM;
6248 VT = MVT::v16i8;
6249 } else if (EltSize == 2) {
6250 Opc1 = PPC::VSPLTISH;
6251 Opc2 = PPC::VADDUHM;
6252 Opc3 = PPC::VSUBUHM;
6253 VT = MVT::v8i16;
6254 } else {
6255 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
6256 Opc1 = PPC::VSPLTISW;
6257 Opc2 = PPC::VADDUWM;
6258 Opc3 = PPC::VSUBUWM;
6259 VT = MVT::v4i32;
6260 }
6261
6262 if ((Elt & 1) == 0) {
6263 // Elt is even, in the range [-32,-18] + [16,30].
6264 //
6265 // Convert: VADD_SPLAT elt, size
6266 // Into: tmp = VSPLTIS[BHW] elt
6267 // VADDU[BHW]M tmp, tmp
6268 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
6269 SDValue EltVal = getI32Imm(Elt >> 1, dl);
6270 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6271 SDValue TmpVal = SDValue(Tmp, 0);
6272 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
6273 return;
6274 } else if (Elt > 0) {
6275 // Elt is odd and positive, in the range [17,31].
6276 //
6277 // Convert: VADD_SPLAT elt, size
6278 // Into: tmp1 = VSPLTIS[BHW] elt-16
6279 // tmp2 = VSPLTIS[BHW] -16
6280 // VSUBU[BHW]M tmp1, tmp2
6281 SDValue EltVal = getI32Imm(Elt - 16, dl);
6282 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6283 EltVal = getI32Imm(-16, dl);
6284 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6285 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
6286 SDValue(Tmp2, 0)));
6287 return;
6288 } else {
6289 // Elt is odd and negative, in the range [-31,-17].
6290 //
6291 // Convert: VADD_SPLAT elt, size
6292 // Into: tmp1 = VSPLTIS[BHW] elt+16
6293 // tmp2 = VSPLTIS[BHW] -16
6294 // VADDU[BHW]M tmp1, tmp2
6295 SDValue EltVal = getI32Imm(Elt + 16, dl);
6296 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6297 EltVal = getI32Imm(-16, dl);
6298 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6299 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
6300 SDValue(Tmp2, 0)));
6301 return;
6302 }
6303 }
6304 case PPCISD::LD_SPLAT: {
6305 // Here we want to handle splat load for type v16i8 and v8i16 when there is
6306 // no direct move, we don't need to use stack for this case. If target has
6307 // direct move, we should be able to get the best selection in the .td file.
6308 if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
6309 break;
6310
6311 EVT Type = N->getValueType(0);
6312 if (Type != MVT::v16i8 && Type != MVT::v8i16)
6313 break;
6314
6315 // If the alignment for the load is 16 or bigger, we don't need the
6316 // permutated mask to get the required value. The value must be the 0
6317 // element in big endian target or 7/15 in little endian target in the
6318 // result vsx register of lvx instruction.
6319 // Select the instruction in the .td file.
6320 if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
6321 isOffsetMultipleOf(N, 16))
6322 break;
6323
6324 SDValue ZeroReg =
6325 CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
6326 Subtarget->getScalarIntVT());
6327 unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
6328 // v16i8 LD_SPLAT addr
6329 // ======>
6330 // Mask = LVSR/LVSL 0, addr
6331 // LoadLow = LVX 0, addr
6332 // Perm = VPERM LoadLow, LoadLow, Mask
6333 // Splat = VSPLTB 15/0, Perm
6334 //
6335 // v8i16 LD_SPLAT addr
6336 // ======>
6337 // Mask = LVSR/LVSL 0, addr
6338 // LoadLow = LVX 0, addr
6339 // LoadHigh = LVX (LI, 1), addr
6340 // Perm = VPERM LoadLow, LoadHigh, Mask
6341 // Splat = VSPLTH 7/0, Perm
6342 unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
6343 unsigned SplatElemIndex =
6344 Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
6345
6346 SDNode *Mask = CurDAG->getMachineNode(
6347 Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
6348 N->getOperand(1));
6349
6350 SDNode *LoadLow =
6351 CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
6352 {ZeroReg, N->getOperand(1), N->getOperand(0)});
6353
6354 SDNode *LoadHigh = LoadLow;
6355 if (Type == MVT::v8i16) {
6356 LoadHigh = CurDAG->getMachineNode(
6357 PPC::LVX, dl, MVT::v16i8, MVT::Other,
6358 {SDValue(CurDAG->getMachineNode(
6359 LIOpcode, dl, MVT::i32,
6360 CurDAG->getTargetConstant(1, dl, MVT::i8)),
6361 0),
6362 N->getOperand(1), SDValue(LoadLow, 1)});
6363 }
6364
6365 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
6366 transferMemOperands(N, LoadHigh);
6367
6368 SDNode *Perm =
6369 CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
6370 SDValue(LoadHigh, 0), SDValue(Mask, 0));
6371 CurDAG->SelectNodeTo(N, SplatOp, Type,
6372 CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
6373 SDValue(Perm, 0));
6374 return;
6375 }
6376 }
6377
6378 SelectCode(N);
6379}
6380
6381// If the target supports the cmpb instruction, do the idiom recognition here.
6382// We don't do this as a DAG combine because we don't want to do it as nodes
6383// are being combined (because we might miss part of the eventual idiom). We
6384// don't want to do it during instruction selection because we want to reuse
6385// the logic for lowering the masking operations already part of the
6386// instruction selector.
6387SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
6388 SDLoc dl(N);
6389
6390 assert(N->getOpcode() == ISD::OR &&
6391 "Only OR nodes are supported for CMPB");
6392
6393 SDValue Res;
6394 if (!Subtarget->hasCMPB())
6395 return Res;
6396
6397 if (N->getValueType(0) != MVT::i32 &&
6398 N->getValueType(0) != MVT::i64)
6399 return Res;
6400
6401 EVT VT = N->getValueType(0);
6402
6403 SDValue RHS, LHS;
6404 bool BytesFound[8] = {false, false, false, false, false, false, false, false};
6405 uint64_t Mask = 0, Alt = 0;
6406
6407 auto IsByteSelectCC = [this](SDValue O, unsigned &b,
6408 uint64_t &Mask, uint64_t &Alt,
6409 SDValue &LHS, SDValue &RHS) {
6410 if (O.getOpcode() != ISD::SELECT_CC)
6411 return false;
6412 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
6413
6414 if (!isa<ConstantSDNode>(O.getOperand(2)) ||
6415 !isa<ConstantSDNode>(O.getOperand(3)))
6416 return false;
6417
6418 uint64_t PM = O.getConstantOperandVal(2);
6419 uint64_t PAlt = O.getConstantOperandVal(3);
6420 for (b = 0; b < 8; ++b) {
6421 uint64_t Mask = UINT64_C(0xFF) << (8*b);
6422 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
6423 break;
6424 }
6425
6426 if (b == 8)
6427 return false;
6428 Mask |= PM;
6429 Alt |= PAlt;
6430
6431 if (!isa<ConstantSDNode>(O.getOperand(1)) ||
6432 O.getConstantOperandVal(1) != 0) {
6433 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
6434 if (Op0.getOpcode() == ISD::TRUNCATE)
6435 Op0 = Op0.getOperand(0);
6436 if (Op1.getOpcode() == ISD::TRUNCATE)
6437 Op1 = Op1.getOperand(0);
6438
6439 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
6440 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
6442
6443 unsigned Bits = Op0.getValueSizeInBits();
6444 if (b != Bits/8-1)
6445 return false;
6446 if (Op0.getConstantOperandVal(1) != Bits-8)
6447 return false;
6448
6449 LHS = Op0.getOperand(0);
6450 RHS = Op1.getOperand(0);
6451 return true;
6452 }
6453
6454 // When we have small integers (i16 to be specific), the form present
6455 // post-legalization uses SETULT in the SELECT_CC for the
6456 // higher-order byte, depending on the fact that the
6457 // even-higher-order bytes are known to all be zero, for example:
6458 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
6459 // (so when the second byte is the same, because all higher-order
6460 // bits from bytes 3 and 4 are known to be zero, the result of the
6461 // xor can be at most 255)
6462 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
6463 isa<ConstantSDNode>(O.getOperand(1))) {
6464
6465 uint64_t ULim = O.getConstantOperandVal(1);
6466 if (ULim != (UINT64_C(1) << b*8))
6467 return false;
6468
6469 // Now we need to make sure that the upper bytes are known to be
6470 // zero.
6471 unsigned Bits = Op0.getValueSizeInBits();
6472 if (!CurDAG->MaskedValueIsZero(
6473 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
6474 return false;
6475
6476 LHS = Op0.getOperand(0);
6477 RHS = Op0.getOperand(1);
6478 return true;
6479 }
6480
6481 return false;
6482 }
6483
6484 if (CC != ISD::SETEQ)
6485 return false;
6486
6487 SDValue Op = O.getOperand(0);
6488 if (Op.getOpcode() == ISD::AND) {
6489 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6490 return false;
6491 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
6492 return false;
6493
6494 SDValue XOR = Op.getOperand(0);
6495 if (XOR.getOpcode() == ISD::TRUNCATE)
6496 XOR = XOR.getOperand(0);
6497 if (XOR.getOpcode() != ISD::XOR)
6498 return false;
6499
6500 LHS = XOR.getOperand(0);
6501 RHS = XOR.getOperand(1);
6502 return true;
6503 } else if (Op.getOpcode() == ISD::SRL) {
6504 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6505 return false;
6506 unsigned Bits = Op.getValueSizeInBits();
6507 if (b != Bits/8-1)
6508 return false;
6509 if (Op.getConstantOperandVal(1) != Bits-8)
6510 return false;
6511
6512 SDValue XOR = Op.getOperand(0);
6513 if (XOR.getOpcode() == ISD::TRUNCATE)
6514 XOR = XOR.getOperand(0);
6515 if (XOR.getOpcode() != ISD::XOR)
6516 return false;
6517
6518 LHS = XOR.getOperand(0);
6519 RHS = XOR.getOperand(1);
6520 return true;
6521 }
6522
6523 return false;
6524 };
6525
6527 while (!Queue.empty()) {
6528 SDValue V = Queue.pop_back_val();
6529
6530 for (const SDValue &O : V.getNode()->ops()) {
6531 unsigned b = 0;
6532 uint64_t M = 0, A = 0;
6533 SDValue OLHS, ORHS;
6534 if (O.getOpcode() == ISD::OR) {
6535 Queue.push_back(O);
6536 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
6537 if (!LHS) {
6538 LHS = OLHS;
6539 RHS = ORHS;
6540 BytesFound[b] = true;
6541 Mask |= M;
6542 Alt |= A;
6543 } else if ((LHS == ORHS && RHS == OLHS) ||
6544 (RHS == ORHS && LHS == OLHS)) {
6545 BytesFound[b] = true;
6546 Mask |= M;
6547 Alt |= A;
6548 } else {
6549 return Res;
6550 }
6551 } else {
6552 return Res;
6553 }
6554 }
6555 }
6556
6557 unsigned LastB = 0, BCnt = 0;
6558 for (unsigned i = 0; i < 8; ++i)
6559 if (BytesFound[LastB]) {
6560 ++BCnt;
6561 LastB = i;
6562 }
6563
6564 if (!LastB || BCnt < 2)
6565 return Res;
6566
6567 // Because we'll be zero-extending the output anyway if don't have a specific
6568 // value for each input byte (via the Mask), we can 'anyext' the inputs.
6569 if (LHS.getValueType() != VT) {
6570 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
6571 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
6572 }
6573
6574 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
6575
6576 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
6577 if (NonTrivialMask && !Alt) {
6578 // Res = Mask & CMPB
6579 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6580 CurDAG->getConstant(Mask, dl, VT));
6581 } else if (Alt) {
6582 // Res = (CMPB & Mask) | (~CMPB & Alt)
6583 // Which, as suggested here:
6584 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6585 // can be written as:
6586 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
6587 // useful because the (Alt ^ Mask) can be pre-computed.
6588 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6589 CurDAG->getConstant(Mask ^ Alt, dl, VT));
6590 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
6591 CurDAG->getConstant(Alt, dl, VT));
6592 }
6593
6594 return Res;
6595}
6596
6597// When CR bit registers are enabled, an extension of an i1 variable to a i32
6598// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6599// involves constant materialization of a 0 or a 1 or both. If the result of
6600// the extension is then operated upon by some operator that can be constant
6601// folded with a constant 0 or 1, and that constant can be materialized using
6602// only one instruction (like a zero or one), then we should fold in those
6603// operations with the select.
6604void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
6605 if (!Subtarget->useCRBits())
6606 return;
6607
6608 if (N->getOpcode() != ISD::ZERO_EXTEND &&
6609 N->getOpcode() != ISD::SIGN_EXTEND &&
6610 N->getOpcode() != ISD::ANY_EXTEND)
6611 return;
6612
6613 if (N->getOperand(0).getValueType() != MVT::i1)
6614 return;
6615
6616 if (!N->hasOneUse())
6617 return;
6618
6619 SDLoc dl(N);
6620 EVT VT = N->getValueType(0);
6621 SDValue Cond = N->getOperand(0);
6622 SDValue ConstTrue = CurDAG->getSignedConstant(
6623 N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
6624 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
6625
6626 do {
6627 SDNode *User = *N->user_begin();
6628 if (User->getNumOperands() != 2)
6629 break;
6630
6631 auto TryFold = [this, N, User, dl](SDValue Val) {
6632 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
6633 SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
6634 SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
6635
6636 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
6637 User->getValueType(0), {O0, O1});
6638 };
6639
6640 // FIXME: When the semantics of the interaction between select and undef
6641 // are clearly defined, it may turn out to be unnecessary to break here.
6642 SDValue TrueRes = TryFold(ConstTrue);
6643 if (!TrueRes || TrueRes.isUndef())
6644 break;
6645 SDValue FalseRes = TryFold(ConstFalse);
6646 if (!FalseRes || FalseRes.isUndef())
6647 break;
6648
6649 // For us to materialize these using one instruction, we must be able to
6650 // represent them as signed 16-bit integers.
6651 uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal();
6652 if (!isInt<16>(True) || !isInt<16>(False))
6653 break;
6654
6655 // We can replace User with a new SELECT node, and try again to see if we
6656 // can fold the select with its user.
6657 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
6658 N = User;
6659 ConstTrue = TrueRes;
6660 ConstFalse = FalseRes;
6661 } while (N->hasOneUse());
6662}
6663
6664void PPCDAGToDAGISel::PreprocessISelDAG() {
6665 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6666
6667 bool MadeChange = false;
6668 while (Position != CurDAG->allnodes_begin()) {
6669 SDNode *N = &*--Position;
6670 if (N->use_empty())
6671 continue;
6672
6673 SDValue Res;
6674 switch (N->getOpcode()) {
6675 default: break;
6676 case ISD::OR:
6677 Res = combineToCMPB(N);
6678 break;
6679 }
6680
6681 if (!Res)
6682 foldBoolExts(Res, N);
6683
6684 if (Res) {
6685 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6686 LLVM_DEBUG(N->dump(CurDAG));
6687 LLVM_DEBUG(dbgs() << "\nNew: ");
6688 LLVM_DEBUG(Res.getNode()->dump(CurDAG));
6689 LLVM_DEBUG(dbgs() << "\n");
6690
6691 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
6692 MadeChange = true;
6693 }
6694 }
6695
6696 if (MadeChange)
6697 CurDAG->RemoveDeadNodes();
6698}
6699
6700/// PostprocessISelDAG - Perform some late peephole optimizations
6701/// on the DAG representation.
6702void PPCDAGToDAGISel::PostprocessISelDAG() {
6703 // Skip peepholes at -O0.
6704 if (TM.getOptLevel() == CodeGenOptLevel::None)
6705 return;
6706
6707 PeepholePPC64();
6708 PeepholeCROps();
6709 PeepholePPC64ZExt();
6710}
6711
6712// Check if all users of this node will become isel where the second operand
6713// is the constant zero. If this is so, and if we can negate the condition,
6714// then we can flip the true and false operands. This will allow the zero to
6715// be folded with the isel so that we don't need to materialize a register
6716// containing zero.
6717bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
6718 for (const SDNode *User : N->users()) {
6719 if (!User->isMachineOpcode())
6720 return false;
6721 if (User->getMachineOpcode() != PPC::SELECT_I4 &&
6722 User->getMachineOpcode() != PPC::SELECT_I8)
6723 return false;
6724
6725 SDNode *Op1 = User->getOperand(1).getNode();
6726 SDNode *Op2 = User->getOperand(2).getNode();
6727 // If we have a degenerate select with two equal operands, swapping will
6728 // not do anything, and we may run into an infinite loop.
6729 if (Op1 == Op2)
6730 return false;
6731
6732 if (!Op2->isMachineOpcode())
6733 return false;
6734
6735 if (Op2->getMachineOpcode() != PPC::LI &&
6736 Op2->getMachineOpcode() != PPC::LI8)
6737 return false;
6738
6739 if (!isNullConstant(Op2->getOperand(0)))
6740 return false;
6741 }
6742
6743 return true;
6744}
6745
6746void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
6747 SmallVector<SDNode *, 4> ToReplace;
6748 for (SDNode *User : N->users()) {
6749 assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
6750 User->getMachineOpcode() == PPC::SELECT_I8) &&
6751 "Must have all select users");
6752 ToReplace.push_back(User);
6753 }
6754
6755 for (SDNode *User : ToReplace) {
6756 SDNode *ResNode =
6757 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
6758 User->getValueType(0), User->getOperand(0),
6759 User->getOperand(2),
6760 User->getOperand(1));
6761
6762 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6763 LLVM_DEBUG(User->dump(CurDAG));
6764 LLVM_DEBUG(dbgs() << "\nNew: ");
6765 LLVM_DEBUG(ResNode->dump(CurDAG));
6766 LLVM_DEBUG(dbgs() << "\n");
6767
6768 ReplaceUses(User, ResNode);
6769 }
6770}
6771
6772void PPCDAGToDAGISel::PeepholeCROps() {
6773 bool IsModified;
6774 do {
6775 IsModified = false;
6776 for (SDNode &Node : CurDAG->allnodes()) {
6777 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
6778 if (!MachineNode || MachineNode->use_empty())
6779 continue;
6780 SDNode *ResNode = MachineNode;
6781
6782 bool Op1Set = false, Op1Unset = false,
6783 Op1Not = false,
6784 Op2Set = false, Op2Unset = false,
6785 Op2Not = false;
6786
6787 unsigned Opcode = MachineNode->getMachineOpcode();
6788 switch (Opcode) {
6789 default: break;
6790 case PPC::CRAND:
6791 case PPC::CRNAND:
6792 case PPC::CROR:
6793 case PPC::CRXOR:
6794 case PPC::CRNOR:
6795 case PPC::CREQV:
6796 case PPC::CRANDC:
6797 case PPC::CRORC: {
6798 SDValue Op = MachineNode->getOperand(1);
6799 if (Op.isMachineOpcode()) {
6800 if (Op.getMachineOpcode() == PPC::CRSET)
6801 Op2Set = true;
6802 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6803 Op2Unset = true;
6804 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6805 Op.getOperand(0) == Op.getOperand(1)) ||
6806 Op.getMachineOpcode() == PPC::CRNOT)
6807 Op2Not = true;
6808 }
6809 [[fallthrough]];
6810 }
6811 case PPC::BC:
6812 case PPC::BCn:
6813 case PPC::SELECT_I4:
6814 case PPC::SELECT_I8:
6815 case PPC::SELECT_F4:
6816 case PPC::SELECT_F8:
6817 case PPC::SELECT_SPE:
6818 case PPC::SELECT_SPE4:
6819 case PPC::SELECT_VRRC:
6820 case PPC::SELECT_VSFRC:
6821 case PPC::SELECT_VSSRC:
6822 case PPC::SELECT_VSRC: {
6823 SDValue Op = MachineNode->getOperand(0);
6824 if (Op.isMachineOpcode()) {
6825 if (Op.getMachineOpcode() == PPC::CRSET)
6826 Op1Set = true;
6827 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6828 Op1Unset = true;
6829 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6830 Op.getOperand(0) == Op.getOperand(1)) ||
6831 Op.getMachineOpcode() == PPC::CRNOT)
6832 Op1Not = true;
6833 }
6834 }
6835 break;
6836 }
6837
6838 bool SelectSwap = false;
6839 switch (Opcode) {
6840 default: break;
6841 case PPC::CRAND:
6842 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6843 // x & x = x
6844 ResNode = MachineNode->getOperand(0).getNode();
6845 else if (Op1Set)
6846 // 1 & y = y
6847 ResNode = MachineNode->getOperand(1).getNode();
6848 else if (Op2Set)
6849 // x & 1 = x
6850 ResNode = MachineNode->getOperand(0).getNode();
6851 else if (Op1Unset || Op2Unset)
6852 // x & 0 = 0 & y = 0
6853 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6854 MVT::i1);
6855 else if (Op1Not)
6856 // ~x & y = andc(y, x)
6857 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6858 MVT::i1, MachineNode->getOperand(1),
6859 MachineNode->getOperand(0).
6860 getOperand(0));
6861 else if (Op2Not)
6862 // x & ~y = andc(x, y)
6863 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6864 MVT::i1, MachineNode->getOperand(0),
6865 MachineNode->getOperand(1).
6866 getOperand(0));
6867 else if (AllUsersSelectZero(MachineNode)) {
6868 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
6869 MVT::i1, MachineNode->getOperand(0),
6870 MachineNode->getOperand(1));
6871 SelectSwap = true;
6872 }
6873 break;
6874 case PPC::CRNAND:
6875 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6876 // nand(x, x) -> nor(x, x)
6877 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6878 MVT::i1, MachineNode->getOperand(0),
6879 MachineNode->getOperand(0));
6880 else if (Op1Set)
6881 // nand(1, y) -> nor(y, y)
6882 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6883 MVT::i1, MachineNode->getOperand(1),
6884 MachineNode->getOperand(1));
6885 else if (Op2Set)
6886 // nand(x, 1) -> nor(x, x)
6887 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6888 MVT::i1, MachineNode->getOperand(0),
6889 MachineNode->getOperand(0));
6890 else if (Op1Unset || Op2Unset)
6891 // nand(x, 0) = nand(0, y) = 1
6892 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6893 MVT::i1);
6894 else if (Op1Not)
6895 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
6896 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6897 MVT::i1, MachineNode->getOperand(0).
6898 getOperand(0),
6899 MachineNode->getOperand(1));
6900 else if (Op2Not)
6901 // nand(x, ~y) = ~x | y = orc(y, x)
6902 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6903 MVT::i1, MachineNode->getOperand(1).
6904 getOperand(0),
6905 MachineNode->getOperand(0));
6906 else if (AllUsersSelectZero(MachineNode)) {
6907 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
6908 MVT::i1, MachineNode->getOperand(0),
6909 MachineNode->getOperand(1));
6910 SelectSwap = true;
6911 }
6912 break;
6913 case PPC::CROR:
6914 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6915 // x | x = x
6916 ResNode = MachineNode->getOperand(0).getNode();
6917 else if (Op1Set || Op2Set)
6918 // x | 1 = 1 | y = 1
6919 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6920 MVT::i1);
6921 else if (Op1Unset)
6922 // 0 | y = y
6923 ResNode = MachineNode->getOperand(1).getNode();
6924 else if (Op2Unset)
6925 // x | 0 = x
6926 ResNode = MachineNode->getOperand(0).getNode();
6927 else if (Op1Not)
6928 // ~x | y = orc(y, x)
6929 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6930 MVT::i1, MachineNode->getOperand(1),
6931 MachineNode->getOperand(0).
6932 getOperand(0));
6933 else if (Op2Not)
6934 // x | ~y = orc(x, y)
6935 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6936 MVT::i1, MachineNode->getOperand(0),
6937 MachineNode->getOperand(1).
6938 getOperand(0));
6939 else if (AllUsersSelectZero(MachineNode)) {
6940 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6941 MVT::i1, MachineNode->getOperand(0),
6942 MachineNode->getOperand(1));
6943 SelectSwap = true;
6944 }
6945 break;
6946 case PPC::CRXOR:
6947 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6948 // xor(x, x) = 0
6949 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6950 MVT::i1);
6951 else if (Op1Set)
6952 // xor(1, y) -> nor(y, y)
6953 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6954 MVT::i1, MachineNode->getOperand(1),
6955 MachineNode->getOperand(1));
6956 else if (Op2Set)
6957 // xor(x, 1) -> nor(x, x)
6958 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6959 MVT::i1, MachineNode->getOperand(0),
6960 MachineNode->getOperand(0));
6961 else if (Op1Unset)
6962 // xor(0, y) = y
6963 ResNode = MachineNode->getOperand(1).getNode();
6964 else if (Op2Unset)
6965 // xor(x, 0) = x
6966 ResNode = MachineNode->getOperand(0).getNode();
6967 else if (Op1Not)
6968 // xor(~x, y) = eqv(x, y)
6969 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6970 MVT::i1, MachineNode->getOperand(0).
6971 getOperand(0),
6972 MachineNode->getOperand(1));
6973 else if (Op2Not)
6974 // xor(x, ~y) = eqv(x, y)
6975 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6976 MVT::i1, MachineNode->getOperand(0),
6977 MachineNode->getOperand(1).
6978 getOperand(0));
6979 else if (AllUsersSelectZero(MachineNode)) {
6980 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6981 MVT::i1, MachineNode->getOperand(0),
6982 MachineNode->getOperand(1));
6983 SelectSwap = true;
6984 }
6985 break;
6986 case PPC::CRNOR:
6987 if (Op1Set || Op2Set)
6988 // nor(1, y) -> 0
6989 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6990 MVT::i1);
6991 else if (Op1Unset)
6992 // nor(0, y) = ~y -> nor(y, y)
6993 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6994 MVT::i1, MachineNode->getOperand(1),
6995 MachineNode->getOperand(1));
6996 else if (Op2Unset)
6997 // nor(x, 0) = ~x
6998 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6999 MVT::i1, MachineNode->getOperand(0),
7000 MachineNode->getOperand(0));
7001 else if (Op1Not)
7002 // nor(~x, y) = andc(x, y)
7003 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7004 MVT::i1, MachineNode->getOperand(0).
7005 getOperand(0),
7006 MachineNode->getOperand(1));
7007 else if (Op2Not)
7008 // nor(x, ~y) = andc(y, x)
7009 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7010 MVT::i1, MachineNode->getOperand(1).
7011 getOperand(0),
7012 MachineNode->getOperand(0));
7013 else if (AllUsersSelectZero(MachineNode)) {
7014 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7015 MVT::i1, MachineNode->getOperand(0),
7016 MachineNode->getOperand(1));
7017 SelectSwap = true;
7018 }
7019 break;
7020 case PPC::CREQV:
7021 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7022 // eqv(x, x) = 1
7023 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7024 MVT::i1);
7025 else if (Op1Set)
7026 // eqv(1, y) = y
7027 ResNode = MachineNode->getOperand(1).getNode();
7028 else if (Op2Set)
7029 // eqv(x, 1) = x
7030 ResNode = MachineNode->getOperand(0).getNode();
7031 else if (Op1Unset)
7032 // eqv(0, y) = ~y -> nor(y, y)
7033 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7034 MVT::i1, MachineNode->getOperand(1),
7035 MachineNode->getOperand(1));
7036 else if (Op2Unset)
7037 // eqv(x, 0) = ~x
7038 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7039 MVT::i1, MachineNode->getOperand(0),
7040 MachineNode->getOperand(0));
7041 else if (Op1Not)
7042 // eqv(~x, y) = xor(x, y)
7043 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7044 MVT::i1, MachineNode->getOperand(0).
7045 getOperand(0),
7046 MachineNode->getOperand(1));
7047 else if (Op2Not)
7048 // eqv(x, ~y) = xor(x, y)
7049 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7050 MVT::i1, MachineNode->getOperand(0),
7051 MachineNode->getOperand(1).
7052 getOperand(0));
7053 else if (AllUsersSelectZero(MachineNode)) {
7054 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7055 MVT::i1, MachineNode->getOperand(0),
7056 MachineNode->getOperand(1));
7057 SelectSwap = true;
7058 }
7059 break;
7060 case PPC::CRANDC:
7061 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7062 // andc(x, x) = 0
7063 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7064 MVT::i1);
7065 else if (Op1Set)
7066 // andc(1, y) = ~y
7067 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7068 MVT::i1, MachineNode->getOperand(1),
7069 MachineNode->getOperand(1));
7070 else if (Op1Unset || Op2Set)
7071 // andc(0, y) = andc(x, 1) = 0
7072 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7073 MVT::i1);
7074 else if (Op2Unset)
7075 // andc(x, 0) = x
7076 ResNode = MachineNode->getOperand(0).getNode();
7077 else if (Op1Not)
7078 // andc(~x, y) = ~(x | y) = nor(x, y)
7079 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7080 MVT::i1, MachineNode->getOperand(0).
7081 getOperand(0),
7082 MachineNode->getOperand(1));
7083 else if (Op2Not)
7084 // andc(x, ~y) = x & y
7085 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
7086 MVT::i1, MachineNode->getOperand(0),
7087 MachineNode->getOperand(1).
7088 getOperand(0));
7089 else if (AllUsersSelectZero(MachineNode)) {
7090 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
7091 MVT::i1, MachineNode->getOperand(1),
7092 MachineNode->getOperand(0));
7093 SelectSwap = true;
7094 }
7095 break;
7096 case PPC::CRORC:
7097 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7098 // orc(x, x) = 1
7099 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7100 MVT::i1);
7101 else if (Op1Set || Op2Unset)
7102 // orc(1, y) = orc(x, 0) = 1
7103 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7104 MVT::i1);
7105 else if (Op2Set)
7106 // orc(x, 1) = x
7107 ResNode = MachineNode->getOperand(0).getNode();
7108 else if (Op1Unset)
7109 // orc(0, y) = ~y
7110 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7111 MVT::i1, MachineNode->getOperand(1),
7112 MachineNode->getOperand(1));
7113 else if (Op1Not)
7114 // orc(~x, y) = ~(x & y) = nand(x, y)
7115 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
7116 MVT::i1, MachineNode->getOperand(0).
7117 getOperand(0),
7118 MachineNode->getOperand(1));
7119 else if (Op2Not)
7120 // orc(x, ~y) = x | y
7121 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7122 MVT::i1, MachineNode->getOperand(0),
7123 MachineNode->getOperand(1).
7124 getOperand(0));
7125 else if (AllUsersSelectZero(MachineNode)) {
7126 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7127 MVT::i1, MachineNode->getOperand(1),
7128 MachineNode->getOperand(0));
7129 SelectSwap = true;
7130 }
7131 break;
7132 case PPC::SELECT_I4:
7133 case PPC::SELECT_I8:
7134 case PPC::SELECT_F4:
7135 case PPC::SELECT_F8:
7136 case PPC::SELECT_SPE:
7137 case PPC::SELECT_SPE4:
7138 case PPC::SELECT_VRRC:
7139 case PPC::SELECT_VSFRC:
7140 case PPC::SELECT_VSSRC:
7141 case PPC::SELECT_VSRC:
7142 if (Op1Set)
7143 ResNode = MachineNode->getOperand(1).getNode();
7144 else if (Op1Unset)
7145 ResNode = MachineNode->getOperand(2).getNode();
7146 else if (Op1Not)
7147 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
7148 SDLoc(MachineNode),
7149 MachineNode->getValueType(0),
7150 MachineNode->getOperand(0).
7151 getOperand(0),
7152 MachineNode->getOperand(2),
7153 MachineNode->getOperand(1));
7154 break;
7155 case PPC::BC:
7156 case PPC::BCn:
7157 if (Op1Not)
7158 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
7159 PPC::BC,
7160 SDLoc(MachineNode),
7161 MVT::Other,
7162 MachineNode->getOperand(0).
7163 getOperand(0),
7164 MachineNode->getOperand(1),
7165 MachineNode->getOperand(2));
7166 // FIXME: Handle Op1Set, Op1Unset here too.
7167 break;
7168 }
7169
7170 // If we're inverting this node because it is used only by selects that
7171 // we'd like to swap, then swap the selects before the node replacement.
7172 if (SelectSwap)
7173 SwapAllSelectUsers(MachineNode);
7174
7175 if (ResNode != MachineNode) {
7176 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
7177 LLVM_DEBUG(MachineNode->dump(CurDAG));
7178 LLVM_DEBUG(dbgs() << "\nNew: ");
7179 LLVM_DEBUG(ResNode->dump(CurDAG));
7180 LLVM_DEBUG(dbgs() << "\n");
7181
7182 ReplaceUses(MachineNode, ResNode);
7183 IsModified = true;
7184 }
7185 }
7186 if (IsModified)
7187 CurDAG->RemoveDeadNodes();
7188 } while (IsModified);
7189}
7190
7191// Gather the set of 32-bit operations that are known to have their
7192// higher-order 32 bits zero, where ToPromote contains all such operations.
7194 SmallPtrSetImpl<SDNode *> &ToPromote) {
7195 if (!Op32.isMachineOpcode())
7196 return false;
7197
7198 // First, check for the "frontier" instructions (those that will clear the
7199 // higher-order 32 bits.
7200
7201 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
7202 // around. If it does not, then these instructions will clear the
7203 // higher-order bits.
7204 if ((Op32.getMachineOpcode() == PPC::RLWINM ||
7205 Op32.getMachineOpcode() == PPC::RLWNM) &&
7206 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
7207 ToPromote.insert(Op32.getNode());
7208 return true;
7209 }
7210
7211 // SLW and SRW always clear the higher-order bits.
7212 if (Op32.getMachineOpcode() == PPC::SLW ||
7213 Op32.getMachineOpcode() == PPC::SRW) {
7214 ToPromote.insert(Op32.getNode());
7215 return true;
7216 }
7217
7218 // For LI and LIS, we need the immediate to be positive (so that it is not
7219 // sign extended).
7220 if (Op32.getMachineOpcode() == PPC::LI ||
7221 Op32.getMachineOpcode() == PPC::LIS) {
7222 if (!isUInt<15>(Op32.getConstantOperandVal(0)))
7223 return false;
7224
7225 ToPromote.insert(Op32.getNode());
7226 return true;
7227 }
7228
7229 // LHBRX and LWBRX always clear the higher-order bits.
7230 if (Op32.getMachineOpcode() == PPC::LHBRX ||
7231 Op32.getMachineOpcode() == PPC::LWBRX) {
7232 ToPromote.insert(Op32.getNode());
7233 return true;
7234 }
7235
7236 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
7237 if (Op32.getMachineOpcode() == PPC::CNTLZW ||
7238 Op32.getMachineOpcode() == PPC::CNTTZW) {
7239 ToPromote.insert(Op32.getNode());
7240 return true;
7241 }
7242
7243 // Next, check for those instructions we can look through.
7244
7245 // Assuming the mask does not wrap around, then the higher-order bits are
7246 // taken directly from the first operand.
7247 if (Op32.getMachineOpcode() == PPC::RLWIMI &&
7248 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
7249 SmallPtrSet<SDNode *, 16> ToPromote1;
7250 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7251 return false;
7252
7253 ToPromote.insert(Op32.getNode());
7254 ToPromote.insert_range(ToPromote1);
7255 return true;
7256 }
7257
7258 // For OR, the higher-order bits are zero if that is true for both operands.
7259 // For SELECT_I4, the same is true (but the relevant operand numbers are
7260 // shifted by 1).
7261 if (Op32.getMachineOpcode() == PPC::OR ||
7262 Op32.getMachineOpcode() == PPC::SELECT_I4) {
7263 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
7264 SmallPtrSet<SDNode *, 16> ToPromote1;
7265 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
7266 return false;
7267 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
7268 return false;
7269
7270 ToPromote.insert(Op32.getNode());
7271 ToPromote.insert_range(ToPromote1);
7272 return true;
7273 }
7274
7275 // For ORI and ORIS, we need the higher-order bits of the first operand to be
7276 // zero, and also for the constant to be positive (so that it is not sign
7277 // extended).
7278 if (Op32.getMachineOpcode() == PPC::ORI ||
7279 Op32.getMachineOpcode() == PPC::ORIS) {
7280 SmallPtrSet<SDNode *, 16> ToPromote1;
7281 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7282 return false;
7283 if (!isUInt<15>(Op32.getConstantOperandVal(1)))
7284 return false;
7285
7286 ToPromote.insert(Op32.getNode());
7287 ToPromote.insert_range(ToPromote1);
7288 return true;
7289 }
7290
7291 // The higher-order bits of AND are zero if that is true for at least one of
7292 // the operands.
7293 if (Op32.getMachineOpcode() == PPC::AND) {
7294 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
7295 bool Op0OK =
7296 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7297 bool Op1OK =
7298 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
7299 if (!Op0OK && !Op1OK)
7300 return false;
7301
7302 ToPromote.insert(Op32.getNode());
7303
7304 if (Op0OK)
7305 ToPromote.insert_range(ToPromote1);
7306
7307 if (Op1OK)
7308 ToPromote.insert_range(ToPromote2);
7309
7310 return true;
7311 }
7312
7313 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
7314 // of the first operand, or if the second operand is positive (so that it is
7315 // not sign extended).
7316 if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
7317 Op32.getMachineOpcode() == PPC::ANDIS_rec) {
7318 SmallPtrSet<SDNode *, 16> ToPromote1;
7319 bool Op0OK =
7320 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7321 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
7322 if (!Op0OK && !Op1OK)
7323 return false;
7324
7325 ToPromote.insert(Op32.getNode());
7326
7327 if (Op0OK)
7328 ToPromote.insert_range(ToPromote1);
7329
7330 return true;
7331 }
7332
7333 return false;
7334}
7335
7336void PPCDAGToDAGISel::PeepholePPC64ZExt() {
7337 if (!Subtarget->isPPC64())
7338 return;
7339
7340 // When we zero-extend from i32 to i64, we use a pattern like this:
7341 // def : Pat<(i64 (zext i32:$in)),
7342 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
7343 // 0, 32)>;
7344 // There are several 32-bit shift/rotate instructions, however, that will
7345 // clear the higher-order bits of their output, rendering the RLDICL
7346 // unnecessary. When that happens, we remove it here, and redefine the
7347 // relevant 32-bit operation to be a 64-bit operation.
7348
7349 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7350
7351 bool MadeChange = false;
7352 while (Position != CurDAG->allnodes_begin()) {
7353 SDNode *N = &*--Position;
7354 // Skip dead nodes and any non-machine opcodes.
7355 if (N->use_empty() || !N->isMachineOpcode())
7356 continue;
7357
7358 if (N->getMachineOpcode() != PPC::RLDICL)
7359 continue;
7360
7361 if (N->getConstantOperandVal(1) != 0 ||
7362 N->getConstantOperandVal(2) != 32)
7363 continue;
7364
7365 SDValue ISR = N->getOperand(0);
7366 if (!ISR.isMachineOpcode() ||
7367 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
7368 continue;
7369
7370 if (!ISR.hasOneUse())
7371 continue;
7372
7373 if (ISR.getConstantOperandVal(2) != PPC::sub_32)
7374 continue;
7375
7376 SDValue IDef = ISR.getOperand(0);
7377 if (!IDef.isMachineOpcode() ||
7378 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
7379 continue;
7380
7381 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
7382 // can get rid of it.
7383
7384 SDValue Op32 = ISR->getOperand(1);
7385 if (!Op32.isMachineOpcode())
7386 continue;
7387
7388 // There are some 32-bit instructions that always clear the high-order 32
7389 // bits, there are also some instructions (like AND) that we can look
7390 // through.
7391 SmallPtrSet<SDNode *, 16> ToPromote;
7392 if (!PeepholePPC64ZExtGather(Op32, ToPromote))
7393 continue;
7394
7395 // If the ToPromote set contains nodes that have uses outside of the set
7396 // (except for the original INSERT_SUBREG), then abort the transformation.
7397 bool OutsideUse = false;
7398 for (SDNode *PN : ToPromote) {
7399 for (SDNode *UN : PN->users()) {
7400 if (!ToPromote.count(UN) && UN != ISR.getNode()) {
7401 OutsideUse = true;
7402 break;
7403 }
7404 }
7405
7406 if (OutsideUse)
7407 break;
7408 }
7409 if (OutsideUse)
7410 continue;
7411
7412 MadeChange = true;
7413
7414 // We now know that this zero extension can be removed by promoting to
7415 // nodes in ToPromote to 64-bit operations, where for operations in the
7416 // frontier of the set, we need to insert INSERT_SUBREGs for their
7417 // operands.
7418 for (SDNode *PN : ToPromote) {
7419 unsigned NewOpcode;
7420 switch (PN->getMachineOpcode()) {
7421 default:
7422 llvm_unreachable("Don't know the 64-bit variant of this instruction");
7423 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
7424 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
7425 case PPC::SLW: NewOpcode = PPC::SLW8; break;
7426 case PPC::SRW: NewOpcode = PPC::SRW8; break;
7427 case PPC::LI: NewOpcode = PPC::LI8; break;
7428 case PPC::LIS: NewOpcode = PPC::LIS8; break;
7429 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
7430 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
7431 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
7432 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
7433 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
7434 case PPC::OR: NewOpcode = PPC::OR8; break;
7435 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
7436 case PPC::ORI: NewOpcode = PPC::ORI8; break;
7437 case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
7438 case PPC::AND: NewOpcode = PPC::AND8; break;
7439 case PPC::ANDI_rec:
7440 NewOpcode = PPC::ANDI8_rec;
7441 break;
7442 case PPC::ANDIS_rec:
7443 NewOpcode = PPC::ANDIS8_rec;
7444 break;
7445 }
7446
7447 // Note: During the replacement process, the nodes will be in an
7448 // inconsistent state (some instructions will have operands with values
7449 // of the wrong type). Once done, however, everything should be right
7450 // again.
7451
7453 for (const SDValue &V : PN->ops()) {
7454 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
7455 !isa<ConstantSDNode>(V)) {
7456 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
7457 SDNode *ReplOp =
7458 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
7459 ISR.getNode()->getVTList(), ReplOpOps);
7460 Ops.push_back(SDValue(ReplOp, 0));
7461 } else {
7462 Ops.push_back(V);
7463 }
7464 }
7465
7466 // Because all to-be-promoted nodes only have users that are other
7467 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
7468 // the i32 result value type with i64.
7469
7470 SmallVector<EVT, 2> NewVTs;
7471 SDVTList VTs = PN->getVTList();
7472 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
7473 if (VTs.VTs[i] == MVT::i32)
7474 NewVTs.push_back(MVT::i64);
7475 else
7476 NewVTs.push_back(VTs.VTs[i]);
7477
7478 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
7479 LLVM_DEBUG(PN->dump(CurDAG));
7480
7481 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
7482
7483 LLVM_DEBUG(dbgs() << "\nNew: ");
7484 LLVM_DEBUG(PN->dump(CurDAG));
7485 LLVM_DEBUG(dbgs() << "\n");
7486 }
7487
7488 // Now we replace the original zero extend and its associated INSERT_SUBREG
7489 // with the value feeding the INSERT_SUBREG (which has now been promoted to
7490 // return an i64).
7491
7492 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
7493 LLVM_DEBUG(N->dump(CurDAG));
7494 LLVM_DEBUG(dbgs() << "\nNew: ");
7495 LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
7496 LLVM_DEBUG(dbgs() << "\n");
7497
7498 ReplaceUses(N, Op32.getNode());
7499 }
7500
7501 if (MadeChange)
7502 CurDAG->RemoveDeadNodes();
7503}
7504
7505static bool isVSXSwap(SDValue N) {
7506 if (!N->isMachineOpcode())
7507 return false;
7508 unsigned Opc = N->getMachineOpcode();
7509
7510 // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
7511 // operand is 2.
7512 if (Opc == PPC::XXPERMDIs) {
7513 return isa<ConstantSDNode>(N->getOperand(1)) &&
7514 N->getConstantOperandVal(1) == 2;
7515 } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
7516 return N->getOperand(0) == N->getOperand(1) &&
7517 isa<ConstantSDNode>(N->getOperand(2)) &&
7518 N->getConstantOperandVal(2) == 2;
7519 }
7520
7521 return false;
7522}
7523
7524// TODO: Make this complete and replace with a table-gen bit.
7526 if (!N->isMachineOpcode())
7527 return false;
7528 unsigned Opc = N->getMachineOpcode();
7529
7530 switch (Opc) {
7531 default:
7532 return false;
7533 case PPC::VAVGSB:
7534 case PPC::VAVGUB:
7535 case PPC::VAVGSH:
7536 case PPC::VAVGUH:
7537 case PPC::VAVGSW:
7538 case PPC::VAVGUW:
7539 case PPC::VMAXFP:
7540 case PPC::VMAXSB:
7541 case PPC::VMAXUB:
7542 case PPC::VMAXSH:
7543 case PPC::VMAXUH:
7544 case PPC::VMAXSW:
7545 case PPC::VMAXUW:
7546 case PPC::VMINFP:
7547 case PPC::VMINSB:
7548 case PPC::VMINUB:
7549 case PPC::VMINSH:
7550 case PPC::VMINUH:
7551 case PPC::VMINSW:
7552 case PPC::VMINUW:
7553 case PPC::VADDFP:
7554 case PPC::VADDUBM:
7555 case PPC::VADDUHM:
7556 case PPC::VADDUWM:
7557 case PPC::VSUBFP:
7558 case PPC::VSUBUBM:
7559 case PPC::VSUBUHM:
7560 case PPC::VSUBUWM:
7561 case PPC::VAND:
7562 case PPC::VANDC:
7563 case PPC::VOR:
7564 case PPC::VORC:
7565 case PPC::VXOR:
7566 case PPC::VNOR:
7567 case PPC::VMULUWM:
7568 return true;
7569 }
7570}
7571
7572// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7573// lane-insensitive.
7574static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
7575 // Our desired xxswap might be source of COPY_TO_REGCLASS.
7576 // TODO: Can we put this a common method for DAG?
7577 auto SkipRCCopy = [](SDValue V) {
7578 while (V->isMachineOpcode() &&
7579 V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
7580 // All values in the chain should have single use.
7581 if (V->use_empty() || !V->user_begin()->isOnlyUserOf(V.getNode()))
7582 return SDValue();
7583 V = V->getOperand(0);
7584 }
7585 return V.hasOneUse() ? V : SDValue();
7586 };
7587
7588 SDValue VecOp = SkipRCCopy(N->getOperand(0));
7589 if (!VecOp || !isLaneInsensitive(VecOp))
7590 return;
7591
7592 SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
7593 RHS = SkipRCCopy(VecOp.getOperand(1));
7594 if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
7595 return;
7596
7597 // These swaps may still have chain-uses here, count on dead code elimination
7598 // in following passes to remove them.
7599 DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));
7600 DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));
7601 DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
7602}
7603
7604// Check if an SDValue has the 'aix-small-tls' global variable attribute.
7605static bool hasAIXSmallTLSAttr(SDValue Val) {
7607 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()))
7608 if (GV->hasAttribute("aix-small-tls"))
7609 return true;
7610
7611 return false;
7612}
7613
7614// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
7615// accesses?
7617 SDValue ADDIToFold) {
7618 // Check if ADDIToFold (the ADDI that we want to fold into local-exec
7619 // accesses), is truly an ADDI.
7620 if (!ADDIToFold.isMachineOpcode() ||
7621 (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7622 return false;
7623
7624 // Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
7625 // attribute or when the 'aix-small-tls' global variable attribute is present.
7626 const PPCSubtarget &Subtarget =
7628 SDValue TLSVarNode = ADDIToFold.getOperand(1);
7629 if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
7630 Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
7631 return false;
7632
7633 // The second operand of the ADDIToFold should be the global TLS address
7634 // (the local-exec TLS variable). We only perform the folding if the TLS
7635 // variable is the second operand.
7637 if (!GA)
7638 return false;
7639
7640 if (DAG->getTarget().getTLSModel(GA->getGlobal()) == TLSModel::LocalExec) {
7641 // The first operand of the ADDIToFold should be the thread pointer.
7642 // This transformation is only performed if the first operand of the
7643 // addi is the thread pointer.
7644 SDValue TPRegNode = ADDIToFold.getOperand(0);
7645 RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7646 if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7647 return false;
7648 }
7649
7650 // The local-[exec|dynamic] TLS variable should only have the
7651 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
7652 // performed otherwise if the flag is not set.
7653 unsigned TargetFlags = GA->getTargetFlags();
7654 if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
7655 TargetFlags == PPCII::MO_TLSLD_FLAG))
7656 return false;
7657
7658 // If all conditions are satisfied, the ADDI is valid for folding.
7659 return true;
7660}
7661
7662// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
7663// another addi, fold this sequence into a single addi if possible. Before this
7664// optimization, the sequence appears as:
7665// addi rN, r13, sym@[le|ld]
7666// addi rM, rN, imm
7667// After this optimization, we can fold the two addi into a single one:
7668// addi rM, r13, sym@[le|ld] + imm
7670 if (N->getMachineOpcode() != PPC::ADDI8)
7671 return;
7672
7673 // InitialADDI is the addi feeding into N (also an addi), and the addi that
7674 // we want optimized out.
7675 SDValue InitialADDI = N->getOperand(0);
7676
7677 if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, InitialADDI))
7678 return;
7679
7680 // The second operand of the InitialADDI should be the global TLS address
7681 // (the local-[exec|dynamic] TLS variable), with the
7682 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
7683 // isEligibleToFoldADDIForFasterLocalAccesses().
7684 SDValue TLSVarNode = InitialADDI.getOperand(1);
7686 assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7687 "local-[exec|dynamic] accesses!");
7688 unsigned TargetFlags = GA->getTargetFlags();
7689
7690 // The second operand of the addi that we want to preserve will be an
7691 // immediate. We add this immediate, together with the address of the TLS
7692 // variable found in InitialADDI, in order to preserve the correct TLS address
7693 // information during assembly printing. The offset is likely to be non-zero
7694 // when we end up in this case.
7695 int Offset = N->getConstantOperandVal(1);
7696 TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
7697 Offset, TargetFlags);
7698
7699 (void)DAG->UpdateNodeOperands(N, InitialADDI.getOperand(0), TLSVarNode);
7700 if (InitialADDI.getNode()->use_empty())
7701 DAG->RemoveDeadNode(InitialADDI.getNode());
7702}
7703
7704void PPCDAGToDAGISel::PeepholePPC64() {
7705 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7706
7707 while (Position != CurDAG->allnodes_begin()) {
7708 SDNode *N = &*--Position;
7709 // Skip dead nodes and any non-machine opcodes.
7710 if (N->use_empty() || !N->isMachineOpcode())
7711 continue;
7712
7713 if (isVSXSwap(SDValue(N, 0)))
7714 reduceVSXSwap(N, CurDAG);
7715
7716 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7717 // accesses.
7719
7720 unsigned FirstOp;
7721 unsigned StorageOpcode = N->getMachineOpcode();
7722 bool RequiresMod4Offset = false;
7723
7724 switch (StorageOpcode) {
7725 default: continue;
7726
7727 case PPC::LWA:
7728 case PPC::LD:
7729 case PPC::DFLOADf64:
7730 case PPC::DFLOADf32:
7731 RequiresMod4Offset = true;
7732 [[fallthrough]];
7733 case PPC::LBZ:
7734 case PPC::LBZ8:
7735 case PPC::LFD:
7736 case PPC::LFS:
7737 case PPC::LHA:
7738 case PPC::LHA8:
7739 case PPC::LHZ:
7740 case PPC::LHZ8:
7741 case PPC::LWZ:
7742 case PPC::LWZ8:
7743 FirstOp = 0;
7744 break;
7745
7746 case PPC::STD:
7747 case PPC::DFSTOREf64:
7748 case PPC::DFSTOREf32:
7749 RequiresMod4Offset = true;
7750 [[fallthrough]];
7751 case PPC::STB:
7752 case PPC::STB8:
7753 case PPC::STFD:
7754 case PPC::STFS:
7755 case PPC::STH:
7756 case PPC::STH8:
7757 case PPC::STW:
7758 case PPC::STW8:
7759 FirstOp = 1;
7760 break;
7761 }
7762
7763 // If this is a load or store with a zero offset, or within the alignment,
7764 // we may be able to fold an add-immediate into the memory operation.
7765 // The check against alignment is below, as it can't occur until we check
7766 // the arguments to N
7767 if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
7768 continue;
7769
7770 SDValue Base = N->getOperand(FirstOp + 1);
7771 if (!Base.isMachineOpcode())
7772 continue;
7773
7774 unsigned Flags = 0;
7775 bool ReplaceFlags = true;
7776
7777 // When the feeding operation is an add-immediate of some sort,
7778 // determine whether we need to add relocation information to the
7779 // target flags on the immediate operand when we fold it into the
7780 // load instruction.
7781 //
7782 // For something like ADDItocL8, the relocation information is
7783 // inferred from the opcode; when we process it in the AsmPrinter,
7784 // we add the necessary relocation there. A load, though, can receive
7785 // relocation from various flavors of ADDIxxx, so we need to carry
7786 // the relocation information in the target flags.
7787 switch (Base.getMachineOpcode()) {
7788 default: continue;
7789
7790 case PPC::ADDI8:
7791 case PPC::ADDI:
7792 // In some cases (such as TLS) the relocation information
7793 // is already in place on the operand, so copying the operand
7794 // is sufficient.
7795 ReplaceFlags = false;
7796 break;
7797 case PPC::ADDIdtprelL:
7799 break;
7800 case PPC::ADDItlsldL:
7802 break;
7803 case PPC::ADDItocL8:
7804 // Skip the following peephole optimizations for ADDItocL8 on AIX which
7805 // is used for toc-data access.
7806 if (Subtarget->isAIXABI())
7807 continue;
7809 break;
7810 }
7811
7812 SDValue ImmOpnd = Base.getOperand(1);
7813
7814 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7815 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7816 // we might have needed different @ha relocation values for the offset
7817 // pointers).
7818 int MaxDisplacement = 7;
7819 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7820 const GlobalValue *GV = GA->getGlobal();
7821 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7822 MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
7823 }
7824
7825 bool UpdateHBase = false;
7826 SDValue HBase = Base.getOperand(0);
7827
7828 int Offset = N->getConstantOperandVal(FirstOp);
7829 if (ReplaceFlags) {
7830 if (Offset < 0 || Offset > MaxDisplacement) {
7831 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7832 // one use, then we can do this for any offset, we just need to also
7833 // update the offset (i.e. the symbol addend) on the addis also.
7834 if (Base.getMachineOpcode() != PPC::ADDItocL8)
7835 continue;
7836
7837 if (!HBase.isMachineOpcode() ||
7838 HBase.getMachineOpcode() != PPC::ADDIStocHA8)
7839 continue;
7840
7841 if (!Base.hasOneUse() || !HBase.hasOneUse())
7842 continue;
7843
7844 SDValue HImmOpnd = HBase.getOperand(1);
7845 if (HImmOpnd != ImmOpnd)
7846 continue;
7847
7848 UpdateHBase = true;
7849 }
7850 } else {
7851 // Global addresses can be folded, but only if they are sufficiently
7852 // aligned.
7853 if (RequiresMod4Offset) {
7854 if (GlobalAddressSDNode *GA =
7856 const GlobalValue *GV = GA->getGlobal();
7857 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7858 if (Alignment < 4)
7859 continue;
7860 }
7861 }
7862
7863 // If we're directly folding the addend from an addi instruction, then:
7864 // 1. In general, the offset on the memory access must be zero.
7865 // 2. If the addend is a constant, then it can be combined with a
7866 // non-zero offset, but only if the result meets the encoding
7867 // requirements.
7868 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
7869 Offset += C->getSExtValue();
7870
7871 if (RequiresMod4Offset && (Offset % 4) != 0)
7872 continue;
7873
7874 if (!isInt<16>(Offset))
7875 continue;
7876
7877 ImmOpnd = CurDAG->getSignedTargetConstant(Offset, SDLoc(ImmOpnd),
7878 ImmOpnd.getValueType());
7879 } else if (Offset != 0) {
7880 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7881 // accesses.
7883 // Add the non-zero offset information into the load or store
7884 // instruction to be used for non-TOC-based local-[exec|dynamic]
7885 // accesses.
7886 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
7887 assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7888 "addi into local-[exec|dynamic] accesses!");
7889 ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
7890 MVT::i64, Offset,
7891 GA->getTargetFlags());
7892 } else
7893 continue;
7894 }
7895 }
7896
7897 // We found an opportunity. Reverse the operands from the add
7898 // immediate and substitute them into the load or store. If
7899 // needed, update the target flags for the immediate operand to
7900 // reflect the necessary relocation information.
7901 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7902 LLVM_DEBUG(Base->dump(CurDAG));
7903 LLVM_DEBUG(dbgs() << "\nN: ");
7904 LLVM_DEBUG(N->dump(CurDAG));
7905 LLVM_DEBUG(dbgs() << "\n");
7906
7907 // If the relocation information isn't already present on the
7908 // immediate operand, add it now.
7909 if (ReplaceFlags) {
7910 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7911 SDLoc dl(GA);
7912 const GlobalValue *GV = GA->getGlobal();
7913 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7914 // We can't perform this optimization for data whose alignment
7915 // is insufficient for the instruction encoding.
7916 if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
7917 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7918 continue;
7919 }
7920 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
7921 } else if (ConstantPoolSDNode *CP =
7923 const Constant *C = CP->getConstVal();
7924 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
7925 Offset, Flags);
7926 }
7927 }
7928
7929 if (FirstOp == 1) // Store
7930 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
7931 Base.getOperand(0), N->getOperand(3));
7932 else // Load
7933 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
7934 N->getOperand(2));
7935
7936 if (UpdateHBase)
7937 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
7938 ImmOpnd);
7939
7940 // The add-immediate may now be dead, in which case remove it.
7941 if (Base.getNode()->use_empty())
7942 CurDAG->RemoveDeadNode(Base.getNode());
7943 }
7944}
7945
7946/// createPPCISelDag - This pass converts a legalized DAG into a
7947/// PowerPC-specific DAG, ready for instruction scheduling.
7948///
7950 CodeGenOptLevel OptLevel) {
7951 return new PPCDAGToDAGISelLegacy(TM, OptLevel);
7952}
unsigned SubReg
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
MachineBasicBlock MachineBasicBlock::iterator MBBI
Function Alias Analysis false
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG, SDValue ADDIToFold)
static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base)
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
static bool hasTocDataAttr(SDValue Val)
static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG)
static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG)
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode * > &ToPromote)
static bool isLaneInsensitive(SDValue N)
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N)
static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget, const TargetMachine &TM, const SDNode *Node)
static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, const PPCSubtarget *Subtarget)
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static unsigned getBranchHint(unsigned PCC, const FunctionLoweringInfo &FuncInfo, const SDValue &DestMBB)
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, bool &NeedSwapOps, bool &IsUnCmp)
static cl::opt< bool > EnableTLSOpt("ppc-tls-opt", cl::init(true), cl::desc("Enable tls optimization peephole"), cl::Hidden)
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
static cl::opt< ICmpInGPRType > CmpInGPR("ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), cl::desc("Specify the types of comparisons to emit GPR-only code for."), cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), clEnumValN(ICGPR_NonExtIn, "nonextin", "Only comparisons where inputs don't need [sz]ext."), clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), clEnumValN(ICGPR_ZextI32, "zexti32", "Only i32 comparisons with zext result."), clEnumValN(ICGPR_ZextI64, "zexti64", "Only i64 comparisons with zext result."), clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), clEnumValN(ICGPR_SextI32, "sexti32", "Only i32 comparisons with sext result."), clEnumValN(ICGPR_SextI64, "sexti64", "Only i64 comparisons with sext result.")))
static SDNode * selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
static SDNode * selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
static bool hasAIXSmallTLSAttr(SDValue Val)
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden)
static bool isSWTestOp(SDValue N)
static SDNode * selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned *InstCnt=nullptr)
ICmpInGPRType
@ ICGPR_ZextI32
@ ICGPR_I64
@ ICGPR_All
@ ICGPR_None
@ ICGPR_NonExtIn
@ ICGPR_Sext
@ ICGPR_I32
@ ICGPR_SextI64
@ ICGPR_ZextI64
@ ICGPR_SextI32
@ ICGPR_Zext
static bool isVSXSwap(SDValue N)
static uint32_t findContiguousZerosAtLeast(uint64_t Imm, unsigned Num)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
LLVM_ABI APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition APInt.cpp:1154
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
MachineBasicBlock * getBasicBlock() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
LLVM_ABI BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
int64_t getSExtValue() const
A debug info location.
Definition DebugLoc.h:124
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
BranchProbabilityInfo * BPI
MachineBasicBlock * MBB
MBB - The current block.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists.
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:470
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setROPProtectionHashSaveIndex(int Idx)
static int getRecordFormOpcode(unsigned Opcode)
bool is32BitELFABI() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCInstrInfo * getInstrInfo() const override
MCRegister getThreadPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
bool isTargetELF() const
CodeModel::Model getCodeModel(const TargetMachine &TM, const GlobalValue *GV) const
Calculates the effective code model for argument GV.
bool isELFv2ABI() const
Common code between 32-bit and 64-bit PowerPC targets.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
LLVM_ABI void dump() const
Dump this node, for debugging.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
const TargetLowering & getTargetLoweringInfo() const
allnodes_const_iterator allnodes_begin() const
allnodes_const_iterator allnodes_end() const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
int getMaskElt(unsigned Idx) const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This class is used to represent ISD::STORE nodes.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Primary interface to the complete machine description for the target machine.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool isPositionIndependent() const
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned getID() const
Return the register class ID number.
virtual const TargetLowering * getTargetLowering() const
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:180
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
@ MO_TLSLD_LO
Definition PPC.h:184
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:150
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:113
@ MO_DTPREL_LO
These values identify relocations on immediates folded into memory operations.
Definition PPC.h:183
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:140
@ MO_TOC_LO
Definition PPC.h:185
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
@ Define
Register definition.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:293
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
int countl_one(T Value)
Count the number of ones from the most significant bit to the first zero bit.
Definition bit.h:280
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
FunctionPass * createPPCISelDag(PPCTargetMachine &TM, CodeGenOptLevel OL)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Other
Any other memory.
Definition ModRef.h:68
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
unsigned int NumVTs