LLVM 23.0.0git
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pattern matching instruction selector for PowerPC,
10// converting from a legalized dag to a PPC dag.
11//
12//===----------------------------------------------------------------------===//
13
16#include "PPC.h"
17#include "PPCISelLowering.h"
19#include "PPCSelectionDAGInfo.h"
20#include "PPCSubtarget.h"
21#include "PPCTargetMachine.h"
22#include "llvm/ADT/APInt.h"
23#include "llvm/ADT/APSInt.h"
24#include "llvm/ADT/DenseMap.h"
25#include "llvm/ADT/STLExtras.h"
28#include "llvm/ADT/Statistic.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/DebugLoc.h"
46#include "llvm/IR/Function.h"
47#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/InlineAsm.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/IntrinsicsPowerPC.h"
51#include "llvm/IR/Module.h"
56#include "llvm/Support/Debug.h"
61#include <algorithm>
62#include <cassert>
63#include <cstdint>
64#include <iterator>
65#include <limits>
66#include <memory>
67#include <new>
68#include <tuple>
69#include <utility>
70
71using namespace llvm;
72
73#define DEBUG_TYPE "ppc-isel"
74#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
75
76STATISTIC(NumSextSetcc,
77 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
78STATISTIC(NumZextSetcc,
79 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
80STATISTIC(SignExtensionsAdded,
81 "Number of sign extensions for compare inputs added.");
82STATISTIC(ZeroExtensionsAdded,
83 "Number of zero extensions for compare inputs added.");
84STATISTIC(NumLogicOpsOnComparison,
85 "Number of logical ops on i1 values calculated in GPR.");
86STATISTIC(OmittedForNonExtendUses,
87 "Number of compares not eliminated as they have non-extending uses.");
88STATISTIC(NumP9Setb,
89 "Number of compares lowered to setb.");
90
91// FIXME: Remove this once the bug has been fixed!
92cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
93cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
94
95static cl::opt<bool>
96 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
97 cl::desc("use aggressive ppc isel for bit permutations"),
100 "ppc-bit-perm-rewriter-stress-rotates",
101 cl::desc("stress rotate selection in aggressive ppc isel for "
102 "bit permutations"),
103 cl::Hidden);
104
106 "ppc-use-branch-hint", cl::init(true),
107 cl::desc("Enable static hinting of branches on ppc"),
108 cl::Hidden);
109
111 "ppc-tls-opt", cl::init(true),
112 cl::desc("Enable tls optimization peephole"),
113 cl::Hidden);
114
118
120 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
121 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
122 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
123 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
124 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
125 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
126 clEnumValN(ICGPR_NonExtIn, "nonextin",
127 "Only comparisons where inputs don't need [sz]ext."),
128 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
129 clEnumValN(ICGPR_ZextI32, "zexti32",
130 "Only i32 comparisons with zext result."),
131 clEnumValN(ICGPR_ZextI64, "zexti64",
132 "Only i64 comparisons with zext result."),
133 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
134 clEnumValN(ICGPR_SextI32, "sexti32",
135 "Only i32 comparisons with sext result."),
136 clEnumValN(ICGPR_SextI64, "sexti64",
137 "Only i64 comparisons with sext result.")));
138namespace {
139
140 //===--------------------------------------------------------------------===//
141 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
142 /// instructions for SelectionDAG operations.
143 ///
144 class PPCDAGToDAGISel : public SelectionDAGISel {
145 const PPCTargetMachine &TM;
146 const PPCSubtarget *Subtarget = nullptr;
147 const PPCTargetLowering *PPCLowering = nullptr;
148 unsigned GlobalBaseReg = 0;
149
150 public:
151 PPCDAGToDAGISel() = delete;
152
153 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
154 : SelectionDAGISel(tm, OptLevel), TM(tm) {}
155
156 bool runOnMachineFunction(MachineFunction &MF) override {
157 // Make sure we re-emit a set of the global base reg if necessary
158 GlobalBaseReg = 0;
159 Subtarget = &MF.getSubtarget<PPCSubtarget>();
160 PPCLowering = Subtarget->getTargetLowering();
161 if (Subtarget->hasROPProtect()) {
162 // Create a place on the stack for the ROP Protection Hash.
163 // The ROP Protection Hash will always be 8 bytes and aligned to 8
164 // bytes.
165 MachineFrameInfo &MFI = MF.getFrameInfo();
166 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
167 const int Result = MFI.CreateStackObject(8, Align(8), false);
169 }
171
172 return true;
173 }
174
175 void PreprocessISelDAG() override;
176 void PostprocessISelDAG() override;
177
178 /// getI16Imm - Return a target constant with the specified value, of type
179 /// i16.
180 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
181 return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
182 }
183
184 /// getI32Imm - Return a target constant with the specified value, of type
185 /// i32.
186 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
187 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
188 }
189
190 /// getI64Imm - Return a target constant with the specified value, of type
191 /// i64.
192 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
193 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
194 }
195
196 /// getSmallIPtrImm - Return a target constant of pointer type.
197 inline SDValue getSmallIPtrImm(int64_t Imm, const SDLoc &dl) {
198 return CurDAG->getSignedTargetConstant(
199 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
200 }
201
202 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
203 /// rotate and mask opcode and mask operation.
204 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
205 unsigned &SH, unsigned &MB, unsigned &ME);
206
207 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
208 /// base register. Return the virtual register that holds this value.
209 SDNode *getGlobalBaseReg();
210
211 void selectFrameIndex(SDNode *SN, SDNode *N, int64_t Offset = 0);
212
213 // Select - Convert the specified operand from a target-independent to a
214 // target-specific node if it hasn't already been changed.
215 void Select(SDNode *N) override;
216
217 bool tryBitfieldInsert(SDNode *N);
218 bool tryBitPermutation(SDNode *N);
219 bool tryIntCompareInGPR(SDNode *N);
220
221 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
222 // an X-Form load instruction with the offset being a relocation coming from
223 // the PPCISD::ADD_TLS.
224 bool tryTLSXFormLoad(LoadSDNode *N);
225 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
226 // an X-Form store instruction with the offset being a relocation coming from
227 // the PPCISD::ADD_TLS.
228 bool tryTLSXFormStore(StoreSDNode *N);
229 /// SelectCC - Select a comparison of the specified values with the
230 /// specified condition code, returning the CR# of the expression.
232 const SDLoc &dl, SDValue Chain = SDValue());
233
234 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
235 /// immediate field. Note that the operand at this point is already the
236 /// result of a prior SelectAddressRegImm call.
237 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
238 if (N.getOpcode() == ISD::TargetConstant ||
239 N.getOpcode() == ISD::TargetGlobalAddress) {
240 Out = N;
241 return true;
242 }
243
244 return false;
245 }
246
247 /// SelectDSForm - Returns true if address N can be represented by the
248 /// addressing mode of DSForm instructions (a base register, plus a signed
249 /// 16-bit displacement that is a multiple of 4.
250 bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
251 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
252 Align(4)) == PPC::AM_DSForm;
253 }
254
255 /// SelectDQForm - Returns true if address N can be represented by the
256 /// addressing mode of DQForm instructions (a base register, plus a signed
257 /// 16-bit displacement that is a multiple of 16.
258 bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
259 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
260 Align(16)) == PPC::AM_DQForm;
261 }
262
263 /// SelectDForm - Returns true if address N can be represented by
264 /// the addressing mode of DForm instructions (a base register, plus a
265 /// signed 16-bit immediate.
266 bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
267 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
268 std::nullopt) == PPC::AM_DForm;
269 }
270
271 /// SelectPCRelForm - Returns true if address N can be represented by
272 /// PC-Relative addressing mode.
273 bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
274 SDValue &Base) {
275 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
276 std::nullopt) == PPC::AM_PCRel;
277 }
278
279 /// SelectPDForm - Returns true if address N can be represented by Prefixed
280 /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
281 bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
282 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
283 std::nullopt) ==
285 }
286
287 /// SelectXForm - Returns true if address N can be represented by the
288 /// addressing mode of XForm instructions (an indexed [r+r] operation).
289 bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
290 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
291 std::nullopt) == PPC::AM_XForm;
292 }
293
294 /// SelectForceXForm - Given the specified address, force it to be
295 /// represented as an indexed [r+r] operation (an XForm instruction).
296 bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
297 SDValue &Base) {
298 return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
300 }
301
302 /// SelectAddrIdx - Given the specified address, check to see if it can be
303 /// represented as an indexed [r+r] operation.
304 /// This is for xform instructions whose associated displacement form is D.
305 /// The last parameter \p 0 means associated D form has no requirment for 16
306 /// bit signed displacement.
307 /// Returns false if it can be represented by [r+imm], which are preferred.
308 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
309 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
310 std::nullopt);
311 }
312
313 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
314 /// represented as an indexed [r+r] operation.
315 /// This is for xform instructions whose associated displacement form is DS.
316 /// The last parameter \p 4 means associated DS form 16 bit signed
317 /// displacement must be a multiple of 4.
318 /// Returns false if it can be represented by [r+imm], which are preferred.
319 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
320 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
321 Align(4));
322 }
323
324 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
325 /// represented as an indexed [r+r] operation.
326 /// This is for xform instructions whose associated displacement form is DQ.
327 /// The last parameter \p 16 means associated DQ form 16 bit signed
328 /// displacement must be a multiple of 16.
329 /// Returns false if it can be represented by [r+imm], which are preferred.
330 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
331 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
332 Align(16));
333 }
334
335 /// SelectAddrIdxOnly - Given the specified address, force it to be
336 /// represented as an indexed [r+r] operation.
337 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
338 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
339 }
340
341 /// SelectAddrImm - Returns true if the address N can be represented by
342 /// a base register plus a signed 16-bit displacement [r+imm].
343 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
344 /// displacement.
345 bool SelectAddrImm(SDValue N, SDValue &Disp,
346 SDValue &Base) {
347 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
348 std::nullopt);
349 }
350
351 /// SelectAddrImmX4 - Returns true if the address N can be represented by
352 /// a base register plus a signed 16-bit displacement that is a multiple of
353 /// 4 (last parameter). Suitable for use by STD and friends.
354 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
355 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
356 }
357
358 /// SelectAddrImmX16 - Returns true if the address N can be represented by
359 /// a base register plus a signed 16-bit displacement that is a multiple of
360 /// 16(last parameter). Suitable for use by STXV and friends.
361 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
362 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
363 Align(16));
364 }
365
366 /// SelectAddrImmX34 - Returns true if the address N can be represented by
367 /// a base register plus a signed 34-bit displacement. Suitable for use by
368 /// PSTXVP and friends.
369 bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
370 return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
371 }
372
373 // Select an address into a single register.
374 bool SelectAddr(SDValue N, SDValue &Base) {
375 Base = N;
376 return true;
377 }
378
379 bool SelectAddrPCRel(SDValue N, SDValue &Base) {
380 return PPCLowering->SelectAddressPCRel(N, Base);
381 }
382
383 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
384 /// inline asm expressions. It is always correct to compute the value into
385 /// a register. The case of adding a (possibly relocatable) constant to a
386 /// register can be improved, but it is wrong to substitute Reg+Reg for
387 /// Reg in an asm, because the load or store opcode would have to change.
388 bool SelectInlineAsmMemoryOperand(const SDValue &Op,
389 InlineAsm::ConstraintCode ConstraintID,
390 std::vector<SDValue> &OutOps) override {
391 switch(ConstraintID) {
392 default:
393 errs() << "ConstraintID: "
394 << InlineAsm::getMemConstraintName(ConstraintID) << "\n";
395 llvm_unreachable("Unexpected asm memory constraint");
396 case InlineAsm::ConstraintCode::es:
397 case InlineAsm::ConstraintCode::m:
398 case InlineAsm::ConstraintCode::o:
399 case InlineAsm::ConstraintCode::Q:
400 case InlineAsm::ConstraintCode::Z:
401 case InlineAsm::ConstraintCode::Zy:
402 // We need to make sure that this one operand does not end up in r0
403 // (because we might end up lowering this as 0(%op)).
404 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
405 const TargetRegisterClass *TRC = TRI->getPointerRegClass(/*Kind=*/1);
406 SDLoc dl(Op);
407 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
408 SDValue NewOp =
409 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
410 dl, Op.getValueType(),
411 Op, RC), 0);
412
413 OutOps.push_back(NewOp);
414 return false;
415 }
416 return true;
417 }
418
419// Include the pieces autogenerated from the target description.
420#include "PPCGenDAGISel.inc"
421
422private:
423 bool trySETCC(SDNode *N);
424 bool tryFoldSWTestBRCC(SDNode *N);
425 bool trySelectLoopCountIntrinsic(SDNode *N);
426 bool tryAsSingleRLDICL(SDNode *N);
427 bool tryAsSingleRLDCL(SDNode *N);
428 bool tryAsSingleRLDICR(SDNode *N);
429 bool tryAsSingleRLWINM(SDNode *N);
430 bool tryAsSingleRLWINM8(SDNode *N);
431 bool tryAsSingleRLWIMI(SDNode *N);
432 bool tryAsPairOfRLDICL(SDNode *N);
433 bool tryAsSingleRLDIMI(SDNode *N);
434
435 void PeepholePPC64();
436 void PeepholePPC64ZExt();
437 void PeepholeCROps();
438
439 SDValue combineToCMPB(SDNode *N);
440 void foldBoolExts(SDValue &Res, SDNode *&N);
441
442 bool AllUsersSelectZero(SDNode *N);
443 void SwapAllSelectUsers(SDNode *N);
444
445 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
446 void transferMemOperands(SDNode *N, SDNode *Result);
447 };
448
449 class PPCDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
450 public:
451 static char ID;
452 explicit PPCDAGToDAGISelLegacy(PPCTargetMachine &tm,
453 CodeGenOptLevel OptLevel)
454 : SelectionDAGISelLegacy(
455 ID, std::make_unique<PPCDAGToDAGISel>(tm, OptLevel)) {}
456 };
457} // end anonymous namespace
458
459char PPCDAGToDAGISelLegacy::ID = 0;
460
461INITIALIZE_PASS(PPCDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
462
463/// getGlobalBaseReg - Output the instructions required to put the
464/// base address to use for accessing globals into a register.
465///
466SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
467 if (!GlobalBaseReg) {
468 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
469 // Insert the set of GlobalBaseReg into the first MBB of the function
470 MachineBasicBlock &FirstMBB = MF->front();
472 const Module *M = MF->getFunction().getParent();
473 DebugLoc dl;
474
475 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
476 if (Subtarget->isTargetELF()) {
477 GlobalBaseReg = PPC::R30;
478 if (!Subtarget->isSecurePlt() &&
479 M->getPICLevel() == PICLevel::SmallPIC) {
480 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
481 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
482 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
483 } else {
484 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
485 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
486 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
487 BuildMI(FirstMBB, MBBI, dl,
488 TII.get(PPC::UpdateGBR), GlobalBaseReg)
489 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
490 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
491 }
492 } else {
494 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
495 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
496 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
497 }
498 } else {
499 // We must ensure that this sequence is dominated by the prologue.
500 // FIXME: This is a bit of a big hammer since we don't get the benefits
501 // of shrink-wrapping whenever we emit this instruction. Considering
502 // this is used in any function where we emit a jump table, this may be
503 // a significant limitation. We should consider inserting this in the
504 // block where it is used and then commoning this sequence up if it
505 // appears in multiple places.
506 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
507 // MovePCtoLR8.
508 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
509 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
510 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
511 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
512 }
513 }
514 return CurDAG->getRegister(GlobalBaseReg,
515 PPCLowering->getPointerTy(CurDAG->getDataLayout()))
516 .getNode();
517}
518
519// Check if a SDValue has the toc-data attribute.
520static bool hasTocDataAttr(SDValue Val) {
522 if (!GA)
523 return false;
524
526 if (!GV)
527 return false;
528
529 if (!GV->hasAttribute("toc-data"))
530 return false;
531 return true;
532}
533
535 const TargetMachine &TM,
536 const SDNode *Node) {
537 // If there isn't an attribute to override the module code model
538 // this will be the effective code model.
539 CodeModel::Model ModuleModel = TM.getCodeModel();
540
542 if (!GA)
543 return ModuleModel;
544
545 const GlobalValue *GV = GA->getGlobal();
546 if (!GV)
547 return ModuleModel;
548
549 return Subtarget.getCodeModel(TM, GV);
550}
551
552/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
553/// operand. If so Imm will receive the 32-bit value.
554static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
555 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
556 Imm = N->getAsZExtVal();
557 return true;
558 }
559 return false;
560}
561
562/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
563/// operand. If so Imm will receive the 64-bit value.
564static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
565 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
566 Imm = N->getAsZExtVal();
567 return true;
568 }
569 return false;
570}
571
572// isInt32Immediate - This method tests to see if a constant operand.
573// If so Imm will receive the 32 bit value.
574static bool isInt32Immediate(SDValue N, unsigned &Imm) {
575 return isInt32Immediate(N.getNode(), Imm);
576}
577
578/// isInt64Immediate - This method tests to see if the value is a 64-bit
579/// constant operand. If so Imm will receive the 64-bit value.
580static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
581 return isInt64Immediate(N.getNode(), Imm);
582}
583
584static unsigned getBranchHint(unsigned PCC,
585 const FunctionLoweringInfo &FuncInfo,
586 const SDValue &DestMBB) {
588
589 if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
590
591 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
592 const Instruction *BBTerm = BB->getTerminator();
593
594 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
595
596 const BasicBlock *TBB = BBTerm->getSuccessor(0);
597 const BasicBlock *FBB = BBTerm->getSuccessor(1);
598
599 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
600 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
601
602 // We only want to handle cases which are easy to predict at static time, e.g.
603 // C++ throw statement, that is very likely not taken, or calling never
604 // returned function, e.g. stdlib exit(). So we set Threshold to filter
605 // unwanted cases.
606 //
607 // Below is LLVM branch weight table, we only want to handle case 1, 2
608 //
609 // Case Taken:Nontaken Example
610 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
611 // 2. Invoke-terminating 1:1048575
612 // 3. Coldblock 4:64 __builtin_expect
613 // 4. Loop Branch 124:4 For loop
614 // 5. PH/ZH/FPH 20:12
615 const uint32_t Threshold = 10000;
616
617 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
618 return PPC::BR_NO_HINT;
619
620 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
621 << "::" << BB->getName() << "'\n"
622 << " -> " << TBB->getName() << ": " << TProb << "\n"
623 << " -> " << FBB->getName() << ": " << FProb << "\n");
624
625 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
626
627 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
628 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
629 if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
630 std::swap(TProb, FProb);
631
632 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
633}
634
635// isOpcWithIntImmediate - This method tests to see if the node is a specific
636// opcode and that it has a immediate integer right operand.
637// If so Imm will receive the 32 bit value.
638static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
639 return N->getOpcode() == Opc
640 && isInt32Immediate(N->getOperand(1).getNode(), Imm);
641}
642
643void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, int64_t Offset) {
644 SDLoc dl(SN);
645 int FI = cast<FrameIndexSDNode>(N)->getIndex();
646 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
647 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
648 if (SN->hasOneUse())
649 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
650 getSmallIPtrImm(Offset, dl));
651 else
652 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
653 getSmallIPtrImm(Offset, dl)));
654}
655
656bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
657 bool isShiftMask, unsigned &SH,
658 unsigned &MB, unsigned &ME) {
659 // Don't even go down this path for i64, since different logic will be
660 // necessary for rldicl/rldicr/rldimi.
661 if (N->getValueType(0) != MVT::i32)
662 return false;
663
664 unsigned Shift = 32;
665 unsigned Indeterminant = ~0; // bit mask marking indeterminant results
666 unsigned Opcode = N->getOpcode();
667 if (N->getNumOperands() != 2 ||
668 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
669 return false;
670
671 if (Opcode == ISD::SHL) {
672 // apply shift left to mask if it comes first
673 if (isShiftMask) Mask = Mask << Shift;
674 // determine which bits are made indeterminant by shift
675 Indeterminant = ~(0xFFFFFFFFu << Shift);
676 } else if (Opcode == ISD::SRL) {
677 // apply shift right to mask if it comes first
678 if (isShiftMask) Mask = Mask >> Shift;
679 // determine which bits are made indeterminant by shift
680 Indeterminant = ~(0xFFFFFFFFu >> Shift);
681 // adjust for the left rotate
682 Shift = 32 - Shift;
683 } else if (Opcode == ISD::ROTL) {
684 Indeterminant = 0;
685 } else {
686 return false;
687 }
688
689 // if the mask doesn't intersect any Indeterminant bits
690 if (Mask && !(Mask & Indeterminant)) {
691 SH = Shift & 31;
692 // make sure the mask is still a mask (wrap arounds may not be)
693 return isRunOfOnes(Mask, MB, ME);
694 }
695 return false;
696}
697
698// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
699// instruction use the thread pointer.
701 assert(
702 Base.getOpcode() == PPCISD::ADD_TLS &&
703 "Only expecting the ADD_TLS instruction to acquire the thread pointer!");
704 const PPCSubtarget &Subtarget =
706 SDValue ADDTLSOp1 = Base.getOperand(0);
707 unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
708
709 // Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
710 //
711 // Although ADD_TLS does not explicitly use the thread pointer
712 // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
713 // instruction will have a relocation specifier, @got@tprel, that is used to
714 // generate a GOT entry. The linker replaces this entry with an offset for a
715 // for a thread local variable, which will be relative to the thread pointer.
716 if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
717 return true;
718 // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
719 // node is produced instead to represent the aforementioned situation.
720 LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSOp1);
721 if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
722 return true;
723
724 // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
725 // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
726 // later returning it into R3.
727 if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
728 return true;
729
730 // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
731 RegisterSDNode *AddFirstOpReg =
732 dyn_cast_or_null<RegisterSDNode>(ADDTLSOp1.getNode());
733 if (AddFirstOpReg &&
734 AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
735 return true;
736
737 return false;
738}
739
740// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
741// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
742// operation, can be optimized to use an X-Form load or store, allowing the
743// ADD_TLS node to be removed completely.
745
746 // Do not do this transformation at -O0.
748 return false;
749
750 // In order to perform this optimization inside tryTLSXForm[Load|Store],
751 // Base is expected to be an ADD_TLS node.
752 if (Base.getOpcode() != PPCISD::ADD_TLS)
753 return false;
754 for (auto *ADDTLSUse : Base.getNode()->users()) {
755 // The optimization to convert the D-Form load/store into its X-Form
756 // counterpart should only occur if the source value offset of the load/
757 // store is 0. This also means that The offset should always be undefined.
758 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSUse)) {
759 if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
760 return false;
761 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(ADDTLSUse)) {
762 if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
763 return false;
764 } else // Don't optimize if there are ADD_TLS users that aren't load/stores.
765 return false;
766 }
767
768 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
769 return false;
770
771 // Does the ADD_TLS node of the load/store use the thread pointer?
772 // If the thread pointer is not used as one of the operands of ADD_TLS,
773 // then this optimization is not valid.
774 return isThreadPointerAcquisitionNode(Base, CurDAG);
775}
776
777bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
778 SDValue Base = ST->getBasePtr();
779 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
780 return false;
781
782 SDLoc dl(ST);
783 EVT MemVT = ST->getMemoryVT();
784 EVT RegVT = ST->getValue().getValueType();
785
786 unsigned Opcode;
787 switch (MemVT.getSimpleVT().SimpleTy) {
788 default:
789 return false;
790 case MVT::i8: {
791 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
792 break;
793 }
794 case MVT::i16: {
795 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
796 break;
797 }
798 case MVT::i32: {
799 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
800 break;
801 }
802 case MVT::i64: {
803 Opcode = PPC::STDXTLS;
804 break;
805 }
806 case MVT::f32: {
807 Opcode = PPC::STFSXTLS;
808 break;
809 }
810 case MVT::f64: {
811 Opcode = PPC::STFDXTLS;
812 break;
813 }
814 }
815 SDValue Chain = ST->getChain();
816 SDVTList VTs = ST->getVTList();
817 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
818 Chain};
819 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
820 transferMemOperands(ST, MN);
821 ReplaceNode(ST, MN);
822 return true;
823}
824
825bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
826 SDValue Base = LD->getBasePtr();
827 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
828 return false;
829
830 SDLoc dl(LD);
831 EVT MemVT = LD->getMemoryVT();
832 EVT RegVT = LD->getValueType(0);
833 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
834 unsigned Opcode;
835 switch (MemVT.getSimpleVT().SimpleTy) {
836 default:
837 return false;
838 case MVT::i8: {
839 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
840 break;
841 }
842 case MVT::i16: {
843 if (RegVT == MVT::i32)
844 Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
845 else
846 Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
847 break;
848 }
849 case MVT::i32: {
850 if (RegVT == MVT::i32)
851 Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
852 else
853 Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
854 break;
855 }
856 case MVT::i64: {
857 Opcode = PPC::LDXTLS;
858 break;
859 }
860 case MVT::f32: {
861 Opcode = PPC::LFSXTLS;
862 break;
863 }
864 case MVT::f64: {
865 Opcode = PPC::LFDXTLS;
866 break;
867 }
868 }
869 SDValue Chain = LD->getChain();
870 SDVTList VTs = LD->getVTList();
871 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
872 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
873 transferMemOperands(LD, MN);
874 ReplaceNode(LD, MN);
875 return true;
876}
877
878/// Turn an or of two masked values into the rotate left word immediate then
879/// mask insert (rlwimi) instruction.
880bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
881 SDValue Op0 = N->getOperand(0);
882 SDValue Op1 = N->getOperand(1);
883 SDLoc dl(N);
884
885 // If either operand is a constant, let ORI/ORIS/ADDI/ADDIS tablegen
886 // patterns handle it — they produce a single instruction without the
887 // tied-register constraint that RLWIMI requires.
889 return false;
890
891 KnownBits LKnown = CurDAG->computeKnownBits(Op0);
892 KnownBits RKnown = CurDAG->computeKnownBits(Op1);
893
894 unsigned TargetMask = LKnown.Zero.getZExtValue();
895 unsigned InsertMask = RKnown.Zero.getZExtValue();
896
897 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
898 unsigned Op0Opc = Op0.getOpcode();
899 unsigned Op1Opc = Op1.getOpcode();
900 unsigned Value, SH = 0;
901 TargetMask = ~TargetMask;
902 InsertMask = ~InsertMask;
903
904 // If the LHS has a foldable shift and the RHS does not, then swap it to the
905 // RHS so that we can fold the shift into the insert.
906 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
907 if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
908 Op0.getOperand(0).getOpcode() == ISD::SRL) {
909 if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
910 Op1.getOperand(0).getOpcode() != ISD::SRL) {
911 std::swap(Op0, Op1);
912 std::swap(Op0Opc, Op1Opc);
913 std::swap(TargetMask, InsertMask);
914 }
915 }
916 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
917 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
918 Op1.getOperand(0).getOpcode() != ISD::SRL) {
919 std::swap(Op0, Op1);
920 std::swap(Op0Opc, Op1Opc);
921 std::swap(TargetMask, InsertMask);
922 }
923 }
924
925 unsigned MB, ME;
926 if (isRunOfOnes(InsertMask, MB, ME)) {
927 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
929 Op1 = Op1.getOperand(0);
930 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
931 }
932 if (Op1Opc == ISD::AND) {
933 // The AND mask might not be a constant, and we need to make sure that
934 // if we're going to fold the masking with the insert, all bits not
935 // know to be zero in the mask are known to be one.
936 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
937 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
938
939 unsigned SHOpc = Op1.getOperand(0).getOpcode();
940 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
942 // Note that Value must be in range here (less than 32) because
943 // otherwise there would not be any bits set in InsertMask.
944 Op1 = Op1.getOperand(0).getOperand(0);
945 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
946 }
947 }
948
949 SH &= 31;
950 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
951 getI32Imm(ME, dl) };
952 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
953 return true;
954 }
955 }
956 return false;
957}
958
959static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
960 unsigned MaxTruncation = 0;
961 // Cannot use range-based for loop here as we need the actual use (i.e. we
962 // need the operand number corresponding to the use). A range-based for
963 // will unbox the use and provide an SDNode*.
964 for (SDUse &Use : N->uses()) {
965 SDNode *User = Use.getUser();
966 unsigned Opc =
967 User->isMachineOpcode() ? User->getMachineOpcode() : User->getOpcode();
968 switch (Opc) {
969 default: return 0;
970 case ISD::TRUNCATE:
971 if (User->isMachineOpcode())
972 return 0;
973 MaxTruncation = std::max(MaxTruncation,
974 (unsigned)User->getValueType(0).getSizeInBits());
975 continue;
976 case ISD::STORE: {
977 if (User->isMachineOpcode())
978 return 0;
980 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
981 if (MemVTSize == 64 || Use.getOperandNo() != 0)
982 return 0;
983 MaxTruncation = std::max(MaxTruncation, MemVTSize);
984 continue;
985 }
986 case PPC::STW8:
987 case PPC::STWX8:
988 case PPC::STWU8:
989 case PPC::STWUX8:
990 if (Use.getOperandNo() != 0)
991 return 0;
992 MaxTruncation = std::max(MaxTruncation, 32u);
993 continue;
994 case PPC::STH8:
995 case PPC::STHX8:
996 case PPC::STHU8:
997 case PPC::STHUX8:
998 if (Use.getOperandNo() != 0)
999 return 0;
1000 MaxTruncation = std::max(MaxTruncation, 16u);
1001 continue;
1002 case PPC::STB8:
1003 case PPC::STBX8:
1004 case PPC::STBU8:
1005 case PPC::STBUX8:
1006 if (Use.getOperandNo() != 0)
1007 return 0;
1008 MaxTruncation = std::max(MaxTruncation, 8u);
1009 continue;
1010 }
1011 }
1012 return MaxTruncation;
1013}
1014
1015// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
1016// zeros and return the number of bits by the left of these consecutive zeros.
1017static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
1018 unsigned HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
1019 unsigned LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
1020 if ((HiTZ + LoLZ) >= Num)
1021 return (32 + HiTZ);
1022 return 0;
1023}
1024
1025// Direct materialization of 64-bit constants by enumerated patterns.
1026static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
1027 uint64_t Imm, unsigned &InstCnt) {
1028 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1029 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1030 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1031 unsigned LO = llvm::countl_one<uint64_t>(Imm);
1032 unsigned Hi32 = Hi_32(Imm);
1033 unsigned Lo32 = Lo_32(Imm);
1034 SDNode *Result = nullptr;
1035 unsigned Shift = 0;
1036
1037 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1038 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1039 };
1040
1041 // Following patterns use 1 instructions to materialize the Imm.
1042 InstCnt = 1;
1043 // 1-1) Patterns : {zeros}{15-bit valve}
1044 // {ones}{15-bit valve}
1045 if (isInt<16>(Imm)) {
1046 SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1047 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1048 }
1049 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
1050 // {ones}{15-bit valve}{16 zeros}
1051 if (TZ > 15 && (LZ > 32 || LO > 32))
1052 return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1053 getI32Imm((Imm >> 16) & 0xffff));
1054
1055 // Following patterns use 2 instructions to materialize the Imm.
1056 InstCnt = 2;
1057 assert(LZ < 64 && "Unexpected leading zeros here.");
1058 // Count of ones follwing the leading zeros.
1059 unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
1060 // 2-1) Patterns : {zeros}{31-bit value}
1061 // {ones}{31-bit value}
1062 if (isInt<32>(Imm)) {
1063 uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
1064 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1065 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1066 return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1067 getI32Imm(Imm & 0xffff));
1068 }
1069 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
1070 // {zeros}{15-bit value}{zeros}
1071 // {zeros}{ones}{15-bit value}
1072 // {ones}{15-bit value}{zeros}
1073 // We can take advantage of LI's sign-extension semantics to generate leading
1074 // ones, and then use RLDIC to mask off the ones in both sides after rotation.
1075 if ((LZ + FO + TZ) > 48) {
1076 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1077 getI32Imm((Imm >> TZ) & 0xffff));
1078 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1079 getI32Imm(TZ), getI32Imm(LZ));
1080 }
1081 // 2-3) Pattern : {zeros}{15-bit value}{ones}
1082 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
1083 // therefore we can take advantage of LI's sign-extension semantics, and then
1084 // mask them off after rotation.
1085 //
1086 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
1087 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1088 // +------------------------+ +------------------------+
1089 // 63 0 63 0
1090 // Imm (Imm >> (48 - LZ) & 0xffff)
1091 // +----sext-----|--16-bit--+ +clear-|-----------------+
1092 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1093 // +------------------------+ +------------------------+
1094 // 63 0 63 0
1095 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
1096 if ((LZ + TO) > 48) {
1097 // Since the immediates with (LZ > 32) have been handled by previous
1098 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1099 // the Imm by a negative value.
1100 assert(LZ <= 32 && "Unexpected shift value.");
1101 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1102 getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1103 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1104 getI32Imm(48 - LZ), getI32Imm(LZ));
1105 }
1106 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1107 // {ones}{15-bit value}{ones}
1108 // We can take advantage of LI's sign-extension semantics to generate leading
1109 // ones, and then use RLDICL to mask off the ones in left sides (if required)
1110 // after rotation.
1111 //
1112 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1113 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1114 // +------------------------+ +------------------------+
1115 // 63 0 63 0
1116 // Imm (Imm >> TO) & 0xffff
1117 // +----sext-----|--16-bit--+ +LZ|---------------------+
1118 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1119 // +------------------------+ +------------------------+
1120 // 63 0 63 0
1121 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1122 if ((LZ + FO + TO) > 48) {
1123 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1124 getI32Imm((Imm >> TO) & 0xffff));
1125 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1126 getI32Imm(TO), getI32Imm(LZ));
1127 }
1128 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1129 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1130 // value, we can use LI for Lo16 without generating leading ones then add the
1131 // Hi16(in Lo32).
1132 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1133 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1134 getI32Imm(Lo32 & 0xffff));
1135 return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1136 getI32Imm(Lo32 >> 16));
1137 }
1138 // 2-6) Patterns : {******}{49 zeros}{******}
1139 // {******}{49 ones}{******}
1140 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1141 // bits remain on both sides. Rotate right the Imm to construct an int<16>
1142 // value, use LI for int<16> value and then use RLDICL without mask to rotate
1143 // it back.
1144 //
1145 // 1) findContiguousZerosAtLeast(Imm, 49)
1146 // +------|--zeros-|------+ +---ones--||---15 bit--+
1147 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1148 // +----------------------+ +----------------------+
1149 // 63 0 63 0
1150 //
1151 // 2) findContiguousZerosAtLeast(~Imm, 49)
1152 // +------|--ones--|------+ +---ones--||---15 bit--+
1153 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1154 // +----------------------+ +----------------------+
1155 // 63 0 63 0
1156 if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1157 (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1158 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1159 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1160 getI32Imm(RotImm & 0xffff));
1161 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1162 getI32Imm(Shift), getI32Imm(0));
1163 }
1164 // 2-7) Patterns : High word == Low word
1165 // This may require 2 to 3 instructions, depending on whether Lo32 can be
1166 // materialized in 1 instruction.
1167 if (Hi32 == Lo32) {
1168 // Handle the first 32 bits.
1169 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1170 uint64_t ImmLo16 = Lo32 & 0xffff;
1171 if (isInt<16>(Lo32))
1172 Result =
1173 CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
1174 else if (!ImmLo16)
1175 Result =
1176 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1177 else {
1178 InstCnt = 3;
1179 Result =
1180 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1181 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1182 SDValue(Result, 0), getI32Imm(ImmLo16));
1183 }
1184 // Use rldimi to insert the Low word into High word.
1185 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1186 getI32Imm(0)};
1187 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1188 }
1189
1190 // Following patterns use 3 instructions to materialize the Imm.
1191 InstCnt = 3;
1192 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1193 // {zeros}{31-bit value}{zeros}
1194 // {zeros}{ones}{31-bit value}
1195 // {ones}{31-bit value}{zeros}
1196 // We can take advantage of LIS's sign-extension semantics to generate leading
1197 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1198 // ones in both sides after rotation.
1199 if ((LZ + FO + TZ) > 32) {
1200 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1201 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1202 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1203 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1204 getI32Imm((Imm >> TZ) & 0xffff));
1205 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1206 getI32Imm(TZ), getI32Imm(LZ));
1207 }
1208 // 3-2) Pattern : {zeros}{31-bit value}{ones}
1209 // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
1210 // value, therefore we can take advantage of LIS's sign-extension semantics,
1211 // add the remaining bits with ORI, and then mask them off after rotation.
1212 // This is similar to Pattern 2-3, please refer to the diagram there.
1213 if ((LZ + TO) > 32) {
1214 // Since the immediates with (LZ > 32) have been handled by previous
1215 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1216 // the Imm by a negative value.
1217 assert(LZ <= 32 && "Unexpected shift value.");
1218 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1219 getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1220 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1221 getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1222 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1223 getI32Imm(32 - LZ), getI32Imm(LZ));
1224 }
1225 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1226 // {ones}{31-bit value}{ones}
1227 // We can take advantage of LIS's sign-extension semantics to generate leading
1228 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1229 // ones in left sides (if required) after rotation.
1230 // This is similar to Pattern 2-4, please refer to the diagram there.
1231 if ((LZ + FO + TO) > 32) {
1232 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1233 getI32Imm((Imm >> (TO + 16)) & 0xffff));
1234 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1235 getI32Imm((Imm >> TO) & 0xffff));
1236 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1237 getI32Imm(TO), getI32Imm(LZ));
1238 }
1239 // 3-4) Patterns : {******}{33 zeros}{******}
1240 // {******}{33 ones}{******}
1241 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1242 // bits remain on both sides. Rotate right the Imm to construct an int<32>
1243 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1244 // rotate it back.
1245 // This is similar to Pattern 2-6, please refer to the diagram there.
1246 if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1247 (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1248 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1249 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1250 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1251 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1252 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1253 getI32Imm(RotImm & 0xffff));
1254 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1255 getI32Imm(Shift), getI32Imm(0));
1256 }
1257
1258 InstCnt = 0;
1259 return nullptr;
1260}
1261
1262// Try to select instructions to generate a 64 bit immediate using prefix as
1263// well as non prefix instructions. The function will return the SDNode
1264// to materialize that constant or it will return nullptr if it does not
1265// find one. The variable InstCnt is set to the number of instructions that
1266// were selected.
1268 uint64_t Imm, unsigned &InstCnt) {
1269 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1270 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1271 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1272 unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1273 unsigned Hi32 = Hi_32(Imm);
1274 unsigned Lo32 = Lo_32(Imm);
1275
1276 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1277 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1278 };
1279
1280 auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1281 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1282 };
1283
1284 // Following patterns use 1 instruction to materialize Imm.
1285 InstCnt = 1;
1286
1287 // The pli instruction can materialize up to 34 bits directly.
1288 // If a constant fits within 34-bits, emit the pli instruction here directly.
1289 if (isInt<34>(Imm))
1290 return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1291 CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1292
1293 // Require at least two instructions.
1294 InstCnt = 2;
1295 SDNode *Result = nullptr;
1296 // Patterns : {zeros}{ones}{33-bit value}{zeros}
1297 // {zeros}{33-bit value}{zeros}
1298 // {zeros}{ones}{33-bit value}
1299 // {ones}{33-bit value}{zeros}
1300 // We can take advantage of PLI's sign-extension semantics to generate leading
1301 // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1302 if ((LZ + FO + TZ) > 30) {
1303 APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1304 APInt Extended = SignedInt34.sext(64);
1305 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1306 getI64Imm(Extended.getZExtValue()));
1307 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1308 getI32Imm(TZ), getI32Imm(LZ));
1309 }
1310 // Pattern : {zeros}{33-bit value}{ones}
1311 // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1312 // therefore we can take advantage of PLI's sign-extension semantics, and then
1313 // mask them off after rotation.
1314 //
1315 // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1316 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1317 // +------------------------+ +------------------------+
1318 // 63 0 63 0
1319 //
1320 // +----sext-----|--34-bit--+ +clear-|-----------------+
1321 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1322 // +------------------------+ +------------------------+
1323 // 63 0 63 0
1324 if ((LZ + TO) > 30) {
1325 APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1326 APInt Extended = SignedInt34.sext(64);
1327 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1328 getI64Imm(Extended.getZExtValue()));
1329 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1330 getI32Imm(30 - LZ), getI32Imm(LZ));
1331 }
1332 // Patterns : {zeros}{ones}{33-bit value}{ones}
1333 // {ones}{33-bit value}{ones}
1334 // Similar to LI we can take advantage of PLI's sign-extension semantics to
1335 // generate leading ones, and then use RLDICL to mask off the ones in left
1336 // sides (if required) after rotation.
1337 if ((LZ + FO + TO) > 30) {
1338 APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1339 APInt Extended = SignedInt34.sext(64);
1340 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1341 getI64Imm(Extended.getZExtValue()));
1342 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1343 getI32Imm(TO), getI32Imm(LZ));
1344 }
1345 // Patterns : {******}{31 zeros}{******}
1346 // : {******}{31 ones}{******}
1347 // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1348 // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1349 // for the int<33> value and then use RLDICL without a mask to rotate it back.
1350 //
1351 // +------|--ones--|------+ +---ones--||---33 bit--+
1352 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1353 // +----------------------+ +----------------------+
1354 // 63 0 63 0
1355 for (unsigned Shift = 0; Shift < 63; ++Shift) {
1356 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1357 if (isInt<34>(RotImm)) {
1358 Result =
1359 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1360 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1361 SDValue(Result, 0), getI32Imm(Shift),
1362 getI32Imm(0));
1363 }
1364 }
1365
1366 // Patterns : High word == Low word
1367 // This is basically a splat of a 32 bit immediate.
1368 if (Hi32 == Lo32) {
1369 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1370 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1371 getI32Imm(0)};
1372 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1373 }
1374
1375 InstCnt = 3;
1376 // Catch-all
1377 // This pattern can form any 64 bit immediate in 3 instructions.
1378 SDNode *ResultHi =
1379 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1380 SDNode *ResultLo =
1381 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1382 SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1383 getI32Imm(0)};
1384 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1385}
1386
1387static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1388 unsigned *InstCnt = nullptr) {
1389 unsigned InstCntDirect = 0;
1390 // No more than 3 instructions are used if we can select the i64 immediate
1391 // directly.
1392 SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1393
1394 const PPCSubtarget &Subtarget =
1396
1397 // If we have prefixed instructions and there is a chance we can
1398 // materialize the constant with fewer prefixed instructions than
1399 // non-prefixed, try that.
1400 if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1401 unsigned InstCntDirectP = 0;
1402 SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1403 // Use the prefix case in either of two cases:
1404 // 1) We have no result from the non-prefix case to use.
1405 // 2) The non-prefix case uses more instructions than the prefix case.
1406 // If the prefix and non-prefix cases use the same number of instructions
1407 // we will prefer the non-prefix case.
1408 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1409 if (InstCnt)
1410 *InstCnt = InstCntDirectP;
1411 return ResultP;
1412 }
1413 }
1414
1415 if (Result) {
1416 if (InstCnt)
1417 *InstCnt = InstCntDirect;
1418 return Result;
1419 }
1420 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1421 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1422 };
1423
1424 uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;
1425 uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;
1426
1427 // Try to use 4 instructions to materialize the immediate which is "almost" a
1428 // splat of a 32 bit immediate.
1429 if (Hi16OfLo32 && Lo16OfLo32) {
1430 uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;
1431 uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;
1432 bool IsSelected = false;
1433
1434 auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
1435 SDNode *Result =
1436 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
1437 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1438 SDValue(Result, 0), getI32Imm(Lo16));
1439 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1440 getI32Imm(0)};
1441 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1442 };
1443
1444 if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1445 IsSelected = true;
1446 Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1447 // Modify Hi16OfHi32.
1448 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),
1449 getI32Imm(0)};
1450 Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1451 } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1452 IsSelected = true;
1453 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1454 // Modify Lo16OfLo32.
1455 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1456 getI32Imm(16), getI32Imm(31)};
1457 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1458 } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1459 IsSelected = true;
1460 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1461 // Modify Hi16OfLo32.
1462 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1463 getI32Imm(0), getI32Imm(15)};
1464 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1465 }
1466 if (IsSelected == true) {
1467 if (InstCnt)
1468 *InstCnt = 4;
1469 return Result;
1470 }
1471 }
1472
1473 // Handle the upper 32 bit value.
1474 Result =
1475 selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1476 // Add in the last bits as required.
1477 if (Hi16OfLo32) {
1478 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1479 SDValue(Result, 0), getI32Imm(Hi16OfLo32));
1480 ++InstCntDirect;
1481 }
1482 if (Lo16OfLo32) {
1483 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1484 getI32Imm(Lo16OfLo32));
1485 ++InstCntDirect;
1486 }
1487 if (InstCnt)
1488 *InstCnt = InstCntDirect;
1489 return Result;
1490}
1491
1492// Select a 64-bit constant.
1494 SDLoc dl(N);
1495
1496 // Get 64 bit value.
1497 int64_t Imm = N->getAsZExtVal();
1498 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1499 uint64_t SextImm = SignExtend64(Imm, MinSize);
1500 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1501 if (isInt<16>(SextImm))
1502 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1503 }
1504 return selectI64Imm(CurDAG, dl, Imm);
1505}
1506
1507namespace {
1508
1509class BitPermutationSelector {
1510 struct ValueBit {
1511 SDValue V;
1512
1513 // The bit number in the value, using a convention where bit 0 is the
1514 // lowest-order bit.
1515 unsigned Idx;
1516
1517 // ConstZero means a bit we need to mask off.
1518 // Variable is a bit comes from an input variable.
1519 // VariableKnownToBeZero is also a bit comes from an input variable,
1520 // but it is known to be already zero. So we do not need to mask them.
1521 enum Kind {
1522 ConstZero,
1523 Variable,
1524 VariableKnownToBeZero
1525 } K;
1526
1527 ValueBit(SDValue V, unsigned I, Kind K = Variable)
1528 : V(V), Idx(I), K(K) {}
1529 ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1530
1531 bool isZero() const {
1532 return K == ConstZero || K == VariableKnownToBeZero;
1533 }
1534
1535 bool hasValue() const {
1536 return K == Variable || K == VariableKnownToBeZero;
1537 }
1538
1539 SDValue getValue() const {
1540 assert(hasValue() && "Cannot get the value of a constant bit");
1541 return V;
1542 }
1543
1544 unsigned getValueBitIndex() const {
1545 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1546 return Idx;
1547 }
1548 };
1549
1550 // A bit group has the same underlying value and the same rotate factor.
1551 struct BitGroup {
1552 SDValue V;
1553 unsigned RLAmt;
1554 unsigned StartIdx, EndIdx;
1555
1556 // This rotation amount assumes that the lower 32 bits of the quantity are
1557 // replicated in the high 32 bits by the rotation operator (which is done
1558 // by rlwinm and friends in 64-bit mode).
1559 bool Repl32;
1560 // Did converting to Repl32 == true change the rotation factor? If it did,
1561 // it decreased it by 32.
1562 bool Repl32CR;
1563 // Was this group coalesced after setting Repl32 to true?
1564 bool Repl32Coalesced;
1565
1566 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1567 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1568 Repl32Coalesced(false) {
1569 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1570 << " [" << S << ", " << E << "]\n");
1571 }
1572 };
1573
1574 // Information on each (Value, RLAmt) pair (like the number of groups
1575 // associated with each) used to choose the lowering method.
1576 struct ValueRotInfo {
1577 SDValue V;
1578 unsigned RLAmt = std::numeric_limits<unsigned>::max();
1579 unsigned NumGroups = 0;
1580 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1581 bool Repl32 = false;
1582
1583 ValueRotInfo() = default;
1584
1585 // For sorting (in reverse order) by NumGroups, and then by
1586 // FirstGroupStartIdx.
1587 bool operator < (const ValueRotInfo &Other) const {
1588 // We need to sort so that the non-Repl32 come first because, when we're
1589 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1590 // masking operation.
1591 if (Repl32 < Other.Repl32)
1592 return true;
1593 else if (Repl32 > Other.Repl32)
1594 return false;
1595 else if (NumGroups > Other.NumGroups)
1596 return true;
1597 else if (NumGroups < Other.NumGroups)
1598 return false;
1599 else if (RLAmt == 0 && Other.RLAmt != 0)
1600 return true;
1601 else if (RLAmt != 0 && Other.RLAmt == 0)
1602 return false;
1603 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1604 return true;
1605 return false;
1606 }
1607 };
1608
1609 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1610 using ValueBitsMemoizer =
1611 DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
1612 ValueBitsMemoizer Memoizer;
1613
1614 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1615 // The bool is true if something interesting was deduced, otherwise if we're
1616 // providing only a generic representation of V (or something else likewise
1617 // uninteresting for instruction selection) through the SmallVector.
1618 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1619 unsigned NumBits) {
1620 auto &ValueEntry = Memoizer[V];
1621 if (ValueEntry)
1622 return std::make_pair(ValueEntry->first, &ValueEntry->second);
1623 ValueEntry.reset(new ValueBitsMemoizedValue());
1624 bool &Interesting = ValueEntry->first;
1625 SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1626 Bits.resize(NumBits);
1627
1628 switch (V.getOpcode()) {
1629 default: break;
1630 case ISD::ROTL:
1631 if (isa<ConstantSDNode>(V.getOperand(1))) {
1632 assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1633 unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);
1634
1635 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1636
1637 for (unsigned i = 0; i < NumBits; ++i)
1638 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1639
1640 return std::make_pair(Interesting = true, &Bits);
1641 }
1642 break;
1643 case ISD::SHL:
1644 case PPCISD::SHL:
1645 if (isa<ConstantSDNode>(V.getOperand(1))) {
1646 // sld takes 7 bits, slw takes 6.
1647 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1648
1649 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1650
1651 if (ShiftAmt >= NumBits) {
1652 for (unsigned i = 0; i < NumBits; ++i)
1653 Bits[i] = ValueBit(ValueBit::ConstZero);
1654 } else {
1655 for (unsigned i = ShiftAmt; i < NumBits; ++i)
1656 Bits[i] = LHSBits[i - ShiftAmt];
1657 for (unsigned i = 0; i < ShiftAmt; ++i)
1658 Bits[i] = ValueBit(ValueBit::ConstZero);
1659 }
1660
1661 return std::make_pair(Interesting = true, &Bits);
1662 }
1663 break;
1664 case ISD::SRL:
1665 case PPCISD::SRL:
1666 if (isa<ConstantSDNode>(V.getOperand(1))) {
1667 // srd takes lowest 7 bits, srw takes 6.
1668 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1669
1670 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1671
1672 if (ShiftAmt >= NumBits) {
1673 for (unsigned i = 0; i < NumBits; ++i)
1674 Bits[i] = ValueBit(ValueBit::ConstZero);
1675 } else {
1676 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1677 Bits[i] = LHSBits[i + ShiftAmt];
1678 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1679 Bits[i] = ValueBit(ValueBit::ConstZero);
1680 }
1681
1682 return std::make_pair(Interesting = true, &Bits);
1683 }
1684 break;
1685 case ISD::AND:
1686 if (isa<ConstantSDNode>(V.getOperand(1))) {
1687 uint64_t Mask = V.getConstantOperandVal(1);
1688
1689 const SmallVector<ValueBit, 64> *LHSBits;
1690 // Mark this as interesting, only if the LHS was also interesting. This
1691 // prevents the overall procedure from matching a single immediate 'and'
1692 // (which is non-optimal because such an and might be folded with other
1693 // things if we don't select it here).
1694 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1695
1696 for (unsigned i = 0; i < NumBits; ++i)
1697 if (((Mask >> i) & 1) == 1)
1698 Bits[i] = (*LHSBits)[i];
1699 else {
1700 // AND instruction masks this bit. If the input is already zero,
1701 // we have nothing to do here. Otherwise, make the bit ConstZero.
1702 if ((*LHSBits)[i].isZero())
1703 Bits[i] = (*LHSBits)[i];
1704 else
1705 Bits[i] = ValueBit(ValueBit::ConstZero);
1706 }
1707
1708 return std::make_pair(Interesting, &Bits);
1709 }
1710 break;
1711 case ISD::OR: {
1712 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1713 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1714
1715 bool AllDisjoint = true;
1716 SDValue LastVal = SDValue();
1717 unsigned LastIdx = 0;
1718 for (unsigned i = 0; i < NumBits; ++i) {
1719 if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1720 // If both inputs are known to be zero and one is ConstZero and
1721 // another is VariableKnownToBeZero, we can select whichever
1722 // we like. To minimize the number of bit groups, we select
1723 // VariableKnownToBeZero if this bit is the next bit of the same
1724 // input variable from the previous bit. Otherwise, we select
1725 // ConstZero.
1726 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1727 LHSBits[i].getValueBitIndex() == LastIdx + 1)
1728 Bits[i] = LHSBits[i];
1729 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1730 RHSBits[i].getValueBitIndex() == LastIdx + 1)
1731 Bits[i] = RHSBits[i];
1732 else
1733 Bits[i] = ValueBit(ValueBit::ConstZero);
1734 }
1735 else if (LHSBits[i].isZero())
1736 Bits[i] = RHSBits[i];
1737 else if (RHSBits[i].isZero())
1738 Bits[i] = LHSBits[i];
1739 else {
1740 AllDisjoint = false;
1741 break;
1742 }
1743 // We remember the value and bit index of this bit.
1744 if (Bits[i].hasValue()) {
1745 LastVal = Bits[i].getValue();
1746 LastIdx = Bits[i].getValueBitIndex();
1747 }
1748 else {
1749 if (LastVal) LastVal = SDValue();
1750 LastIdx = 0;
1751 }
1752 }
1753
1754 if (!AllDisjoint)
1755 break;
1756
1757 return std::make_pair(Interesting = true, &Bits);
1758 }
1759 case ISD::ZERO_EXTEND: {
1760 // We support only the case with zero extension from i32 to i64 so far.
1761 if (V.getValueType() != MVT::i64 ||
1762 V.getOperand(0).getValueType() != MVT::i32)
1763 break;
1764
1765 const SmallVector<ValueBit, 64> *LHSBits;
1766 const unsigned NumOperandBits = 32;
1767 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1768 NumOperandBits);
1769
1770 for (unsigned i = 0; i < NumOperandBits; ++i)
1771 Bits[i] = (*LHSBits)[i];
1772
1773 for (unsigned i = NumOperandBits; i < NumBits; ++i)
1774 Bits[i] = ValueBit(ValueBit::ConstZero);
1775
1776 return std::make_pair(Interesting, &Bits);
1777 }
1778 case ISD::TRUNCATE: {
1779 EVT FromType = V.getOperand(0).getValueType();
1780 EVT ToType = V.getValueType();
1781 // We support only the case with truncate from i64 to i32.
1782 if (FromType != MVT::i64 || ToType != MVT::i32)
1783 break;
1784 const unsigned NumAllBits = FromType.getSizeInBits();
1786 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1787 NumAllBits);
1788 const unsigned NumValidBits = ToType.getSizeInBits();
1789
1790 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1791 // So, we cannot include this truncate.
1792 bool UseUpper32bit = false;
1793 for (unsigned i = 0; i < NumValidBits; ++i)
1794 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1795 UseUpper32bit = true;
1796 break;
1797 }
1798 if (UseUpper32bit)
1799 break;
1800
1801 for (unsigned i = 0; i < NumValidBits; ++i)
1802 Bits[i] = (*InBits)[i];
1803
1804 return std::make_pair(Interesting, &Bits);
1805 }
1806 case ISD::AssertZext: {
1807 // For AssertZext, we look through the operand and
1808 // mark the bits known to be zero.
1809 const SmallVector<ValueBit, 64> *LHSBits;
1810 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1811 NumBits);
1812
1813 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1814 const unsigned NumValidBits = FromType.getSizeInBits();
1815 for (unsigned i = 0; i < NumValidBits; ++i)
1816 Bits[i] = (*LHSBits)[i];
1817
1818 // These bits are known to be zero but the AssertZext may be from a value
1819 // that already has some constant zero bits (i.e. from a masking and).
1820 for (unsigned i = NumValidBits; i < NumBits; ++i)
1821 Bits[i] = (*LHSBits)[i].hasValue()
1822 ? ValueBit((*LHSBits)[i].getValue(),
1823 (*LHSBits)[i].getValueBitIndex(),
1824 ValueBit::VariableKnownToBeZero)
1825 : ValueBit(ValueBit::ConstZero);
1826
1827 return std::make_pair(Interesting, &Bits);
1828 }
1829 case ISD::LOAD:
1830 LoadSDNode *LD = cast<LoadSDNode>(V);
1831 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1832 EVT VT = LD->getMemoryVT();
1833 const unsigned NumValidBits = VT.getSizeInBits();
1834
1835 for (unsigned i = 0; i < NumValidBits; ++i)
1836 Bits[i] = ValueBit(V, i);
1837
1838 // These bits are known to be zero.
1839 for (unsigned i = NumValidBits; i < NumBits; ++i)
1840 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1841
1842 // Zero-extending load itself cannot be optimized. So, it is not
1843 // interesting by itself though it gives useful information.
1844 return std::make_pair(Interesting = false, &Bits);
1845 }
1846 break;
1847 }
1848
1849 for (unsigned i = 0; i < NumBits; ++i)
1850 Bits[i] = ValueBit(V, i);
1851
1852 return std::make_pair(Interesting = false, &Bits);
1853 }
1854
1855 // For each value (except the constant ones), compute the left-rotate amount
1856 // to get it from its original to final position.
1857 void computeRotationAmounts() {
1858 NeedMask = false;
1859 RLAmt.resize(Bits.size());
1860 for (unsigned i = 0; i < Bits.size(); ++i)
1861 if (Bits[i].hasValue()) {
1862 unsigned VBI = Bits[i].getValueBitIndex();
1863 if (i >= VBI)
1864 RLAmt[i] = i - VBI;
1865 else
1866 RLAmt[i] = Bits.size() - (VBI - i);
1867 } else if (Bits[i].isZero()) {
1868 NeedMask = true;
1869 RLAmt[i] = UINT32_MAX;
1870 } else {
1871 llvm_unreachable("Unknown value bit type");
1872 }
1873 }
1874
1875 // Collect groups of consecutive bits with the same underlying value and
1876 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1877 // they break up groups.
1878 void collectBitGroups(bool LateMask) {
1879 BitGroups.clear();
1880
1881 unsigned LastRLAmt = RLAmt[0];
1882 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1883 unsigned LastGroupStartIdx = 0;
1884 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1885 for (unsigned i = 1; i < Bits.size(); ++i) {
1886 unsigned ThisRLAmt = RLAmt[i];
1887 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1888 if (LateMask && !ThisValue) {
1889 ThisValue = LastValue;
1890 ThisRLAmt = LastRLAmt;
1891 // If we're doing late masking, then the first bit group always starts
1892 // at zero (even if the first bits were zero).
1893 if (BitGroups.empty())
1894 LastGroupStartIdx = 0;
1895 }
1896
1897 // If this bit is known to be zero and the current group is a bit group
1898 // of zeros, we do not need to terminate the current bit group even the
1899 // Value or RLAmt does not match here. Instead, we terminate this group
1900 // when the first non-zero bit appears later.
1901 if (IsGroupOfZeros && Bits[i].isZero())
1902 continue;
1903
1904 // If this bit has the same underlying value and the same rotate factor as
1905 // the last one, then they're part of the same group.
1906 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1907 // We cannot continue the current group if this bits is not known to
1908 // be zero in a bit group of zeros.
1909 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1910 continue;
1911
1912 if (LastValue.getNode())
1913 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1914 i-1));
1915 LastRLAmt = ThisRLAmt;
1916 LastValue = ThisValue;
1917 LastGroupStartIdx = i;
1918 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1919 }
1920 if (LastValue.getNode())
1921 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1922 Bits.size()-1));
1923
1924 if (BitGroups.empty())
1925 return;
1926
1927 // We might be able to combine the first and last groups.
1928 if (BitGroups.size() > 1) {
1929 // If the first and last groups are the same, then remove the first group
1930 // in favor of the last group, making the ending index of the last group
1931 // equal to the ending index of the to-be-removed first group.
1932 if (BitGroups[0].StartIdx == 0 &&
1933 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1934 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1935 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1936 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1937 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1938 BitGroups.erase(BitGroups.begin());
1939 }
1940 }
1941 }
1942
1943 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1944 // associated with each. If the number of groups are same, we prefer a group
1945 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1946 // instruction. If there is a degeneracy, pick the one that occurs
1947 // first (in the final value).
1948 void collectValueRotInfo() {
1949 ValueRots.clear();
1950
1951 for (auto &BG : BitGroups) {
1952 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1953 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1954 VRI.V = BG.V;
1955 VRI.RLAmt = BG.RLAmt;
1956 VRI.Repl32 = BG.Repl32;
1957 VRI.NumGroups += 1;
1958 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1959 }
1960
1961 // Now that we've collected the various ValueRotInfo instances, we need to
1962 // sort them.
1963 ValueRotsVec.clear();
1964 for (auto &I : ValueRots) {
1965 ValueRotsVec.push_back(I.second);
1966 }
1967 llvm::sort(ValueRotsVec);
1968 }
1969
1970 // In 64-bit mode, rlwinm and friends have a rotation operator that
1971 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1972 // indices of these instructions can only be in the lower 32 bits, so they
1973 // can only represent some 64-bit bit groups. However, when they can be used,
1974 // the 32-bit replication can be used to represent, as a single bit group,
1975 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1976 // groups when possible. Returns true if any of the bit groups were
1977 // converted.
1978 void assignRepl32BitGroups() {
1979 // If we have bits like this:
1980 //
1981 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1982 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1983 // Groups: | RLAmt = 8 | RLAmt = 40 |
1984 //
1985 // But, making use of a 32-bit operation that replicates the low-order 32
1986 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1987 // of 8.
1988
1989 auto IsAllLow32 = [this](BitGroup & BG) {
1990 if (BG.StartIdx <= BG.EndIdx) {
1991 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1992 if (!Bits[i].hasValue())
1993 continue;
1994 if (Bits[i].getValueBitIndex() >= 32)
1995 return false;
1996 }
1997 } else {
1998 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1999 if (!Bits[i].hasValue())
2000 continue;
2001 if (Bits[i].getValueBitIndex() >= 32)
2002 return false;
2003 }
2004 for (unsigned i = 0; i <= BG.EndIdx; ++i) {
2005 if (!Bits[i].hasValue())
2006 continue;
2007 if (Bits[i].getValueBitIndex() >= 32)
2008 return false;
2009 }
2010 }
2011
2012 return true;
2013 };
2014
2015 for (auto &BG : BitGroups) {
2016 // If this bit group has RLAmt of 0 and will not be merged with
2017 // another bit group, we don't benefit from Repl32. We don't mark
2018 // such group to give more freedom for later instruction selection.
2019 if (BG.RLAmt == 0) {
2020 auto PotentiallyMerged = [this](BitGroup & BG) {
2021 for (auto &BG2 : BitGroups)
2022 if (&BG != &BG2 && BG.V == BG2.V &&
2023 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
2024 return true;
2025 return false;
2026 };
2027 if (!PotentiallyMerged(BG))
2028 continue;
2029 }
2030 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
2031 if (IsAllLow32(BG)) {
2032 if (BG.RLAmt >= 32) {
2033 BG.RLAmt -= 32;
2034 BG.Repl32CR = true;
2035 }
2036
2037 BG.Repl32 = true;
2038
2039 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
2040 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
2041 << BG.StartIdx << ", " << BG.EndIdx << "]\n");
2042 }
2043 }
2044 }
2045
2046 // Now walk through the bit groups, consolidating where possible.
2047 for (auto I = BitGroups.begin(); I != BitGroups.end();) {
2048 // We might want to remove this bit group by merging it with the previous
2049 // group (which might be the ending group).
2050 auto IP = (I == BitGroups.begin()) ?
2051 std::prev(BitGroups.end()) : std::prev(I);
2052 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
2053 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
2054
2055 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
2056 << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
2057 << I->StartIdx << ", " << I->EndIdx
2058 << "] with group with range [" << IP->StartIdx << ", "
2059 << IP->EndIdx << "]\n");
2060
2061 IP->EndIdx = I->EndIdx;
2062 IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
2063 IP->Repl32Coalesced = true;
2064 I = BitGroups.erase(I);
2065 continue;
2066 } else {
2067 // There is a special case worth handling: If there is a single group
2068 // covering the entire upper 32 bits, and it can be merged with both
2069 // the next and previous groups (which might be the same group), then
2070 // do so. If it is the same group (so there will be only one group in
2071 // total), then we need to reverse the order of the range so that it
2072 // covers the entire 64 bits.
2073 if (I->StartIdx == 32 && I->EndIdx == 63) {
2074 assert(std::next(I) == BitGroups.end() &&
2075 "bit group ends at index 63 but there is another?");
2076 auto IN = BitGroups.begin();
2077
2078 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
2079 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
2080 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
2081 IsAllLow32(*I)) {
2082
2083 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
2084 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
2085 << ", " << I->EndIdx
2086 << "] with 32-bit replicated groups with ranges ["
2087 << IP->StartIdx << ", " << IP->EndIdx << "] and ["
2088 << IN->StartIdx << ", " << IN->EndIdx << "]\n");
2089
2090 if (IP == IN) {
2091 // There is only one other group; change it to cover the whole
2092 // range (backward, so that it can still be Repl32 but cover the
2093 // whole 64-bit range).
2094 IP->StartIdx = 31;
2095 IP->EndIdx = 30;
2096 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
2097 IP->Repl32Coalesced = true;
2098 I = BitGroups.erase(I);
2099 } else {
2100 // There are two separate groups, one before this group and one
2101 // after us (at the beginning). We're going to remove this group,
2102 // but also the group at the very beginning.
2103 IP->EndIdx = IN->EndIdx;
2104 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
2105 IP->Repl32Coalesced = true;
2106 I = BitGroups.erase(I);
2107 BitGroups.erase(BitGroups.begin());
2108 }
2109
2110 // This must be the last group in the vector (and we might have
2111 // just invalidated the iterator above), so break here.
2112 break;
2113 }
2114 }
2115 }
2116
2117 ++I;
2118 }
2119 }
2120
2121 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
2122 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
2123 }
2124
2125 uint64_t getZerosMask() {
2126 uint64_t Mask = 0;
2127 for (unsigned i = 0; i < Bits.size(); ++i) {
2128 if (Bits[i].hasValue())
2129 continue;
2130 Mask |= (UINT64_C(1) << i);
2131 }
2132
2133 return ~Mask;
2134 }
2135
2136 // This method extends an input value to 64 bit if input is 32-bit integer.
2137 // While selecting instructions in BitPermutationSelector in 64-bit mode,
2138 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
2139 // In such case, we extend it to 64 bit to be consistent with other values.
2140 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
2141 if (V.getValueSizeInBits() == 64)
2142 return V;
2143
2144 assert(V.getValueSizeInBits() == 32);
2145 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2146 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2147 MVT::i64), 0);
2148 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2149 MVT::i64, ImDef, V,
2150 SubRegIdx), 0);
2151 return ExtVal;
2152 }
2153
2154 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
2155 if (V.getValueSizeInBits() == 32)
2156 return V;
2157
2158 assert(V.getValueSizeInBits() == 64);
2159 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2160 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
2161 MVT::i32, V, SubRegIdx), 0);
2162 return SubVal;
2163 }
2164
2165 // Depending on the number of groups for a particular value, it might be
2166 // better to rotate, mask explicitly (using andi/andis), and then or the
2167 // result. Select this part of the result first.
2168 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2170 return;
2171
2172 for (ValueRotInfo &VRI : ValueRotsVec) {
2173 unsigned Mask = 0;
2174 for (unsigned i = 0; i < Bits.size(); ++i) {
2175 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2176 continue;
2177 if (RLAmt[i] != VRI.RLAmt)
2178 continue;
2179 Mask |= (1u << i);
2180 }
2181
2182 // Compute the masks for andi/andis that would be necessary.
2183 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2184 assert((ANDIMask != 0 || ANDISMask != 0) &&
2185 "No set bits in mask for value bit groups");
2186 bool NeedsRotate = VRI.RLAmt != 0;
2187
2188 // We're trying to minimize the number of instructions. If we have one
2189 // group, using one of andi/andis can break even. If we have three
2190 // groups, we can use both andi and andis and break even (to use both
2191 // andi and andis we also need to or the results together). We need four
2192 // groups if we also need to rotate. To use andi/andis we need to do more
2193 // than break even because rotate-and-mask instructions tend to be easier
2194 // to schedule.
2195
2196 // FIXME: We've biased here against using andi/andis, which is right for
2197 // POWER cores, but not optimal everywhere. For example, on the A2,
2198 // andi/andis have single-cycle latency whereas the rotate-and-mask
2199 // instructions take two cycles, and it would be better to bias toward
2200 // andi/andis in break-even cases.
2201
2202 unsigned NumAndInsts = (unsigned) NeedsRotate +
2203 (unsigned) (ANDIMask != 0) +
2204 (unsigned) (ANDISMask != 0) +
2205 (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2206 (unsigned) (bool) Res;
2207
2208 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2209 << " RL: " << VRI.RLAmt << ":"
2210 << "\n\t\t\tisel using masking: " << NumAndInsts
2211 << " using rotates: " << VRI.NumGroups << "\n");
2212
2213 if (NumAndInsts >= VRI.NumGroups)
2214 continue;
2215
2216 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2217
2218 if (InstCnt) *InstCnt += NumAndInsts;
2219
2220 SDValue VRot;
2221 if (VRI.RLAmt) {
2222 SDValue Ops[] =
2223 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2224 getI32Imm(0, dl), getI32Imm(31, dl) };
2225 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2226 Ops), 0);
2227 } else {
2228 VRot = TruncateToInt32(VRI.V, dl);
2229 }
2230
2231 SDValue ANDIVal, ANDISVal;
2232 if (ANDIMask != 0)
2233 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2234 VRot, getI32Imm(ANDIMask, dl)),
2235 0);
2236 if (ANDISMask != 0)
2237 ANDISVal =
2238 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2239 getI32Imm(ANDISMask, dl)),
2240 0);
2241
2242 SDValue TotalVal;
2243 if (!ANDIVal)
2244 TotalVal = ANDISVal;
2245 else if (!ANDISVal)
2246 TotalVal = ANDIVal;
2247 else
2248 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2249 ANDIVal, ANDISVal), 0);
2250
2251 if (!Res)
2252 Res = TotalVal;
2253 else
2254 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2255 Res, TotalVal), 0);
2256
2257 // Now, remove all groups with this underlying value and rotation
2258 // factor.
2259 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2260 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2261 });
2262 }
2263 }
2264
2265 // Instruction selection for the 32-bit case.
2266 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2267 SDLoc dl(N);
2268 SDValue Res;
2269
2270 if (InstCnt) *InstCnt = 0;
2271
2272 // Take care of cases that should use andi/andis first.
2273 SelectAndParts32(dl, Res, InstCnt);
2274
2275 // If we've not yet selected a 'starting' instruction, and we have no zeros
2276 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2277 // number of groups), and start with this rotated value.
2278 if ((!NeedMask || LateMask) && !Res) {
2279 ValueRotInfo &VRI = ValueRotsVec[0];
2280 if (VRI.RLAmt) {
2281 if (InstCnt) *InstCnt += 1;
2282 SDValue Ops[] =
2283 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2284 getI32Imm(0, dl), getI32Imm(31, dl) };
2285 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2286 0);
2287 } else {
2288 Res = TruncateToInt32(VRI.V, dl);
2289 }
2290
2291 // Now, remove all groups with this underlying value and rotation factor.
2292 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2293 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2294 });
2295 }
2296
2297 if (InstCnt) *InstCnt += BitGroups.size();
2298
2299 // Insert the other groups (one at a time).
2300 for (auto &BG : BitGroups) {
2301 if (!Res) {
2302 SDValue Ops[] =
2303 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2304 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2305 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2306 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2307 } else {
2308 SDValue Ops[] =
2309 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2310 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2311 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2312 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2313 }
2314 }
2315
2316 if (LateMask) {
2317 unsigned Mask = (unsigned) getZerosMask();
2318
2319 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2320 assert((ANDIMask != 0 || ANDISMask != 0) &&
2321 "No set bits in zeros mask?");
2322
2323 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2324 (unsigned) (ANDISMask != 0) +
2325 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2326
2327 SDValue ANDIVal, ANDISVal;
2328 if (ANDIMask != 0)
2329 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2330 Res, getI32Imm(ANDIMask, dl)),
2331 0);
2332 if (ANDISMask != 0)
2333 ANDISVal =
2334 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2335 getI32Imm(ANDISMask, dl)),
2336 0);
2337
2338 if (!ANDIVal)
2339 Res = ANDISVal;
2340 else if (!ANDISVal)
2341 Res = ANDIVal;
2342 else
2343 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2344 ANDIVal, ANDISVal), 0);
2345 }
2346
2347 return Res.getNode();
2348 }
2349
2350 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2351 unsigned MaskStart, unsigned MaskEnd,
2352 bool IsIns) {
2353 // In the notation used by the instructions, 'start' and 'end' are reversed
2354 // because bits are counted from high to low order.
2355 unsigned InstMaskStart = 64 - MaskEnd - 1,
2356 InstMaskEnd = 64 - MaskStart - 1;
2357
2358 if (Repl32)
2359 return 1;
2360
2361 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2362 InstMaskEnd == 63 - RLAmt)
2363 return 1;
2364
2365 return 2;
2366 }
2367
2368 // For 64-bit values, not all combinations of rotates and masks are
2369 // available. Produce one if it is available.
2370 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2371 bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2372 unsigned *InstCnt = nullptr) {
2373 // In the notation used by the instructions, 'start' and 'end' are reversed
2374 // because bits are counted from high to low order.
2375 unsigned InstMaskStart = 64 - MaskEnd - 1,
2376 InstMaskEnd = 64 - MaskStart - 1;
2377
2378 if (InstCnt) *InstCnt += 1;
2379
2380 if (Repl32) {
2381 // This rotation amount assumes that the lower 32 bits of the quantity
2382 // are replicated in the high 32 bits by the rotation operator (which is
2383 // done by rlwinm and friends).
2384 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2385 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2386 SDValue Ops[] =
2387 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2388 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2389 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2390 Ops), 0);
2391 }
2392
2393 if (InstMaskEnd == 63) {
2394 SDValue Ops[] =
2395 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2396 getI32Imm(InstMaskStart, dl) };
2397 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2398 }
2399
2400 if (InstMaskStart == 0) {
2401 SDValue Ops[] =
2402 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2403 getI32Imm(InstMaskEnd, dl) };
2404 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2405 }
2406
2407 if (InstMaskEnd == 63 - RLAmt) {
2408 SDValue Ops[] =
2409 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2410 getI32Imm(InstMaskStart, dl) };
2411 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2412 }
2413
2414 // We cannot do this with a single instruction, so we'll use two. The
2415 // problem is that we're not free to choose both a rotation amount and mask
2416 // start and end independently. We can choose an arbitrary mask start and
2417 // end, but then the rotation amount is fixed. Rotation, however, can be
2418 // inverted, and so by applying an "inverse" rotation first, we can get the
2419 // desired result.
2420 if (InstCnt) *InstCnt += 1;
2421
2422 // The rotation mask for the second instruction must be MaskStart.
2423 unsigned RLAmt2 = MaskStart;
2424 // The first instruction must rotate V so that the overall rotation amount
2425 // is RLAmt.
2426 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2427 if (RLAmt1)
2428 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2429 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2430 }
2431
2432 // For 64-bit values, not all combinations of rotates and masks are
2433 // available. Produce a rotate-mask-and-insert if one is available.
2434 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2435 unsigned RLAmt, bool Repl32, unsigned MaskStart,
2436 unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2437 // In the notation used by the instructions, 'start' and 'end' are reversed
2438 // because bits are counted from high to low order.
2439 unsigned InstMaskStart = 64 - MaskEnd - 1,
2440 InstMaskEnd = 64 - MaskStart - 1;
2441
2442 if (InstCnt) *InstCnt += 1;
2443
2444 if (Repl32) {
2445 // This rotation amount assumes that the lower 32 bits of the quantity
2446 // are replicated in the high 32 bits by the rotation operator (which is
2447 // done by rlwinm and friends).
2448 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2449 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2450 SDValue Ops[] =
2451 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2452 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2453 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2454 Ops), 0);
2455 }
2456
2457 if (InstMaskEnd == 63 - RLAmt) {
2458 SDValue Ops[] =
2459 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2460 getI32Imm(InstMaskStart, dl) };
2461 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2462 }
2463
2464 // We cannot do this with a single instruction, so we'll use two. The
2465 // problem is that we're not free to choose both a rotation amount and mask
2466 // start and end independently. We can choose an arbitrary mask start and
2467 // end, but then the rotation amount is fixed. Rotation, however, can be
2468 // inverted, and so by applying an "inverse" rotation first, we can get the
2469 // desired result.
2470 if (InstCnt) *InstCnt += 1;
2471
2472 // The rotation mask for the second instruction must be MaskStart.
2473 unsigned RLAmt2 = MaskStart;
2474 // The first instruction must rotate V so that the overall rotation amount
2475 // is RLAmt.
2476 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2477 if (RLAmt1)
2478 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2479 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2480 }
2481
2482 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2484 return;
2485
2486 // The idea here is the same as in the 32-bit version, but with additional
2487 // complications from the fact that Repl32 might be true. Because we
2488 // aggressively convert bit groups to Repl32 form (which, for small
2489 // rotation factors, involves no other change), and then coalesce, it might
2490 // be the case that a single 64-bit masking operation could handle both
2491 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2492 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2493 // completely capture the new combined bit group.
2494
2495 for (ValueRotInfo &VRI : ValueRotsVec) {
2496 uint64_t Mask = 0;
2497
2498 // We need to add to the mask all bits from the associated bit groups.
2499 // If Repl32 is false, we need to add bits from bit groups that have
2500 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2501 // group is trivially convertable if it overlaps only with the lower 32
2502 // bits, and the group has not been coalesced.
2503 auto MatchingBG = [VRI](const BitGroup &BG) {
2504 if (VRI.V != BG.V)
2505 return false;
2506
2507 unsigned EffRLAmt = BG.RLAmt;
2508 if (!VRI.Repl32 && BG.Repl32) {
2509 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2510 !BG.Repl32Coalesced) {
2511 if (BG.Repl32CR)
2512 EffRLAmt += 32;
2513 } else {
2514 return false;
2515 }
2516 } else if (VRI.Repl32 != BG.Repl32) {
2517 return false;
2518 }
2519
2520 return VRI.RLAmt == EffRLAmt;
2521 };
2522
2523 for (auto &BG : BitGroups) {
2524 if (!MatchingBG(BG))
2525 continue;
2526
2527 if (BG.StartIdx <= BG.EndIdx) {
2528 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2529 Mask |= (UINT64_C(1) << i);
2530 } else {
2531 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2532 Mask |= (UINT64_C(1) << i);
2533 for (unsigned i = 0; i <= BG.EndIdx; ++i)
2534 Mask |= (UINT64_C(1) << i);
2535 }
2536 }
2537
2538 // We can use the 32-bit andi/andis technique if the mask does not
2539 // require any higher-order bits. This can save an instruction compared
2540 // to always using the general 64-bit technique.
2541 bool Use32BitInsts = isUInt<32>(Mask);
2542 // Compute the masks for andi/andis that would be necessary.
2543 unsigned ANDIMask = (Mask & UINT16_MAX),
2544 ANDISMask = (Mask >> 16) & UINT16_MAX;
2545
2546 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2547
2548 unsigned NumAndInsts = (unsigned) NeedsRotate +
2549 (unsigned) (bool) Res;
2550 unsigned NumOfSelectInsts = 0;
2551 selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2552 assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2553 if (Use32BitInsts)
2554 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2555 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2556 else
2557 NumAndInsts += NumOfSelectInsts + /* and */ 1;
2558
2559 unsigned NumRLInsts = 0;
2560 bool FirstBG = true;
2561 bool MoreBG = false;
2562 for (auto &BG : BitGroups) {
2563 if (!MatchingBG(BG)) {
2564 MoreBG = true;
2565 continue;
2566 }
2567 NumRLInsts +=
2568 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2569 !FirstBG);
2570 FirstBG = false;
2571 }
2572
2573 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2574 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2575 << "\n\t\t\tisel using masking: " << NumAndInsts
2576 << " using rotates: " << NumRLInsts << "\n");
2577
2578 // When we'd use andi/andis, we bias toward using the rotates (andi only
2579 // has a record form, and is cracked on POWER cores). However, when using
2580 // general 64-bit constant formation, bias toward the constant form,
2581 // because that exposes more opportunities for CSE.
2582 if (NumAndInsts > NumRLInsts)
2583 continue;
2584 // When merging multiple bit groups, instruction or is used.
2585 // But when rotate is used, rldimi can inert the rotated value into any
2586 // register, so instruction or can be avoided.
2587 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2588 continue;
2589
2590 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2591
2592 if (InstCnt) *InstCnt += NumAndInsts;
2593
2594 SDValue VRot;
2595 // We actually need to generate a rotation if we have a non-zero rotation
2596 // factor or, in the Repl32 case, if we care about any of the
2597 // higher-order replicated bits. In the latter case, we generate a mask
2598 // backward so that it actually includes the entire 64 bits.
2599 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2600 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2601 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2602 else
2603 VRot = VRI.V;
2604
2605 SDValue TotalVal;
2606 if (Use32BitInsts) {
2607 assert((ANDIMask != 0 || ANDISMask != 0) &&
2608 "No set bits in mask when using 32-bit ands for 64-bit value");
2609
2610 SDValue ANDIVal, ANDISVal;
2611 if (ANDIMask != 0)
2612 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2613 ExtendToInt64(VRot, dl),
2614 getI32Imm(ANDIMask, dl)),
2615 0);
2616 if (ANDISMask != 0)
2617 ANDISVal =
2618 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2619 ExtendToInt64(VRot, dl),
2620 getI32Imm(ANDISMask, dl)),
2621 0);
2622
2623 if (!ANDIVal)
2624 TotalVal = ANDISVal;
2625 else if (!ANDISVal)
2626 TotalVal = ANDIVal;
2627 else
2628 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2629 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2630 } else {
2631 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2632 TotalVal =
2633 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2634 ExtendToInt64(VRot, dl), TotalVal),
2635 0);
2636 }
2637
2638 if (!Res)
2639 Res = TotalVal;
2640 else
2641 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2642 ExtendToInt64(Res, dl), TotalVal),
2643 0);
2644
2645 // Now, remove all groups with this underlying value and rotation
2646 // factor.
2647 eraseMatchingBitGroups(MatchingBG);
2648 }
2649 }
2650
2651 // Instruction selection for the 64-bit case.
2652 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2653 SDLoc dl(N);
2654 SDValue Res;
2655
2656 if (InstCnt) *InstCnt = 0;
2657
2658 // Take care of cases that should use andi/andis first.
2659 SelectAndParts64(dl, Res, InstCnt);
2660
2661 // If we've not yet selected a 'starting' instruction, and we have no zeros
2662 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2663 // number of groups), and start with this rotated value.
2664 if ((!NeedMask || LateMask) && !Res) {
2665 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2666 // groups will come first, and so the VRI representing the largest number
2667 // of groups might not be first (it might be the first Repl32 groups).
2668 unsigned MaxGroupsIdx = 0;
2669 if (!ValueRotsVec[0].Repl32) {
2670 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2671 if (ValueRotsVec[i].Repl32) {
2672 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2673 MaxGroupsIdx = i;
2674 break;
2675 }
2676 }
2677
2678 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2679 bool NeedsRotate = false;
2680 if (VRI.RLAmt) {
2681 NeedsRotate = true;
2682 } else if (VRI.Repl32) {
2683 for (auto &BG : BitGroups) {
2684 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2685 BG.Repl32 != VRI.Repl32)
2686 continue;
2687
2688 // We don't need a rotate if the bit group is confined to the lower
2689 // 32 bits.
2690 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2691 continue;
2692
2693 NeedsRotate = true;
2694 break;
2695 }
2696 }
2697
2698 if (NeedsRotate)
2699 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2700 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2701 InstCnt);
2702 else
2703 Res = VRI.V;
2704
2705 // Now, remove all groups with this underlying value and rotation factor.
2706 if (Res)
2707 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2708 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2709 BG.Repl32 == VRI.Repl32;
2710 });
2711 }
2712
2713 // Because 64-bit rotates are more flexible than inserts, we might have a
2714 // preference regarding which one we do first (to save one instruction).
2715 if (!Res)
2716 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2717 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2718 false) <
2719 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2720 true)) {
2721 if (I != BitGroups.begin()) {
2722 BitGroup BG = *I;
2723 BitGroups.erase(I);
2724 BitGroups.insert(BitGroups.begin(), BG);
2725 }
2726
2727 break;
2728 }
2729 }
2730
2731 // Insert the other groups (one at a time).
2732 for (auto &BG : BitGroups) {
2733 if (!Res)
2734 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2735 BG.EndIdx, InstCnt);
2736 else
2737 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2738 BG.StartIdx, BG.EndIdx, InstCnt);
2739 }
2740
2741 if (LateMask) {
2742 uint64_t Mask = getZerosMask();
2743
2744 // We can use the 32-bit andi/andis technique if the mask does not
2745 // require any higher-order bits. This can save an instruction compared
2746 // to always using the general 64-bit technique.
2747 bool Use32BitInsts = isUInt<32>(Mask);
2748 // Compute the masks for andi/andis that would be necessary.
2749 unsigned ANDIMask = (Mask & UINT16_MAX),
2750 ANDISMask = (Mask >> 16) & UINT16_MAX;
2751
2752 if (Use32BitInsts) {
2753 assert((ANDIMask != 0 || ANDISMask != 0) &&
2754 "No set bits in mask when using 32-bit ands for 64-bit value");
2755
2756 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2757 (unsigned) (ANDISMask != 0) +
2758 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2759
2760 SDValue ANDIVal, ANDISVal;
2761 if (ANDIMask != 0)
2762 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2763 ExtendToInt64(Res, dl),
2764 getI32Imm(ANDIMask, dl)),
2765 0);
2766 if (ANDISMask != 0)
2767 ANDISVal =
2768 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2769 ExtendToInt64(Res, dl),
2770 getI32Imm(ANDISMask, dl)),
2771 0);
2772
2773 if (!ANDIVal)
2774 Res = ANDISVal;
2775 else if (!ANDISVal)
2776 Res = ANDIVal;
2777 else
2778 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2779 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2780 } else {
2781 unsigned NumOfSelectInsts = 0;
2782 SDValue MaskVal =
2783 SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2784 Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2785 ExtendToInt64(Res, dl), MaskVal),
2786 0);
2787 if (InstCnt)
2788 *InstCnt += NumOfSelectInsts + /* and */ 1;
2789 }
2790 }
2791
2792 return Res.getNode();
2793 }
2794
2795 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2796 // Fill in BitGroups.
2797 collectBitGroups(LateMask);
2798 if (BitGroups.empty())
2799 return nullptr;
2800
2801 // For 64-bit values, figure out when we can use 32-bit instructions.
2802 if (Bits.size() == 64)
2803 assignRepl32BitGroups();
2804
2805 // Fill in ValueRotsVec.
2806 collectValueRotInfo();
2807
2808 if (Bits.size() == 32) {
2809 return Select32(N, LateMask, InstCnt);
2810 } else {
2811 assert(Bits.size() == 64 && "Not 64 bits here?");
2812 return Select64(N, LateMask, InstCnt);
2813 }
2814
2815 return nullptr;
2816 }
2817
2818 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2819 erase_if(BitGroups, F);
2820 }
2821
2823
2824 bool NeedMask = false;
2826
2827 SmallVector<BitGroup, 16> BitGroups;
2828
2829 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2830 SmallVector<ValueRotInfo, 16> ValueRotsVec;
2831
2832 SelectionDAG *CurDAG = nullptr;
2833
2834public:
2835 BitPermutationSelector(SelectionDAG *DAG)
2836 : CurDAG(DAG) {}
2837
2838 // Here we try to match complex bit permutations into a set of
2839 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2840 // known to produce optimal code for common cases (like i32 byte swapping).
2841 SDNode *Select(SDNode *N) {
2842 Memoizer.clear();
2843 auto Result =
2844 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2845 if (!Result.first)
2846 return nullptr;
2847 Bits = std::move(*Result.second);
2848
2849 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2850 " selection for: ");
2851 LLVM_DEBUG(N->dump(CurDAG));
2852
2853 // Fill it RLAmt and set NeedMask.
2854 computeRotationAmounts();
2855
2856 if (!NeedMask)
2857 return Select(N, false);
2858
2859 // We currently have two techniques for handling results with zeros: early
2860 // masking (the default) and late masking. Late masking is sometimes more
2861 // efficient, but because the structure of the bit groups is different, it
2862 // is hard to tell without generating both and comparing the results. With
2863 // late masking, we ignore zeros in the resulting value when inserting each
2864 // set of bit groups, and then mask in the zeros at the end. With early
2865 // masking, we only insert the non-zero parts of the result at every step.
2866
2867 unsigned InstCnt = 0, InstCntLateMask = 0;
2868 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2869 SDNode *RN = Select(N, false, &InstCnt);
2870 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2871
2872 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2873 SDNode *RNLM = Select(N, true, &InstCntLateMask);
2874 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2875 << " instructions\n");
2876
2877 if (InstCnt <= InstCntLateMask) {
2878 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2879 return RN;
2880 }
2881
2882 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2883 return RNLM;
2884 }
2885};
2886
2887class IntegerCompareEliminator {
2888 SelectionDAG *CurDAG;
2889 PPCDAGToDAGISel *S;
2890 // Conversion type for interpreting results of a 32-bit instruction as
2891 // a 64-bit value or vice versa.
2892 enum ExtOrTruncConversion { Ext, Trunc };
2893
2894 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2895 // in a GPR.
2896 // ZExtOrig - use the original condition code, zero-extend value
2897 // ZExtInvert - invert the condition code, zero-extend value
2898 // SExtOrig - use the original condition code, sign-extend value
2899 // SExtInvert - invert the condition code, sign-extend value
2900 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2901
2902 // Comparisons against zero to emit GPR code sequences for. Each of these
2903 // sequences may need to be emitted for two or more equivalent patterns.
2904 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2905 // matters as well as the extension type: sext (-1/0), zext (1/0).
2906 // GEZExt - (zext (LHS >= 0))
2907 // GESExt - (sext (LHS >= 0))
2908 // LEZExt - (zext (LHS <= 0))
2909 // LESExt - (sext (LHS <= 0))
2910 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2911
2912 SDNode *tryEXTEND(SDNode *N);
2913 SDNode *tryLogicOpOfCompares(SDNode *N);
2914 SDValue computeLogicOpInGPR(SDValue LogicOp);
2915 SDValue signExtendInputIfNeeded(SDValue Input);
2916 SDValue zeroExtendInputIfNeeded(SDValue Input);
2917 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2918 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2919 ZeroCompare CmpTy);
2920 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2921 int64_t RHSValue, SDLoc dl);
2922 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2923 int64_t RHSValue, SDLoc dl);
2924 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2925 int64_t RHSValue, SDLoc dl);
2926 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2927 int64_t RHSValue, SDLoc dl);
2928 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2929
2930public:
2931 IntegerCompareEliminator(SelectionDAG *DAG,
2932 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2934 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2935 "Only expecting to use this on 64 bit targets.");
2936 }
2937 SDNode *Select(SDNode *N) {
2938 if (CmpInGPR == ICGPR_None)
2939 return nullptr;
2940 switch (N->getOpcode()) {
2941 default: break;
2942 case ISD::ZERO_EXTEND:
2945 return nullptr;
2946 [[fallthrough]];
2947 case ISD::SIGN_EXTEND:
2950 return nullptr;
2951 return tryEXTEND(N);
2952 case ISD::AND:
2953 case ISD::OR:
2954 case ISD::XOR:
2955 return tryLogicOpOfCompares(N);
2956 }
2957 return nullptr;
2958 }
2959};
2960
2961// The obvious case for wanting to keep the value in a GPR. Namely, the
2962// result of the comparison is actually needed in a GPR.
2963SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2964 assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2965 N->getOpcode() == ISD::SIGN_EXTEND) &&
2966 "Expecting a zero/sign extend node!");
2967 SDValue WideRes;
2968 // If we are zero-extending the result of a logical operation on i1
2969 // values, we can keep the values in GPRs.
2970 if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) &&
2971 N->getOperand(0).getValueType() == MVT::i1 &&
2972 N->getOpcode() == ISD::ZERO_EXTEND)
2973 WideRes = computeLogicOpInGPR(N->getOperand(0));
2974 else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2975 return nullptr;
2976 else
2977 WideRes =
2978 getSETCCInGPR(N->getOperand(0),
2979 N->getOpcode() == ISD::SIGN_EXTEND ?
2980 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2981
2982 if (!WideRes)
2983 return nullptr;
2984
2985 bool Input32Bit = WideRes.getValueType() == MVT::i32;
2986 bool Output32Bit = N->getValueType(0) == MVT::i32;
2987
2988 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2989 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2990
2991 SDValue ConvOp = WideRes;
2992 if (Input32Bit != Output32Bit)
2993 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2994 ExtOrTruncConversion::Trunc);
2995 return ConvOp.getNode();
2996}
2997
2998// Attempt to perform logical operations on the results of comparisons while
2999// keeping the values in GPRs. Without doing so, these would end up being
3000// lowered to CR-logical operations which suffer from significant latency and
3001// low ILP.
3002SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
3003 if (N->getValueType(0) != MVT::i1)
3004 return nullptr;
3005 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
3006 "Expected a logic operation on setcc results.");
3007 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
3008 if (!LoweredLogical)
3009 return nullptr;
3010
3011 SDLoc dl(N);
3012 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
3013 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3014 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
3015 SDValue LHS = LoweredLogical.getOperand(0);
3016 SDValue RHS = LoweredLogical.getOperand(1);
3017 SDValue WideOp;
3018 SDValue OpToConvToRecForm;
3019
3020 // Look through any 32-bit to 64-bit implicit extend nodes to find the
3021 // opcode that is input to the XORI.
3022 if (IsBitwiseNegate &&
3023 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
3024 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
3025 else if (IsBitwiseNegate)
3026 // If the input to the XORI isn't an extension, that's what we're after.
3027 OpToConvToRecForm = LoweredLogical.getOperand(0);
3028 else
3029 // If this is not an XORI, it is a reg-reg logical op and we can convert
3030 // it to record-form.
3031 OpToConvToRecForm = LoweredLogical;
3032
3033 // Get the record-form version of the node we're looking to use to get the
3034 // CR result from.
3035 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
3036 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
3037
3038 // Convert the right node to record-form. This is either the logical we're
3039 // looking at or it is the input node to the negation (if we're looking at
3040 // a bitwise negation).
3041 if (NewOpc != -1 && IsBitwiseNegate) {
3042 // The input to the XORI has a record-form. Use it.
3043 assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
3044 "Expected a PPC::XORI8 only for bitwise negation.");
3045 // Emit the record-form instruction.
3046 std::vector<SDValue> Ops;
3047 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
3048 Ops.push_back(OpToConvToRecForm.getOperand(i));
3049
3050 WideOp =
3051 SDValue(CurDAG->getMachineNode(NewOpc, dl,
3052 OpToConvToRecForm.getValueType(),
3053 MVT::Glue, Ops), 0);
3054 } else {
3055 assert((NewOpc != -1 || !IsBitwiseNegate) &&
3056 "No record form available for AND8/OR8/XOR8?");
3057 WideOp =
3058 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
3059 dl, MVT::i64, MVT::Glue, LHS, RHS),
3060 0);
3061 }
3062
3063 // Select this node to a single bit from CR0 set by the record-form node
3064 // just created. For bitwise negation, use the EQ bit which is the equivalent
3065 // of negating the result (i.e. it is a bit set when the result of the
3066 // operation is zero).
3067 SDValue SRIdxVal =
3068 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
3069 SDValue CRBit =
3070 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
3071 MVT::i1, CR0Reg, SRIdxVal,
3072 WideOp.getValue(1)), 0);
3073 return CRBit.getNode();
3074}
3075
3076// Lower a logical operation on i1 values into a GPR sequence if possible.
3077// The result can be kept in a GPR if requested.
3078// Three types of inputs can be handled:
3079// - SETCC
3080// - TRUNCATE
3081// - Logical operation (AND/OR/XOR)
3082// There is also a special case that is handled (namely a complement operation
3083// achieved with xor %a, -1).
3084SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
3086 "Can only handle logic operations here.");
3087 assert(LogicOp.getValueType() == MVT::i1 &&
3088 "Can only handle logic operations on i1 values here.");
3089 SDLoc dl(LogicOp);
3090 SDValue LHS, RHS;
3091
3092 // Special case: xor %a, -1
3093 bool IsBitwiseNegation = isBitwiseNot(LogicOp);
3094
3095 // Produces a GPR sequence for each operand of the binary logic operation.
3096 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
3097 // the value in a GPR and for logic operations, it will recursively produce
3098 // a GPR sequence for the operation.
3099 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
3100 unsigned OperandOpcode = Operand.getOpcode();
3101 if (OperandOpcode == ISD::SETCC)
3102 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
3103 else if (OperandOpcode == ISD::TRUNCATE) {
3104 SDValue InputOp = Operand.getOperand(0);
3105 EVT InVT = InputOp.getValueType();
3106 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
3107 PPC::RLDICL, dl, InVT, InputOp,
3108 S->getI64Imm(0, dl),
3109 S->getI64Imm(63, dl)), 0);
3110 } else if (ISD::isBitwiseLogicOp(OperandOpcode))
3111 return computeLogicOpInGPR(Operand);
3112 return SDValue();
3113 };
3114 LHS = getLogicOperand(LogicOp.getOperand(0));
3115 RHS = getLogicOperand(LogicOp.getOperand(1));
3116
3117 // If a GPR sequence can't be produced for the LHS we can't proceed.
3118 // Not producing a GPR sequence for the RHS is only a problem if this isn't
3119 // a bitwise negation operation.
3120 if (!LHS || (!RHS && !IsBitwiseNegation))
3121 return SDValue();
3122
3123 NumLogicOpsOnComparison++;
3124
3125 // We will use the inputs as 64-bit values.
3126 if (LHS.getValueType() == MVT::i32)
3127 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
3128 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
3129 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
3130
3131 unsigned NewOpc;
3132 switch (LogicOp.getOpcode()) {
3133 default: llvm_unreachable("Unknown logic operation.");
3134 case ISD::AND: NewOpc = PPC::AND8; break;
3135 case ISD::OR: NewOpc = PPC::OR8; break;
3136 case ISD::XOR: NewOpc = PPC::XOR8; break;
3137 }
3138
3139 if (IsBitwiseNegation) {
3140 RHS = S->getI64Imm(1, dl);
3141 NewOpc = PPC::XORI8;
3142 }
3143
3144 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
3145
3146}
3147
3148/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
3149/// Otherwise just reinterpret it as a 64-bit value.
3150/// Useful when emitting comparison code for 32-bit values without using
3151/// the compare instruction (which only considers the lower 32-bits).
3152SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
3153 assert(Input.getValueType() == MVT::i32 &&
3154 "Can only sign-extend 32-bit values here.");
3155 unsigned Opc = Input.getOpcode();
3156
3157 // The value was sign extended and then truncated to 32-bits. No need to
3158 // sign extend it again.
3159 if (Opc == ISD::TRUNCATE &&
3160 (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
3161 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
3162 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3163
3164 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3165 // The input is a sign-extending load. All ppc sign-extending loads
3166 // sign-extend to the full 64-bits.
3167 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3168 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3169
3170 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3171 // We don't sign-extend constants.
3172 if (InputConst)
3173 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3174
3175 SDLoc dl(Input);
3176 SignExtensionsAdded++;
3177 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3178 MVT::i64, Input), 0);
3179}
3180
3181/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3182/// Otherwise just reinterpret it as a 64-bit value.
3183/// Useful when emitting comparison code for 32-bit values without using
3184/// the compare instruction (which only considers the lower 32-bits).
3185SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3186 assert(Input.getValueType() == MVT::i32 &&
3187 "Can only zero-extend 32-bit values here.");
3188 unsigned Opc = Input.getOpcode();
3189
3190 // The only condition under which we can omit the actual extend instruction:
3191 // - The value is a positive constant
3192 // - The value comes from a load that isn't a sign-extending load
3193 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3194 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3195 (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3196 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3197 if (IsTruncateOfZExt)
3198 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3199
3200 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3201 if (InputConst && InputConst->getSExtValue() >= 0)
3202 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3203
3204 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3205 // The input is a load that doesn't sign-extend (it will be zero-extended).
3206 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3207 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3208
3209 // None of the above, need to zero-extend.
3210 SDLoc dl(Input);
3211 ZeroExtensionsAdded++;
3212 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3213 S->getI64Imm(0, dl),
3214 S->getI64Imm(32, dl)), 0);
3215}
3216
3217// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3218// course not actual zero/sign extensions that will generate machine code,
3219// they're just a way to reinterpret a 32 bit value in a register as a
3220// 64 bit value and vice-versa.
3221SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3222 ExtOrTruncConversion Conv) {
3223 SDLoc dl(NatWidthRes);
3224
3225 // For reinterpreting 32-bit values as 64 bit values, we generate
3226 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3227 if (Conv == ExtOrTruncConversion::Ext) {
3228 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3229 SDValue SubRegIdx =
3230 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3231 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3232 ImDef, NatWidthRes, SubRegIdx), 0);
3233 }
3234
3235 assert(Conv == ExtOrTruncConversion::Trunc &&
3236 "Unknown convertion between 32 and 64 bit values.");
3237 // For reinterpreting 64-bit values as 32-bit values, we just need to
3238 // EXTRACT_SUBREG (i.e. extract the low word).
3239 SDValue SubRegIdx =
3240 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3241 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3242 NatWidthRes, SubRegIdx), 0);
3243}
3244
3245// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3246// Handle both zero-extensions and sign-extensions.
3247SDValue
3248IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3249 ZeroCompare CmpTy) {
3250 EVT InVT = LHS.getValueType();
3251 bool Is32Bit = InVT == MVT::i32;
3252 SDValue ToExtend;
3253
3254 // Produce the value that needs to be either zero or sign extended.
3255 switch (CmpTy) {
3256 case ZeroCompare::GEZExt:
3257 case ZeroCompare::GESExt:
3258 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3259 dl, InVT, LHS, LHS), 0);
3260 break;
3261 case ZeroCompare::LEZExt:
3262 case ZeroCompare::LESExt: {
3263 if (Is32Bit) {
3264 // Upper 32 bits cannot be undefined for this sequence.
3265 LHS = signExtendInputIfNeeded(LHS);
3266 SDValue Neg =
3267 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3268 ToExtend =
3269 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3270 Neg, S->getI64Imm(1, dl),
3271 S->getI64Imm(63, dl)), 0);
3272 } else {
3273 SDValue Addi =
3274 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3275 S->getI64Imm(~0ULL, dl)), 0);
3276 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3277 Addi, LHS), 0);
3278 }
3279 break;
3280 }
3281 }
3282
3283 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3284 if (!Is32Bit &&
3285 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3286 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3287 ToExtend, S->getI64Imm(1, dl),
3288 S->getI64Imm(63, dl)), 0);
3289 if (!Is32Bit &&
3290 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3291 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3292 S->getI64Imm(63, dl)), 0);
3293
3294 assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3295 // For 32-bit sequences, the extensions differ between GE/LE cases.
3296 switch (CmpTy) {
3297 case ZeroCompare::GEZExt: {
3298 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3299 S->getI32Imm(31, dl) };
3300 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3301 ShiftOps), 0);
3302 }
3303 case ZeroCompare::GESExt:
3304 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3305 S->getI32Imm(31, dl)), 0);
3306 case ZeroCompare::LEZExt:
3307 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3308 S->getI32Imm(1, dl)), 0);
3309 case ZeroCompare::LESExt:
3310 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3311 S->getI32Imm(-1, dl)), 0);
3312 }
3313
3314 // The above case covers all the enumerators so it can't have a default clause
3315 // to avoid compiler warnings.
3316 llvm_unreachable("Unknown zero-comparison type.");
3317}
3318
3319/// Produces a zero-extended result of comparing two 32-bit values according to
3320/// the passed condition code.
3321SDValue
3322IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3323 ISD::CondCode CC,
3324 int64_t RHSValue, SDLoc dl) {
3327 return SDValue();
3328 bool IsRHSZero = RHSValue == 0;
3329 bool IsRHSOne = RHSValue == 1;
3330 bool IsRHSNegOne = RHSValue == -1LL;
3331 switch (CC) {
3332 default: return SDValue();
3333 case ISD::SETEQ: {
3334 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3335 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3336 SDValue Xor = IsRHSZero ? LHS :
3337 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3338 SDValue Clz =
3339 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3340 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3341 S->getI32Imm(31, dl) };
3342 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3343 ShiftOps), 0);
3344 }
3345 case ISD::SETNE: {
3346 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3347 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3348 SDValue Xor = IsRHSZero ? LHS :
3349 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3350 SDValue Clz =
3351 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3352 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3353 S->getI32Imm(31, dl) };
3354 SDValue Shift =
3355 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3356 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3357 S->getI32Imm(1, dl)), 0);
3358 }
3359 case ISD::SETGE: {
3360 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3361 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3362 if(IsRHSZero)
3363 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3364
3365 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3366 // by swapping inputs and falling through.
3367 std::swap(LHS, RHS);
3368 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3369 IsRHSZero = RHSConst && RHSConst->isZero();
3370 [[fallthrough]];
3371 }
3372 case ISD::SETLE: {
3373 if (CmpInGPR == ICGPR_NonExtIn)
3374 return SDValue();
3375 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3376 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3377 if(IsRHSZero) {
3378 if (CmpInGPR == ICGPR_NonExtIn)
3379 return SDValue();
3380 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3381 }
3382
3383 // The upper 32-bits of the register can't be undefined for this sequence.
3384 LHS = signExtendInputIfNeeded(LHS);
3385 RHS = signExtendInputIfNeeded(RHS);
3386 SDValue Sub =
3387 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3388 SDValue Shift =
3389 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3390 S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3391 0);
3392 return
3393 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3394 MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3395 }
3396 case ISD::SETGT: {
3397 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3398 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3399 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3400 // Handle SETLT -1 (which is equivalent to SETGE 0).
3401 if (IsRHSNegOne)
3402 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3403
3404 if (IsRHSZero) {
3405 if (CmpInGPR == ICGPR_NonExtIn)
3406 return SDValue();
3407 // The upper 32-bits of the register can't be undefined for this sequence.
3408 LHS = signExtendInputIfNeeded(LHS);
3409 RHS = signExtendInputIfNeeded(RHS);
3410 SDValue Neg =
3411 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3412 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3413 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3414 }
3415 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3416 // (%b < %a) by swapping inputs and falling through.
3417 std::swap(LHS, RHS);
3418 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3419 IsRHSZero = RHSConst && RHSConst->isZero();
3420 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3421 [[fallthrough]];
3422 }
3423 case ISD::SETLT: {
3424 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3425 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3426 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3427 // Handle SETLT 1 (which is equivalent to SETLE 0).
3428 if (IsRHSOne) {
3429 if (CmpInGPR == ICGPR_NonExtIn)
3430 return SDValue();
3431 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3432 }
3433
3434 if (IsRHSZero) {
3435 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3436 S->getI32Imm(31, dl) };
3437 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3438 ShiftOps), 0);
3439 }
3440
3441 if (CmpInGPR == ICGPR_NonExtIn)
3442 return SDValue();
3443 // The upper 32-bits of the register can't be undefined for this sequence.
3444 LHS = signExtendInputIfNeeded(LHS);
3445 RHS = signExtendInputIfNeeded(RHS);
3446 SDValue SUBFNode =
3447 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3448 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3449 SUBFNode, S->getI64Imm(1, dl),
3450 S->getI64Imm(63, dl)), 0);
3451 }
3452 case ISD::SETUGE:
3453 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3454 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3455 std::swap(LHS, RHS);
3456 [[fallthrough]];
3457 case ISD::SETULE: {
3458 if (CmpInGPR == ICGPR_NonExtIn)
3459 return SDValue();
3460 // The upper 32-bits of the register can't be undefined for this sequence.
3461 LHS = zeroExtendInputIfNeeded(LHS);
3462 RHS = zeroExtendInputIfNeeded(RHS);
3464 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3465 SDValue SrdiNode =
3466 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3467 Subtract, S->getI64Imm(1, dl),
3468 S->getI64Imm(63, dl)), 0);
3469 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3470 S->getI32Imm(1, dl)), 0);
3471 }
3472 case ISD::SETUGT:
3473 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3474 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3475 std::swap(LHS, RHS);
3476 [[fallthrough]];
3477 case ISD::SETULT: {
3478 if (CmpInGPR == ICGPR_NonExtIn)
3479 return SDValue();
3480 // The upper 32-bits of the register can't be undefined for this sequence.
3481 LHS = zeroExtendInputIfNeeded(LHS);
3482 RHS = zeroExtendInputIfNeeded(RHS);
3484 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3485 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3486 Subtract, S->getI64Imm(1, dl),
3487 S->getI64Imm(63, dl)), 0);
3488 }
3489 }
3490}
3491
3492/// Produces a sign-extended result of comparing two 32-bit values according to
3493/// the passed condition code.
3494SDValue
3495IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3496 ISD::CondCode CC,
3497 int64_t RHSValue, SDLoc dl) {
3500 return SDValue();
3501 bool IsRHSZero = RHSValue == 0;
3502 bool IsRHSOne = RHSValue == 1;
3503 bool IsRHSNegOne = RHSValue == -1LL;
3504
3505 switch (CC) {
3506 default: return SDValue();
3507 case ISD::SETEQ: {
3508 // (sext (setcc %a, %b, seteq)) ->
3509 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3510 // (sext (setcc %a, 0, seteq)) ->
3511 // (ashr (shl (ctlz %a), 58), 63)
3512 SDValue CountInput = IsRHSZero ? LHS :
3513 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3514 SDValue Cntlzw =
3515 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3516 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3517 S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3518 SDValue Slwi =
3519 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3520 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3521 }
3522 case ISD::SETNE: {
3523 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3524 // flip the bit, finally take 2's complement.
3525 // (sext (setcc %a, %b, setne)) ->
3526 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3527 // Same as above, but the first xor is not needed.
3528 // (sext (setcc %a, 0, setne)) ->
3529 // (neg (xor (lshr (ctlz %a), 5), 1))
3530 SDValue Xor = IsRHSZero ? LHS :
3531 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3532 SDValue Clz =
3533 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3534 SDValue ShiftOps[] =
3535 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3536 SDValue Shift =
3537 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3538 SDValue Xori =
3539 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3540 S->getI32Imm(1, dl)), 0);
3541 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3542 }
3543 case ISD::SETGE: {
3544 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3545 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3546 if (IsRHSZero)
3547 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3548
3549 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3550 // by swapping inputs and falling through.
3551 std::swap(LHS, RHS);
3552 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3553 IsRHSZero = RHSConst && RHSConst->isZero();
3554 [[fallthrough]];
3555 }
3556 case ISD::SETLE: {
3557 if (CmpInGPR == ICGPR_NonExtIn)
3558 return SDValue();
3559 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3560 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3561 if (IsRHSZero)
3562 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3563
3564 // The upper 32-bits of the register can't be undefined for this sequence.
3565 LHS = signExtendInputIfNeeded(LHS);
3566 RHS = signExtendInputIfNeeded(RHS);
3567 SDValue SUBFNode =
3568 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3569 LHS, RHS), 0);
3570 SDValue Srdi =
3571 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3572 SUBFNode, S->getI64Imm(1, dl),
3573 S->getI64Imm(63, dl)), 0);
3574 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3575 S->getI32Imm(-1, dl)), 0);
3576 }
3577 case ISD::SETGT: {
3578 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3579 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3580 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3581 if (IsRHSNegOne)
3582 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3583 if (IsRHSZero) {
3584 if (CmpInGPR == ICGPR_NonExtIn)
3585 return SDValue();
3586 // The upper 32-bits of the register can't be undefined for this sequence.
3587 LHS = signExtendInputIfNeeded(LHS);
3588 RHS = signExtendInputIfNeeded(RHS);
3589 SDValue Neg =
3590 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3591 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3592 S->getI64Imm(63, dl)), 0);
3593 }
3594 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3595 // (%b < %a) by swapping inputs and falling through.
3596 std::swap(LHS, RHS);
3597 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3598 IsRHSZero = RHSConst && RHSConst->isZero();
3599 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3600 [[fallthrough]];
3601 }
3602 case ISD::SETLT: {
3603 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3604 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3605 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3606 if (IsRHSOne) {
3607 if (CmpInGPR == ICGPR_NonExtIn)
3608 return SDValue();
3609 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3610 }
3611 if (IsRHSZero)
3612 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3613 S->getI32Imm(31, dl)), 0);
3614
3615 if (CmpInGPR == ICGPR_NonExtIn)
3616 return SDValue();
3617 // The upper 32-bits of the register can't be undefined for this sequence.
3618 LHS = signExtendInputIfNeeded(LHS);
3619 RHS = signExtendInputIfNeeded(RHS);
3620 SDValue SUBFNode =
3621 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3622 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3623 SUBFNode, S->getI64Imm(63, dl)), 0);
3624 }
3625 case ISD::SETUGE:
3626 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3627 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3628 std::swap(LHS, RHS);
3629 [[fallthrough]];
3630 case ISD::SETULE: {
3631 if (CmpInGPR == ICGPR_NonExtIn)
3632 return SDValue();
3633 // The upper 32-bits of the register can't be undefined for this sequence.
3634 LHS = zeroExtendInputIfNeeded(LHS);
3635 RHS = zeroExtendInputIfNeeded(RHS);
3637 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3638 SDValue Shift =
3639 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3640 S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3641 0);
3642 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3643 S->getI32Imm(-1, dl)), 0);
3644 }
3645 case ISD::SETUGT:
3646 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3647 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3648 std::swap(LHS, RHS);
3649 [[fallthrough]];
3650 case ISD::SETULT: {
3651 if (CmpInGPR == ICGPR_NonExtIn)
3652 return SDValue();
3653 // The upper 32-bits of the register can't be undefined for this sequence.
3654 LHS = zeroExtendInputIfNeeded(LHS);
3655 RHS = zeroExtendInputIfNeeded(RHS);
3657 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3658 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3659 Subtract, S->getI64Imm(63, dl)), 0);
3660 }
3661 }
3662}
3663
3664/// Produces a zero-extended result of comparing two 64-bit values according to
3665/// the passed condition code.
3666SDValue
3667IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3668 ISD::CondCode CC,
3669 int64_t RHSValue, SDLoc dl) {
3672 return SDValue();
3673 bool IsRHSZero = RHSValue == 0;
3674 bool IsRHSOne = RHSValue == 1;
3675 bool IsRHSNegOne = RHSValue == -1LL;
3676 switch (CC) {
3677 default: return SDValue();
3678 case ISD::SETEQ: {
3679 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3680 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3681 SDValue Xor = IsRHSZero ? LHS :
3682 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3683 SDValue Clz =
3684 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3685 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3686 S->getI64Imm(58, dl),
3687 S->getI64Imm(63, dl)), 0);
3688 }
3689 case ISD::SETNE: {
3690 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3691 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3692 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3693 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3694 SDValue Xor = IsRHSZero ? LHS :
3695 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3696 SDValue AC =
3697 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3698 Xor, S->getI32Imm(~0U, dl)), 0);
3699 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3700 Xor, AC.getValue(1)), 0);
3701 }
3702 case ISD::SETGE: {
3703 // {subc.reg, subc.CA} = (subcarry %a, %b)
3704 // (zext (setcc %a, %b, setge)) ->
3705 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3706 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3707 if (IsRHSZero)
3708 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3709 std::swap(LHS, RHS);
3710 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3711 IsRHSZero = RHSConst && RHSConst->isZero();
3712 [[fallthrough]];
3713 }
3714 case ISD::SETLE: {
3715 // {subc.reg, subc.CA} = (subcarry %b, %a)
3716 // (zext (setcc %a, %b, setge)) ->
3717 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3718 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3719 if (IsRHSZero)
3720 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3721 SDValue ShiftL =
3722 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3723 S->getI64Imm(1, dl),
3724 S->getI64Imm(63, dl)), 0);
3725 SDValue ShiftR =
3726 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3727 S->getI64Imm(63, dl)), 0);
3728 SDValue SubtractCarry =
3729 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3730 LHS, RHS), 1);
3731 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3732 ShiftR, ShiftL, SubtractCarry), 0);
3733 }
3734 case ISD::SETGT: {
3735 // {subc.reg, subc.CA} = (subcarry %b, %a)
3736 // (zext (setcc %a, %b, setgt)) ->
3737 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3738 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3739 if (IsRHSNegOne)
3740 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3741 if (IsRHSZero) {
3742 SDValue Addi =
3743 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3744 S->getI64Imm(~0ULL, dl)), 0);
3745 SDValue Nor =
3746 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3747 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3748 S->getI64Imm(1, dl),
3749 S->getI64Imm(63, dl)), 0);
3750 }
3751 std::swap(LHS, RHS);
3752 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3753 IsRHSZero = RHSConst && RHSConst->isZero();
3754 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3755 [[fallthrough]];
3756 }
3757 case ISD::SETLT: {
3758 // {subc.reg, subc.CA} = (subcarry %a, %b)
3759 // (zext (setcc %a, %b, setlt)) ->
3760 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3761 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3762 if (IsRHSOne)
3763 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3764 if (IsRHSZero)
3765 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3766 S->getI64Imm(1, dl),
3767 S->getI64Imm(63, dl)), 0);
3768 SDValue SRADINode =
3769 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3770 LHS, S->getI64Imm(63, dl)), 0);
3771 SDValue SRDINode =
3772 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3773 RHS, S->getI64Imm(1, dl),
3774 S->getI64Imm(63, dl)), 0);
3775 SDValue SUBFC8Carry =
3776 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3777 RHS, LHS), 1);
3778 SDValue ADDE8Node =
3779 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3780 SRDINode, SRADINode, SUBFC8Carry), 0);
3781 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3782 ADDE8Node, S->getI64Imm(1, dl)), 0);
3783 }
3784 case ISD::SETUGE:
3785 // {subc.reg, subc.CA} = (subcarry %a, %b)
3786 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3787 std::swap(LHS, RHS);
3788 [[fallthrough]];
3789 case ISD::SETULE: {
3790 // {subc.reg, subc.CA} = (subcarry %b, %a)
3791 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3792 SDValue SUBFC8Carry =
3793 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3794 LHS, RHS), 1);
3795 SDValue SUBFE8Node =
3796 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3797 LHS, LHS, SUBFC8Carry), 0);
3798 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3799 SUBFE8Node, S->getI64Imm(1, dl)), 0);
3800 }
3801 case ISD::SETUGT:
3802 // {subc.reg, subc.CA} = (subcarry %b, %a)
3803 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3804 std::swap(LHS, RHS);
3805 [[fallthrough]];
3806 case ISD::SETULT: {
3807 // {subc.reg, subc.CA} = (subcarry %a, %b)
3808 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3809 SDValue SubtractCarry =
3810 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3811 RHS, LHS), 1);
3812 SDValue ExtSub =
3813 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3814 LHS, LHS, SubtractCarry), 0);
3815 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3816 ExtSub), 0);
3817 }
3818 }
3819}
3820
3821/// Produces a sign-extended result of comparing two 64-bit values according to
3822/// the passed condition code.
3823SDValue
3824IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3825 ISD::CondCode CC,
3826 int64_t RHSValue, SDLoc dl) {
3829 return SDValue();
3830 bool IsRHSZero = RHSValue == 0;
3831 bool IsRHSOne = RHSValue == 1;
3832 bool IsRHSNegOne = RHSValue == -1LL;
3833 switch (CC) {
3834 default: return SDValue();
3835 case ISD::SETEQ: {
3836 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3837 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3838 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3839 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3840 SDValue AddInput = IsRHSZero ? LHS :
3841 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3842 SDValue Addic =
3843 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3844 AddInput, S->getI32Imm(~0U, dl)), 0);
3845 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3846 Addic, Addic.getValue(1)), 0);
3847 }
3848 case ISD::SETNE: {
3849 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3850 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3851 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3852 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3853 SDValue Xor = IsRHSZero ? LHS :
3854 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3855 SDValue SC =
3856 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3857 Xor, S->getI32Imm(0, dl)), 0);
3858 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3859 SC, SC.getValue(1)), 0);
3860 }
3861 case ISD::SETGE: {
3862 // {subc.reg, subc.CA} = (subcarry %a, %b)
3863 // (zext (setcc %a, %b, setge)) ->
3864 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3865 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3866 if (IsRHSZero)
3867 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3868 std::swap(LHS, RHS);
3869 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3870 IsRHSZero = RHSConst && RHSConst->isZero();
3871 [[fallthrough]];
3872 }
3873 case ISD::SETLE: {
3874 // {subc.reg, subc.CA} = (subcarry %b, %a)
3875 // (zext (setcc %a, %b, setge)) ->
3876 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3877 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3878 if (IsRHSZero)
3879 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3880 SDValue ShiftR =
3881 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3882 S->getI64Imm(63, dl)), 0);
3883 SDValue ShiftL =
3884 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3885 S->getI64Imm(1, dl),
3886 S->getI64Imm(63, dl)), 0);
3887 SDValue SubtractCarry =
3888 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3889 LHS, RHS), 1);
3890 SDValue Adde =
3891 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3892 ShiftR, ShiftL, SubtractCarry), 0);
3893 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3894 }
3895 case ISD::SETGT: {
3896 // {subc.reg, subc.CA} = (subcarry %b, %a)
3897 // (zext (setcc %a, %b, setgt)) ->
3898 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3899 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3900 if (IsRHSNegOne)
3901 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3902 if (IsRHSZero) {
3903 SDValue Add =
3904 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3905 S->getI64Imm(-1, dl)), 0);
3906 SDValue Nor =
3907 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3908 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3909 S->getI64Imm(63, dl)), 0);
3910 }
3911 std::swap(LHS, RHS);
3912 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3913 IsRHSZero = RHSConst && RHSConst->isZero();
3914 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3915 [[fallthrough]];
3916 }
3917 case ISD::SETLT: {
3918 // {subc.reg, subc.CA} = (subcarry %a, %b)
3919 // (zext (setcc %a, %b, setlt)) ->
3920 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3921 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3922 if (IsRHSOne)
3923 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3924 if (IsRHSZero) {
3925 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3926 S->getI64Imm(63, dl)), 0);
3927 }
3928 SDValue SRADINode =
3929 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3930 LHS, S->getI64Imm(63, dl)), 0);
3931 SDValue SRDINode =
3932 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3933 RHS, S->getI64Imm(1, dl),
3934 S->getI64Imm(63, dl)), 0);
3935 SDValue SUBFC8Carry =
3936 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3937 RHS, LHS), 1);
3938 SDValue ADDE8Node =
3939 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3940 SRDINode, SRADINode, SUBFC8Carry), 0);
3941 SDValue XORI8Node =
3942 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3943 ADDE8Node, S->getI64Imm(1, dl)), 0);
3944 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3945 XORI8Node), 0);
3946 }
3947 case ISD::SETUGE:
3948 // {subc.reg, subc.CA} = (subcarry %a, %b)
3949 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3950 std::swap(LHS, RHS);
3951 [[fallthrough]];
3952 case ISD::SETULE: {
3953 // {subc.reg, subc.CA} = (subcarry %b, %a)
3954 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3955 SDValue SubtractCarry =
3956 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3957 LHS, RHS), 1);
3958 SDValue ExtSub =
3959 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3960 LHS, SubtractCarry), 0);
3961 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3962 ExtSub, ExtSub), 0);
3963 }
3964 case ISD::SETUGT:
3965 // {subc.reg, subc.CA} = (subcarry %b, %a)
3966 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3967 std::swap(LHS, RHS);
3968 [[fallthrough]];
3969 case ISD::SETULT: {
3970 // {subc.reg, subc.CA} = (subcarry %a, %b)
3971 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3972 SDValue SubCarry =
3973 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3974 RHS, LHS), 1);
3975 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3976 LHS, LHS, SubCarry), 0);
3977 }
3978 }
3979}
3980
3981/// Do all uses of this SDValue need the result in a GPR?
3982/// This is meant to be used on values that have type i1 since
3983/// it is somewhat meaningless to ask if values of other types
3984/// should be kept in GPR's.
3985static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3986 assert(Compare.getOpcode() == ISD::SETCC &&
3987 "An ISD::SETCC node required here.");
3988
3989 // For values that have a single use, the caller should obviously already have
3990 // checked if that use is an extending use. We check the other uses here.
3991 if (Compare.hasOneUse())
3992 return true;
3993 // We want the value in a GPR if it is being extended, used for a select, or
3994 // used in logical operations.
3995 for (auto *CompareUse : Compare.getNode()->users())
3996 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3997 CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3998 CompareUse->getOpcode() != ISD::SELECT &&
3999 !ISD::isBitwiseLogicOp(CompareUse->getOpcode())) {
4000 OmittedForNonExtendUses++;
4001 return false;
4002 }
4003 return true;
4004}
4005
4006/// Returns an equivalent of a SETCC node but with the result the same width as
4007/// the inputs. This can also be used for SELECT_CC if either the true or false
4008/// values is a power of two while the other is zero.
4009SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
4010 SetccInGPROpts ConvOpts) {
4011 assert((Compare.getOpcode() == ISD::SETCC ||
4012 Compare.getOpcode() == ISD::SELECT_CC) &&
4013 "An ISD::SETCC node required here.");
4014
4015 // Don't convert this comparison to a GPR sequence because there are uses
4016 // of the i1 result (i.e. uses that require the result in the CR).
4017 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
4018 return SDValue();
4019
4020 SDValue LHS = Compare.getOperand(0);
4021 SDValue RHS = Compare.getOperand(1);
4022
4023 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
4024 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
4025 ISD::CondCode CC =
4026 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
4027 EVT InputVT = LHS.getValueType();
4028 if (InputVT != MVT::i32 && InputVT != MVT::i64)
4029 return SDValue();
4030
4031 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
4032 ConvOpts == SetccInGPROpts::SExtInvert)
4033 CC = ISD::getSetCCInverse(CC, InputVT);
4034
4035 bool Inputs32Bit = InputVT == MVT::i32;
4036
4037 SDLoc dl(Compare);
4038 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
4039 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
4040 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
4041 ConvOpts == SetccInGPROpts::SExtInvert;
4042
4043 if (IsSext && Inputs32Bit)
4044 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4045 else if (Inputs32Bit)
4046 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4047 else if (IsSext)
4048 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4049 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4050}
4051
4052} // end anonymous namespace
4053
4054bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
4055 if (N->getValueType(0) != MVT::i32 &&
4056 N->getValueType(0) != MVT::i64)
4057 return false;
4058
4059 // This optimization will emit code that assumes 64-bit registers
4060 // so we don't want to run it in 32-bit mode. Also don't run it
4061 // on functions that are not to be optimized.
4062 if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64())
4063 return false;
4064
4065 // For POWER10, it is more profitable to use the set boolean extension
4066 // instructions rather than the integer compare elimination codegen.
4067 // Users can override this via the command line option, `--ppc-gpr-icmps`.
4068 if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
4069 return false;
4070
4071 switch (N->getOpcode()) {
4072 default: break;
4073 case ISD::ZERO_EXTEND:
4074 case ISD::SIGN_EXTEND:
4075 case ISD::AND:
4076 case ISD::OR:
4077 case ISD::XOR: {
4078 IntegerCompareEliminator ICmpElim(CurDAG, this);
4079 if (SDNode *New = ICmpElim.Select(N)) {
4080 ReplaceNode(N, New);
4081 return true;
4082 }
4083 }
4084 }
4085 return false;
4086}
4087
4088bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
4089 if (N->getValueType(0) != MVT::i32 &&
4090 N->getValueType(0) != MVT::i64)
4091 return false;
4092
4093 if (!UseBitPermRewriter)
4094 return false;
4095
4096 switch (N->getOpcode()) {
4097 default: break;
4098 case ISD::SRL:
4099 // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
4100 // uses the BRH instruction.
4101 if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
4102 N->getOperand(0).getOpcode() == ISD::BSWAP) {
4103 auto &OpRight = N->getOperand(1);
4104 ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
4105 if (SRLConst && SRLConst->getSExtValue() == 16)
4106 return false;
4107 }
4108 [[fallthrough]];
4109 case ISD::ROTL:
4110 case ISD::SHL:
4111 case ISD::AND:
4112 case ISD::OR: {
4113 BitPermutationSelector BPS(CurDAG);
4114 if (SDNode *New = BPS.Select(N)) {
4115 ReplaceNode(N, New);
4116 return true;
4117 }
4118 return false;
4119 }
4120 }
4121
4122 return false;
4123}
4124
4125/// SelectCC - Select a comparison of the specified values with the specified
4126/// condition code, returning the CR# of the expression.
4127SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4128 const SDLoc &dl, SDValue Chain) {
4129 // Always select the LHS.
4130 unsigned Opc;
4131
4132 if (LHS.getValueType() == MVT::i32) {
4133 unsigned Imm;
4134 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4135 if (isInt32Immediate(RHS, Imm)) {
4136 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4137 if (isUInt<16>(Imm))
4138 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4139 getI32Imm(Imm & 0xFFFF, dl)),
4140 0);
4141 // If this is a 16-bit signed immediate, fold it.
4142 if (isInt<16>((int)Imm))
4143 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4144 getI32Imm(Imm & 0xFFFF, dl)),
4145 0);
4146
4147 // For non-equality comparisons, the default code would materialize the
4148 // constant, then compare against it, like this:
4149 // lis r2, 4660
4150 // ori r2, r2, 22136
4151 // cmpw cr0, r3, r2
4152 // Since we are just comparing for equality, we can emit this instead:
4153 // xoris r0,r3,0x1234
4154 // cmplwi cr0,r0,0x5678
4155 // beq cr0,L6
4156 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
4157 getI32Imm(Imm >> 16, dl)), 0);
4158 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
4159 getI32Imm(Imm & 0xFFFF, dl)), 0);
4160 }
4161 Opc = PPC::CMPLW;
4162 } else if (ISD::isUnsignedIntSetCC(CC)) {
4163 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
4164 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4165 getI32Imm(Imm & 0xFFFF, dl)), 0);
4166 Opc = PPC::CMPLW;
4167 } else {
4168 int16_t SImm;
4169 if (isIntS16Immediate(RHS, SImm))
4170 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4171 getI32Imm((int)SImm & 0xFFFF,
4172 dl)),
4173 0);
4174 Opc = PPC::CMPW;
4175 }
4176 } else if (LHS.getValueType() == MVT::i64) {
4177 uint64_t Imm;
4178 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4179 if (isInt64Immediate(RHS.getNode(), Imm)) {
4180 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4181 if (isUInt<16>(Imm))
4182 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4183 getI32Imm(Imm & 0xFFFF, dl)),
4184 0);
4185 // If this is a 16-bit signed immediate, fold it.
4186 if (isInt<16>(Imm))
4187 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4188 getI32Imm(Imm & 0xFFFF, dl)),
4189 0);
4190
4191 // For non-equality comparisons, the default code would materialize the
4192 // constant, then compare against it, like this:
4193 // lis r2, 4660
4194 // ori r2, r2, 22136
4195 // cmpd cr0, r3, r2
4196 // Since we are just comparing for equality, we can emit this instead:
4197 // xoris r0,r3,0x1234
4198 // cmpldi cr0,r0,0x5678
4199 // beq cr0,L6
4200 if (isUInt<32>(Imm)) {
4201 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4202 getI64Imm(Imm >> 16, dl)), 0);
4203 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4204 getI64Imm(Imm & 0xFFFF, dl)),
4205 0);
4206 }
4207 }
4208 Opc = PPC::CMPLD;
4209 } else if (ISD::isUnsignedIntSetCC(CC)) {
4210 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4211 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4212 getI64Imm(Imm & 0xFFFF, dl)), 0);
4213 Opc = PPC::CMPLD;
4214 } else {
4215 int16_t SImm;
4216 if (isIntS16Immediate(RHS, SImm))
4217 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4218 getI64Imm(SImm & 0xFFFF, dl)),
4219 0);
4220 Opc = PPC::CMPD;
4221 }
4222 } else if (LHS.getValueType() == MVT::f32) {
4223 if (Subtarget->hasSPE()) {
4224 switch (CC) {
4225 default:
4226 case ISD::SETEQ:
4227 case ISD::SETNE:
4228 Opc = PPC::EFSCMPEQ;
4229 break;
4230 case ISD::SETLT:
4231 case ISD::SETGE:
4232 case ISD::SETOLT:
4233 case ISD::SETOGE:
4234 case ISD::SETULT:
4235 case ISD::SETUGE:
4236 Opc = PPC::EFSCMPLT;
4237 break;
4238 case ISD::SETGT:
4239 case ISD::SETLE:
4240 case ISD::SETOGT:
4241 case ISD::SETOLE:
4242 case ISD::SETUGT:
4243 case ISD::SETULE:
4244 Opc = PPC::EFSCMPGT;
4245 break;
4246 }
4247 } else
4248 Opc = PPC::FCMPUS;
4249 } else if (LHS.getValueType() == MVT::f64) {
4250 if (Subtarget->hasSPE()) {
4251 switch (CC) {
4252 default:
4253 case ISD::SETEQ:
4254 case ISD::SETNE:
4255 Opc = PPC::EFDCMPEQ;
4256 break;
4257 case ISD::SETLT:
4258 case ISD::SETGE:
4259 case ISD::SETOLT:
4260 case ISD::SETOGE:
4261 case ISD::SETULT:
4262 case ISD::SETUGE:
4263 Opc = PPC::EFDCMPLT;
4264 break;
4265 case ISD::SETGT:
4266 case ISD::SETLE:
4267 case ISD::SETOGT:
4268 case ISD::SETOLE:
4269 case ISD::SETUGT:
4270 case ISD::SETULE:
4271 Opc = PPC::EFDCMPGT;
4272 break;
4273 }
4274 } else
4275 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4276 } else {
4277 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4278 assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4279 Opc = PPC::XSCMPUQP;
4280 }
4281 if (Chain)
4282 return SDValue(
4283 CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4284 0);
4285 else
4286 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4287}
4288
4290 const PPCSubtarget *Subtarget) {
4291 // For SPE instructions, the result is in GT bit of the CR
4292 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4293
4294 switch (CC) {
4295 case ISD::SETUEQ:
4296 case ISD::SETONE:
4297 case ISD::SETOLE:
4298 case ISD::SETOGE:
4299 llvm_unreachable("Should be lowered by legalize!");
4300 default: llvm_unreachable("Unknown condition!");
4301 case ISD::SETOEQ:
4302 case ISD::SETEQ:
4303 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4304 case ISD::SETUNE:
4305 case ISD::SETNE:
4306 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4307 case ISD::SETOLT:
4308 case ISD::SETLT:
4309 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4310 case ISD::SETULE:
4311 case ISD::SETLE:
4312 return PPC::PRED_LE;
4313 case ISD::SETOGT:
4314 case ISD::SETGT:
4315 return PPC::PRED_GT;
4316 case ISD::SETUGE:
4317 case ISD::SETGE:
4318 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4319 case ISD::SETO: return PPC::PRED_NU;
4320 case ISD::SETUO: return PPC::PRED_UN;
4321 // These two are invalid for floating point. Assume we have int.
4322 case ISD::SETULT: return PPC::PRED_LT;
4323 case ISD::SETUGT: return PPC::PRED_GT;
4324 }
4325}
4326
4327/// getCRIdxForSetCC - Return the index of the condition register field
4328/// associated with the SetCC condition, and whether or not the field is
4329/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4330static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4331 Invert = false;
4332 switch (CC) {
4333 default: llvm_unreachable("Unknown condition!");
4334 case ISD::SETOLT:
4335 case ISD::SETLT: return 0; // Bit #0 = SETOLT
4336 case ISD::SETOGT:
4337 case ISD::SETGT: return 1; // Bit #1 = SETOGT
4338 case ISD::SETOEQ:
4339 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4340 case ISD::SETUO: return 3; // Bit #3 = SETUO
4341 case ISD::SETUGE:
4342 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4343 case ISD::SETULE:
4344 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4345 case ISD::SETUNE:
4346 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4347 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4348 case ISD::SETUEQ:
4349 case ISD::SETOGE:
4350 case ISD::SETOLE:
4351 case ISD::SETONE:
4352 llvm_unreachable("Invalid branch code: should be expanded by legalize");
4353 // These are invalid for floating point. Assume integer.
4354 case ISD::SETULT: return 0;
4355 case ISD::SETUGT: return 1;
4356 }
4357}
4358
4359// getVCmpInst: return the vector compare instruction for the specified
4360// vector type and condition code. Since this is for altivec specific code,
4361// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4362// and v4f32).
4363static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4364 bool HasVSX, bool &Swap, bool &Negate) {
4365 Swap = false;
4366 Negate = false;
4367
4368 if (VecVT.isFloatingPoint()) {
4369 /* Handle some cases by swapping input operands. */
4370 switch (CC) {
4371 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4372 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4373 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4374 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4375 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4376 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4377 default: break;
4378 }
4379 /* Handle some cases by negating the result. */
4380 switch (CC) {
4381 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4382 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4383 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4384 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4385 default: break;
4386 }
4387 /* We have instructions implementing the remaining cases. */
4388 switch (CC) {
4389 case ISD::SETEQ:
4390 case ISD::SETOEQ:
4391 if (VecVT == MVT::v4f32)
4392 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4393 else if (VecVT == MVT::v2f64)
4394 return PPC::XVCMPEQDP;
4395 break;
4396 case ISD::SETGT:
4397 case ISD::SETOGT:
4398 if (VecVT == MVT::v4f32)
4399 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4400 else if (VecVT == MVT::v2f64)
4401 return PPC::XVCMPGTDP;
4402 break;
4403 case ISD::SETGE:
4404 case ISD::SETOGE:
4405 if (VecVT == MVT::v4f32)
4406 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4407 else if (VecVT == MVT::v2f64)
4408 return PPC::XVCMPGEDP;
4409 break;
4410 default:
4411 break;
4412 }
4413 llvm_unreachable("Invalid floating-point vector compare condition");
4414 } else {
4415 /* Handle some cases by swapping input operands. */
4416 switch (CC) {
4417 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4418 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4419 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4420 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4421 default: break;
4422 }
4423 /* Handle some cases by negating the result. */
4424 switch (CC) {
4425 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4426 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4427 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4428 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4429 default: break;
4430 }
4431 /* We have instructions implementing the remaining cases. */
4432 switch (CC) {
4433 case ISD::SETEQ:
4434 case ISD::SETUEQ:
4435 if (VecVT == MVT::v16i8)
4436 return PPC::VCMPEQUB;
4437 else if (VecVT == MVT::v8i16)
4438 return PPC::VCMPEQUH;
4439 else if (VecVT == MVT::v4i32)
4440 return PPC::VCMPEQUW;
4441 else if (VecVT == MVT::v2i64)
4442 return PPC::VCMPEQUD;
4443 else if (VecVT == MVT::v1i128)
4444 return PPC::VCMPEQUQ;
4445 break;
4446 case ISD::SETGT:
4447 if (VecVT == MVT::v16i8)
4448 return PPC::VCMPGTSB;
4449 else if (VecVT == MVT::v8i16)
4450 return PPC::VCMPGTSH;
4451 else if (VecVT == MVT::v4i32)
4452 return PPC::VCMPGTSW;
4453 else if (VecVT == MVT::v2i64)
4454 return PPC::VCMPGTSD;
4455 else if (VecVT == MVT::v1i128)
4456 return PPC::VCMPGTSQ;
4457 break;
4458 case ISD::SETUGT:
4459 if (VecVT == MVT::v16i8)
4460 return PPC::VCMPGTUB;
4461 else if (VecVT == MVT::v8i16)
4462 return PPC::VCMPGTUH;
4463 else if (VecVT == MVT::v4i32)
4464 return PPC::VCMPGTUW;
4465 else if (VecVT == MVT::v2i64)
4466 return PPC::VCMPGTUD;
4467 else if (VecVT == MVT::v1i128)
4468 return PPC::VCMPGTUQ;
4469 break;
4470 default:
4471 break;
4472 }
4473 llvm_unreachable("Invalid integer vector compare condition");
4474 }
4475}
4476
4477bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4478 SDLoc dl(N);
4479 unsigned Imm;
4480 bool IsStrict = N->isStrictFPOpcode();
4481 ISD::CondCode CC =
4482 cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4483 EVT PtrVT =
4485 bool isPPC64 = (PtrVT == MVT::i64);
4486 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4487
4488 SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4489 SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4490
4491 if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4492 // We can codegen setcc op, imm very efficiently compared to a brcond.
4493 // Check for those cases here.
4494 // setcc op, 0
4495 if (Imm == 0) {
4496 SDValue Op = LHS;
4497 switch (CC) {
4498 default: break;
4499 case ISD::SETEQ: {
4500 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4501 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4502 getI32Imm(31, dl) };
4503 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4504 return true;
4505 }
4506 case ISD::SETNE: {
4507 if (isPPC64) break;
4508 SDValue AD =
4509 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4510 Op, getI32Imm(~0U, dl)), 0);
4511 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4512 return true;
4513 }
4514 case ISD::SETLT: {
4515 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4516 getI32Imm(31, dl) };
4517 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4518 return true;
4519 }
4520 case ISD::SETGT: {
4521 SDValue T =
4522 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4523 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4524 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4525 getI32Imm(31, dl) };
4526 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4527 return true;
4528 }
4529 }
4530 } else if (Imm == ~0U) { // setcc op, -1
4531 SDValue Op = LHS;
4532 switch (CC) {
4533 default: break;
4534 case ISD::SETEQ:
4535 if (isPPC64) break;
4536 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4537 Op, getI32Imm(1, dl)), 0);
4538 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4539 SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4540 MVT::i32,
4541 getI32Imm(0, dl)),
4542 0), Op.getValue(1));
4543 return true;
4544 case ISD::SETNE: {
4545 if (isPPC64) break;
4546 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4547 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4548 Op, getI32Imm(~0U, dl));
4549 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4550 SDValue(AD, 1));
4551 return true;
4552 }
4553 case ISD::SETLT: {
4554 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4555 getI32Imm(1, dl)), 0);
4556 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4557 Op), 0);
4558 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4559 getI32Imm(31, dl) };
4560 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4561 return true;
4562 }
4563 case ISD::SETGT: {
4564 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4565 getI32Imm(31, dl) };
4566 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4567 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4568 return true;
4569 }
4570 }
4571 }
4572 }
4573
4574 // Altivec Vector compare instructions do not set any CR register by default and
4575 // vector compare operations return the same type as the operands.
4576 if (!IsStrict && LHS.getValueType().isVector()) {
4577 if (Subtarget->hasSPE())
4578 return false;
4579
4580 EVT VecVT = LHS.getValueType();
4581 // Optimize 'Not equal to zero-vector' comparisons to 'Greater than or
4582 // less than' operators.
4583 // Example: Consider k to be any non-zero positive value.
4584 // * for k != 0, change SETNE to SETUGT (k > 0)
4585 // * for 0 != k, change SETNE to SETULT (0 < k)
4586 if (CC == ISD::SETNE) {
4587 // Only optimize for integer types (avoid FP completely)
4588 if (VecVT.getVectorElementType().isInteger()) {
4589 if (ISD::isBuildVectorAllZeros(RHS.getNode()))
4590 CC = ISD::SETUGT;
4591 else if (ISD::isBuildVectorAllZeros(LHS.getNode()))
4592 CC = ISD::SETULT;
4593 }
4594 }
4595 bool Swap, Negate;
4596 unsigned int VCmpInst =
4597 getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4598 if (Swap)
4599 std::swap(LHS, RHS);
4600
4601 EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4602 if (Negate) {
4603 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4604 CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4605 ResVT, VCmp, VCmp);
4606 return true;
4607 }
4608
4609 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4610 return true;
4611 }
4612
4613 if (Subtarget->useCRBits())
4614 return false;
4615
4616 bool Inv;
4617 unsigned Idx = getCRIdxForSetCC(CC, Inv);
4618 SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4619 if (IsStrict)
4620 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4621 SDValue IntCR;
4622
4623 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4624 // The correct compare instruction is already set by SelectCC()
4625 if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4626 Idx = 1;
4627 }
4628
4629 // Force the ccreg into CR7.
4630 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4631
4632 SDValue InGlue; // Null incoming flag value.
4633 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4634 InGlue).getValue(1);
4635
4636 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4637 CCReg), 0);
4638
4639 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4640 getI32Imm(31, dl), getI32Imm(31, dl) };
4641 if (!Inv) {
4642 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4643 return true;
4644 }
4645
4646 // Get the specified bit.
4647 SDValue Tmp =
4648 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4649 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4650 return true;
4651}
4652
4653/// Does this node represent a load/store node whose address can be represented
4654/// with a register plus an immediate that's a multiple of \p Val:
4655bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4656 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4657 StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4658 MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
4659 SDValue AddrOp;
4660 if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4661 AddrOp = N->getOperand(1);
4662 else if (STN)
4663 AddrOp = STN->getOperand(2);
4664
4665 // If the address points a frame object or a frame object with an offset,
4666 // we need to check the object alignment.
4667 short Imm = 0;
4668 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4669 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4670 AddrOp)) {
4671 // If op0 is a frame index that is under aligned, we can't do it either,
4672 // because it is translated to r31 or r1 + slot + offset. We won't know the
4673 // slot number until the stack frame is finalized.
4674 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4675 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4676 if ((SlotAlign % Val) != 0)
4677 return false;
4678
4679 // If we have an offset, we need further check on the offset.
4680 if (AddrOp.getOpcode() != ISD::ADD)
4681 return true;
4682 }
4683
4684 if (AddrOp.getOpcode() == ISD::ADD)
4685 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4686
4687 // If the address comes from the outside, the offset will be zero.
4688 return AddrOp.getOpcode() == ISD::CopyFromReg;
4689}
4690
4691void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4692 // Transfer memoperands.
4693 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4694 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4695}
4696
4697static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG,
4698 bool &NeedSwapOps, bool &IsUnCmp) {
4699
4700 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4701
4702 SDValue LHS = N->getOperand(0);
4703 SDValue RHS = N->getOperand(1);
4704 SDValue TrueRes = N->getOperand(2);
4705 SDValue FalseRes = N->getOperand(3);
4706 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4707 if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4708 N->getSimpleValueType(0) != MVT::i32))
4709 return false;
4710
4711 // We are looking for any of:
4712 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4713 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4714 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4715 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4716 int64_t TrueResVal = TrueConst->getSExtValue();
4717 if ((TrueResVal < -1 || TrueResVal > 1) ||
4718 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4719 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4720 (TrueResVal == 0 &&
4721 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4722 return false;
4723
4724 SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4725 ? FalseRes
4726 : FalseRes.getOperand(0);
4727 bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4728 if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4729 SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4730 return false;
4731
4732 // Without this setb optimization, the outer SELECT_CC will be manually
4733 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4734 // transforms pseudo instruction to isel instruction. When there are more than
4735 // one use for result like zext/sext, with current optimization we only see
4736 // isel is replaced by setb but can't see any significant gain. Since
4737 // setb has longer latency than original isel, we should avoid this. Another
4738 // point is that setb requires comparison always kept, it can break the
4739 // opportunity to get the comparison away if we have in future.
4740 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4741 return false;
4742
4743 SDValue InnerLHS = SetOrSelCC.getOperand(0);
4744 SDValue InnerRHS = SetOrSelCC.getOperand(1);
4745 ISD::CondCode InnerCC =
4746 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4747 // If the inner comparison is a select_cc, make sure the true/false values are
4748 // 1/-1 and canonicalize it if needed.
4749 if (InnerIsSel) {
4750 ConstantSDNode *SelCCTrueConst =
4751 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4752 ConstantSDNode *SelCCFalseConst =
4753 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4754 if (!SelCCTrueConst || !SelCCFalseConst)
4755 return false;
4756 int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4757 int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4758 // The values must be -1/1 (requiring a swap) or 1/-1.
4759 if (SelCCTVal == -1 && SelCCFVal == 1) {
4760 std::swap(InnerLHS, InnerRHS);
4761 } else if (SelCCTVal != 1 || SelCCFVal != -1)
4762 return false;
4763 }
4764
4765 // Canonicalize unsigned case
4766 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4767 IsUnCmp = true;
4768 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4769 }
4770
4771 bool InnerSwapped = false;
4772 if (LHS == InnerRHS && RHS == InnerLHS)
4773 InnerSwapped = true;
4774 else if (LHS != InnerLHS || RHS != InnerRHS)
4775 return false;
4776
4777 switch (CC) {
4778 // (select_cc lhs, rhs, 0, \
4779 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4780 case ISD::SETEQ:
4781 if (!InnerIsSel)
4782 return false;
4783 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4784 return false;
4785 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4786 break;
4787
4788 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4789 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4790 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4791 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4792 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4793 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4794 case ISD::SETULT:
4795 if (!IsUnCmp && InnerCC != ISD::SETNE)
4796 return false;
4797 IsUnCmp = true;
4798 [[fallthrough]];
4799 case ISD::SETLT:
4800 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4801 (InnerCC == ISD::SETLT && InnerSwapped))
4802 NeedSwapOps = (TrueResVal == 1);
4803 else
4804 return false;
4805 break;
4806
4807 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4808 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4809 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4810 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4811 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4812 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4813 case ISD::SETUGT:
4814 if (!IsUnCmp && InnerCC != ISD::SETNE)
4815 return false;
4816 IsUnCmp = true;
4817 [[fallthrough]];
4818 case ISD::SETGT:
4819 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4820 (InnerCC == ISD::SETGT && InnerSwapped))
4821 NeedSwapOps = (TrueResVal == -1);
4822 else
4823 return false;
4824 break;
4825
4826 default:
4827 return false;
4828 }
4829
4830 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4831 LLVM_DEBUG(N->dump());
4832
4833 return true;
4834}
4835
4836// Return true if it's a software square-root/divide operand.
4837static bool isSWTestOp(SDValue N) {
4838 if (N.getOpcode() == PPCISD::FTSQRT)
4839 return true;
4840 if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
4841 N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4842 return false;
4843 switch (N.getConstantOperandVal(0)) {
4844 case Intrinsic::ppc_vsx_xvtdivdp:
4845 case Intrinsic::ppc_vsx_xvtdivsp:
4846 case Intrinsic::ppc_vsx_xvtsqrtdp:
4847 case Intrinsic::ppc_vsx_xvtsqrtsp:
4848 return true;
4849 }
4850 return false;
4851}
4852
4853bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4854 assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4855 // We are looking for following patterns, where `truncate to i1` actually has
4856 // the same semantic with `and 1`.
4857 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4858 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4859 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4860 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4861 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4862 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4863 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4864 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4865 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4866 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4867 return false;
4868
4869 SDValue CmpRHS = N->getOperand(3);
4870 if (!isNullConstant(CmpRHS))
4871 return false;
4872
4873 SDValue CmpLHS = N->getOperand(2);
4874 if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4875 return false;
4876
4877 unsigned PCC = 0;
4878 bool IsCCNE = CC == ISD::SETNE;
4879 if (CmpLHS.getOpcode() == ISD::AND &&
4881 switch (CmpLHS.getConstantOperandVal(1)) {
4882 case 1:
4883 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4884 break;
4885 case 2:
4886 PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4887 break;
4888 case 4:
4889 PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4890 break;
4891 case 8:
4892 PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4893 break;
4894 default:
4895 return false;
4896 }
4897 else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4898 CmpLHS.getValueType() == MVT::i1)
4899 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4900
4901 if (PCC) {
4902 SDLoc dl(N);
4903 SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4904 N->getOperand(0)};
4905 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4906 return true;
4907 }
4908 return false;
4909}
4910
4911bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
4912 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
4913 // value, for example when crbits is disabled. If so, select the
4914 // loop_decrement intrinsics now.
4915 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4916 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
4917
4918 if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) ||
4919 isNullConstant(LHS.getOperand(1)))
4920 return false;
4921
4922 if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4923 LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement)
4924 return false;
4925
4927 return false;
4928
4929 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
4930 "Counter decrement comparison is not EQ or NE");
4931
4932 SDValue OldDecrement = LHS.getOperand(0);
4933 assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
4934
4935 SDLoc DecrementLoc(OldDecrement);
4936 SDValue ChainInput = OldDecrement.getOperand(0);
4937 SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc)
4938 : getI32Imm(1, DecrementLoc)};
4939 unsigned DecrementOpcode =
4940 Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4941 SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,
4942 MVT::i1, DecrementOps);
4943
4944 unsigned Val = RHS->getAsZExtVal();
4945 bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);
4946 unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4947
4948 ReplaceUses(LHS.getValue(0), LHS.getOperand(1));
4949 CurDAG->RemoveDeadNode(LHS.getNode());
4950
4951 // Mark the old loop_decrement intrinsic as dead.
4952 ReplaceUses(OldDecrement.getValue(1), ChainInput);
4953 CurDAG->RemoveDeadNode(OldDecrement.getNode());
4954
4955 SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
4956 ChainInput, N->getOperand(0));
4957
4958 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0),
4959 N->getOperand(4), Chain);
4960 return true;
4961}
4962
4963bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4964 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4965 unsigned Imm;
4966 if (!isInt32Immediate(N->getOperand(1), Imm))
4967 return false;
4968
4969 SDLoc dl(N);
4970 SDValue Val = N->getOperand(0);
4971 unsigned SH, MB, ME;
4972 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4973 // with a mask, emit rlwinm
4974 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4975 Val = Val.getOperand(0);
4976 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4977 getI32Imm(ME, dl)};
4978 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4979 return true;
4980 }
4981
4982 // If this is just a masked value where the input is not handled, and
4983 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4984 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4985 // The result of LBARX/LHARX do not need to be cleared as the instructions
4986 // implicitly clear the upper bits.
4987 unsigned AlreadyCleared = 0;
4988 if (Val.getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4989 auto IntrinsicID = Val.getConstantOperandVal(1);
4990 if (IntrinsicID == Intrinsic::ppc_lbarx)
4991 AlreadyCleared = 24;
4992 else if (IntrinsicID == Intrinsic::ppc_lharx)
4993 AlreadyCleared = 16;
4994 if (AlreadyCleared != 0 && AlreadyCleared == MB && ME == 31) {
4995 ReplaceUses(SDValue(N, 0), N->getOperand(0));
4996 return true;
4997 }
4998 }
4999
5000 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
5001 getI32Imm(ME, dl)};
5002 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5003 return true;
5004 }
5005
5006 // AND X, 0 -> 0, not "rlwinm 32".
5007 if (Imm == 0) {
5008 ReplaceUses(SDValue(N, 0), N->getOperand(1));
5009 return true;
5010 }
5011
5012 return false;
5013}
5014
5015bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
5016 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5017 uint64_t Imm64;
5018 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
5019 return false;
5020
5021 unsigned MB, ME;
5022 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
5023 // MB ME
5024 // +----------------------+
5025 // |xxxxxxxxxxx00011111000|
5026 // +----------------------+
5027 // 0 32 64
5028 // We can only do it if the MB is larger than 32 and MB <= ME
5029 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
5030 // we didn't rotate it.
5031 SDLoc dl(N);
5032 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
5033 getI64Imm(ME - 32, dl)};
5034 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
5035 return true;
5036 }
5037
5038 return false;
5039}
5040
5041bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
5042 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5043 uint64_t Imm64;
5044 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
5045 return false;
5046
5047 // Do nothing if it is 16-bit imm as the pattern in the .td file handle
5048 // it well with "andi.".
5049 if (isUInt<16>(Imm64))
5050 return false;
5051
5052 SDLoc Loc(N);
5053 SDValue Val = N->getOperand(0);
5054
5055 // Optimized with two rldicl's as follows:
5056 // Add missing bits on left to the mask and check that the mask is a
5057 // wrapped run of ones, i.e.
5058 // Change pattern |0001111100000011111111|
5059 // to |1111111100000011111111|.
5060 unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64);
5061 if (NumOfLeadingZeros != 0)
5062 Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
5063
5064 unsigned MB, ME;
5065 if (!isRunOfOnes64(Imm64, MB, ME))
5066 return false;
5067
5068 // ME MB MB-ME+63
5069 // +----------------------+ +----------------------+
5070 // |1111111100000011111111| -> |0000001111111111111111|
5071 // +----------------------+ +----------------------+
5072 // 0 63 0 63
5073 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
5074 unsigned OnesOnLeft = ME + 1;
5075 unsigned ZerosInBetween = (MB - ME + 63) & 63;
5076 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
5077 // on the left the bits that are already zeros in the mask.
5078 Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
5079 getI64Imm(OnesOnLeft, Loc),
5080 getI64Imm(ZerosInBetween, Loc)),
5081 0);
5082 // MB-ME+63 ME MB
5083 // +----------------------+ +----------------------+
5084 // |0000001111111111111111| -> |0001111100000011111111|
5085 // +----------------------+ +----------------------+
5086 // 0 63 0 63
5087 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
5088 // left the number of ones we previously added.
5089 SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
5090 getI64Imm(NumOfLeadingZeros, Loc)};
5091 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5092 return true;
5093}
5094
5095bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
5096 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5097 unsigned Imm;
5098 if (!isInt32Immediate(N->getOperand(1), Imm))
5099 return false;
5100
5101 SDValue Val = N->getOperand(0);
5102 unsigned Imm2;
5103 // ISD::OR doesn't get all the bitfield insertion fun.
5104 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
5105 // bitfield insert.
5106 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
5107 return false;
5108
5109 // The idea here is to check whether this is equivalent to:
5110 // (c1 & m) | (x & ~m)
5111 // where m is a run-of-ones mask. The logic here is that, for each bit in
5112 // c1 and c2:
5113 // - if both are 1, then the output will be 1.
5114 // - if both are 0, then the output will be 0.
5115 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
5116 // come from x.
5117 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
5118 // be 0.
5119 // If that last condition is never the case, then we can form m from the
5120 // bits that are the same between c1 and c2.
5121 unsigned MB, ME;
5122 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
5123 SDLoc dl(N);
5124 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
5125 getI32Imm(MB, dl), getI32Imm(ME, dl)};
5126 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
5127 return true;
5128 }
5129
5130 return false;
5131}
5132
5133bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5134 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5135
5136 uint64_t Imm64;
5137 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5138 return false;
5139
5140 SDValue Val = N->getOperand(0);
5141
5142 if (Val.getOpcode() != ISD::ROTL)
5143 return false;
5144
5145 // Looking to try to avoid a situation like this one:
5146 // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5147 // %and1 = and i64 %2, 9223372036854775807
5148 // In this function we are looking to try to match RLDCL. However, the above
5149 // DAG would better match RLDICL instead which is not what we are looking
5150 // for here.
5151 SDValue RotateAmt = Val.getOperand(1);
5152 if (RotateAmt.getOpcode() == ISD::Constant)
5153 return false;
5154
5155 unsigned MB = 64 - llvm::countr_one(Imm64);
5156 SDLoc dl(N);
5157 SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
5158 CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
5159 return true;
5160}
5161
5162bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
5163 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5164 uint64_t Imm64;
5165 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5166 return false;
5167
5168 // If this is a 64-bit zero-extension mask, emit rldicl.
5169 unsigned MB = 64 - llvm::countr_one(Imm64);
5170 unsigned SH = 0;
5171 unsigned Imm;
5172 SDValue Val = N->getOperand(0);
5173 SDLoc dl(N);
5174
5175 if (Val.getOpcode() == ISD::ANY_EXTEND) {
5176 auto Op0 = Val.getOperand(0);
5177 if (Op0.getOpcode() == ISD::SRL &&
5178 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
5179
5180 auto ResultType = Val.getNode()->getValueType(0);
5181 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
5182 SDValue IDVal(ImDef, 0);
5183
5184 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
5185 IDVal, Op0.getOperand(0),
5186 getI32Imm(1, dl)),
5187 0);
5188 SH = 64 - Imm;
5189 }
5190 }
5191
5192 // If the operand is a logical right shift, we can fold it into this
5193 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
5194 // for n <= mb. The right shift is really a left rotate followed by a
5195 // mask, and this mask is a more-restrictive sub-mask of the mask implied
5196 // by the shift.
5197 if (Val.getOpcode() == ISD::SRL &&
5198 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
5199 assert(Imm < 64 && "Illegal shift amount");
5200 Val = Val.getOperand(0);
5201 SH = 64 - Imm;
5202 }
5203
5204 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
5205 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5206 return true;
5207}
5208
5209bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
5210 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5211 uint64_t Imm64;
5212 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5213 !isMask_64(~Imm64))
5214 return false;
5215
5216 // If this is a negated 64-bit zero-extension mask,
5217 // i.e. the immediate is a sequence of ones from most significant side
5218 // and all zero for reminder, we should use rldicr.
5219 unsigned MB = 63 - llvm::countr_one(~Imm64);
5220 unsigned SH = 0;
5221 SDLoc dl(N);
5222 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
5223 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5224 return true;
5225}
5226
5227bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
5228 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
5229 uint64_t Imm64;
5230 unsigned MB, ME;
5231 SDValue N0 = N->getOperand(0);
5232
5233 // We won't get fewer instructions if the imm is 32-bit integer.
5234 // rldimi requires the imm to have consecutive ones with both sides zero.
5235 // Also, make sure the first Op has only one use, otherwise this may increase
5236 // register pressure since rldimi is destructive.
5237 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5238 isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
5239 return false;
5240
5241 unsigned SH = 63 - ME;
5242 SDLoc Dl(N);
5243 // Use select64Imm for making LI instr instead of directly putting Imm64
5244 SDValue Ops[] = {
5245 N->getOperand(0),
5246 SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
5247 getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
5248 CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
5249 return true;
5250}
5251
5252// Select - Convert the specified operand from a target-independent to a
5253// target-specific node if it hasn't already been changed.
5254void PPCDAGToDAGISel::Select(SDNode *N) {
5255 SDLoc dl(N);
5256 if (N->isMachineOpcode()) {
5257 N->setNodeId(-1);
5258 return; // Already selected.
5259 }
5260
5261 // In case any misguided DAG-level optimizations form an ADD with a
5262 // TargetConstant operand, crash here instead of miscompiling (by selecting
5263 // an r+r add instead of some kind of r+i add).
5264 if (N->getOpcode() == ISD::ADD &&
5265 N->getOperand(1).getOpcode() == ISD::TargetConstant)
5266 llvm_unreachable("Invalid ADD with TargetConstant operand");
5267
5268 // Try matching complex bit permutations before doing anything else.
5269 if (tryBitPermutation(N))
5270 return;
5271
5272 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
5273 if (tryIntCompareInGPR(N))
5274 return;
5275
5276 switch (N->getOpcode()) {
5277 default: break;
5278
5279 case ISD::Constant:
5280 if (N->getValueType(0) == MVT::i64) {
5281 ReplaceNode(N, selectI64Imm(CurDAG, N));
5282 return;
5283 }
5284 break;
5285
5286 case ISD::INTRINSIC_VOID: {
5287 auto IntrinsicID = N->getConstantOperandVal(1);
5288 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
5289 IntrinsicID != Intrinsic::ppc_trapd &&
5290 IntrinsicID != Intrinsic::ppc_trap)
5291 break;
5292 unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||
5293 IntrinsicID == Intrinsic::ppc_trapd)
5294 ? PPC::TDI
5295 : PPC::TWI;
5296 SmallVector<SDValue, 4> OpsWithMD;
5297 unsigned MDIndex;
5298 if (IntrinsicID == Intrinsic::ppc_tdw ||
5299 IntrinsicID == Intrinsic::ppc_tw) {
5300 SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
5301 int16_t SImmOperand2;
5302 int16_t SImmOperand3;
5303 int16_t SImmOperand4;
5304 bool isOperand2IntS16Immediate =
5305 isIntS16Immediate(N->getOperand(2), SImmOperand2);
5306 bool isOperand3IntS16Immediate =
5307 isIntS16Immediate(N->getOperand(3), SImmOperand3);
5308 // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5309 // reg or imm + imm. The imm + imm form will be optimized to either an
5310 // unconditional trap or a nop in a later pass.
5311 if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5312 Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5313 else if (isOperand3IntS16Immediate)
5314 // The 2nd and 3rd operands are reg + imm.
5315 Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
5316 else {
5317 // The 2nd and 3rd operands are imm + reg.
5318 bool isOperand4IntS16Immediate =
5319 isIntS16Immediate(N->getOperand(4), SImmOperand4);
5320 (void)isOperand4IntS16Immediate;
5321 assert(isOperand4IntS16Immediate &&
5322 "The 4th operand is not an Immediate");
5323 // We need to flip the condition immediate TO.
5324 int16_t TO = int(SImmOperand4) & 0x1F;
5325 // We swap the first and second bit of TO if they are not same.
5326 if ((TO & 0x1) != ((TO & 0x2) >> 1))
5327 TO = (TO & 0x1) ? TO + 1 : TO - 1;
5328 // We swap the fourth and fifth bit of TO if they are not same.
5329 if ((TO & 0x8) != ((TO & 0x10) >> 1))
5330 TO = (TO & 0x8) ? TO + 8 : TO - 8;
5331 Ops[0] = getI32Imm(TO, dl);
5332 Ops[1] = N->getOperand(3);
5333 Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
5334 }
5335 OpsWithMD = {Ops[0], Ops[1], Ops[2]};
5336 MDIndex = 5;
5337 } else {
5338 OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)};
5339 MDIndex = 3;
5340 }
5341
5342 if (N->getNumOperands() > MDIndex) {
5343 SDValue MDV = N->getOperand(MDIndex);
5344 const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
5345 assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!");
5346 assert((isa<MDString>(MD->getOperand(0)) &&
5347 cast<MDString>(MD->getOperand(0))->getString() ==
5348 "ppc-trap-reason") &&
5349 "Unsupported annotation data type!");
5350 for (unsigned i = 1; i < MD->getNumOperands(); i++) {
5351 assert(isa<MDString>(MD->getOperand(i)) &&
5352 "Invalid data type for annotation ppc-trap-reason!");
5353 OpsWithMD.push_back(
5354 getI32Imm(std::stoi(cast<MDString>(
5355 MD->getOperand(i))->getString().str()), dl));
5356 }
5357 }
5358 OpsWithMD.push_back(N->getOperand(0)); // chain
5359 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);
5360 return;
5361 }
5362
5364 // We emit the PPC::FSELS instruction here because of type conflicts with
5365 // the comparison operand. The FSELS instruction is defined to use an 8-byte
5366 // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5367 // value for the comparison. When selecting through a .td file, a type
5368 // error is raised. Must check this first so we never break on the
5369 // !Subtarget->isISA3_1() check.
5370 auto IntID = N->getConstantOperandVal(0);
5371 if (IntID == Intrinsic::ppc_fsels) {
5372 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
5373 CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
5374 return;
5375 }
5376
5377 if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
5378 auto Pred = N->getConstantOperandVal(1);
5379 unsigned Opcode =
5380 IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5381 unsigned SubReg = 0;
5382 unsigned ShiftVal = 0;
5383 bool Reverse = false;
5384 switch (Pred) {
5385 case 0:
5386 SubReg = PPC::sub_eq;
5387 ShiftVal = 1;
5388 break;
5389 case 1:
5390 SubReg = PPC::sub_eq;
5391 ShiftVal = 1;
5392 Reverse = true;
5393 break;
5394 case 2:
5395 SubReg = PPC::sub_lt;
5396 ShiftVal = 3;
5397 break;
5398 case 3:
5399 SubReg = PPC::sub_lt;
5400 ShiftVal = 3;
5401 Reverse = true;
5402 break;
5403 case 4:
5404 SubReg = PPC::sub_gt;
5405 ShiftVal = 2;
5406 break;
5407 case 5:
5408 SubReg = PPC::sub_gt;
5409 ShiftVal = 2;
5410 Reverse = true;
5411 break;
5412 case 6:
5413 SubReg = PPC::sub_un;
5414 break;
5415 case 7:
5416 SubReg = PPC::sub_un;
5417 Reverse = true;
5418 break;
5419 }
5420
5421 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5422 SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
5423 CurDAG->getTargetConstant(0, dl, MVT::i32)};
5424 SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
5425 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5426 // On Power10, we can use SETBC[R]. On prior architectures, we have to use
5427 // MFOCRF and shift/negate the value.
5428 if (Subtarget->isISA3_1()) {
5429 SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
5430 SDValue CRBit = SDValue(
5431 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5432 CR6Reg, SubRegIdx, BCDOp.getValue(1)),
5433 0);
5434 CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
5435 CRBit);
5436 } else {
5437 SDValue Move =
5438 SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
5439 BCDOp.getValue(1)),
5440 0);
5441 SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
5442 getI32Imm(31, dl), getI32Imm(31, dl)};
5443 if (!Reverse)
5444 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5445 else {
5446 SDValue Shift = SDValue(
5447 CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
5448 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
5449 }
5450 }
5451 return;
5452 }
5453
5454 if (!Subtarget->isISA3_1())
5455 break;
5456 unsigned Opcode = 0;
5457 switch (IntID) {
5458 default:
5459 break;
5460 case Intrinsic::ppc_altivec_vstribr_p:
5461 Opcode = PPC::VSTRIBR_rec;
5462 break;
5463 case Intrinsic::ppc_altivec_vstribl_p:
5464 Opcode = PPC::VSTRIBL_rec;
5465 break;
5466 case Intrinsic::ppc_altivec_vstrihr_p:
5467 Opcode = PPC::VSTRIHR_rec;
5468 break;
5469 case Intrinsic::ppc_altivec_vstrihl_p:
5470 Opcode = PPC::VSTRIHL_rec;
5471 break;
5472 }
5473 if (!Opcode)
5474 break;
5475
5476 // Generate the appropriate vector string isolate intrinsic to match.
5477 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5478 SDValue VecStrOp =
5479 SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
5480 // Vector string isolate instructions update the EQ bit of CR6.
5481 // Generate a SETBC instruction to extract the bit and place it in a GPR.
5482 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
5483 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5484 SDValue CRBit = SDValue(
5485 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5486 CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
5487 0);
5488 CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
5489 return;
5490 }
5491
5492 case ISD::SETCC:
5493 case ISD::STRICT_FSETCC:
5495 if (trySETCC(N))
5496 return;
5497 break;
5498 // These nodes will be transformed into GETtlsADDR32 node, which
5499 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5500 case PPCISD::ADDI_TLSLD_L_ADDR:
5501 case PPCISD::ADDI_TLSGD_L_ADDR: {
5502 const Module *Mod = MF->getFunction().getParent();
5503 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5504 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
5505 Mod->getPICLevel() == PICLevel::SmallPIC)
5506 break;
5507 // Attach global base pointer on GETtlsADDR32 node in order to
5508 // generate secure plt code for TLS symbols.
5509 getGlobalBaseReg();
5510 } break;
5511 case PPCISD::CALL:
5512 case PPCISD::CALL_RM: {
5513 if (Subtarget->isPPC64() || !TM.isPositionIndependent() ||
5514 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF())
5515 break;
5516
5517 SDValue Op = N->getOperand(1);
5518
5519 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5520 if (GA->getTargetFlags() == PPCII::MO_PLT)
5521 getGlobalBaseReg();
5522 }
5523 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
5524 if (ES->getTargetFlags() == PPCII::MO_PLT)
5525 getGlobalBaseReg();
5526 }
5527 } break;
5528
5530 ReplaceNode(N, getGlobalBaseReg());
5531 return;
5532
5533 case ISD::FrameIndex:
5534 selectFrameIndex(N, N);
5535 return;
5536
5537 case PPCISD::MFOCRF: {
5538 SDValue InGlue = N->getOperand(1);
5539 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
5540 N->getOperand(0), InGlue));
5541 return;
5542 }
5543
5545 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
5546 MVT::Other, N->getOperand(0)));
5547 return;
5548
5549 case PPCISD::SRA_ADDZE: {
5550 SDValue N0 = N->getOperand(0);
5551 SDValue ShiftAmt =
5552 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
5553 getConstantIntValue(), dl,
5554 N->getValueType(0));
5555 if (N->getValueType(0) == MVT::i64) {
5556 SDNode *Op =
5557 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
5558 N0, ShiftAmt);
5559 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
5560 SDValue(Op, 1));
5561 return;
5562 } else {
5563 assert(N->getValueType(0) == MVT::i32 &&
5564 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5565 SDNode *Op =
5566 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
5567 N0, ShiftAmt);
5568 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
5569 SDValue(Op, 1));
5570 return;
5571 }
5572 }
5573
5574 case ISD::STORE: {
5575 // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
5576 // X-form stores.
5577 StoreSDNode *ST = cast<StoreSDNode>(N);
5578 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) &&
5579 ST->getAddressingMode() != ISD::PRE_INC)
5580 if (tryTLSXFormStore(ST))
5581 return;
5582 break;
5583 }
5584 case ISD::LOAD: {
5585 // Handle preincrement loads.
5586 LoadSDNode *LD = cast<LoadSDNode>(N);
5587 EVT LoadedVT = LD->getMemoryVT();
5588
5589 // Normal loads are handled by code generated from the .td file.
5590 if (LD->getAddressingMode() != ISD::PRE_INC) {
5591 // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
5592 // X-form loads.
5593 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()))
5594 if (tryTLSXFormLoad(LD))
5595 return;
5596 break;
5597 }
5598
5599 SDValue Offset = LD->getOffset();
5600 if (Offset.getOpcode() == ISD::TargetConstant ||
5601 Offset.getOpcode() == ISD::TargetGlobalAddress) {
5602
5603 unsigned Opcode;
5604 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5605 if (LD->getValueType(0) != MVT::i64) {
5606 // Handle PPC32 integer and normal FP loads.
5607 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5608 switch (LoadedVT.getSimpleVT().SimpleTy) {
5609 default: llvm_unreachable("Invalid PPC load type!");
5610 case MVT::f64: Opcode = PPC::LFDU; break;
5611 case MVT::f32: Opcode = PPC::LFSU; break;
5612 case MVT::i32: Opcode = PPC::LWZU; break;
5613 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5614 case MVT::i1:
5615 case MVT::i8: Opcode = PPC::LBZU; break;
5616 }
5617 } else {
5618 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5619 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5620 switch (LoadedVT.getSimpleVT().SimpleTy) {
5621 default: llvm_unreachable("Invalid PPC load type!");
5622 case MVT::i64: Opcode = PPC::LDU; break;
5623 case MVT::i32: Opcode = PPC::LWZU8; break;
5624 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5625 case MVT::i1:
5626 case MVT::i8: Opcode = PPC::LBZU8; break;
5627 }
5628 }
5629
5630 SDValue Chain = LD->getChain();
5631 SDValue Base = LD->getBasePtr();
5632 SDValue Ops[] = { Offset, Base, Chain };
5633 SDNode *MN = CurDAG->getMachineNode(
5634 Opcode, dl, LD->getValueType(0),
5635 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5636 transferMemOperands(N, MN);
5637 ReplaceNode(N, MN);
5638 return;
5639 } else {
5640 unsigned Opcode;
5641 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5642 if (LD->getValueType(0) != MVT::i64) {
5643 // Handle PPC32 integer and normal FP loads.
5644 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5645 switch (LoadedVT.getSimpleVT().SimpleTy) {
5646 default: llvm_unreachable("Invalid PPC load type!");
5647 case MVT::f64: Opcode = PPC::LFDUX; break;
5648 case MVT::f32: Opcode = PPC::LFSUX; break;
5649 case MVT::i32: Opcode = PPC::LWZUX; break;
5650 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5651 case MVT::i1:
5652 case MVT::i8: Opcode = PPC::LBZUX; break;
5653 }
5654 } else {
5655 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5656 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
5657 "Invalid sext update load");
5658 switch (LoadedVT.getSimpleVT().SimpleTy) {
5659 default: llvm_unreachable("Invalid PPC load type!");
5660 case MVT::i64: Opcode = PPC::LDUX; break;
5661 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5662 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5663 case MVT::i1:
5664 case MVT::i8: Opcode = PPC::LBZUX8; break;
5665 }
5666 }
5667
5668 SDValue Chain = LD->getChain();
5669 SDValue Base = LD->getBasePtr();
5670 SDValue Ops[] = { Base, Offset, Chain };
5671 SDNode *MN = CurDAG->getMachineNode(
5672 Opcode, dl, LD->getValueType(0),
5673 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5674 transferMemOperands(N, MN);
5675 ReplaceNode(N, MN);
5676 return;
5677 }
5678 }
5679
5680 case ISD::AND:
5681 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5682 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
5683 tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
5684 tryAsPairOfRLDICL(N))
5685 return;
5686
5687 // Other cases are autogenerated.
5688 break;
5689 case ISD::OR: {
5690 if (N->getValueType(0) == MVT::i32)
5691 if (tryBitfieldInsert(N))
5692 return;
5693
5694 int16_t Imm;
5695 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5696 isIntS16Immediate(N->getOperand(1), Imm)) {
5697 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
5698
5699 // If this is equivalent to an add, then we can fold it with the
5700 // FrameIndex calculation.
5701 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
5702 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5703 return;
5704 }
5705 }
5706
5707 // If this is 'or' against an imm with consecutive ones and both sides zero,
5708 // try to emit rldimi
5709 if (tryAsSingleRLDIMI(N))
5710 return;
5711
5712 // OR with a 32-bit immediate can be handled by ori + oris
5713 // without creating an immediate in a GPR.
5714 uint64_t Imm64 = 0;
5715 bool IsPPC64 = Subtarget->isPPC64();
5716 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5717 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5718 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5719 uint64_t ImmHi = Imm64 >> 16;
5720 uint64_t ImmLo = Imm64 & 0xFFFF;
5721 if (ImmHi != 0 && ImmLo != 0) {
5722 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
5723 N->getOperand(0),
5724 getI16Imm(ImmLo, dl));
5725 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5726 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
5727 return;
5728 }
5729 }
5730
5731 // Other cases are autogenerated.
5732 break;
5733 }
5734 case ISD::XOR: {
5735 // XOR with a 32-bit immediate can be handled by xori + xoris
5736 // without creating an immediate in a GPR.
5737 uint64_t Imm64 = 0;
5738 bool IsPPC64 = Subtarget->isPPC64();
5739 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5740 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5741 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5742 uint64_t ImmHi = Imm64 >> 16;
5743 uint64_t ImmLo = Imm64 & 0xFFFF;
5744 if (ImmHi != 0 && ImmLo != 0) {
5745 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
5746 N->getOperand(0),
5747 getI16Imm(ImmLo, dl));
5748 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5749 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
5750 return;
5751 }
5752 }
5753
5754 break;
5755 }
5756 case ISD::ADD: {
5757 int16_t Imm;
5758 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5759 isIntS16Immediate(N->getOperand(1), Imm)) {
5760 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5761 return;
5762 }
5763
5764 break;
5765 }
5766 case ISD::SHL: {
5767 unsigned Imm, SH, MB, ME;
5768 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5769 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5770 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5771 getI32Imm(SH, dl), getI32Imm(MB, dl),
5772 getI32Imm(ME, dl) };
5773 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5774 return;
5775 }
5776
5777 // Other cases are autogenerated.
5778 break;
5779 }
5780 case ISD::SRL: {
5781 unsigned Imm, SH, MB, ME;
5782 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5783 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5784 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5785 getI32Imm(SH, dl), getI32Imm(MB, dl),
5786 getI32Imm(ME, dl) };
5787 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5788 return;
5789 }
5790
5791 // Other cases are autogenerated.
5792 break;
5793 }
5794 case ISD::MUL: {
5795 SDValue Op1 = N->getOperand(1);
5796 if (Op1.getOpcode() != ISD::Constant ||
5797 (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
5798 break;
5799
5800 // If the multiplier fits int16, we can handle it with mulli.
5801 int64_t Imm = Op1->getAsZExtVal();
5802 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
5803 if (isInt<16>(Imm) || !Shift)
5804 break;
5805
5806 // If the shifted value fits int16, we can do this transformation:
5807 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5808 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5809 uint64_t ImmSh = Imm >> Shift;
5810 if (!isInt<16>(ImmSh))
5811 break;
5812
5813 uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
5814 if (Op1.getValueType() == MVT::i64) {
5815 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
5816 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
5817 N->getOperand(0), SDImm);
5818
5819 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5820 getI32Imm(63 - Shift, dl)};
5821 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5822 return;
5823 } else {
5824 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
5825 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
5826 N->getOperand(0), SDImm);
5827
5828 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5829 getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};
5830 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5831 return;
5832 }
5833 break;
5834 }
5835 // FIXME: Remove this once the ANDI glue bug is fixed:
5838 if (!ANDIGlueBug)
5839 break;
5840
5841 EVT InVT = N->getOperand(0).getValueType();
5842 assert((InVT == MVT::i64 || InVT == MVT::i32) &&
5843 "Invalid input type for ANDI_rec_1_EQ_BIT");
5844
5845 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5846 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
5847 N->getOperand(0),
5848 CurDAG->getTargetConstant(1, dl, InVT)),
5849 0);
5850 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
5851 SDValue SRIdxVal = CurDAG->getTargetConstant(
5852 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5853 dl, MVT::i32);
5854
5855 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
5856 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
5857 return;
5858 }
5859 case ISD::SELECT_CC: {
5860 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
5861 EVT PtrVT =
5863 bool isPPC64 = (PtrVT == MVT::i64);
5864
5865 // If this is a select of i1 operands, we'll pattern match it.
5866 if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
5867 break;
5868
5869 if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5870 bool NeedSwapOps = false;
5871 bool IsUnCmp = false;
5872 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
5873 SDValue LHS = N->getOperand(0);
5874 SDValue RHS = N->getOperand(1);
5875 if (NeedSwapOps)
5876 std::swap(LHS, RHS);
5877
5878 // Make use of SelectCC to generate the comparison to set CR bits, for
5879 // equality comparisons having one literal operand, SelectCC probably
5880 // doesn't need to materialize the whole literal and just use xoris to
5881 // check it first, it leads the following comparison result can't
5882 // exactly represent GT/LT relationship. So to avoid this we specify
5883 // SETGT/SETUGT here instead of SETEQ.
5884 SDValue GenCC =
5885 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5886 CurDAG->SelectNodeTo(
5887 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5888 N->getValueType(0), GenCC);
5889 NumP9Setb++;
5890 return;
5891 }
5892 }
5893
5894 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5895 if (!isPPC64 && isNullConstant(N->getOperand(1)) &&
5896 isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) &&
5897 CC == ISD::SETNE &&
5898 // FIXME: Implement this optzn for PPC64.
5899 N->getValueType(0) == MVT::i32) {
5900 SDNode *Tmp =
5901 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
5902 N->getOperand(0), getI32Imm(~0U, dl));
5903 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
5904 N->getOperand(0), SDValue(Tmp, 1));
5905 return;
5906 }
5907
5908 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
5909
5910 if (N->getValueType(0) == MVT::i1) {
5911 // An i1 select is: (c & t) | (!c & f).
5912 bool Inv;
5913 unsigned Idx = getCRIdxForSetCC(CC, Inv);
5914
5915 unsigned SRI;
5916 switch (Idx) {
5917 default: llvm_unreachable("Invalid CC index");
5918 case 0: SRI = PPC::sub_lt; break;
5919 case 1: SRI = PPC::sub_gt; break;
5920 case 2: SRI = PPC::sub_eq; break;
5921 case 3: SRI = PPC::sub_un; break;
5922 }
5923
5924 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
5925
5926 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
5927 CCBit, CCBit), 0);
5928 SDValue C = Inv ? NotCCBit : CCBit,
5929 NotC = Inv ? CCBit : NotCCBit;
5930
5931 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5932 C, N->getOperand(2)), 0);
5933 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5934 NotC, N->getOperand(3)), 0);
5935
5936 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
5937 return;
5938 }
5939
5940 unsigned BROpc =
5941 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
5942
5943 unsigned SelectCCOp;
5944 if (N->getValueType(0) == MVT::i32)
5945 SelectCCOp = PPC::SELECT_CC_I4;
5946 else if (N->getValueType(0) == MVT::i64)
5947 SelectCCOp = PPC::SELECT_CC_I8;
5948 else if (N->getValueType(0) == MVT::f32) {
5949 if (Subtarget->hasP8Vector())
5950 SelectCCOp = PPC::SELECT_CC_VSSRC;
5951 else if (Subtarget->hasSPE())
5952 SelectCCOp = PPC::SELECT_CC_SPE4;
5953 else
5954 SelectCCOp = PPC::SELECT_CC_F4;
5955 } else if (N->getValueType(0) == MVT::f64) {
5956 if (Subtarget->hasVSX())
5957 SelectCCOp = PPC::SELECT_CC_VSFRC;
5958 else if (Subtarget->hasSPE())
5959 SelectCCOp = PPC::SELECT_CC_SPE;
5960 else
5961 SelectCCOp = PPC::SELECT_CC_F8;
5962 } else if (N->getValueType(0) == MVT::f128)
5963 SelectCCOp = PPC::SELECT_CC_F16;
5964 else if (Subtarget->hasSPE())
5965 SelectCCOp = PPC::SELECT_CC_SPE;
5966 else if (N->getValueType(0) == MVT::v2f64 ||
5967 N->getValueType(0) == MVT::v2i64)
5968 SelectCCOp = PPC::SELECT_CC_VSRC;
5969 else
5970 SelectCCOp = PPC::SELECT_CC_VRRC;
5971
5972 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
5973 getI32Imm(BROpc, dl) };
5974 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
5975 return;
5976 }
5978 if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
5979 N->getValueType(0) == MVT::v2i64)) {
5980 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
5981
5982 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
5983 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
5984 unsigned DM[2];
5985
5986 for (int i = 0; i < 2; ++i)
5987 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
5988 DM[i] = 0;
5989 else
5990 DM[i] = 1;
5991
5992 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5994 isa<LoadSDNode>(Op1.getOperand(0))) {
5995 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
5997
5998 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5999 (LD->getMemoryVT() == MVT::f64 ||
6000 LD->getMemoryVT() == MVT::i64) &&
6001 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
6002 SDValue Chain = LD->getChain();
6003 SDValue Ops[] = { Base, Offset, Chain };
6004 MachineMemOperand *MemOp = LD->getMemOperand();
6005 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
6006 N->getValueType(0), Ops);
6007 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
6008 return;
6009 }
6010 }
6011
6012 // For little endian, we must swap the input operands and adjust
6013 // the mask elements (reverse and invert them).
6014 if (Subtarget->isLittleEndian()) {
6015 std::swap(Op1, Op2);
6016 unsigned tmp = DM[0];
6017 DM[0] = 1 - DM[1];
6018 DM[1] = 1 - tmp;
6019 }
6020
6021 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
6022 MVT::i32);
6023 SDValue Ops[] = { Op1, Op2, DMV };
6024 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
6025 return;
6026 }
6027
6028 break;
6029 case PPCISD::BDNZ:
6030 case PPCISD::BDZ: {
6031 bool IsPPC64 = Subtarget->isPPC64();
6032 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
6033 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
6034 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
6035 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
6036 MVT::Other, Ops);
6037 return;
6038 }
6039 case PPCISD::COND_BRANCH: {
6040 // Op #0 is the Chain.
6041 // Op #1 is the PPC::PRED_* number.
6042 // Op #2 is the CR#
6043 // Op #3 is the Dest MBB
6044 // Op #4 is the Flag.
6045 // Prevent PPC::PRED_* from being selected into LI.
6046 unsigned PCC = N->getConstantOperandVal(1);
6047 if (EnableBranchHint)
6048 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
6049
6050 SDValue Pred = getI32Imm(PCC, dl);
6051 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
6052 N->getOperand(0), N->getOperand(4) };
6053 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6054 return;
6055 }
6056 case ISD::BR_CC: {
6057 if (tryFoldSWTestBRCC(N))
6058 return;
6059 if (trySelectLoopCountIntrinsic(N))
6060 return;
6061 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
6062 unsigned PCC =
6063 getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
6064
6065 if (N->getOperand(2).getValueType() == MVT::i1) {
6066 unsigned Opc;
6067 bool Swap;
6068 switch (PCC) {
6069 default: llvm_unreachable("Unexpected Boolean-operand predicate");
6070 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
6071 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
6072 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
6073 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
6074 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
6075 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
6076 }
6077
6078 // A signed comparison of i1 values produces the opposite result to an
6079 // unsigned one if the condition code includes less-than or greater-than.
6080 // This is because 1 is the most negative signed i1 number and the most
6081 // positive unsigned i1 number. The CR-logical operations used for such
6082 // comparisons are non-commutative so for signed comparisons vs. unsigned
6083 // ones, the input operands just need to be swapped.
6084 if (ISD::isSignedIntSetCC(CC))
6085 Swap = !Swap;
6086
6087 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
6088 N->getOperand(Swap ? 3 : 2),
6089 N->getOperand(Swap ? 2 : 3)), 0);
6090 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
6091 N->getOperand(0));
6092 return;
6093 }
6094
6095 if (EnableBranchHint)
6096 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
6097
6098 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
6099 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
6100 N->getOperand(4), N->getOperand(0) };
6101 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6102 return;
6103 }
6104 case ISD::BRIND: {
6105 // FIXME: Should custom lower this.
6106 SDValue Chain = N->getOperand(0);
6107 SDValue Target = N->getOperand(1);
6108 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
6109 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
6110 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
6111 Chain), 0);
6112 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
6113 return;
6114 }
6115 case PPCISD::TOC_ENTRY: {
6116 const bool isPPC64 = Subtarget->isPPC64();
6117 const bool isELFABI = Subtarget->isSVR4ABI();
6118 const bool isAIXABI = Subtarget->isAIXABI();
6119
6120 // PowerPC only support small, medium and large code model.
6121 const CodeModel::Model CModel = getCodeModel(*Subtarget, TM, N);
6122
6123 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
6124 "PowerPC doesn't support tiny or kernel code models.");
6125
6126 if (isAIXABI && CModel == CodeModel::Medium)
6127 report_fatal_error("Medium code model is not supported on AIX.");
6128
6129 // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
6130 // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
6131 // small code model, we need to check for a toc-data attribute.
6132 if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
6133 break;
6134
6135 auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
6136 EVT OperandTy) {
6137 SDValue GA = TocEntry->getOperand(0);
6138 SDValue TocBase = TocEntry->getOperand(1);
6139 SDNode *MN = nullptr;
6140 if (OpCode == PPC::ADDItoc || OpCode == PPC::ADDItoc8)
6141 // toc-data access doesn't involve in loading from got, no need to
6142 // keep memory operands.
6143 MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, TocBase, GA);
6144 else {
6145 MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
6146 transferMemOperands(TocEntry, MN);
6147 }
6148 ReplaceNode(TocEntry, MN);
6149 };
6150
6151 // Handle 32-bit small code model.
6152 if (!isPPC64 && CModel == CodeModel::Small) {
6153 // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
6154 // PPC::ADDItoc, or PPC::LWZtoc
6155 if (isELFABI) {
6157 "32-bit ELF can only have TOC entries in position independent"
6158 " code.");
6159 // 32-bit ELF always uses a small code model toc access.
6160 replaceWith(PPC::LWZtoc, N, MVT::i32);
6161 return;
6162 }
6163
6164 assert(isAIXABI && "ELF ABI already handled");
6165
6166 if (hasTocDataAttr(N->getOperand(0))) {
6167 replaceWith(PPC::ADDItoc, N, MVT::i32);
6168 return;
6169 }
6170
6171 replaceWith(PPC::LWZtoc, N, MVT::i32);
6172 return;
6173 }
6174
6175 if (isPPC64 && CModel == CodeModel::Small) {
6176 assert(isAIXABI && "ELF ABI handled in common SelectCode");
6177
6178 if (hasTocDataAttr(N->getOperand(0))) {
6179 replaceWith(PPC::ADDItoc8, N, MVT::i64);
6180 return;
6181 }
6182 // Break if it doesn't have toc data attribute. Proceed with common
6183 // SelectCode.
6184 break;
6185 }
6186
6187 assert(CModel != CodeModel::Small && "All small code models handled.");
6188
6189 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
6190 " ELF/AIX or 32-bit AIX in the following.");
6191
6192 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode,
6193 // 64-bit medium (ELF-only), or 64-bit large (ELF and AIX) code model code
6194 // that does not contain TOC data symbols. We generate two instructions as
6195 // described below. The first source operand is a symbol reference. If it
6196 // must be referenced via the TOC according to Subtarget, we generate:
6197 // [32-bit AIX]
6198 // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
6199 // [64-bit ELF/AIX]
6200 // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
6201 // Otherwise for medium code model ELF we generate:
6202 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6203
6204 // And finally for AIX with toc-data we generate:
6205 // [32-bit AIX]
6206 // ADDItocL(ADDIStocHA(%x2, @sym), @sym)
6207 // [64-bit AIX]
6208 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6209
6210 SDValue GA = N->getOperand(0);
6211 SDValue TOCbase = N->getOperand(1);
6212
6213 EVT VT = Subtarget->getScalarIntVT();
6214 SDNode *Tmp = CurDAG->getMachineNode(
6215 isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
6216
6217 // On AIX, if the symbol has the toc-data attribute it will be defined
6218 // in the TOC entry, so we use an ADDItocL/ADDItocL8.
6219 if (isAIXABI && hasTocDataAttr(GA)) {
6220 ReplaceNode(
6221 N, CurDAG->getMachineNode(isPPC64 ? PPC::ADDItocL8 : PPC::ADDItocL,
6222 dl, VT, SDValue(Tmp, 0), GA));
6223 return;
6224 }
6225
6226 if (PPCLowering->isAccessedAsGotIndirect(GA)) {
6227 // If it is accessed as got-indirect, we need an extra LWZ/LD to load
6228 // the address.
6229 SDNode *MN = CurDAG->getMachineNode(
6230 isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
6231
6232 transferMemOperands(N, MN);
6233 ReplaceNode(N, MN);
6234 return;
6235 }
6236
6237 assert(isPPC64 && "TOC_ENTRY already handled for 32-bit.");
6238 // Build the address relative to the TOC-pointer.
6239 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64,
6240 SDValue(Tmp, 0), GA));
6241 return;
6242 }
6244 // Generate a PIC-safe GOT reference.
6245 assert(Subtarget->is32BitELFABI() &&
6246 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
6247 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
6248 PPCLowering->getPointerTy(CurDAG->getDataLayout()),
6249 MVT::i32);
6250 return;
6251
6252 case PPCISD::VADD_SPLAT: {
6253 // This expands into one of three sequences, depending on whether
6254 // the first operand is odd or even, positive or negative.
6255 assert(isa<ConstantSDNode>(N->getOperand(0)) &&
6256 isa<ConstantSDNode>(N->getOperand(1)) &&
6257 "Invalid operand on VADD_SPLAT!");
6258
6259 int Elt = N->getConstantOperandVal(0);
6260 int EltSize = N->getConstantOperandVal(1);
6261 unsigned Opc1, Opc2, Opc3;
6262 EVT VT;
6263
6264 if (EltSize == 1) {
6265 Opc1 = PPC::VSPLTISB;
6266 Opc2 = PPC::VADDUBM;
6267 Opc3 = PPC::VSUBUBM;
6268 VT = MVT::v16i8;
6269 } else if (EltSize == 2) {
6270 Opc1 = PPC::VSPLTISH;
6271 Opc2 = PPC::VADDUHM;
6272 Opc3 = PPC::VSUBUHM;
6273 VT = MVT::v8i16;
6274 } else {
6275 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
6276 Opc1 = PPC::VSPLTISW;
6277 Opc2 = PPC::VADDUWM;
6278 Opc3 = PPC::VSUBUWM;
6279 VT = MVT::v4i32;
6280 }
6281
6282 if ((Elt & 1) == 0) {
6283 // Elt is even, in the range [-32,-18] + [16,30].
6284 //
6285 // Convert: VADD_SPLAT elt, size
6286 // Into: tmp = VSPLTIS[BHW] elt
6287 // VADDU[BHW]M tmp, tmp
6288 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
6289 SDValue EltVal = getI32Imm(Elt >> 1, dl);
6290 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6291 SDValue TmpVal = SDValue(Tmp, 0);
6292 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
6293 return;
6294 } else if (Elt > 0) {
6295 // Elt is odd and positive, in the range [17,31].
6296 //
6297 // Convert: VADD_SPLAT elt, size
6298 // Into: tmp1 = VSPLTIS[BHW] elt-16
6299 // tmp2 = VSPLTIS[BHW] -16
6300 // VSUBU[BHW]M tmp1, tmp2
6301 SDValue EltVal = getI32Imm(Elt - 16, dl);
6302 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6303 EltVal = getI32Imm(-16, dl);
6304 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6305 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
6306 SDValue(Tmp2, 0)));
6307 return;
6308 } else {
6309 // Elt is odd and negative, in the range [-31,-17].
6310 //
6311 // Convert: VADD_SPLAT elt, size
6312 // Into: tmp1 = VSPLTIS[BHW] elt+16
6313 // tmp2 = VSPLTIS[BHW] -16
6314 // VADDU[BHW]M tmp1, tmp2
6315 SDValue EltVal = getI32Imm(Elt + 16, dl);
6316 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6317 EltVal = getI32Imm(-16, dl);
6318 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6319 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
6320 SDValue(Tmp2, 0)));
6321 return;
6322 }
6323 }
6324 case PPCISD::LD_SPLAT: {
6325 // Here we want to handle splat load for type v16i8 and v8i16 when there is
6326 // no direct move, we don't need to use stack for this case. If target has
6327 // direct move, we should be able to get the best selection in the .td file.
6328 if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
6329 break;
6330
6331 EVT Type = N->getValueType(0);
6332 if (Type != MVT::v16i8 && Type != MVT::v8i16)
6333 break;
6334
6335 // If the alignment for the load is 16 or bigger, we don't need the
6336 // permutated mask to get the required value. The value must be the 0
6337 // element in big endian target or 7/15 in little endian target in the
6338 // result vsx register of lvx instruction.
6339 // Select the instruction in the .td file.
6340 if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
6341 isOffsetMultipleOf(N, 16))
6342 break;
6343
6344 SDValue ZeroReg =
6345 CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
6346 Subtarget->getScalarIntVT());
6347 unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
6348 // v16i8 LD_SPLAT addr
6349 // ======>
6350 // Mask = LVSR/LVSL 0, addr
6351 // LoadLow = LVX 0, addr
6352 // Perm = VPERM LoadLow, LoadLow, Mask
6353 // Splat = VSPLTB 15/0, Perm
6354 //
6355 // v8i16 LD_SPLAT addr
6356 // ======>
6357 // Mask = LVSR/LVSL 0, addr
6358 // LoadLow = LVX 0, addr
6359 // LoadHigh = LVX (LI, 1), addr
6360 // Perm = VPERM LoadLow, LoadHigh, Mask
6361 // Splat = VSPLTH 7/0, Perm
6362 unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
6363 unsigned SplatElemIndex =
6364 Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
6365
6366 SDNode *Mask = CurDAG->getMachineNode(
6367 Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
6368 N->getOperand(1));
6369
6370 SDNode *LoadLow =
6371 CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
6372 {ZeroReg, N->getOperand(1), N->getOperand(0)});
6373
6374 SDNode *LoadHigh = LoadLow;
6375 if (Type == MVT::v8i16) {
6376 LoadHigh = CurDAG->getMachineNode(
6377 PPC::LVX, dl, MVT::v16i8, MVT::Other,
6378 {SDValue(CurDAG->getMachineNode(
6379 LIOpcode, dl, MVT::i32,
6380 CurDAG->getTargetConstant(1, dl, MVT::i8)),
6381 0),
6382 N->getOperand(1), SDValue(LoadLow, 1)});
6383 }
6384
6385 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
6386 transferMemOperands(N, LoadHigh);
6387
6388 SDNode *Perm =
6389 CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
6390 SDValue(LoadHigh, 0), SDValue(Mask, 0));
6391 CurDAG->SelectNodeTo(N, SplatOp, Type,
6392 CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
6393 SDValue(Perm, 0));
6394 return;
6395 }
6396 }
6397
6398 SelectCode(N);
6399}
6400
6401// If the target supports the cmpb instruction, do the idiom recognition here.
6402// We don't do this as a DAG combine because we don't want to do it as nodes
6403// are being combined (because we might miss part of the eventual idiom). We
6404// don't want to do it during instruction selection because we want to reuse
6405// the logic for lowering the masking operations already part of the
6406// instruction selector.
6407SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
6408 SDLoc dl(N);
6409
6410 assert(N->getOpcode() == ISD::OR &&
6411 "Only OR nodes are supported for CMPB");
6412
6413 SDValue Res;
6414 if (!Subtarget->hasCMPB())
6415 return Res;
6416
6417 if (N->getValueType(0) != MVT::i32 &&
6418 N->getValueType(0) != MVT::i64)
6419 return Res;
6420
6421 EVT VT = N->getValueType(0);
6422
6423 SDValue RHS, LHS;
6424 bool BytesFound[8] = {false, false, false, false, false, false, false, false};
6425 uint64_t Mask = 0, Alt = 0;
6426
6427 auto IsByteSelectCC = [this](SDValue O, unsigned &b,
6428 uint64_t &Mask, uint64_t &Alt,
6429 SDValue &LHS, SDValue &RHS) {
6430 if (O.getOpcode() != ISD::SELECT_CC)
6431 return false;
6432 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
6433
6434 if (!isa<ConstantSDNode>(O.getOperand(2)) ||
6435 !isa<ConstantSDNode>(O.getOperand(3)))
6436 return false;
6437
6438 uint64_t PM = O.getConstantOperandVal(2);
6439 uint64_t PAlt = O.getConstantOperandVal(3);
6440 for (b = 0; b < 8; ++b) {
6441 uint64_t Mask = UINT64_C(0xFF) << (8*b);
6442 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
6443 break;
6444 }
6445
6446 if (b == 8)
6447 return false;
6448 Mask |= PM;
6449 Alt |= PAlt;
6450
6451 if (!isa<ConstantSDNode>(O.getOperand(1)) ||
6452 O.getConstantOperandVal(1) != 0) {
6453 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
6454 if (Op0.getOpcode() == ISD::TRUNCATE)
6455 Op0 = Op0.getOperand(0);
6456 if (Op1.getOpcode() == ISD::TRUNCATE)
6457 Op1 = Op1.getOperand(0);
6458
6459 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
6460 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
6462
6463 unsigned Bits = Op0.getValueSizeInBits();
6464 if (b != Bits/8-1)
6465 return false;
6466 if (Op0.getConstantOperandVal(1) != Bits-8)
6467 return false;
6468
6469 LHS = Op0.getOperand(0);
6470 RHS = Op1.getOperand(0);
6471 return true;
6472 }
6473
6474 // When we have small integers (i16 to be specific), the form present
6475 // post-legalization uses SETULT in the SELECT_CC for the
6476 // higher-order byte, depending on the fact that the
6477 // even-higher-order bytes are known to all be zero, for example:
6478 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
6479 // (so when the second byte is the same, because all higher-order
6480 // bits from bytes 3 and 4 are known to be zero, the result of the
6481 // xor can be at most 255)
6482 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
6483 isa<ConstantSDNode>(O.getOperand(1))) {
6484
6485 uint64_t ULim = O.getConstantOperandVal(1);
6486 if (ULim != (UINT64_C(1) << b*8))
6487 return false;
6488
6489 // Now we need to make sure that the upper bytes are known to be
6490 // zero.
6491 unsigned Bits = Op0.getValueSizeInBits();
6492 if (!CurDAG->MaskedValueIsZero(
6493 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
6494 return false;
6495
6496 LHS = Op0.getOperand(0);
6497 RHS = Op0.getOperand(1);
6498 return true;
6499 }
6500
6501 return false;
6502 }
6503
6504 if (CC != ISD::SETEQ)
6505 return false;
6506
6507 SDValue Op = O.getOperand(0);
6508 if (Op.getOpcode() == ISD::AND) {
6509 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6510 return false;
6511 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
6512 return false;
6513
6514 SDValue XOR = Op.getOperand(0);
6515 if (XOR.getOpcode() == ISD::TRUNCATE)
6516 XOR = XOR.getOperand(0);
6517 if (XOR.getOpcode() != ISD::XOR)
6518 return false;
6519
6520 LHS = XOR.getOperand(0);
6521 RHS = XOR.getOperand(1);
6522 return true;
6523 } else if (Op.getOpcode() == ISD::SRL) {
6524 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6525 return false;
6526 unsigned Bits = Op.getValueSizeInBits();
6527 if (b != Bits/8-1)
6528 return false;
6529 if (Op.getConstantOperandVal(1) != Bits-8)
6530 return false;
6531
6532 SDValue XOR = Op.getOperand(0);
6533 if (XOR.getOpcode() == ISD::TRUNCATE)
6534 XOR = XOR.getOperand(0);
6535 if (XOR.getOpcode() != ISD::XOR)
6536 return false;
6537
6538 LHS = XOR.getOperand(0);
6539 RHS = XOR.getOperand(1);
6540 return true;
6541 }
6542
6543 return false;
6544 };
6545
6547 while (!Queue.empty()) {
6548 SDValue V = Queue.pop_back_val();
6549
6550 for (const SDValue &O : V.getNode()->ops()) {
6551 unsigned b = 0;
6552 uint64_t M = 0, A = 0;
6553 SDValue OLHS, ORHS;
6554 if (O.getOpcode() == ISD::OR) {
6555 Queue.push_back(O);
6556 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
6557 if (!LHS) {
6558 LHS = OLHS;
6559 RHS = ORHS;
6560 BytesFound[b] = true;
6561 Mask |= M;
6562 Alt |= A;
6563 } else if ((LHS == ORHS && RHS == OLHS) ||
6564 (RHS == ORHS && LHS == OLHS)) {
6565 BytesFound[b] = true;
6566 Mask |= M;
6567 Alt |= A;
6568 } else {
6569 return Res;
6570 }
6571 } else {
6572 return Res;
6573 }
6574 }
6575 }
6576
6577 unsigned LastB = 0, BCnt = 0;
6578 for (unsigned i = 0; i < 8; ++i)
6579 if (BytesFound[LastB]) {
6580 ++BCnt;
6581 LastB = i;
6582 }
6583
6584 if (!LastB || BCnt < 2)
6585 return Res;
6586
6587 // Because we'll be zero-extending the output anyway if don't have a specific
6588 // value for each input byte (via the Mask), we can 'anyext' the inputs.
6589 if (LHS.getValueType() != VT) {
6590 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
6591 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
6592 }
6593
6594 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
6595
6596 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
6597 if (NonTrivialMask && !Alt) {
6598 // Res = Mask & CMPB
6599 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6600 CurDAG->getConstant(Mask, dl, VT));
6601 } else if (Alt) {
6602 // Res = (CMPB & Mask) | (~CMPB & Alt)
6603 // Which, as suggested here:
6604 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6605 // can be written as:
6606 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
6607 // useful because the (Alt ^ Mask) can be pre-computed.
6608 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6609 CurDAG->getConstant(Mask ^ Alt, dl, VT));
6610 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
6611 CurDAG->getConstant(Alt, dl, VT));
6612 }
6613
6614 return Res;
6615}
6616
6617// When CR bit registers are enabled, an extension of an i1 variable to a i32
6618// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6619// involves constant materialization of a 0 or a 1 or both. If the result of
6620// the extension is then operated upon by some operator that can be constant
6621// folded with a constant 0 or 1, and that constant can be materialized using
6622// only one instruction (like a zero or one), then we should fold in those
6623// operations with the select.
6624void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
6625 if (!Subtarget->useCRBits())
6626 return;
6627
6628 if (N->getOpcode() != ISD::ZERO_EXTEND &&
6629 N->getOpcode() != ISD::SIGN_EXTEND &&
6630 N->getOpcode() != ISD::ANY_EXTEND)
6631 return;
6632
6633 if (N->getOperand(0).getValueType() != MVT::i1)
6634 return;
6635
6636 if (!N->hasOneUse())
6637 return;
6638
6639 SDLoc dl(N);
6640 EVT VT = N->getValueType(0);
6641 SDValue Cond = N->getOperand(0);
6642 SDValue ConstTrue = CurDAG->getSignedConstant(
6643 N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
6644 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
6645
6646 do {
6647 SDNode *User = *N->user_begin();
6648 if (User->getNumOperands() != 2)
6649 break;
6650
6651 auto TryFold = [this, N, User, dl](SDValue Val) {
6652 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
6653 SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
6654 SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
6655
6656 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
6657 User->getValueType(0), {O0, O1});
6658 };
6659
6660 // FIXME: When the semantics of the interaction between select and undef
6661 // are clearly defined, it may turn out to be unnecessary to break here.
6662 SDValue TrueRes = TryFold(ConstTrue);
6663 if (!TrueRes || TrueRes.isUndef())
6664 break;
6665 SDValue FalseRes = TryFold(ConstFalse);
6666 if (!FalseRes || FalseRes.isUndef())
6667 break;
6668
6669 // For us to materialize these using one instruction, we must be able to
6670 // represent them as signed 16-bit integers.
6671 uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal();
6672 if (!isInt<16>(True) || !isInt<16>(False))
6673 break;
6674
6675 // We can replace User with a new SELECT node, and try again to see if we
6676 // can fold the select with its user.
6677 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
6678 N = User;
6679 ConstTrue = TrueRes;
6680 ConstFalse = FalseRes;
6681 } while (N->hasOneUse());
6682}
6683
6684void PPCDAGToDAGISel::PreprocessISelDAG() {
6685 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6686
6687 bool MadeChange = false;
6688 while (Position != CurDAG->allnodes_begin()) {
6689 SDNode *N = &*--Position;
6690 if (N->use_empty())
6691 continue;
6692
6693 SDValue Res;
6694 switch (N->getOpcode()) {
6695 default: break;
6696 case ISD::OR:
6697 Res = combineToCMPB(N);
6698 break;
6699 }
6700
6701 if (!Res)
6702 foldBoolExts(Res, N);
6703
6704 if (Res) {
6705 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6706 LLVM_DEBUG(N->dump(CurDAG));
6707 LLVM_DEBUG(dbgs() << "\nNew: ");
6708 LLVM_DEBUG(Res.getNode()->dump(CurDAG));
6709 LLVM_DEBUG(dbgs() << "\n");
6710
6711 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
6712 MadeChange = true;
6713 }
6714 }
6715
6716 if (MadeChange)
6717 CurDAG->RemoveDeadNodes();
6718}
6719
6720/// PostprocessISelDAG - Perform some late peephole optimizations
6721/// on the DAG representation.
6722void PPCDAGToDAGISel::PostprocessISelDAG() {
6723 // Skip peepholes at -O0.
6724 if (TM.getOptLevel() == CodeGenOptLevel::None)
6725 return;
6726
6727 PeepholePPC64();
6728 PeepholeCROps();
6729 PeepholePPC64ZExt();
6730}
6731
6732// Check if all users of this node will become isel where the second operand
6733// is the constant zero. If this is so, and if we can negate the condition,
6734// then we can flip the true and false operands. This will allow the zero to
6735// be folded with the isel so that we don't need to materialize a register
6736// containing zero.
6737bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
6738 for (const SDNode *User : N->users()) {
6739 if (!User->isMachineOpcode())
6740 return false;
6741 if (User->getMachineOpcode() != PPC::SELECT_I4 &&
6742 User->getMachineOpcode() != PPC::SELECT_I8)
6743 return false;
6744
6745 SDNode *Op1 = User->getOperand(1).getNode();
6746 SDNode *Op2 = User->getOperand(2).getNode();
6747 // If we have a degenerate select with two equal operands, swapping will
6748 // not do anything, and we may run into an infinite loop.
6749 if (Op1 == Op2)
6750 return false;
6751
6752 if (!Op2->isMachineOpcode())
6753 return false;
6754
6755 if (Op2->getMachineOpcode() != PPC::LI &&
6756 Op2->getMachineOpcode() != PPC::LI8)
6757 return false;
6758
6759 if (!isNullConstant(Op2->getOperand(0)))
6760 return false;
6761 }
6762
6763 return true;
6764}
6765
6766void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
6767 SmallVector<SDNode *, 4> ToReplace;
6768 for (SDNode *User : N->users()) {
6769 assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
6770 User->getMachineOpcode() == PPC::SELECT_I8) &&
6771 "Must have all select users");
6772 ToReplace.push_back(User);
6773 }
6774
6775 for (SDNode *User : ToReplace) {
6776 SDNode *ResNode =
6777 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
6778 User->getValueType(0), User->getOperand(0),
6779 User->getOperand(2),
6780 User->getOperand(1));
6781
6782 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6783 LLVM_DEBUG(User->dump(CurDAG));
6784 LLVM_DEBUG(dbgs() << "\nNew: ");
6785 LLVM_DEBUG(ResNode->dump(CurDAG));
6786 LLVM_DEBUG(dbgs() << "\n");
6787
6788 ReplaceUses(User, ResNode);
6789 }
6790}
6791
6792void PPCDAGToDAGISel::PeepholeCROps() {
6793 bool IsModified;
6794 do {
6795 IsModified = false;
6796 for (SDNode &Node : CurDAG->allnodes()) {
6797 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
6798 if (!MachineNode || MachineNode->use_empty())
6799 continue;
6800 SDNode *ResNode = MachineNode;
6801
6802 bool Op1Set = false, Op1Unset = false,
6803 Op1Not = false,
6804 Op2Set = false, Op2Unset = false,
6805 Op2Not = false;
6806
6807 unsigned Opcode = MachineNode->getMachineOpcode();
6808 switch (Opcode) {
6809 default: break;
6810 case PPC::CRAND:
6811 case PPC::CRNAND:
6812 case PPC::CROR:
6813 case PPC::CRXOR:
6814 case PPC::CRNOR:
6815 case PPC::CREQV:
6816 case PPC::CRANDC:
6817 case PPC::CRORC: {
6818 SDValue Op = MachineNode->getOperand(1);
6819 if (Op.isMachineOpcode()) {
6820 if (Op.getMachineOpcode() == PPC::CRSET)
6821 Op2Set = true;
6822 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6823 Op2Unset = true;
6824 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6825 Op.getOperand(0) == Op.getOperand(1)) ||
6826 Op.getMachineOpcode() == PPC::CRNOT)
6827 Op2Not = true;
6828 }
6829 [[fallthrough]];
6830 }
6831 case PPC::BC:
6832 case PPC::BCn:
6833 case PPC::SELECT_I4:
6834 case PPC::SELECT_I8:
6835 case PPC::SELECT_F4:
6836 case PPC::SELECT_F8:
6837 case PPC::SELECT_SPE:
6838 case PPC::SELECT_SPE4:
6839 case PPC::SELECT_VRRC:
6840 case PPC::SELECT_VSFRC:
6841 case PPC::SELECT_VSSRC:
6842 case PPC::SELECT_VSRC: {
6843 SDValue Op = MachineNode->getOperand(0);
6844 if (Op.isMachineOpcode()) {
6845 if (Op.getMachineOpcode() == PPC::CRSET)
6846 Op1Set = true;
6847 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6848 Op1Unset = true;
6849 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6850 Op.getOperand(0) == Op.getOperand(1)) ||
6851 Op.getMachineOpcode() == PPC::CRNOT)
6852 Op1Not = true;
6853 }
6854 }
6855 break;
6856 }
6857
6858 bool SelectSwap = false;
6859 switch (Opcode) {
6860 default: break;
6861 case PPC::CRAND:
6862 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6863 // x & x = x
6864 ResNode = MachineNode->getOperand(0).getNode();
6865 else if (Op1Set)
6866 // 1 & y = y
6867 ResNode = MachineNode->getOperand(1).getNode();
6868 else if (Op2Set)
6869 // x & 1 = x
6870 ResNode = MachineNode->getOperand(0).getNode();
6871 else if (Op1Unset || Op2Unset)
6872 // x & 0 = 0 & y = 0
6873 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6874 MVT::i1);
6875 else if (Op1Not)
6876 // ~x & y = andc(y, x)
6877 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6878 MVT::i1, MachineNode->getOperand(1),
6879 MachineNode->getOperand(0).
6880 getOperand(0));
6881 else if (Op2Not)
6882 // x & ~y = andc(x, y)
6883 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6884 MVT::i1, MachineNode->getOperand(0),
6885 MachineNode->getOperand(1).
6886 getOperand(0));
6887 else if (AllUsersSelectZero(MachineNode)) {
6888 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
6889 MVT::i1, MachineNode->getOperand(0),
6890 MachineNode->getOperand(1));
6891 SelectSwap = true;
6892 }
6893 break;
6894 case PPC::CRNAND:
6895 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6896 // nand(x, x) -> nor(x, x)
6897 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6898 MVT::i1, MachineNode->getOperand(0),
6899 MachineNode->getOperand(0));
6900 else if (Op1Set)
6901 // nand(1, y) -> nor(y, y)
6902 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6903 MVT::i1, MachineNode->getOperand(1),
6904 MachineNode->getOperand(1));
6905 else if (Op2Set)
6906 // nand(x, 1) -> nor(x, x)
6907 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6908 MVT::i1, MachineNode->getOperand(0),
6909 MachineNode->getOperand(0));
6910 else if (Op1Unset || Op2Unset)
6911 // nand(x, 0) = nand(0, y) = 1
6912 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6913 MVT::i1);
6914 else if (Op1Not)
6915 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
6916 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6917 MVT::i1, MachineNode->getOperand(0).
6918 getOperand(0),
6919 MachineNode->getOperand(1));
6920 else if (Op2Not)
6921 // nand(x, ~y) = ~x | y = orc(y, x)
6922 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6923 MVT::i1, MachineNode->getOperand(1).
6924 getOperand(0),
6925 MachineNode->getOperand(0));
6926 else if (AllUsersSelectZero(MachineNode)) {
6927 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
6928 MVT::i1, MachineNode->getOperand(0),
6929 MachineNode->getOperand(1));
6930 SelectSwap = true;
6931 }
6932 break;
6933 case PPC::CROR:
6934 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6935 // x | x = x
6936 ResNode = MachineNode->getOperand(0).getNode();
6937 else if (Op1Set || Op2Set)
6938 // x | 1 = 1 | y = 1
6939 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6940 MVT::i1);
6941 else if (Op1Unset)
6942 // 0 | y = y
6943 ResNode = MachineNode->getOperand(1).getNode();
6944 else if (Op2Unset)
6945 // x | 0 = x
6946 ResNode = MachineNode->getOperand(0).getNode();
6947 else if (Op1Not)
6948 // ~x | y = orc(y, x)
6949 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6950 MVT::i1, MachineNode->getOperand(1),
6951 MachineNode->getOperand(0).
6952 getOperand(0));
6953 else if (Op2Not)
6954 // x | ~y = orc(x, y)
6955 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6956 MVT::i1, MachineNode->getOperand(0),
6957 MachineNode->getOperand(1).
6958 getOperand(0));
6959 else if (AllUsersSelectZero(MachineNode)) {
6960 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6961 MVT::i1, MachineNode->getOperand(0),
6962 MachineNode->getOperand(1));
6963 SelectSwap = true;
6964 }
6965 break;
6966 case PPC::CRXOR:
6967 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6968 // xor(x, x) = 0
6969 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6970 MVT::i1);
6971 else if (Op1Set)
6972 // xor(1, y) -> nor(y, y)
6973 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6974 MVT::i1, MachineNode->getOperand(1),
6975 MachineNode->getOperand(1));
6976 else if (Op2Set)
6977 // xor(x, 1) -> nor(x, x)
6978 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6979 MVT::i1, MachineNode->getOperand(0),
6980 MachineNode->getOperand(0));
6981 else if (Op1Unset)
6982 // xor(0, y) = y
6983 ResNode = MachineNode->getOperand(1).getNode();
6984 else if (Op2Unset)
6985 // xor(x, 0) = x
6986 ResNode = MachineNode->getOperand(0).getNode();
6987 else if (Op1Not)
6988 // xor(~x, y) = eqv(x, y)
6989 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6990 MVT::i1, MachineNode->getOperand(0).
6991 getOperand(0),
6992 MachineNode->getOperand(1));
6993 else if (Op2Not)
6994 // xor(x, ~y) = eqv(x, y)
6995 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6996 MVT::i1, MachineNode->getOperand(0),
6997 MachineNode->getOperand(1).
6998 getOperand(0));
6999 else if (AllUsersSelectZero(MachineNode)) {
7000 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
7001 MVT::i1, MachineNode->getOperand(0),
7002 MachineNode->getOperand(1));
7003 SelectSwap = true;
7004 }
7005 break;
7006 case PPC::CRNOR:
7007 if (Op1Set || Op2Set)
7008 // nor(1, y) -> 0
7009 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7010 MVT::i1);
7011 else if (Op1Unset)
7012 // nor(0, y) = ~y -> nor(y, y)
7013 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7014 MVT::i1, MachineNode->getOperand(1),
7015 MachineNode->getOperand(1));
7016 else if (Op2Unset)
7017 // nor(x, 0) = ~x
7018 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7019 MVT::i1, MachineNode->getOperand(0),
7020 MachineNode->getOperand(0));
7021 else if (Op1Not)
7022 // nor(~x, y) = andc(x, y)
7023 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7024 MVT::i1, MachineNode->getOperand(0).
7025 getOperand(0),
7026 MachineNode->getOperand(1));
7027 else if (Op2Not)
7028 // nor(x, ~y) = andc(y, x)
7029 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7030 MVT::i1, MachineNode->getOperand(1).
7031 getOperand(0),
7032 MachineNode->getOperand(0));
7033 else if (AllUsersSelectZero(MachineNode)) {
7034 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7035 MVT::i1, MachineNode->getOperand(0),
7036 MachineNode->getOperand(1));
7037 SelectSwap = true;
7038 }
7039 break;
7040 case PPC::CREQV:
7041 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7042 // eqv(x, x) = 1
7043 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7044 MVT::i1);
7045 else if (Op1Set)
7046 // eqv(1, y) = y
7047 ResNode = MachineNode->getOperand(1).getNode();
7048 else if (Op2Set)
7049 // eqv(x, 1) = x
7050 ResNode = MachineNode->getOperand(0).getNode();
7051 else if (Op1Unset)
7052 // eqv(0, y) = ~y -> nor(y, y)
7053 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7054 MVT::i1, MachineNode->getOperand(1),
7055 MachineNode->getOperand(1));
7056 else if (Op2Unset)
7057 // eqv(x, 0) = ~x
7058 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7059 MVT::i1, MachineNode->getOperand(0),
7060 MachineNode->getOperand(0));
7061 else if (Op1Not)
7062 // eqv(~x, y) = xor(x, y)
7063 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7064 MVT::i1, MachineNode->getOperand(0).
7065 getOperand(0),
7066 MachineNode->getOperand(1));
7067 else if (Op2Not)
7068 // eqv(x, ~y) = xor(x, y)
7069 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7070 MVT::i1, MachineNode->getOperand(0),
7071 MachineNode->getOperand(1).
7072 getOperand(0));
7073 else if (AllUsersSelectZero(MachineNode)) {
7074 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7075 MVT::i1, MachineNode->getOperand(0),
7076 MachineNode->getOperand(1));
7077 SelectSwap = true;
7078 }
7079 break;
7080 case PPC::CRANDC:
7081 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7082 // andc(x, x) = 0
7083 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7084 MVT::i1);
7085 else if (Op1Set)
7086 // andc(1, y) = ~y
7087 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7088 MVT::i1, MachineNode->getOperand(1),
7089 MachineNode->getOperand(1));
7090 else if (Op1Unset || Op2Set)
7091 // andc(0, y) = andc(x, 1) = 0
7092 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7093 MVT::i1);
7094 else if (Op2Unset)
7095 // andc(x, 0) = x
7096 ResNode = MachineNode->getOperand(0).getNode();
7097 else if (Op1Not)
7098 // andc(~x, y) = ~(x | y) = nor(x, y)
7099 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7100 MVT::i1, MachineNode->getOperand(0).
7101 getOperand(0),
7102 MachineNode->getOperand(1));
7103 else if (Op2Not)
7104 // andc(x, ~y) = x & y
7105 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
7106 MVT::i1, MachineNode->getOperand(0),
7107 MachineNode->getOperand(1).
7108 getOperand(0));
7109 else if (AllUsersSelectZero(MachineNode)) {
7110 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
7111 MVT::i1, MachineNode->getOperand(1),
7112 MachineNode->getOperand(0));
7113 SelectSwap = true;
7114 }
7115 break;
7116 case PPC::CRORC:
7117 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7118 // orc(x, x) = 1
7119 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7120 MVT::i1);
7121 else if (Op1Set || Op2Unset)
7122 // orc(1, y) = orc(x, 0) = 1
7123 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7124 MVT::i1);
7125 else if (Op2Set)
7126 // orc(x, 1) = x
7127 ResNode = MachineNode->getOperand(0).getNode();
7128 else if (Op1Unset)
7129 // orc(0, y) = ~y
7130 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7131 MVT::i1, MachineNode->getOperand(1),
7132 MachineNode->getOperand(1));
7133 else if (Op1Not)
7134 // orc(~x, y) = ~(x & y) = nand(x, y)
7135 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
7136 MVT::i1, MachineNode->getOperand(0).
7137 getOperand(0),
7138 MachineNode->getOperand(1));
7139 else if (Op2Not)
7140 // orc(x, ~y) = x | y
7141 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7142 MVT::i1, MachineNode->getOperand(0),
7143 MachineNode->getOperand(1).
7144 getOperand(0));
7145 else if (AllUsersSelectZero(MachineNode)) {
7146 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7147 MVT::i1, MachineNode->getOperand(1),
7148 MachineNode->getOperand(0));
7149 SelectSwap = true;
7150 }
7151 break;
7152 case PPC::SELECT_I4:
7153 case PPC::SELECT_I8:
7154 case PPC::SELECT_F4:
7155 case PPC::SELECT_F8:
7156 case PPC::SELECT_SPE:
7157 case PPC::SELECT_SPE4:
7158 case PPC::SELECT_VRRC:
7159 case PPC::SELECT_VSFRC:
7160 case PPC::SELECT_VSSRC:
7161 case PPC::SELECT_VSRC:
7162 if (Op1Set)
7163 ResNode = MachineNode->getOperand(1).getNode();
7164 else if (Op1Unset)
7165 ResNode = MachineNode->getOperand(2).getNode();
7166 else if (Op1Not)
7167 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
7168 SDLoc(MachineNode),
7169 MachineNode->getValueType(0),
7170 MachineNode->getOperand(0).
7171 getOperand(0),
7172 MachineNode->getOperand(2),
7173 MachineNode->getOperand(1));
7174 break;
7175 case PPC::BC:
7176 case PPC::BCn:
7177 if (Op1Not)
7178 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
7179 PPC::BC,
7180 SDLoc(MachineNode),
7181 MVT::Other,
7182 MachineNode->getOperand(0).
7183 getOperand(0),
7184 MachineNode->getOperand(1),
7185 MachineNode->getOperand(2));
7186 // FIXME: Handle Op1Set, Op1Unset here too.
7187 break;
7188 }
7189
7190 // If we're inverting this node because it is used only by selects that
7191 // we'd like to swap, then swap the selects before the node replacement.
7192 if (SelectSwap)
7193 SwapAllSelectUsers(MachineNode);
7194
7195 if (ResNode != MachineNode) {
7196 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
7197 LLVM_DEBUG(MachineNode->dump(CurDAG));
7198 LLVM_DEBUG(dbgs() << "\nNew: ");
7199 LLVM_DEBUG(ResNode->dump(CurDAG));
7200 LLVM_DEBUG(dbgs() << "\n");
7201
7202 ReplaceUses(MachineNode, ResNode);
7203 IsModified = true;
7204 }
7205 }
7206 if (IsModified)
7207 CurDAG->RemoveDeadNodes();
7208 } while (IsModified);
7209}
7210
7211// Gather the set of 32-bit operations that are known to have their
7212// higher-order 32 bits zero, where ToPromote contains all such operations.
7214 SmallPtrSetImpl<SDNode *> &ToPromote) {
7215 if (!Op32.isMachineOpcode())
7216 return false;
7217
7218 // First, check for the "frontier" instructions (those that will clear the
7219 // higher-order 32 bits.
7220
7221 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
7222 // around. If it does not, then these instructions will clear the
7223 // higher-order bits.
7224 if ((Op32.getMachineOpcode() == PPC::RLWINM ||
7225 Op32.getMachineOpcode() == PPC::RLWNM) &&
7226 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
7227 ToPromote.insert(Op32.getNode());
7228 return true;
7229 }
7230
7231 // SLW and SRW always clear the higher-order bits.
7232 if (Op32.getMachineOpcode() == PPC::SLW ||
7233 Op32.getMachineOpcode() == PPC::SRW) {
7234 ToPromote.insert(Op32.getNode());
7235 return true;
7236 }
7237
7238 // For LI and LIS, we need the immediate to be positive (so that it is not
7239 // sign extended).
7240 if (Op32.getMachineOpcode() == PPC::LI ||
7241 Op32.getMachineOpcode() == PPC::LIS) {
7242 if (!isUInt<15>(Op32.getConstantOperandVal(0)))
7243 return false;
7244
7245 ToPromote.insert(Op32.getNode());
7246 return true;
7247 }
7248
7249 // LHBRX and LWBRX always clear the higher-order bits.
7250 if (Op32.getMachineOpcode() == PPC::LHBRX ||
7251 Op32.getMachineOpcode() == PPC::LWBRX) {
7252 ToPromote.insert(Op32.getNode());
7253 return true;
7254 }
7255
7256 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
7257 if (Op32.getMachineOpcode() == PPC::CNTLZW ||
7258 Op32.getMachineOpcode() == PPC::CNTTZW) {
7259 ToPromote.insert(Op32.getNode());
7260 return true;
7261 }
7262
7263 // Next, check for those instructions we can look through.
7264
7265 // Assuming the mask does not wrap around, then the higher-order bits are
7266 // taken directly from the first operand.
7267 if (Op32.getMachineOpcode() == PPC::RLWIMI &&
7268 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
7269 SmallPtrSet<SDNode *, 16> ToPromote1;
7270 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7271 return false;
7272
7273 ToPromote.insert(Op32.getNode());
7274 ToPromote.insert_range(ToPromote1);
7275 return true;
7276 }
7277
7278 // For OR, the higher-order bits are zero if that is true for both operands.
7279 // For SELECT_I4, the same is true (but the relevant operand numbers are
7280 // shifted by 1).
7281 if (Op32.getMachineOpcode() == PPC::OR ||
7282 Op32.getMachineOpcode() == PPC::SELECT_I4) {
7283 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
7284 SmallPtrSet<SDNode *, 16> ToPromote1;
7285 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
7286 return false;
7287 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
7288 return false;
7289
7290 ToPromote.insert(Op32.getNode());
7291 ToPromote.insert_range(ToPromote1);
7292 return true;
7293 }
7294
7295 // For ORI and ORIS, we need the higher-order bits of the first operand to be
7296 // zero, and also for the constant to be positive (so that it is not sign
7297 // extended).
7298 if (Op32.getMachineOpcode() == PPC::ORI ||
7299 Op32.getMachineOpcode() == PPC::ORIS) {
7300 SmallPtrSet<SDNode *, 16> ToPromote1;
7301 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7302 return false;
7303 if (!isUInt<15>(Op32.getConstantOperandVal(1)))
7304 return false;
7305
7306 ToPromote.insert(Op32.getNode());
7307 ToPromote.insert_range(ToPromote1);
7308 return true;
7309 }
7310
7311 // The higher-order bits of AND are zero if that is true for at least one of
7312 // the operands.
7313 if (Op32.getMachineOpcode() == PPC::AND) {
7314 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
7315 bool Op0OK =
7316 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7317 bool Op1OK =
7318 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
7319 if (!Op0OK && !Op1OK)
7320 return false;
7321
7322 ToPromote.insert(Op32.getNode());
7323
7324 if (Op0OK)
7325 ToPromote.insert_range(ToPromote1);
7326
7327 if (Op1OK)
7328 ToPromote.insert_range(ToPromote2);
7329
7330 return true;
7331 }
7332
7333 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
7334 // of the first operand, or if the second operand is positive (so that it is
7335 // not sign extended).
7336 if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
7337 Op32.getMachineOpcode() == PPC::ANDIS_rec) {
7338 SmallPtrSet<SDNode *, 16> ToPromote1;
7339 bool Op0OK =
7340 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7341 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
7342 if (!Op0OK && !Op1OK)
7343 return false;
7344
7345 ToPromote.insert(Op32.getNode());
7346
7347 if (Op0OK)
7348 ToPromote.insert_range(ToPromote1);
7349
7350 return true;
7351 }
7352
7353 return false;
7354}
7355
7356void PPCDAGToDAGISel::PeepholePPC64ZExt() {
7357 if (!Subtarget->isPPC64())
7358 return;
7359
7360 // When we zero-extend from i32 to i64, we use a pattern like this:
7361 // def : Pat<(i64 (zext i32:$in)),
7362 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
7363 // 0, 32)>;
7364 // There are several 32-bit shift/rotate instructions, however, that will
7365 // clear the higher-order bits of their output, rendering the RLDICL
7366 // unnecessary. When that happens, we remove it here, and redefine the
7367 // relevant 32-bit operation to be a 64-bit operation.
7368
7369 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7370
7371 bool MadeChange = false;
7372 while (Position != CurDAG->allnodes_begin()) {
7373 SDNode *N = &*--Position;
7374 // Skip dead nodes and any non-machine opcodes.
7375 if (N->use_empty() || !N->isMachineOpcode())
7376 continue;
7377
7378 if (N->getMachineOpcode() != PPC::RLDICL)
7379 continue;
7380
7381 if (N->getConstantOperandVal(1) != 0 ||
7382 N->getConstantOperandVal(2) != 32)
7383 continue;
7384
7385 SDValue ISR = N->getOperand(0);
7386 if (!ISR.isMachineOpcode() ||
7387 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
7388 continue;
7389
7390 if (!ISR.hasOneUse())
7391 continue;
7392
7393 if (ISR.getConstantOperandVal(2) != PPC::sub_32)
7394 continue;
7395
7396 SDValue IDef = ISR.getOperand(0);
7397 if (!IDef.isMachineOpcode() ||
7398 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
7399 continue;
7400
7401 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
7402 // can get rid of it.
7403
7404 SDValue Op32 = ISR->getOperand(1);
7405 if (!Op32.isMachineOpcode())
7406 continue;
7407
7408 // There are some 32-bit instructions that always clear the high-order 32
7409 // bits, there are also some instructions (like AND) that we can look
7410 // through.
7411 SmallPtrSet<SDNode *, 16> ToPromote;
7412 if (!PeepholePPC64ZExtGather(Op32, ToPromote))
7413 continue;
7414
7415 // If the ToPromote set contains nodes that have uses outside of the set
7416 // (except for the original INSERT_SUBREG), then abort the transformation.
7417 bool OutsideUse = false;
7418 for (SDNode *PN : ToPromote) {
7419 for (SDNode *UN : PN->users()) {
7420 if (!ToPromote.count(UN) && UN != ISR.getNode()) {
7421 OutsideUse = true;
7422 break;
7423 }
7424 }
7425
7426 if (OutsideUse)
7427 break;
7428 }
7429 if (OutsideUse)
7430 continue;
7431
7432 MadeChange = true;
7433
7434 // We now know that this zero extension can be removed by promoting to
7435 // nodes in ToPromote to 64-bit operations, where for operations in the
7436 // frontier of the set, we need to insert INSERT_SUBREGs for their
7437 // operands.
7438 for (SDNode *PN : ToPromote) {
7439 unsigned NewOpcode;
7440 switch (PN->getMachineOpcode()) {
7441 default:
7442 llvm_unreachable("Don't know the 64-bit variant of this instruction");
7443 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
7444 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
7445 case PPC::SLW: NewOpcode = PPC::SLW8; break;
7446 case PPC::SRW: NewOpcode = PPC::SRW8; break;
7447 case PPC::LI: NewOpcode = PPC::LI8; break;
7448 case PPC::LIS: NewOpcode = PPC::LIS8; break;
7449 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
7450 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
7451 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
7452 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
7453 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
7454 case PPC::OR: NewOpcode = PPC::OR8; break;
7455 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
7456 case PPC::ORI: NewOpcode = PPC::ORI8; break;
7457 case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
7458 case PPC::AND: NewOpcode = PPC::AND8; break;
7459 case PPC::ANDI_rec:
7460 NewOpcode = PPC::ANDI8_rec;
7461 break;
7462 case PPC::ANDIS_rec:
7463 NewOpcode = PPC::ANDIS8_rec;
7464 break;
7465 }
7466
7467 // Note: During the replacement process, the nodes will be in an
7468 // inconsistent state (some instructions will have operands with values
7469 // of the wrong type). Once done, however, everything should be right
7470 // again.
7471
7473 for (const SDValue &V : PN->ops()) {
7474 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
7475 !isa<ConstantSDNode>(V)) {
7476 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
7477 SDNode *ReplOp =
7478 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
7479 ISR.getNode()->getVTList(), ReplOpOps);
7480 Ops.push_back(SDValue(ReplOp, 0));
7481 } else {
7482 Ops.push_back(V);
7483 }
7484 }
7485
7486 // Because all to-be-promoted nodes only have users that are other
7487 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
7488 // the i32 result value type with i64.
7489
7490 SmallVector<EVT, 2> NewVTs;
7491 SDVTList VTs = PN->getVTList();
7492 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
7493 if (VTs.VTs[i] == MVT::i32)
7494 NewVTs.push_back(MVT::i64);
7495 else
7496 NewVTs.push_back(VTs.VTs[i]);
7497
7498 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
7499 LLVM_DEBUG(PN->dump(CurDAG));
7500
7501 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
7502
7503 LLVM_DEBUG(dbgs() << "\nNew: ");
7504 LLVM_DEBUG(PN->dump(CurDAG));
7505 LLVM_DEBUG(dbgs() << "\n");
7506 }
7507
7508 // Now we replace the original zero extend and its associated INSERT_SUBREG
7509 // with the value feeding the INSERT_SUBREG (which has now been promoted to
7510 // return an i64).
7511
7512 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
7513 LLVM_DEBUG(N->dump(CurDAG));
7514 LLVM_DEBUG(dbgs() << "\nNew: ");
7515 LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
7516 LLVM_DEBUG(dbgs() << "\n");
7517
7518 ReplaceUses(N, Op32.getNode());
7519 }
7520
7521 if (MadeChange)
7522 CurDAG->RemoveDeadNodes();
7523}
7524
7525static bool isVSXSwap(SDValue N) {
7526 if (!N->isMachineOpcode())
7527 return false;
7528 unsigned Opc = N->getMachineOpcode();
7529
7530 // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
7531 // operand is 2.
7532 if (Opc == PPC::XXPERMDIs) {
7533 return isa<ConstantSDNode>(N->getOperand(1)) &&
7534 N->getConstantOperandVal(1) == 2;
7535 } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
7536 return N->getOperand(0) == N->getOperand(1) &&
7537 isa<ConstantSDNode>(N->getOperand(2)) &&
7538 N->getConstantOperandVal(2) == 2;
7539 }
7540
7541 return false;
7542}
7543
7544// TODO: Make this complete and replace with a table-gen bit.
7546 if (!N->isMachineOpcode())
7547 return false;
7548 unsigned Opc = N->getMachineOpcode();
7549
7550 switch (Opc) {
7551 default:
7552 return false;
7553 case PPC::VAVGSB:
7554 case PPC::VAVGUB:
7555 case PPC::VAVGSH:
7556 case PPC::VAVGUH:
7557 case PPC::VAVGSW:
7558 case PPC::VAVGUW:
7559 case PPC::VMAXFP:
7560 case PPC::VMAXSB:
7561 case PPC::VMAXUB:
7562 case PPC::VMAXSH:
7563 case PPC::VMAXUH:
7564 case PPC::VMAXSW:
7565 case PPC::VMAXUW:
7566 case PPC::VMINFP:
7567 case PPC::VMINSB:
7568 case PPC::VMINUB:
7569 case PPC::VMINSH:
7570 case PPC::VMINUH:
7571 case PPC::VMINSW:
7572 case PPC::VMINUW:
7573 case PPC::VADDFP:
7574 case PPC::VADDUBM:
7575 case PPC::VADDUHM:
7576 case PPC::VADDUWM:
7577 case PPC::VSUBFP:
7578 case PPC::VSUBUBM:
7579 case PPC::VSUBUHM:
7580 case PPC::VSUBUWM:
7581 case PPC::VAND:
7582 case PPC::VANDC:
7583 case PPC::VOR:
7584 case PPC::VORC:
7585 case PPC::VXOR:
7586 case PPC::VNOR:
7587 case PPC::VMULUWM:
7588 return true;
7589 }
7590}
7591
7592// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7593// lane-insensitive.
7594static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
7595 // Our desired xxswap might be source of COPY_TO_REGCLASS.
7596 // TODO: Can we put this a common method for DAG?
7597 auto SkipRCCopy = [](SDValue V) {
7598 while (V->isMachineOpcode() &&
7599 V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
7600 // All values in the chain should have single use.
7601 if (V->use_empty() || !V->user_begin()->isOnlyUserOf(V.getNode()))
7602 return SDValue();
7603 V = V->getOperand(0);
7604 }
7605 return V.hasOneUse() ? V : SDValue();
7606 };
7607
7608 SDValue VecOp = SkipRCCopy(N->getOperand(0));
7609 if (!VecOp || !isLaneInsensitive(VecOp))
7610 return;
7611
7612 SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
7613 RHS = SkipRCCopy(VecOp.getOperand(1));
7614 if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
7615 return;
7616
7617 // These swaps may still have chain-uses here, count on dead code elimination
7618 // in following passes to remove them.
7619 DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));
7620 DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));
7621 DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
7622}
7623
7624// Check if an SDValue has the 'aix-small-tls' global variable attribute.
7625static bool hasAIXSmallTLSAttr(SDValue Val) {
7627 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(GA->getGlobal()))
7628 if (GV->hasAttribute("aix-small-tls"))
7629 return true;
7630
7631 return false;
7632}
7633
7634// Is an ADDI eligible for folding for non-TOC-based local-[exec|dynamic]
7635// accesses?
7637 SDValue ADDIToFold) {
7638 // Check if ADDIToFold (the ADDI that we want to fold into local-exec
7639 // accesses), is truly an ADDI.
7640 if (!ADDIToFold.isMachineOpcode() ||
7641 (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7642 return false;
7643
7644 // Folding is only allowed for the AIX small-local-[exec|dynamic] TLS target
7645 // attribute or when the 'aix-small-tls' global variable attribute is present.
7646 const PPCSubtarget &Subtarget =
7648 SDValue TLSVarNode = ADDIToFold.getOperand(1);
7649 if (!(Subtarget.hasAIXSmallLocalDynamicTLS() ||
7650 Subtarget.hasAIXSmallLocalExecTLS() || hasAIXSmallTLSAttr(TLSVarNode)))
7651 return false;
7652
7653 // The second operand of the ADDIToFold should be the global TLS address
7654 // (the local-exec TLS variable). We only perform the folding if the TLS
7655 // variable is the second operand.
7657 if (!GA)
7658 return false;
7659
7660 if (DAG->getTarget().getTLSModel(GA->getGlobal()) == TLSModel::LocalExec) {
7661 // The first operand of the ADDIToFold should be the thread pointer.
7662 // This transformation is only performed if the first operand of the
7663 // addi is the thread pointer.
7664 SDValue TPRegNode = ADDIToFold.getOperand(0);
7665 RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7666 if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7667 return false;
7668 }
7669
7670 // The local-[exec|dynamic] TLS variable should only have the
7671 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flags, so this optimization is not
7672 // performed otherwise if the flag is not set.
7673 unsigned TargetFlags = GA->getTargetFlags();
7674 if (!(TargetFlags == PPCII::MO_TPREL_FLAG ||
7675 TargetFlags == PPCII::MO_TLSLD_FLAG))
7676 return false;
7677
7678 // If all conditions are satisfied, the ADDI is valid for folding.
7679 return true;
7680}
7681
7682// For non-TOC-based local-[exec|dynamic] access where an addi is feeding into
7683// another addi, fold this sequence into a single addi if possible. Before this
7684// optimization, the sequence appears as:
7685// addi rN, r13, sym@[le|ld]
7686// addi rM, rN, imm
7687// After this optimization, we can fold the two addi into a single one:
7688// addi rM, r13, sym@[le|ld] + imm
7690 if (N->getMachineOpcode() != PPC::ADDI8)
7691 return;
7692
7693 // InitialADDI is the addi feeding into N (also an addi), and the addi that
7694 // we want optimized out.
7695 SDValue InitialADDI = N->getOperand(0);
7696
7697 if (!isEligibleToFoldADDIForFasterLocalAccesses(DAG, InitialADDI))
7698 return;
7699
7700 // The second operand of the InitialADDI should be the global TLS address
7701 // (the local-[exec|dynamic] TLS variable), with the
7702 // [MO_TPREL_FLAG|MO_TLSLD_FLAG] target flag. This has been checked in
7703 // isEligibleToFoldADDIForFasterLocalAccesses().
7704 SDValue TLSVarNode = InitialADDI.getOperand(1);
7706 assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7707 "local-[exec|dynamic] accesses!");
7708 unsigned TargetFlags = GA->getTargetFlags();
7709
7710 // The second operand of the addi that we want to preserve will be an
7711 // immediate. We add this immediate, together with the address of the TLS
7712 // variable found in InitialADDI, in order to preserve the correct TLS address
7713 // information during assembly printing. The offset is likely to be non-zero
7714 // when we end up in this case.
7715 int Offset = N->getConstantOperandVal(1);
7716 TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
7717 Offset, TargetFlags);
7718
7719 (void)DAG->UpdateNodeOperands(N, InitialADDI.getOperand(0), TLSVarNode);
7720 if (InitialADDI.getNode()->use_empty())
7721 DAG->RemoveDeadNode(InitialADDI.getNode());
7722}
7723
7724void PPCDAGToDAGISel::PeepholePPC64() {
7725 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7726
7727 while (Position != CurDAG->allnodes_begin()) {
7728 SDNode *N = &*--Position;
7729 // Skip dead nodes and any non-machine opcodes.
7730 if (N->use_empty() || !N->isMachineOpcode())
7731 continue;
7732
7733 if (isVSXSwap(SDValue(N, 0)))
7734 reduceVSXSwap(N, CurDAG);
7735
7736 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7737 // accesses.
7739
7740 unsigned FirstOp;
7741 unsigned StorageOpcode = N->getMachineOpcode();
7742 bool RequiresMod4Offset = false;
7743
7744 switch (StorageOpcode) {
7745 default: continue;
7746
7747 case PPC::LWA:
7748 case PPC::LD:
7749 case PPC::DFLOADf64:
7750 case PPC::DFLOADf32:
7751 RequiresMod4Offset = true;
7752 [[fallthrough]];
7753 case PPC::LBZ:
7754 case PPC::LBZ8:
7755 case PPC::LFD:
7756 case PPC::LFS:
7757 case PPC::LHA:
7758 case PPC::LHA8:
7759 case PPC::LHZ:
7760 case PPC::LHZ8:
7761 case PPC::LWZ:
7762 case PPC::LWZ8:
7763 FirstOp = 0;
7764 break;
7765
7766 case PPC::STD:
7767 case PPC::DFSTOREf64:
7768 case PPC::DFSTOREf32:
7769 RequiresMod4Offset = true;
7770 [[fallthrough]];
7771 case PPC::STB:
7772 case PPC::STB8:
7773 case PPC::STFD:
7774 case PPC::STFS:
7775 case PPC::STH:
7776 case PPC::STH8:
7777 case PPC::STW:
7778 case PPC::STW8:
7779 FirstOp = 1;
7780 break;
7781 }
7782
7783 // If this is a load or store with a zero offset, or within the alignment,
7784 // we may be able to fold an add-immediate into the memory operation.
7785 // The check against alignment is below, as it can't occur until we check
7786 // the arguments to N
7787 if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
7788 continue;
7789
7790 SDValue Base = N->getOperand(FirstOp + 1);
7791 if (!Base.isMachineOpcode())
7792 continue;
7793
7794 unsigned Flags = 0;
7795 bool ReplaceFlags = true;
7796
7797 // When the feeding operation is an add-immediate of some sort,
7798 // determine whether we need to add relocation information to the
7799 // target flags on the immediate operand when we fold it into the
7800 // load instruction.
7801 //
7802 // For something like ADDItocL8, the relocation information is
7803 // inferred from the opcode; when we process it in the AsmPrinter,
7804 // we add the necessary relocation there. A load, though, can receive
7805 // relocation from various flavors of ADDIxxx, so we need to carry
7806 // the relocation information in the target flags.
7807 switch (Base.getMachineOpcode()) {
7808 default: continue;
7809
7810 case PPC::ADDI8:
7811 case PPC::ADDI:
7812 // In some cases (such as TLS) the relocation information
7813 // is already in place on the operand, so copying the operand
7814 // is sufficient.
7815 ReplaceFlags = false;
7816 break;
7817 case PPC::ADDIdtprelL:
7819 break;
7820 case PPC::ADDItlsldL:
7822 break;
7823 case PPC::ADDItocL8:
7824 // Skip the following peephole optimizations for ADDItocL8 on AIX which
7825 // is used for toc-data access.
7826 if (Subtarget->isAIXABI())
7827 continue;
7829 break;
7830 }
7831
7832 SDValue ImmOpnd = Base.getOperand(1);
7833
7834 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7835 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7836 // we might have needed different @ha relocation values for the offset
7837 // pointers).
7838 int MaxDisplacement = 7;
7839 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7840 const GlobalValue *GV = GA->getGlobal();
7841 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7842 MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
7843 }
7844
7845 bool UpdateHBase = false;
7846 SDValue HBase = Base.getOperand(0);
7847
7848 int Offset = N->getConstantOperandVal(FirstOp);
7849 if (ReplaceFlags) {
7850 if (Offset < 0 || Offset > MaxDisplacement) {
7851 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7852 // one use, then we can do this for any offset, we just need to also
7853 // update the offset (i.e. the symbol addend) on the addis also.
7854 if (Base.getMachineOpcode() != PPC::ADDItocL8)
7855 continue;
7856
7857 if (!HBase.isMachineOpcode() ||
7858 HBase.getMachineOpcode() != PPC::ADDIStocHA8)
7859 continue;
7860
7861 if (!Base.hasOneUse() || !HBase.hasOneUse())
7862 continue;
7863
7864 SDValue HImmOpnd = HBase.getOperand(1);
7865 if (HImmOpnd != ImmOpnd)
7866 continue;
7867
7868 UpdateHBase = true;
7869 }
7870 } else {
7871 // Global addresses can be folded, but only if they are sufficiently
7872 // aligned.
7873 if (RequiresMod4Offset) {
7874 if (GlobalAddressSDNode *GA =
7876 const GlobalValue *GV = GA->getGlobal();
7877 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7878 if (Alignment < 4)
7879 continue;
7880 }
7881 }
7882
7883 // If we're directly folding the addend from an addi instruction, then:
7884 // 1. In general, the offset on the memory access must be zero.
7885 // 2. If the addend is a constant, then it can be combined with a
7886 // non-zero offset, but only if the result meets the encoding
7887 // requirements.
7888 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
7889 Offset += C->getSExtValue();
7890
7891 if (RequiresMod4Offset && (Offset % 4) != 0)
7892 continue;
7893
7894 if (!isInt<16>(Offset))
7895 continue;
7896
7897 ImmOpnd = CurDAG->getSignedTargetConstant(Offset, SDLoc(ImmOpnd),
7898 ImmOpnd.getValueType());
7899 } else if (Offset != 0) {
7900 // This optimization is performed for non-TOC-based local-[exec|dynamic]
7901 // accesses.
7903 // Add the non-zero offset information into the load or store
7904 // instruction to be used for non-TOC-based local-[exec|dynamic]
7905 // accesses.
7906 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
7907 assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7908 "addi into local-[exec|dynamic] accesses!");
7909 ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
7910 MVT::i64, Offset,
7911 GA->getTargetFlags());
7912 } else
7913 continue;
7914 }
7915 }
7916
7917 // We found an opportunity. Reverse the operands from the add
7918 // immediate and substitute them into the load or store. If
7919 // needed, update the target flags for the immediate operand to
7920 // reflect the necessary relocation information.
7921 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7922 LLVM_DEBUG(Base->dump(CurDAG));
7923 LLVM_DEBUG(dbgs() << "\nN: ");
7924 LLVM_DEBUG(N->dump(CurDAG));
7925 LLVM_DEBUG(dbgs() << "\n");
7926
7927 // If the relocation information isn't already present on the
7928 // immediate operand, add it now.
7929 if (ReplaceFlags) {
7930 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7931 SDLoc dl(GA);
7932 const GlobalValue *GV = GA->getGlobal();
7933 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7934 // We can't perform this optimization for data whose alignment
7935 // is insufficient for the instruction encoding.
7936 if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
7937 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7938 continue;
7939 }
7940 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
7941 } else if (ConstantPoolSDNode *CP =
7943 const Constant *C = CP->getConstVal();
7944 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
7945 Offset, Flags);
7946 }
7947 }
7948
7949 if (FirstOp == 1) // Store
7950 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
7951 Base.getOperand(0), N->getOperand(3));
7952 else // Load
7953 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
7954 N->getOperand(2));
7955
7956 if (UpdateHBase)
7957 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
7958 ImmOpnd);
7959
7960 // The add-immediate may now be dead, in which case remove it.
7961 if (Base.getNode()->use_empty())
7962 CurDAG->RemoveDeadNode(Base.getNode());
7963 }
7964}
7965
7966/// createPPCISelDag - This pass converts a legalized DAG into a
7967/// PowerPC-specific DAG, ready for instruction scheduling.
7968///
7970 CodeGenOptLevel OptLevel) {
7971 return new PPCDAGToDAGISelLegacy(TM, OptLevel);
7972}
static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, uint64_t &Imm)
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
MachineBasicBlock MachineBasicBlock::iterator MBBI
Function Alias Analysis false
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
#define T
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
static bool isEligibleToFoldADDIForFasterLocalAccesses(SelectionDAG *DAG, SDValue ADDIToFold)
static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base)
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
static bool hasTocDataAttr(SDValue Val)
static void foldADDIForFasterLocalAccesses(SDNode *N, SelectionDAG *DAG)
static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG)
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode * > &ToPromote)
static bool isLaneInsensitive(SDValue N)
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N)
static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget, const TargetMachine &TM, const SDNode *Node)
static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, const PPCSubtarget *Subtarget)
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static unsigned getBranchHint(unsigned PCC, const FunctionLoweringInfo &FuncInfo, const SDValue &DestMBB)
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, bool &NeedSwapOps, bool &IsUnCmp)
static cl::opt< bool > EnableTLSOpt("ppc-tls-opt", cl::init(true), cl::desc("Enable tls optimization peephole"), cl::Hidden)
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
static cl::opt< ICmpInGPRType > CmpInGPR("ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), cl::desc("Specify the types of comparisons to emit GPR-only code for."), cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), clEnumValN(ICGPR_NonExtIn, "nonextin", "Only comparisons where inputs don't need [sz]ext."), clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), clEnumValN(ICGPR_ZextI32, "zexti32", "Only i32 comparisons with zext result."), clEnumValN(ICGPR_ZextI64, "zexti64", "Only i64 comparisons with zext result."), clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), clEnumValN(ICGPR_SextI32, "sexti32", "Only i32 comparisons with sext result."), clEnumValN(ICGPR_SextI64, "sexti64", "Only i64 comparisons with sext result.")))
static SDNode * selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
static SDNode * selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
static bool hasAIXSmallTLSAttr(SDValue Val)
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden)
static bool isSWTestOp(SDValue N)
static SDNode * selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned *InstCnt=nullptr)
ICmpInGPRType
@ ICGPR_ZextI32
@ ICGPR_I64
@ ICGPR_All
@ ICGPR_None
@ ICGPR_NonExtIn
@ ICGPR_Sext
@ ICGPR_I32
@ ICGPR_SextI64
@ ICGPR_ZextI64
@ ICGPR_SextI32
@ ICGPR_Zext
static bool isVSXSwap(SDValue N)
static uint32_t findContiguousZerosAtLeast(uint64_t Imm, unsigned Num)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
#define PASS_NAME
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
LLVM_ABI APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition APInt.cpp:1185
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:1016
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
MachineBasicBlock * getBasicBlock() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
LLVM_ABI BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
int64_t getSExtValue() const
A debug info location.
Definition DebugLoc.h:123
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
BranchProbabilityInfo * BPI
MachineBasicBlock * MBB
MBB - The current block.
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists.
static StringRef getMemConstraintName(ConstraintCode C)
Definition InlineAsm.h:475
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setROPProtectionHashSaveIndex(int Idx)
static int getRecordFormOpcode(unsigned Opcode)
bool is32BitELFABI() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCInstrInfo * getInstrInfo() const override
MCRegister getThreadPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
bool isTargetELF() const
CodeModel::Model getCodeModel(const TargetMachine &TM, const GlobalValue *GV) const
Calculates the effective code model for argument GV.
bool isELFv2ABI() const
Common code between 32-bit and 64-bit PowerPC targets.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
LLVM_ABI void dump() const
Dump this node, for debugging.
bool hasOneUse() const
Return true if there is exactly one use of this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool runOnMachineFunction(MachineFunction &mf)
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
const TargetLowering & getTargetLoweringInfo() const
allnodes_const_iterator allnodes_begin() const
allnodes_const_iterator allnodes_end() const
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
LLVM_ABI void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops, SDNodeFlags Flags=SDNodeFlags())
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
iterator_range< allnodes_iterator > allnodes()
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
ilist< SDNode >::iterator allnodes_iterator
int getMaskElt(unsigned Idx) const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This class is used to represent ISD::STORE nodes.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Primary interface to the complete machine description for the target machine.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool isPositionIndependent() const
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned getID() const
Return the register class ID number.
virtual const TargetLowering * getTargetLowering() const
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:964
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
#define INT64_MAX
Definition DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:185
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
@ MO_TLSLD_LO
Definition PPC.h:186
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:152
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:115
@ MO_DTPREL_LO
These values identify relocations on immediates folded into memory operations.
Definition PPC.h:185
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:142
@ MO_TOC_LO
Definition PPC.h:187
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Define
Register definition.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:313
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:261
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition MathExtras.h:261
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
int countl_one(T Value)
Count the number of ones from the most significant bit to the first zero bit.
Definition bit.h:300
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
FunctionPass * createPPCISelDag(PPCTargetMachine &TM, CodeGenOptLevel OL)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Other
Any other memory.
Definition ModRef.h:68
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
unsigned int NumVTs