LLVM 19.0.0git
PPCISelDAGToDAG.cpp
Go to the documentation of this file.
1//===-- PPCISelDAGToDAG.cpp - PPC --pattern matching inst selector --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines a pattern matching instruction selector for PowerPC,
10// converting from a legalized dag to a PPC dag.
11//
12//===----------------------------------------------------------------------===//
13
16#include "PPC.h"
17#include "PPCISelLowering.h"
19#include "PPCSubtarget.h"
20#include "PPCTargetMachine.h"
21#include "llvm/ADT/APInt.h"
22#include "llvm/ADT/APSInt.h"
23#include "llvm/ADT/DenseMap.h"
24#include "llvm/ADT/STLExtras.h"
27#include "llvm/ADT/Statistic.h"
43#include "llvm/IR/BasicBlock.h"
44#include "llvm/IR/DebugLoc.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
47#include "llvm/IR/InlineAsm.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/IntrinsicsPowerPC.h"
50#include "llvm/IR/Module.h"
55#include "llvm/Support/Debug.h"
60#include <algorithm>
61#include <cassert>
62#include <cstdint>
63#include <iterator>
64#include <limits>
65#include <memory>
66#include <new>
67#include <tuple>
68#include <utility>
69
70using namespace llvm;
71
72#define DEBUG_TYPE "ppc-isel"
73#define PASS_NAME "PowerPC DAG->DAG Pattern Instruction Selection"
74
75STATISTIC(NumSextSetcc,
76 "Number of (sext(setcc)) nodes expanded into GPR sequence.");
77STATISTIC(NumZextSetcc,
78 "Number of (zext(setcc)) nodes expanded into GPR sequence.");
79STATISTIC(SignExtensionsAdded,
80 "Number of sign extensions for compare inputs added.");
81STATISTIC(ZeroExtensionsAdded,
82 "Number of zero extensions for compare inputs added.");
83STATISTIC(NumLogicOpsOnComparison,
84 "Number of logical ops on i1 values calculated in GPR.");
85STATISTIC(OmittedForNonExtendUses,
86 "Number of compares not eliminated as they have non-extending uses.");
87STATISTIC(NumP9Setb,
88 "Number of compares lowered to setb.");
89
90// FIXME: Remove this once the bug has been fixed!
91cl::opt<bool> ANDIGlueBug("expose-ppc-andi-glue-bug",
92cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden);
93
94static cl::opt<bool>
95 UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true),
96 cl::desc("use aggressive ppc isel for bit permutations"),
99 "ppc-bit-perm-rewriter-stress-rotates",
100 cl::desc("stress rotate selection in aggressive ppc isel for "
101 "bit permutations"),
102 cl::Hidden);
103
105 "ppc-use-branch-hint", cl::init(true),
106 cl::desc("Enable static hinting of branches on ppc"),
107 cl::Hidden);
108
110 "ppc-tls-opt", cl::init(true),
111 cl::desc("Enable tls optimization peephole"),
112 cl::Hidden);
113
117
119 "ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All),
120 cl::desc("Specify the types of comparisons to emit GPR-only code for."),
121 cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."),
122 clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."),
123 clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."),
124 clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."),
125 clEnumValN(ICGPR_NonExtIn, "nonextin",
126 "Only comparisons where inputs don't need [sz]ext."),
127 clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."),
128 clEnumValN(ICGPR_ZextI32, "zexti32",
129 "Only i32 comparisons with zext result."),
130 clEnumValN(ICGPR_ZextI64, "zexti64",
131 "Only i64 comparisons with zext result."),
132 clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."),
133 clEnumValN(ICGPR_SextI32, "sexti32",
134 "Only i32 comparisons with sext result."),
135 clEnumValN(ICGPR_SextI64, "sexti64",
136 "Only i64 comparisons with sext result.")));
137namespace {
138
139 //===--------------------------------------------------------------------===//
140 /// PPCDAGToDAGISel - PPC specific code to select PPC machine
141 /// instructions for SelectionDAG operations.
142 ///
143 class PPCDAGToDAGISel : public SelectionDAGISel {
144 const PPCTargetMachine &TM;
145 const PPCSubtarget *Subtarget = nullptr;
146 const PPCTargetLowering *PPCLowering = nullptr;
147 unsigned GlobalBaseReg = 0;
148
149 public:
150 static char ID;
151
152 PPCDAGToDAGISel() = delete;
153
154 explicit PPCDAGToDAGISel(PPCTargetMachine &tm, CodeGenOptLevel OptLevel)
155 : SelectionDAGISel(ID, tm, OptLevel), TM(tm) {}
156
157 bool runOnMachineFunction(MachineFunction &MF) override {
158 // Make sure we re-emit a set of the global base reg if necessary
159 GlobalBaseReg = 0;
160 Subtarget = &MF.getSubtarget<PPCSubtarget>();
161 PPCLowering = Subtarget->getTargetLowering();
162 if (Subtarget->hasROPProtect()) {
163 // Create a place on the stack for the ROP Protection Hash.
164 // The ROP Protection Hash will always be 8 bytes and aligned to 8
165 // bytes.
166 MachineFrameInfo &MFI = MF.getFrameInfo();
168 const int Result = MFI.CreateStackObject(8, Align(8), false);
170 }
172
173 return true;
174 }
175
176 void PreprocessISelDAG() override;
177 void PostprocessISelDAG() override;
178
179 /// getI16Imm - Return a target constant with the specified value, of type
180 /// i16.
181 inline SDValue getI16Imm(unsigned Imm, const SDLoc &dl) {
182 return CurDAG->getTargetConstant(Imm, dl, MVT::i16);
183 }
184
185 /// getI32Imm - Return a target constant with the specified value, of type
186 /// i32.
187 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
188 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
189 }
190
191 /// getI64Imm - Return a target constant with the specified value, of type
192 /// i64.
193 inline SDValue getI64Imm(uint64_t Imm, const SDLoc &dl) {
194 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
195 }
196
197 /// getSmallIPtrImm - Return a target constant of pointer type.
198 inline SDValue getSmallIPtrImm(uint64_t Imm, const SDLoc &dl) {
199 return CurDAG->getTargetConstant(
200 Imm, dl, PPCLowering->getPointerTy(CurDAG->getDataLayout()));
201 }
202
203 /// isRotateAndMask - Returns true if Mask and Shift can be folded into a
204 /// rotate and mask opcode and mask operation.
205 static bool isRotateAndMask(SDNode *N, unsigned Mask, bool isShiftMask,
206 unsigned &SH, unsigned &MB, unsigned &ME);
207
208 /// getGlobalBaseReg - insert code into the entry mbb to materialize the PIC
209 /// base register. Return the virtual register that holds this value.
210 SDNode *getGlobalBaseReg();
211
212 void selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset = 0);
213
214 // Select - Convert the specified operand from a target-independent to a
215 // target-specific node if it hasn't already been changed.
216 void Select(SDNode *N) override;
217
218 bool tryBitfieldInsert(SDNode *N);
219 bool tryBitPermutation(SDNode *N);
220 bool tryIntCompareInGPR(SDNode *N);
221
222 // tryTLSXFormLoad - Convert an ISD::LOAD fed by a PPCISD::ADD_TLS into
223 // an X-Form load instruction with the offset being a relocation coming from
224 // the PPCISD::ADD_TLS.
225 bool tryTLSXFormLoad(LoadSDNode *N);
226 // tryTLSXFormStore - Convert an ISD::STORE fed by a PPCISD::ADD_TLS into
227 // an X-Form store instruction with the offset being a relocation coming from
228 // the PPCISD::ADD_TLS.
229 bool tryTLSXFormStore(StoreSDNode *N);
230 /// SelectCC - Select a comparison of the specified values with the
231 /// specified condition code, returning the CR# of the expression.
232 SDValue SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
233 const SDLoc &dl, SDValue Chain = SDValue());
234
235 /// SelectAddrImmOffs - Return true if the operand is valid for a preinc
236 /// immediate field. Note that the operand at this point is already the
237 /// result of a prior SelectAddressRegImm call.
238 bool SelectAddrImmOffs(SDValue N, SDValue &Out) const {
239 if (N.getOpcode() == ISD::TargetConstant ||
240 N.getOpcode() == ISD::TargetGlobalAddress) {
241 Out = N;
242 return true;
243 }
244
245 return false;
246 }
247
248 /// SelectDSForm - Returns true if address N can be represented by the
249 /// addressing mode of DSForm instructions (a base register, plus a signed
250 /// 16-bit displacement that is a multiple of 4.
251 bool SelectDSForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
252 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
253 Align(4)) == PPC::AM_DSForm;
254 }
255
256 /// SelectDQForm - Returns true if address N can be represented by the
257 /// addressing mode of DQForm instructions (a base register, plus a signed
258 /// 16-bit displacement that is a multiple of 16.
259 bool SelectDQForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
260 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
261 Align(16)) == PPC::AM_DQForm;
262 }
263
264 /// SelectDForm - Returns true if address N can be represented by
265 /// the addressing mode of DForm instructions (a base register, plus a
266 /// signed 16-bit immediate.
267 bool SelectDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
268 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
269 std::nullopt) == PPC::AM_DForm;
270 }
271
272 /// SelectPCRelForm - Returns true if address N can be represented by
273 /// PC-Relative addressing mode.
274 bool SelectPCRelForm(SDNode *Parent, SDValue N, SDValue &Disp,
275 SDValue &Base) {
276 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
277 std::nullopt) == PPC::AM_PCRel;
278 }
279
280 /// SelectPDForm - Returns true if address N can be represented by Prefixed
281 /// DForm addressing mode (a base register, plus a signed 34-bit immediate.
282 bool SelectPDForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
283 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
284 std::nullopt) ==
286 }
287
288 /// SelectXForm - Returns true if address N can be represented by the
289 /// addressing mode of XForm instructions (an indexed [r+r] operation).
290 bool SelectXForm(SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base) {
291 return PPCLowering->SelectOptimalAddrMode(Parent, N, Disp, Base, *CurDAG,
292 std::nullopt) == PPC::AM_XForm;
293 }
294
295 /// SelectForceXForm - Given the specified address, force it to be
296 /// represented as an indexed [r+r] operation (an XForm instruction).
297 bool SelectForceXForm(SDNode *Parent, SDValue N, SDValue &Disp,
298 SDValue &Base) {
299 return PPCLowering->SelectForceXFormMode(N, Disp, Base, *CurDAG) ==
301 }
302
303 /// SelectAddrIdx - Given the specified address, check to see if it can be
304 /// represented as an indexed [r+r] operation.
305 /// This is for xform instructions whose associated displacement form is D.
306 /// The last parameter \p 0 means associated D form has no requirment for 16
307 /// bit signed displacement.
308 /// Returns false if it can be represented by [r+imm], which are preferred.
309 bool SelectAddrIdx(SDValue N, SDValue &Base, SDValue &Index) {
310 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
311 std::nullopt);
312 }
313
314 /// SelectAddrIdx4 - Given the specified address, check to see if it can be
315 /// represented as an indexed [r+r] operation.
316 /// This is for xform instructions whose associated displacement form is DS.
317 /// The last parameter \p 4 means associated DS form 16 bit signed
318 /// displacement must be a multiple of 4.
319 /// Returns false if it can be represented by [r+imm], which are preferred.
320 bool SelectAddrIdxX4(SDValue N, SDValue &Base, SDValue &Index) {
321 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
322 Align(4));
323 }
324
325 /// SelectAddrIdx16 - Given the specified address, check to see if it can be
326 /// represented as an indexed [r+r] operation.
327 /// This is for xform instructions whose associated displacement form is DQ.
328 /// The last parameter \p 16 means associated DQ form 16 bit signed
329 /// displacement must be a multiple of 16.
330 /// Returns false if it can be represented by [r+imm], which are preferred.
331 bool SelectAddrIdxX16(SDValue N, SDValue &Base, SDValue &Index) {
332 return PPCLowering->SelectAddressRegReg(N, Base, Index, *CurDAG,
333 Align(16));
334 }
335
336 /// SelectAddrIdxOnly - Given the specified address, force it to be
337 /// represented as an indexed [r+r] operation.
338 bool SelectAddrIdxOnly(SDValue N, SDValue &Base, SDValue &Index) {
339 return PPCLowering->SelectAddressRegRegOnly(N, Base, Index, *CurDAG);
340 }
341
342 /// SelectAddrImm - Returns true if the address N can be represented by
343 /// a base register plus a signed 16-bit displacement [r+imm].
344 /// The last parameter \p 0 means D form has no requirment for 16 bit signed
345 /// displacement.
346 bool SelectAddrImm(SDValue N, SDValue &Disp,
347 SDValue &Base) {
348 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
349 std::nullopt);
350 }
351
352 /// SelectAddrImmX4 - Returns true if the address N can be represented by
353 /// a base register plus a signed 16-bit displacement that is a multiple of
354 /// 4 (last parameter). Suitable for use by STD and friends.
355 bool SelectAddrImmX4(SDValue N, SDValue &Disp, SDValue &Base) {
356 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG, Align(4));
357 }
358
359 /// SelectAddrImmX16 - Returns true if the address N can be represented by
360 /// a base register plus a signed 16-bit displacement that is a multiple of
361 /// 16(last parameter). Suitable for use by STXV and friends.
362 bool SelectAddrImmX16(SDValue N, SDValue &Disp, SDValue &Base) {
363 return PPCLowering->SelectAddressRegImm(N, Disp, Base, *CurDAG,
364 Align(16));
365 }
366
367 /// SelectAddrImmX34 - Returns true if the address N can be represented by
368 /// a base register plus a signed 34-bit displacement. Suitable for use by
369 /// PSTXVP and friends.
370 bool SelectAddrImmX34(SDValue N, SDValue &Disp, SDValue &Base) {
371 return PPCLowering->SelectAddressRegImm34(N, Disp, Base, *CurDAG);
372 }
373
374 // Select an address into a single register.
375 bool SelectAddr(SDValue N, SDValue &Base) {
376 Base = N;
377 return true;
378 }
379
380 bool SelectAddrPCRel(SDValue N, SDValue &Base) {
381 return PPCLowering->SelectAddressPCRel(N, Base);
382 }
383
384 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
385 /// inline asm expressions. It is always correct to compute the value into
386 /// a register. The case of adding a (possibly relocatable) constant to a
387 /// register can be improved, but it is wrong to substitute Reg+Reg for
388 /// Reg in an asm, because the load or store opcode would have to change.
390 InlineAsm::ConstraintCode ConstraintID,
391 std::vector<SDValue> &OutOps) override {
392 switch(ConstraintID) {
393 default:
394 errs() << "ConstraintID: "
395 << InlineAsm::getMemConstraintName(ConstraintID) << "\n";
396 llvm_unreachable("Unexpected asm memory constraint");
403 // We need to make sure that this one operand does not end up in r0
404 // (because we might end up lowering this as 0(%op)).
405 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
406 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF, /*Kind=*/1);
407 SDLoc dl(Op);
408 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
409 SDValue NewOp =
410 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
411 dl, Op.getValueType(),
412 Op, RC), 0);
413
414 OutOps.push_back(NewOp);
415 return false;
416 }
417 return true;
418 }
419
420// Include the pieces autogenerated from the target description.
421#include "PPCGenDAGISel.inc"
422
423private:
424 bool trySETCC(SDNode *N);
425 bool tryFoldSWTestBRCC(SDNode *N);
426 bool trySelectLoopCountIntrinsic(SDNode *N);
427 bool tryAsSingleRLDICL(SDNode *N);
428 bool tryAsSingleRLDCL(SDNode *N);
429 bool tryAsSingleRLDICR(SDNode *N);
430 bool tryAsSingleRLWINM(SDNode *N);
431 bool tryAsSingleRLWINM8(SDNode *N);
432 bool tryAsSingleRLWIMI(SDNode *N);
433 bool tryAsPairOfRLDICL(SDNode *N);
434 bool tryAsSingleRLDIMI(SDNode *N);
435
436 void PeepholePPC64();
437 void PeepholePPC64ZExt();
438 void PeepholeCROps();
439
440 SDValue combineToCMPB(SDNode *N);
441 void foldBoolExts(SDValue &Res, SDNode *&N);
442
443 bool AllUsersSelectZero(SDNode *N);
444 void SwapAllSelectUsers(SDNode *N);
445
446 bool isOffsetMultipleOf(SDNode *N, unsigned Val) const;
447 void transferMemOperands(SDNode *N, SDNode *Result);
448 };
449
450} // end anonymous namespace
451
452char PPCDAGToDAGISel::ID = 0;
453
454INITIALIZE_PASS(PPCDAGToDAGISel, DEBUG_TYPE, PASS_NAME, false, false)
455
456/// getGlobalBaseReg - Output the instructions required to put the
457/// base address to use for accessing globals into a register.
458///
459SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
460 if (!GlobalBaseReg) {
461 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
462 // Insert the set of GlobalBaseReg into the first MBB of the function
463 MachineBasicBlock &FirstMBB = MF->front();
465 const Module *M = MF->getFunction().getParent();
466 DebugLoc dl;
467
468 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) == MVT::i32) {
469 if (Subtarget->isTargetELF()) {
470 GlobalBaseReg = PPC::R30;
471 if (!Subtarget->isSecurePlt() &&
472 M->getPICLevel() == PICLevel::SmallPIC) {
473 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MoveGOTtoLR));
474 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
475 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
476 } else {
477 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
478 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
479 Register TempReg = RegInfo->createVirtualRegister(&PPC::GPRCRegClass);
480 BuildMI(FirstMBB, MBBI, dl,
481 TII.get(PPC::UpdateGBR), GlobalBaseReg)
482 .addReg(TempReg, RegState::Define).addReg(GlobalBaseReg);
483 MF->getInfo<PPCFunctionInfo>()->setUsesPICBase(true);
484 }
485 } else {
487 RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
488 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
489 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
490 }
491 } else {
492 // We must ensure that this sequence is dominated by the prologue.
493 // FIXME: This is a bit of a big hammer since we don't get the benefits
494 // of shrink-wrapping whenever we emit this instruction. Considering
495 // this is used in any function where we emit a jump table, this may be
496 // a significant limitation. We should consider inserting this in the
497 // block where it is used and then commoning this sequence up if it
498 // appears in multiple places.
499 // Note: on ISA 3.0 cores, we can use lnia (addpcis) instead of
500 // MovePCtoLR8.
501 MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
502 GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
503 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
504 BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
505 }
506 }
507 return CurDAG->getRegister(GlobalBaseReg,
508 PPCLowering->getPointerTy(CurDAG->getDataLayout()))
509 .getNode();
510}
511
512// Check if a SDValue has the toc-data attribute.
513static bool hasTocDataAttr(SDValue Val, unsigned PointerSize) {
514 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val);
515 if (!GA)
516 return false;
517
518 const GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(GA->getGlobal());
519 if (!GV)
520 return false;
521
522 if (!GV->hasAttribute("toc-data"))
523 return false;
524 return true;
525}
526
528 const TargetMachine &TM,
529 const SDNode *Node) {
530 // If there isn't an attribute to override the module code model
531 // this will be the effective code model.
532 CodeModel::Model ModuleModel = TM.getCodeModel();
533
534 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Node->getOperand(0));
535 if (!GA)
536 return ModuleModel;
537
538 const GlobalValue *GV = GA->getGlobal();
539 if (!GV)
540 return ModuleModel;
541
542 return Subtarget.getCodeModel(TM, GV);
543}
544
545/// isInt32Immediate - This method tests to see if the node is a 32-bit constant
546/// operand. If so Imm will receive the 32-bit value.
547static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
548 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
549 Imm = N->getAsZExtVal();
550 return true;
551 }
552 return false;
553}
554
555/// isInt64Immediate - This method tests to see if the node is a 64-bit constant
556/// operand. If so Imm will receive the 64-bit value.
557static bool isInt64Immediate(SDNode *N, uint64_t &Imm) {
558 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i64) {
559 Imm = N->getAsZExtVal();
560 return true;
561 }
562 return false;
563}
564
565// isInt32Immediate - This method tests to see if a constant operand.
566// If so Imm will receive the 32 bit value.
567static bool isInt32Immediate(SDValue N, unsigned &Imm) {
568 return isInt32Immediate(N.getNode(), Imm);
569}
570
571/// isInt64Immediate - This method tests to see if the value is a 64-bit
572/// constant operand. If so Imm will receive the 64-bit value.
573static bool isInt64Immediate(SDValue N, uint64_t &Imm) {
574 return isInt64Immediate(N.getNode(), Imm);
575}
576
577static unsigned getBranchHint(unsigned PCC,
578 const FunctionLoweringInfo &FuncInfo,
579 const SDValue &DestMBB) {
580 assert(isa<BasicBlockSDNode>(DestMBB));
581
582 if (!FuncInfo.BPI) return PPC::BR_NO_HINT;
583
584 const BasicBlock *BB = FuncInfo.MBB->getBasicBlock();
585 const Instruction *BBTerm = BB->getTerminator();
586
587 if (BBTerm->getNumSuccessors() != 2) return PPC::BR_NO_HINT;
588
589 const BasicBlock *TBB = BBTerm->getSuccessor(0);
590 const BasicBlock *FBB = BBTerm->getSuccessor(1);
591
592 auto TProb = FuncInfo.BPI->getEdgeProbability(BB, TBB);
593 auto FProb = FuncInfo.BPI->getEdgeProbability(BB, FBB);
594
595 // We only want to handle cases which are easy to predict at static time, e.g.
596 // C++ throw statement, that is very likely not taken, or calling never
597 // returned function, e.g. stdlib exit(). So we set Threshold to filter
598 // unwanted cases.
599 //
600 // Below is LLVM branch weight table, we only want to handle case 1, 2
601 //
602 // Case Taken:Nontaken Example
603 // 1. Unreachable 1048575:1 C++ throw, stdlib exit(),
604 // 2. Invoke-terminating 1:1048575
605 // 3. Coldblock 4:64 __builtin_expect
606 // 4. Loop Branch 124:4 For loop
607 // 5. PH/ZH/FPH 20:12
608 const uint32_t Threshold = 10000;
609
610 if (std::max(TProb, FProb) / Threshold < std::min(TProb, FProb))
611 return PPC::BR_NO_HINT;
612
613 LLVM_DEBUG(dbgs() << "Use branch hint for '" << FuncInfo.Fn->getName()
614 << "::" << BB->getName() << "'\n"
615 << " -> " << TBB->getName() << ": " << TProb << "\n"
616 << " -> " << FBB->getName() << ": " << FProb << "\n");
617
618 const BasicBlockSDNode *BBDN = cast<BasicBlockSDNode>(DestMBB);
619
620 // If Dest BasicBlock is False-BasicBlock (FBB), swap branch probabilities,
621 // because we want 'TProb' stands for 'branch probability' to Dest BasicBlock
622 if (BBDN->getBasicBlock()->getBasicBlock() != TBB)
623 std::swap(TProb, FProb);
624
625 return (TProb > FProb) ? PPC::BR_TAKEN_HINT : PPC::BR_NONTAKEN_HINT;
626}
627
628// isOpcWithIntImmediate - This method tests to see if the node is a specific
629// opcode and that it has a immediate integer right operand.
630// If so Imm will receive the 32 bit value.
631static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
632 return N->getOpcode() == Opc
633 && isInt32Immediate(N->getOperand(1).getNode(), Imm);
634}
635
636void PPCDAGToDAGISel::selectFrameIndex(SDNode *SN, SDNode *N, uint64_t Offset) {
637 SDLoc dl(SN);
638 int FI = cast<FrameIndexSDNode>(N)->getIndex();
639 SDValue TFI = CurDAG->getTargetFrameIndex(FI, N->getValueType(0));
640 unsigned Opc = N->getValueType(0) == MVT::i32 ? PPC::ADDI : PPC::ADDI8;
641 if (SN->hasOneUse())
642 CurDAG->SelectNodeTo(SN, Opc, N->getValueType(0), TFI,
643 getSmallIPtrImm(Offset, dl));
644 else
645 ReplaceNode(SN, CurDAG->getMachineNode(Opc, dl, N->getValueType(0), TFI,
646 getSmallIPtrImm(Offset, dl)));
647}
648
649bool PPCDAGToDAGISel::isRotateAndMask(SDNode *N, unsigned Mask,
650 bool isShiftMask, unsigned &SH,
651 unsigned &MB, unsigned &ME) {
652 // Don't even go down this path for i64, since different logic will be
653 // necessary for rldicl/rldicr/rldimi.
654 if (N->getValueType(0) != MVT::i32)
655 return false;
656
657 unsigned Shift = 32;
658 unsigned Indeterminant = ~0; // bit mask marking indeterminant results
659 unsigned Opcode = N->getOpcode();
660 if (N->getNumOperands() != 2 ||
661 !isInt32Immediate(N->getOperand(1).getNode(), Shift) || (Shift > 31))
662 return false;
663
664 if (Opcode == ISD::SHL) {
665 // apply shift left to mask if it comes first
666 if (isShiftMask) Mask = Mask << Shift;
667 // determine which bits are made indeterminant by shift
668 Indeterminant = ~(0xFFFFFFFFu << Shift);
669 } else if (Opcode == ISD::SRL) {
670 // apply shift right to mask if it comes first
671 if (isShiftMask) Mask = Mask >> Shift;
672 // determine which bits are made indeterminant by shift
673 Indeterminant = ~(0xFFFFFFFFu >> Shift);
674 // adjust for the left rotate
675 Shift = 32 - Shift;
676 } else if (Opcode == ISD::ROTL) {
677 Indeterminant = 0;
678 } else {
679 return false;
680 }
681
682 // if the mask doesn't intersect any Indeterminant bits
683 if (Mask && !(Mask & Indeterminant)) {
684 SH = Shift & 31;
685 // make sure the mask is still a mask (wrap arounds may not be)
686 return isRunOfOnes(Mask, MB, ME);
687 }
688 return false;
689}
690
691// isThreadPointerAcquisitionNode - Check if the operands of an ADD_TLS
692// instruction use the thread pointer.
694 assert(
695 Base.getOpcode() == PPCISD::ADD_TLS &&
696 "Only expecting the ADD_TLS instruction to acquire the thread pointer!");
697 const PPCSubtarget &Subtarget =
699 SDValue ADDTLSOp1 = Base.getOperand(0);
700 unsigned ADDTLSOp1Opcode = ADDTLSOp1.getOpcode();
701
702 // Account for when ADD_TLS is used for the initial-exec TLS model on Linux.
703 //
704 // Although ADD_TLS does not explicitly use the thread pointer
705 // register when LD_GOT_TPREL_L is one of it's operands, the LD_GOT_TPREL_L
706 // instruction will have a relocation specifier, @got@tprel, that is used to
707 // generate a GOT entry. The linker replaces this entry with an offset for a
708 // for a thread local variable, which will be relative to the thread pointer.
709 if (ADDTLSOp1Opcode == PPCISD::LD_GOT_TPREL_L)
710 return true;
711 // When using PC-Relative instructions for initial-exec, a MAT_PCREL_ADDR
712 // node is produced instead to represent the aforementioned situation.
713 LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSOp1);
714 if (LD && LD->getBasePtr().getOpcode() == PPCISD::MAT_PCREL_ADDR)
715 return true;
716
717 // A GET_TPOINTER PPCISD node (only produced on AIX 32-bit mode) as an operand
718 // to ADD_TLS represents a call to .__get_tpointer to get the thread pointer,
719 // later returning it into R3.
720 if (ADDTLSOp1Opcode == PPCISD::GET_TPOINTER)
721 return true;
722
723 // The ADD_TLS note is explicitly acquiring the thread pointer (X13/R13).
724 RegisterSDNode *AddFirstOpReg =
725 dyn_cast_or_null<RegisterSDNode>(ADDTLSOp1.getNode());
726 if (AddFirstOpReg &&
727 AddFirstOpReg->getReg() == Subtarget.getThreadPointerRegister())
728 return true;
729
730 return false;
731}
732
733// canOptimizeTLSDFormToXForm - Optimize TLS accesses when an ADD_TLS
734// instruction is present. An ADD_TLS instruction, followed by a D-Form memory
735// operation, can be optimized to use an X-Form load or store, allowing the
736// ADD_TLS node to be removed completely.
738
739 // Do not do this transformation at -O0.
740 if (CurDAG->getTarget().getOptLevel() == CodeGenOptLevel::None)
741 return false;
742
743 // In order to perform this optimization inside tryTLSXForm[Load|Store],
744 // Base is expected to be an ADD_TLS node.
745 if (Base.getOpcode() != PPCISD::ADD_TLS)
746 return false;
747 for (auto *ADDTLSUse : Base.getNode()->uses()) {
748 // The optimization to convert the D-Form load/store into its X-Form
749 // counterpart should only occur if the source value offset of the load/
750 // store is 0. This also means that The offset should always be undefined.
751 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(ADDTLSUse)) {
752 if (LD->getSrcValueOffset() != 0 || !LD->getOffset().isUndef())
753 return false;
754 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(ADDTLSUse)) {
755 if (ST->getSrcValueOffset() != 0 || !ST->getOffset().isUndef())
756 return false;
757 } else // Don't optimize if there are ADD_TLS users that aren't load/stores.
758 return false;
759 }
760
761 if (Base.getOperand(1).getOpcode() == PPCISD::TLS_LOCAL_EXEC_MAT_ADDR)
762 return false;
763
764 // Does the ADD_TLS node of the load/store use the thread pointer?
765 // If the thread pointer is not used as one of the operands of ADD_TLS,
766 // then this optimization is not valid.
767 return isThreadPointerAcquisitionNode(Base, CurDAG);
768}
769
770bool PPCDAGToDAGISel::tryTLSXFormStore(StoreSDNode *ST) {
771 SDValue Base = ST->getBasePtr();
772 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
773 return false;
774
775 SDLoc dl(ST);
776 EVT MemVT = ST->getMemoryVT();
777 EVT RegVT = ST->getValue().getValueType();
778
779 unsigned Opcode;
780 switch (MemVT.getSimpleVT().SimpleTy) {
781 default:
782 return false;
783 case MVT::i8: {
784 Opcode = (RegVT == MVT::i32) ? PPC::STBXTLS_32 : PPC::STBXTLS;
785 break;
786 }
787 case MVT::i16: {
788 Opcode = (RegVT == MVT::i32) ? PPC::STHXTLS_32 : PPC::STHXTLS;
789 break;
790 }
791 case MVT::i32: {
792 Opcode = (RegVT == MVT::i32) ? PPC::STWXTLS_32 : PPC::STWXTLS;
793 break;
794 }
795 case MVT::i64: {
796 Opcode = PPC::STDXTLS;
797 break;
798 }
799 case MVT::f32: {
800 Opcode = PPC::STFSXTLS;
801 break;
802 }
803 case MVT::f64: {
804 Opcode = PPC::STFDXTLS;
805 break;
806 }
807 }
808 SDValue Chain = ST->getChain();
809 SDVTList VTs = ST->getVTList();
810 SDValue Ops[] = {ST->getValue(), Base.getOperand(0), Base.getOperand(1),
811 Chain};
812 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
813 transferMemOperands(ST, MN);
814 ReplaceNode(ST, MN);
815 return true;
816}
817
818bool PPCDAGToDAGISel::tryTLSXFormLoad(LoadSDNode *LD) {
819 SDValue Base = LD->getBasePtr();
820 if (!canOptimizeTLSDFormToXForm(CurDAG, Base))
821 return false;
822
823 SDLoc dl(LD);
824 EVT MemVT = LD->getMemoryVT();
825 EVT RegVT = LD->getValueType(0);
826 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
827 unsigned Opcode;
828 switch (MemVT.getSimpleVT().SimpleTy) {
829 default:
830 return false;
831 case MVT::i8: {
832 Opcode = (RegVT == MVT::i32) ? PPC::LBZXTLS_32 : PPC::LBZXTLS;
833 break;
834 }
835 case MVT::i16: {
836 if (RegVT == MVT::i32)
837 Opcode = isSExt ? PPC::LHAXTLS_32 : PPC::LHZXTLS_32;
838 else
839 Opcode = isSExt ? PPC::LHAXTLS : PPC::LHZXTLS;
840 break;
841 }
842 case MVT::i32: {
843 if (RegVT == MVT::i32)
844 Opcode = isSExt ? PPC::LWAXTLS_32 : PPC::LWZXTLS_32;
845 else
846 Opcode = isSExt ? PPC::LWAXTLS : PPC::LWZXTLS;
847 break;
848 }
849 case MVT::i64: {
850 Opcode = PPC::LDXTLS;
851 break;
852 }
853 case MVT::f32: {
854 Opcode = PPC::LFSXTLS;
855 break;
856 }
857 case MVT::f64: {
858 Opcode = PPC::LFDXTLS;
859 break;
860 }
861 }
862 SDValue Chain = LD->getChain();
863 SDVTList VTs = LD->getVTList();
864 SDValue Ops[] = {Base.getOperand(0), Base.getOperand(1), Chain};
865 SDNode *MN = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
866 transferMemOperands(LD, MN);
867 ReplaceNode(LD, MN);
868 return true;
869}
870
871/// Turn an or of two masked values into the rotate left word immediate then
872/// mask insert (rlwimi) instruction.
873bool PPCDAGToDAGISel::tryBitfieldInsert(SDNode *N) {
874 SDValue Op0 = N->getOperand(0);
875 SDValue Op1 = N->getOperand(1);
876 SDLoc dl(N);
877
878 KnownBits LKnown = CurDAG->computeKnownBits(Op0);
879 KnownBits RKnown = CurDAG->computeKnownBits(Op1);
880
881 unsigned TargetMask = LKnown.Zero.getZExtValue();
882 unsigned InsertMask = RKnown.Zero.getZExtValue();
883
884 if ((TargetMask | InsertMask) == 0xFFFFFFFF) {
885 unsigned Op0Opc = Op0.getOpcode();
886 unsigned Op1Opc = Op1.getOpcode();
887 unsigned Value, SH = 0;
888 TargetMask = ~TargetMask;
889 InsertMask = ~InsertMask;
890
891 // If the LHS has a foldable shift and the RHS does not, then swap it to the
892 // RHS so that we can fold the shift into the insert.
893 if (Op0Opc == ISD::AND && Op1Opc == ISD::AND) {
894 if (Op0.getOperand(0).getOpcode() == ISD::SHL ||
895 Op0.getOperand(0).getOpcode() == ISD::SRL) {
896 if (Op1.getOperand(0).getOpcode() != ISD::SHL &&
897 Op1.getOperand(0).getOpcode() != ISD::SRL) {
898 std::swap(Op0, Op1);
899 std::swap(Op0Opc, Op1Opc);
900 std::swap(TargetMask, InsertMask);
901 }
902 }
903 } else if (Op0Opc == ISD::SHL || Op0Opc == ISD::SRL) {
904 if (Op1Opc == ISD::AND && Op1.getOperand(0).getOpcode() != ISD::SHL &&
905 Op1.getOperand(0).getOpcode() != ISD::SRL) {
906 std::swap(Op0, Op1);
907 std::swap(Op0Opc, Op1Opc);
908 std::swap(TargetMask, InsertMask);
909 }
910 }
911
912 unsigned MB, ME;
913 if (isRunOfOnes(InsertMask, MB, ME)) {
914 if ((Op1Opc == ISD::SHL || Op1Opc == ISD::SRL) &&
916 Op1 = Op1.getOperand(0);
917 SH = (Op1Opc == ISD::SHL) ? Value : 32 - Value;
918 }
919 if (Op1Opc == ISD::AND) {
920 // The AND mask might not be a constant, and we need to make sure that
921 // if we're going to fold the masking with the insert, all bits not
922 // know to be zero in the mask are known to be one.
923 KnownBits MKnown = CurDAG->computeKnownBits(Op1.getOperand(1));
924 bool CanFoldMask = InsertMask == MKnown.One.getZExtValue();
925
926 unsigned SHOpc = Op1.getOperand(0).getOpcode();
927 if ((SHOpc == ISD::SHL || SHOpc == ISD::SRL) && CanFoldMask &&
929 // Note that Value must be in range here (less than 32) because
930 // otherwise there would not be any bits set in InsertMask.
931 Op1 = Op1.getOperand(0).getOperand(0);
932 SH = (SHOpc == ISD::SHL) ? Value : 32 - Value;
933 }
934 }
935
936 SH &= 31;
937 SDValue Ops[] = { Op0, Op1, getI32Imm(SH, dl), getI32Imm(MB, dl),
938 getI32Imm(ME, dl) };
939 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
940 return true;
941 }
942 }
943 return false;
944}
945
946static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N) {
947 unsigned MaxTruncation = 0;
948 // Cannot use range-based for loop here as we need the actual use (i.e. we
949 // need the operand number corresponding to the use). A range-based for
950 // will unbox the use and provide an SDNode*.
951 for (SDNode::use_iterator Use = N->use_begin(), UseEnd = N->use_end();
952 Use != UseEnd; ++Use) {
953 unsigned Opc =
954 Use->isMachineOpcode() ? Use->getMachineOpcode() : Use->getOpcode();
955 switch (Opc) {
956 default: return 0;
957 case ISD::TRUNCATE:
958 if (Use->isMachineOpcode())
959 return 0;
960 MaxTruncation =
961 std::max(MaxTruncation, (unsigned)Use->getValueType(0).getSizeInBits());
962 continue;
963 case ISD::STORE: {
964 if (Use->isMachineOpcode())
965 return 0;
966 StoreSDNode *STN = cast<StoreSDNode>(*Use);
967 unsigned MemVTSize = STN->getMemoryVT().getSizeInBits();
968 if (MemVTSize == 64 || Use.getOperandNo() != 0)
969 return 0;
970 MaxTruncation = std::max(MaxTruncation, MemVTSize);
971 continue;
972 }
973 case PPC::STW8:
974 case PPC::STWX8:
975 case PPC::STWU8:
976 case PPC::STWUX8:
977 if (Use.getOperandNo() != 0)
978 return 0;
979 MaxTruncation = std::max(MaxTruncation, 32u);
980 continue;
981 case PPC::STH8:
982 case PPC::STHX8:
983 case PPC::STHU8:
984 case PPC::STHUX8:
985 if (Use.getOperandNo() != 0)
986 return 0;
987 MaxTruncation = std::max(MaxTruncation, 16u);
988 continue;
989 case PPC::STB8:
990 case PPC::STBX8:
991 case PPC::STBU8:
992 case PPC::STBUX8:
993 if (Use.getOperandNo() != 0)
994 return 0;
995 MaxTruncation = std::max(MaxTruncation, 8u);
996 continue;
997 }
998 }
999 return MaxTruncation;
1000}
1001
1002// For any 32 < Num < 64, check if the Imm contains at least Num consecutive
1003// zeros and return the number of bits by the left of these consecutive zeros.
1004static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num) {
1005 unsigned HiTZ = llvm::countr_zero<uint32_t>(Hi_32(Imm));
1006 unsigned LoLZ = llvm::countl_zero<uint32_t>(Lo_32(Imm));
1007 if ((HiTZ + LoLZ) >= Num)
1008 return (32 + HiTZ);
1009 return 0;
1010}
1011
1012// Direct materialization of 64-bit constants by enumerated patterns.
1013static SDNode *selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl,
1014 uint64_t Imm, unsigned &InstCnt) {
1015 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1016 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1017 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1018 unsigned LO = llvm::countl_one<uint64_t>(Imm);
1019 unsigned Hi32 = Hi_32(Imm);
1020 unsigned Lo32 = Lo_32(Imm);
1021 SDNode *Result = nullptr;
1022 unsigned Shift = 0;
1023
1024 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1025 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1026 };
1027
1028 // Following patterns use 1 instructions to materialize the Imm.
1029 InstCnt = 1;
1030 // 1-1) Patterns : {zeros}{15-bit valve}
1031 // {ones}{15-bit valve}
1032 if (isInt<16>(Imm)) {
1033 SDValue SDImm = CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1034 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1035 }
1036 // 1-2) Patterns : {zeros}{15-bit valve}{16 zeros}
1037 // {ones}{15-bit valve}{16 zeros}
1038 if (TZ > 15 && (LZ > 32 || LO > 32))
1039 return CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1040 getI32Imm((Imm >> 16) & 0xffff));
1041
1042 // Following patterns use 2 instructions to materialize the Imm.
1043 InstCnt = 2;
1044 assert(LZ < 64 && "Unexpected leading zeros here.");
1045 // Count of ones follwing the leading zeros.
1046 unsigned FO = llvm::countl_one<uint64_t>(Imm << LZ);
1047 // 2-1) Patterns : {zeros}{31-bit value}
1048 // {ones}{31-bit value}
1049 if (isInt<32>(Imm)) {
1050 uint64_t ImmHi16 = (Imm >> 16) & 0xffff;
1051 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1052 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1053 return CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1054 getI32Imm(Imm & 0xffff));
1055 }
1056 // 2-2) Patterns : {zeros}{ones}{15-bit value}{zeros}
1057 // {zeros}{15-bit value}{zeros}
1058 // {zeros}{ones}{15-bit value}
1059 // {ones}{15-bit value}{zeros}
1060 // We can take advantage of LI's sign-extension semantics to generate leading
1061 // ones, and then use RLDIC to mask off the ones in both sides after rotation.
1062 if ((LZ + FO + TZ) > 48) {
1063 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1064 getI32Imm((Imm >> TZ) & 0xffff));
1065 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1066 getI32Imm(TZ), getI32Imm(LZ));
1067 }
1068 // 2-3) Pattern : {zeros}{15-bit value}{ones}
1069 // Shift right the Imm by (48 - LZ) bits to construct a negtive 16 bits value,
1070 // therefore we can take advantage of LI's sign-extension semantics, and then
1071 // mask them off after rotation.
1072 //
1073 // +--LZ--||-15-bit-||--TO--+ +-------------|--16-bit--+
1074 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1075 // +------------------------+ +------------------------+
1076 // 63 0 63 0
1077 // Imm (Imm >> (48 - LZ) & 0xffff)
1078 // +----sext-----|--16-bit--+ +clear-|-----------------+
1079 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1080 // +------------------------+ +------------------------+
1081 // 63 0 63 0
1082 // LI8: sext many leading zeros RLDICL: rotate left (48 - LZ), clear left LZ
1083 if ((LZ + TO) > 48) {
1084 // Since the immediates with (LZ > 32) have been handled by previous
1085 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1086 // the Imm by a negative value.
1087 assert(LZ <= 32 && "Unexpected shift value.");
1088 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1089 getI32Imm((Imm >> (48 - LZ) & 0xffff)));
1090 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1091 getI32Imm(48 - LZ), getI32Imm(LZ));
1092 }
1093 // 2-4) Patterns : {zeros}{ones}{15-bit value}{ones}
1094 // {ones}{15-bit value}{ones}
1095 // We can take advantage of LI's sign-extension semantics to generate leading
1096 // ones, and then use RLDICL to mask off the ones in left sides (if required)
1097 // after rotation.
1098 //
1099 // +-LZ-FO||-15-bit-||--TO--+ +-------------|--16-bit--+
1100 // |00011110bbbbbbbbb1111111| -> |000000000011110bbbbbbbbb|
1101 // +------------------------+ +------------------------+
1102 // 63 0 63 0
1103 // Imm (Imm >> TO) & 0xffff
1104 // +----sext-----|--16-bit--+ +LZ|---------------------+
1105 // |111111111111110bbbbbbbbb| -> |00011110bbbbbbbbb1111111|
1106 // +------------------------+ +------------------------+
1107 // 63 0 63 0
1108 // LI8: sext many leading zeros RLDICL: rotate left TO, clear left LZ
1109 if ((LZ + FO + TO) > 48) {
1110 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1111 getI32Imm((Imm >> TO) & 0xffff));
1112 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1113 getI32Imm(TO), getI32Imm(LZ));
1114 }
1115 // 2-5) Pattern : {32 zeros}{****}{0}{15-bit value}
1116 // If Hi32 is zero and the Lo16(in Lo32) can be presented as a positive 16 bit
1117 // value, we can use LI for Lo16 without generating leading ones then add the
1118 // Hi16(in Lo32).
1119 if (LZ == 32 && ((Lo32 & 0x8000) == 0)) {
1120 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1121 getI32Imm(Lo32 & 0xffff));
1122 return CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64, SDValue(Result, 0),
1123 getI32Imm(Lo32 >> 16));
1124 }
1125 // 2-6) Patterns : {******}{49 zeros}{******}
1126 // {******}{49 ones}{******}
1127 // If the Imm contains 49 consecutive zeros/ones, it means that a total of 15
1128 // bits remain on both sides. Rotate right the Imm to construct an int<16>
1129 // value, use LI for int<16> value and then use RLDICL without mask to rotate
1130 // it back.
1131 //
1132 // 1) findContiguousZerosAtLeast(Imm, 49)
1133 // +------|--zeros-|------+ +---ones--||---15 bit--+
1134 // |bbbbbb0000000000aaaaaa| -> |0000000000aaaaaabbbbbb|
1135 // +----------------------+ +----------------------+
1136 // 63 0 63 0
1137 //
1138 // 2) findContiguousZerosAtLeast(~Imm, 49)
1139 // +------|--ones--|------+ +---ones--||---15 bit--+
1140 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1141 // +----------------------+ +----------------------+
1142 // 63 0 63 0
1143 if ((Shift = findContiguousZerosAtLeast(Imm, 49)) ||
1144 (Shift = findContiguousZerosAtLeast(~Imm, 49))) {
1145 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1146 Result = CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64,
1147 getI32Imm(RotImm & 0xffff));
1148 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1149 getI32Imm(Shift), getI32Imm(0));
1150 }
1151 // 2-7) Patterns : High word == Low word
1152 // This may require 2 to 3 instructions, depending on whether Lo32 can be
1153 // materialized in 1 instruction.
1154 if (Hi32 == Lo32) {
1155 // Handle the first 32 bits.
1156 uint64_t ImmHi16 = (Lo32 >> 16) & 0xffff;
1157 uint64_t ImmLo16 = Lo32 & 0xffff;
1158 if (isInt<16>(Lo32))
1159 Result =
1160 CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, getI32Imm(ImmLo16));
1161 else if (!ImmLo16)
1162 Result =
1163 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1164 else {
1165 InstCnt = 3;
1166 Result =
1167 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(ImmHi16));
1168 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1169 SDValue(Result, 0), getI32Imm(ImmLo16));
1170 }
1171 // Use rldimi to insert the Low word into High word.
1172 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1173 getI32Imm(0)};
1174 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1175 }
1176
1177 // Following patterns use 3 instructions to materialize the Imm.
1178 InstCnt = 3;
1179 // 3-1) Patterns : {zeros}{ones}{31-bit value}{zeros}
1180 // {zeros}{31-bit value}{zeros}
1181 // {zeros}{ones}{31-bit value}
1182 // {ones}{31-bit value}{zeros}
1183 // We can take advantage of LIS's sign-extension semantics to generate leading
1184 // ones, add the remaining bits with ORI, and then use RLDIC to mask off the
1185 // ones in both sides after rotation.
1186 if ((LZ + FO + TZ) > 32) {
1187 uint64_t ImmHi16 = (Imm >> (TZ + 16)) & 0xffff;
1188 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1189 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1190 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1191 getI32Imm((Imm >> TZ) & 0xffff));
1192 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1193 getI32Imm(TZ), getI32Imm(LZ));
1194 }
1195 // 3-2) Pattern : {zeros}{31-bit value}{ones}
1196 // Shift right the Imm by (32 - LZ) bits to construct a negative 32 bits
1197 // value, therefore we can take advantage of LIS's sign-extension semantics,
1198 // add the remaining bits with ORI, and then mask them off after rotation.
1199 // This is similar to Pattern 2-3, please refer to the diagram there.
1200 if ((LZ + TO) > 32) {
1201 // Since the immediates with (LZ > 32) have been handled by previous
1202 // patterns, here we have (LZ <= 32) to make sure we will not shift right
1203 // the Imm by a negative value.
1204 assert(LZ <= 32 && "Unexpected shift value.");
1205 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1206 getI32Imm((Imm >> (48 - LZ)) & 0xffff));
1207 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1208 getI32Imm((Imm >> (32 - LZ)) & 0xffff));
1209 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1210 getI32Imm(32 - LZ), getI32Imm(LZ));
1211 }
1212 // 3-3) Patterns : {zeros}{ones}{31-bit value}{ones}
1213 // {ones}{31-bit value}{ones}
1214 // We can take advantage of LIS's sign-extension semantics to generate leading
1215 // ones, add the remaining bits with ORI, and then use RLDICL to mask off the
1216 // ones in left sides (if required) after rotation.
1217 // This is similar to Pattern 2-4, please refer to the diagram there.
1218 if ((LZ + FO + TO) > 32) {
1219 Result = CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64,
1220 getI32Imm((Imm >> (TO + 16)) & 0xffff));
1221 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1222 getI32Imm((Imm >> TO) & 0xffff));
1223 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1224 getI32Imm(TO), getI32Imm(LZ));
1225 }
1226 // 3-4) Patterns : {******}{33 zeros}{******}
1227 // {******}{33 ones}{******}
1228 // If the Imm contains 33 consecutive zeros/ones, it means that a total of 31
1229 // bits remain on both sides. Rotate right the Imm to construct an int<32>
1230 // value, use LIS + ORI for int<32> value and then use RLDICL without mask to
1231 // rotate it back.
1232 // This is similar to Pattern 2-6, please refer to the diagram there.
1233 if ((Shift = findContiguousZerosAtLeast(Imm, 33)) ||
1234 (Shift = findContiguousZerosAtLeast(~Imm, 33))) {
1235 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1236 uint64_t ImmHi16 = (RotImm >> 16) & 0xffff;
1237 unsigned Opcode = ImmHi16 ? PPC::LIS8 : PPC::LI8;
1238 Result = CurDAG->getMachineNode(Opcode, dl, MVT::i64, getI32Imm(ImmHi16));
1239 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1240 getI32Imm(RotImm & 0xffff));
1241 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1242 getI32Imm(Shift), getI32Imm(0));
1243 }
1244
1245 InstCnt = 0;
1246 return nullptr;
1247}
1248
1249// Try to select instructions to generate a 64 bit immediate using prefix as
1250// well as non prefix instructions. The function will return the SDNode
1251// to materialize that constant or it will return nullptr if it does not
1252// find one. The variable InstCnt is set to the number of instructions that
1253// were selected.
1255 uint64_t Imm, unsigned &InstCnt) {
1256 unsigned TZ = llvm::countr_zero<uint64_t>(Imm);
1257 unsigned LZ = llvm::countl_zero<uint64_t>(Imm);
1258 unsigned TO = llvm::countr_one<uint64_t>(Imm);
1259 unsigned FO = llvm::countl_one<uint64_t>(LZ == 64 ? 0 : (Imm << LZ));
1260 unsigned Hi32 = Hi_32(Imm);
1261 unsigned Lo32 = Lo_32(Imm);
1262
1263 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1264 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1265 };
1266
1267 auto getI64Imm = [CurDAG, dl](uint64_t Imm) {
1268 return CurDAG->getTargetConstant(Imm, dl, MVT::i64);
1269 };
1270
1271 // Following patterns use 1 instruction to materialize Imm.
1272 InstCnt = 1;
1273
1274 // The pli instruction can materialize up to 34 bits directly.
1275 // If a constant fits within 34-bits, emit the pli instruction here directly.
1276 if (isInt<34>(Imm))
1277 return CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1278 CurDAG->getTargetConstant(Imm, dl, MVT::i64));
1279
1280 // Require at least two instructions.
1281 InstCnt = 2;
1282 SDNode *Result = nullptr;
1283 // Patterns : {zeros}{ones}{33-bit value}{zeros}
1284 // {zeros}{33-bit value}{zeros}
1285 // {zeros}{ones}{33-bit value}
1286 // {ones}{33-bit value}{zeros}
1287 // We can take advantage of PLI's sign-extension semantics to generate leading
1288 // ones, and then use RLDIC to mask off the ones on both sides after rotation.
1289 if ((LZ + FO + TZ) > 30) {
1290 APInt SignedInt34 = APInt(34, (Imm >> TZ) & 0x3ffffffff);
1291 APInt Extended = SignedInt34.sext(64);
1292 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1293 getI64Imm(*Extended.getRawData()));
1294 return CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, SDValue(Result, 0),
1295 getI32Imm(TZ), getI32Imm(LZ));
1296 }
1297 // Pattern : {zeros}{33-bit value}{ones}
1298 // Shift right the Imm by (30 - LZ) bits to construct a negative 34 bit value,
1299 // therefore we can take advantage of PLI's sign-extension semantics, and then
1300 // mask them off after rotation.
1301 //
1302 // +--LZ--||-33-bit-||--TO--+ +-------------|--34-bit--+
1303 // |00000001bbbbbbbbb1111111| -> |00000000000001bbbbbbbbb1|
1304 // +------------------------+ +------------------------+
1305 // 63 0 63 0
1306 //
1307 // +----sext-----|--34-bit--+ +clear-|-----------------+
1308 // |11111111111111bbbbbbbbb1| -> |00000001bbbbbbbbb1111111|
1309 // +------------------------+ +------------------------+
1310 // 63 0 63 0
1311 if ((LZ + TO) > 30) {
1312 APInt SignedInt34 = APInt(34, (Imm >> (30 - LZ)) & 0x3ffffffff);
1313 APInt Extended = SignedInt34.sext(64);
1314 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1315 getI64Imm(*Extended.getRawData()));
1316 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1317 getI32Imm(30 - LZ), getI32Imm(LZ));
1318 }
1319 // Patterns : {zeros}{ones}{33-bit value}{ones}
1320 // {ones}{33-bit value}{ones}
1321 // Similar to LI we can take advantage of PLI's sign-extension semantics to
1322 // generate leading ones, and then use RLDICL to mask off the ones in left
1323 // sides (if required) after rotation.
1324 if ((LZ + FO + TO) > 30) {
1325 APInt SignedInt34 = APInt(34, (Imm >> TO) & 0x3ffffffff);
1326 APInt Extended = SignedInt34.sext(64);
1327 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64,
1328 getI64Imm(*Extended.getRawData()));
1329 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, SDValue(Result, 0),
1330 getI32Imm(TO), getI32Imm(LZ));
1331 }
1332 // Patterns : {******}{31 zeros}{******}
1333 // : {******}{31 ones}{******}
1334 // If Imm contains 31 consecutive zeros/ones then the remaining bit count
1335 // is 33. Rotate right the Imm to construct a int<33> value, we can use PLI
1336 // for the int<33> value and then use RLDICL without a mask to rotate it back.
1337 //
1338 // +------|--ones--|------+ +---ones--||---33 bit--+
1339 // |bbbbbb1111111111aaaaaa| -> |1111111111aaaaaabbbbbb|
1340 // +----------------------+ +----------------------+
1341 // 63 0 63 0
1342 for (unsigned Shift = 0; Shift < 63; ++Shift) {
1343 uint64_t RotImm = APInt(64, Imm).rotr(Shift).getZExtValue();
1344 if (isInt<34>(RotImm)) {
1345 Result =
1346 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(RotImm));
1347 return CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
1348 SDValue(Result, 0), getI32Imm(Shift),
1349 getI32Imm(0));
1350 }
1351 }
1352
1353 // Patterns : High word == Low word
1354 // This is basically a splat of a 32 bit immediate.
1355 if (Hi32 == Lo32) {
1356 Result = CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1357 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1358 getI32Imm(0)};
1359 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1360 }
1361
1362 InstCnt = 3;
1363 // Catch-all
1364 // This pattern can form any 64 bit immediate in 3 instructions.
1365 SDNode *ResultHi =
1366 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Hi32));
1367 SDNode *ResultLo =
1368 CurDAG->getMachineNode(PPC::PLI8, dl, MVT::i64, getI64Imm(Lo32));
1369 SDValue Ops[] = {SDValue(ResultLo, 0), SDValue(ResultHi, 0), getI32Imm(32),
1370 getI32Imm(0)};
1371 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1372}
1373
1374static SDNode *selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm,
1375 unsigned *InstCnt = nullptr) {
1376 unsigned InstCntDirect = 0;
1377 // No more than 3 instructions are used if we can select the i64 immediate
1378 // directly.
1379 SDNode *Result = selectI64ImmDirect(CurDAG, dl, Imm, InstCntDirect);
1380
1381 const PPCSubtarget &Subtarget =
1383
1384 // If we have prefixed instructions and there is a chance we can
1385 // materialize the constant with fewer prefixed instructions than
1386 // non-prefixed, try that.
1387 if (Subtarget.hasPrefixInstrs() && InstCntDirect != 1) {
1388 unsigned InstCntDirectP = 0;
1389 SDNode *ResultP = selectI64ImmDirectPrefix(CurDAG, dl, Imm, InstCntDirectP);
1390 // Use the prefix case in either of two cases:
1391 // 1) We have no result from the non-prefix case to use.
1392 // 2) The non-prefix case uses more instructions than the prefix case.
1393 // If the prefix and non-prefix cases use the same number of instructions
1394 // we will prefer the non-prefix case.
1395 if (ResultP && (!Result || InstCntDirectP < InstCntDirect)) {
1396 if (InstCnt)
1397 *InstCnt = InstCntDirectP;
1398 return ResultP;
1399 }
1400 }
1401
1402 if (Result) {
1403 if (InstCnt)
1404 *InstCnt = InstCntDirect;
1405 return Result;
1406 }
1407 auto getI32Imm = [CurDAG, dl](unsigned Imm) {
1408 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
1409 };
1410
1411 uint32_t Hi16OfLo32 = (Lo_32(Imm) >> 16) & 0xffff;
1412 uint32_t Lo16OfLo32 = Lo_32(Imm) & 0xffff;
1413
1414 // Try to use 4 instructions to materialize the immediate which is "almost" a
1415 // splat of a 32 bit immediate.
1416 if (Hi16OfLo32 && Lo16OfLo32) {
1417 uint32_t Hi16OfHi32 = (Hi_32(Imm) >> 16) & 0xffff;
1418 uint32_t Lo16OfHi32 = Hi_32(Imm) & 0xffff;
1419 bool IsSelected = false;
1420
1421 auto getSplat = [CurDAG, dl, getI32Imm](uint32_t Hi16, uint32_t Lo16) {
1422 SDNode *Result =
1423 CurDAG->getMachineNode(PPC::LIS8, dl, MVT::i64, getI32Imm(Hi16));
1424 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
1425 SDValue(Result, 0), getI32Imm(Lo16));
1426 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(32),
1427 getI32Imm(0)};
1428 return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1429 };
1430
1431 if (Hi16OfHi32 == Lo16OfHi32 && Lo16OfHi32 == Lo16OfLo32) {
1432 IsSelected = true;
1433 Result = getSplat(Hi16OfLo32, Lo16OfLo32);
1434 // Modify Hi16OfHi32.
1435 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(48),
1436 getI32Imm(0)};
1437 Result = CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
1438 } else if (Hi16OfHi32 == Hi16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1439 IsSelected = true;
1440 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1441 // Modify Lo16OfLo32.
1442 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1443 getI32Imm(16), getI32Imm(31)};
1444 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1445 } else if (Lo16OfHi32 == Lo16OfLo32 && Hi16OfLo32 == Lo16OfLo32) {
1446 IsSelected = true;
1447 Result = getSplat(Hi16OfHi32, Lo16OfHi32);
1448 // Modify Hi16OfLo32.
1449 SDValue Ops[] = {SDValue(Result, 0), SDValue(Result, 0), getI32Imm(16),
1450 getI32Imm(0), getI32Imm(15)};
1451 Result = CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64, Ops);
1452 }
1453 if (IsSelected == true) {
1454 if (InstCnt)
1455 *InstCnt = 4;
1456 return Result;
1457 }
1458 }
1459
1460 // Handle the upper 32 bit value.
1461 Result =
1462 selectI64ImmDirect(CurDAG, dl, Imm & 0xffffffff00000000, InstCntDirect);
1463 // Add in the last bits as required.
1464 if (Hi16OfLo32) {
1465 Result = CurDAG->getMachineNode(PPC::ORIS8, dl, MVT::i64,
1466 SDValue(Result, 0), getI32Imm(Hi16OfLo32));
1467 ++InstCntDirect;
1468 }
1469 if (Lo16OfLo32) {
1470 Result = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64, SDValue(Result, 0),
1471 getI32Imm(Lo16OfLo32));
1472 ++InstCntDirect;
1473 }
1474 if (InstCnt)
1475 *InstCnt = InstCntDirect;
1476 return Result;
1477}
1478
1479// Select a 64-bit constant.
1481 SDLoc dl(N);
1482
1483 // Get 64 bit value.
1484 int64_t Imm = N->getAsZExtVal();
1485 if (unsigned MinSize = allUsesTruncate(CurDAG, N)) {
1486 uint64_t SextImm = SignExtend64(Imm, MinSize);
1487 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
1488 if (isInt<16>(SextImm))
1489 return CurDAG->getMachineNode(PPC::LI8, dl, MVT::i64, SDImm);
1490 }
1491 return selectI64Imm(CurDAG, dl, Imm);
1492}
1493
1494namespace {
1495
1496class BitPermutationSelector {
1497 struct ValueBit {
1498 SDValue V;
1499
1500 // The bit number in the value, using a convention where bit 0 is the
1501 // lowest-order bit.
1502 unsigned Idx;
1503
1504 // ConstZero means a bit we need to mask off.
1505 // Variable is a bit comes from an input variable.
1506 // VariableKnownToBeZero is also a bit comes from an input variable,
1507 // but it is known to be already zero. So we do not need to mask them.
1508 enum Kind {
1509 ConstZero,
1510 Variable,
1511 VariableKnownToBeZero
1512 } K;
1513
1514 ValueBit(SDValue V, unsigned I, Kind K = Variable)
1515 : V(V), Idx(I), K(K) {}
1516 ValueBit(Kind K = Variable) : Idx(UINT32_MAX), K(K) {}
1517
1518 bool isZero() const {
1519 return K == ConstZero || K == VariableKnownToBeZero;
1520 }
1521
1522 bool hasValue() const {
1523 return K == Variable || K == VariableKnownToBeZero;
1524 }
1525
1526 SDValue getValue() const {
1527 assert(hasValue() && "Cannot get the value of a constant bit");
1528 return V;
1529 }
1530
1531 unsigned getValueBitIndex() const {
1532 assert(hasValue() && "Cannot get the value bit index of a constant bit");
1533 return Idx;
1534 }
1535 };
1536
1537 // A bit group has the same underlying value and the same rotate factor.
1538 struct BitGroup {
1539 SDValue V;
1540 unsigned RLAmt;
1541 unsigned StartIdx, EndIdx;
1542
1543 // This rotation amount assumes that the lower 32 bits of the quantity are
1544 // replicated in the high 32 bits by the rotation operator (which is done
1545 // by rlwinm and friends in 64-bit mode).
1546 bool Repl32;
1547 // Did converting to Repl32 == true change the rotation factor? If it did,
1548 // it decreased it by 32.
1549 bool Repl32CR;
1550 // Was this group coalesced after setting Repl32 to true?
1551 bool Repl32Coalesced;
1552
1553 BitGroup(SDValue V, unsigned R, unsigned S, unsigned E)
1554 : V(V), RLAmt(R), StartIdx(S), EndIdx(E), Repl32(false), Repl32CR(false),
1555 Repl32Coalesced(false) {
1556 LLVM_DEBUG(dbgs() << "\tbit group for " << V.getNode() << " RLAmt = " << R
1557 << " [" << S << ", " << E << "]\n");
1558 }
1559 };
1560
1561 // Information on each (Value, RLAmt) pair (like the number of groups
1562 // associated with each) used to choose the lowering method.
1563 struct ValueRotInfo {
1564 SDValue V;
1565 unsigned RLAmt = std::numeric_limits<unsigned>::max();
1566 unsigned NumGroups = 0;
1567 unsigned FirstGroupStartIdx = std::numeric_limits<unsigned>::max();
1568 bool Repl32 = false;
1569
1570 ValueRotInfo() = default;
1571
1572 // For sorting (in reverse order) by NumGroups, and then by
1573 // FirstGroupStartIdx.
1574 bool operator < (const ValueRotInfo &Other) const {
1575 // We need to sort so that the non-Repl32 come first because, when we're
1576 // doing masking, the Repl32 bit groups might be subsumed into the 64-bit
1577 // masking operation.
1578 if (Repl32 < Other.Repl32)
1579 return true;
1580 else if (Repl32 > Other.Repl32)
1581 return false;
1582 else if (NumGroups > Other.NumGroups)
1583 return true;
1584 else if (NumGroups < Other.NumGroups)
1585 return false;
1586 else if (RLAmt == 0 && Other.RLAmt != 0)
1587 return true;
1588 else if (RLAmt != 0 && Other.RLAmt == 0)
1589 return false;
1590 else if (FirstGroupStartIdx < Other.FirstGroupStartIdx)
1591 return true;
1592 return false;
1593 }
1594 };
1595
1596 using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
1597 using ValueBitsMemoizer =
1599 ValueBitsMemoizer Memoizer;
1600
1601 // Return a pair of bool and a SmallVector pointer to a memoization entry.
1602 // The bool is true if something interesting was deduced, otherwise if we're
1603 // providing only a generic representation of V (or something else likewise
1604 // uninteresting for instruction selection) through the SmallVector.
1605 std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
1606 unsigned NumBits) {
1607 auto &ValueEntry = Memoizer[V];
1608 if (ValueEntry)
1609 return std::make_pair(ValueEntry->first, &ValueEntry->second);
1610 ValueEntry.reset(new ValueBitsMemoizedValue());
1611 bool &Interesting = ValueEntry->first;
1612 SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
1613 Bits.resize(NumBits);
1614
1615 switch (V.getOpcode()) {
1616 default: break;
1617 case ISD::ROTL:
1618 if (isa<ConstantSDNode>(V.getOperand(1))) {
1619 assert(isPowerOf2_32(NumBits) && "rotl bits should be power of 2!");
1620 unsigned RotAmt = V.getConstantOperandVal(1) & (NumBits - 1);
1621
1622 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1623
1624 for (unsigned i = 0; i < NumBits; ++i)
1625 Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
1626
1627 return std::make_pair(Interesting = true, &Bits);
1628 }
1629 break;
1630 case ISD::SHL:
1631 case PPCISD::SHL:
1632 if (isa<ConstantSDNode>(V.getOperand(1))) {
1633 // sld takes 7 bits, slw takes 6.
1634 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1635
1636 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1637
1638 if (ShiftAmt >= NumBits) {
1639 for (unsigned i = 0; i < NumBits; ++i)
1640 Bits[i] = ValueBit(ValueBit::ConstZero);
1641 } else {
1642 for (unsigned i = ShiftAmt; i < NumBits; ++i)
1643 Bits[i] = LHSBits[i - ShiftAmt];
1644 for (unsigned i = 0; i < ShiftAmt; ++i)
1645 Bits[i] = ValueBit(ValueBit::ConstZero);
1646 }
1647
1648 return std::make_pair(Interesting = true, &Bits);
1649 }
1650 break;
1651 case ISD::SRL:
1652 case PPCISD::SRL:
1653 if (isa<ConstantSDNode>(V.getOperand(1))) {
1654 // srd takes lowest 7 bits, srw takes 6.
1655 unsigned ShiftAmt = V.getConstantOperandVal(1) & ((NumBits << 1) - 1);
1656
1657 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1658
1659 if (ShiftAmt >= NumBits) {
1660 for (unsigned i = 0; i < NumBits; ++i)
1661 Bits[i] = ValueBit(ValueBit::ConstZero);
1662 } else {
1663 for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
1664 Bits[i] = LHSBits[i + ShiftAmt];
1665 for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
1666 Bits[i] = ValueBit(ValueBit::ConstZero);
1667 }
1668
1669 return std::make_pair(Interesting = true, &Bits);
1670 }
1671 break;
1672 case ISD::AND:
1673 if (isa<ConstantSDNode>(V.getOperand(1))) {
1674 uint64_t Mask = V.getConstantOperandVal(1);
1675
1676 const SmallVector<ValueBit, 64> *LHSBits;
1677 // Mark this as interesting, only if the LHS was also interesting. This
1678 // prevents the overall procedure from matching a single immediate 'and'
1679 // (which is non-optimal because such an and might be folded with other
1680 // things if we don't select it here).
1681 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
1682
1683 for (unsigned i = 0; i < NumBits; ++i)
1684 if (((Mask >> i) & 1) == 1)
1685 Bits[i] = (*LHSBits)[i];
1686 else {
1687 // AND instruction masks this bit. If the input is already zero,
1688 // we have nothing to do here. Otherwise, make the bit ConstZero.
1689 if ((*LHSBits)[i].isZero())
1690 Bits[i] = (*LHSBits)[i];
1691 else
1692 Bits[i] = ValueBit(ValueBit::ConstZero);
1693 }
1694
1695 return std::make_pair(Interesting, &Bits);
1696 }
1697 break;
1698 case ISD::OR: {
1699 const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
1700 const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
1701
1702 bool AllDisjoint = true;
1703 SDValue LastVal = SDValue();
1704 unsigned LastIdx = 0;
1705 for (unsigned i = 0; i < NumBits; ++i) {
1706 if (LHSBits[i].isZero() && RHSBits[i].isZero()) {
1707 // If both inputs are known to be zero and one is ConstZero and
1708 // another is VariableKnownToBeZero, we can select whichever
1709 // we like. To minimize the number of bit groups, we select
1710 // VariableKnownToBeZero if this bit is the next bit of the same
1711 // input variable from the previous bit. Otherwise, we select
1712 // ConstZero.
1713 if (LHSBits[i].hasValue() && LHSBits[i].getValue() == LastVal &&
1714 LHSBits[i].getValueBitIndex() == LastIdx + 1)
1715 Bits[i] = LHSBits[i];
1716 else if (RHSBits[i].hasValue() && RHSBits[i].getValue() == LastVal &&
1717 RHSBits[i].getValueBitIndex() == LastIdx + 1)
1718 Bits[i] = RHSBits[i];
1719 else
1720 Bits[i] = ValueBit(ValueBit::ConstZero);
1721 }
1722 else if (LHSBits[i].isZero())
1723 Bits[i] = RHSBits[i];
1724 else if (RHSBits[i].isZero())
1725 Bits[i] = LHSBits[i];
1726 else {
1727 AllDisjoint = false;
1728 break;
1729 }
1730 // We remember the value and bit index of this bit.
1731 if (Bits[i].hasValue()) {
1732 LastVal = Bits[i].getValue();
1733 LastIdx = Bits[i].getValueBitIndex();
1734 }
1735 else {
1736 if (LastVal) LastVal = SDValue();
1737 LastIdx = 0;
1738 }
1739 }
1740
1741 if (!AllDisjoint)
1742 break;
1743
1744 return std::make_pair(Interesting = true, &Bits);
1745 }
1746 case ISD::ZERO_EXTEND: {
1747 // We support only the case with zero extension from i32 to i64 so far.
1748 if (V.getValueType() != MVT::i64 ||
1749 V.getOperand(0).getValueType() != MVT::i32)
1750 break;
1751
1752 const SmallVector<ValueBit, 64> *LHSBits;
1753 const unsigned NumOperandBits = 32;
1754 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1755 NumOperandBits);
1756
1757 for (unsigned i = 0; i < NumOperandBits; ++i)
1758 Bits[i] = (*LHSBits)[i];
1759
1760 for (unsigned i = NumOperandBits; i < NumBits; ++i)
1761 Bits[i] = ValueBit(ValueBit::ConstZero);
1762
1763 return std::make_pair(Interesting, &Bits);
1764 }
1765 case ISD::TRUNCATE: {
1766 EVT FromType = V.getOperand(0).getValueType();
1767 EVT ToType = V.getValueType();
1768 // We support only the case with truncate from i64 to i32.
1769 if (FromType != MVT::i64 || ToType != MVT::i32)
1770 break;
1771 const unsigned NumAllBits = FromType.getSizeInBits();
1773 std::tie(Interesting, InBits) = getValueBits(V.getOperand(0),
1774 NumAllBits);
1775 const unsigned NumValidBits = ToType.getSizeInBits();
1776
1777 // A 32-bit instruction cannot touch upper 32-bit part of 64-bit value.
1778 // So, we cannot include this truncate.
1779 bool UseUpper32bit = false;
1780 for (unsigned i = 0; i < NumValidBits; ++i)
1781 if ((*InBits)[i].hasValue() && (*InBits)[i].getValueBitIndex() >= 32) {
1782 UseUpper32bit = true;
1783 break;
1784 }
1785 if (UseUpper32bit)
1786 break;
1787
1788 for (unsigned i = 0; i < NumValidBits; ++i)
1789 Bits[i] = (*InBits)[i];
1790
1791 return std::make_pair(Interesting, &Bits);
1792 }
1793 case ISD::AssertZext: {
1794 // For AssertZext, we look through the operand and
1795 // mark the bits known to be zero.
1796 const SmallVector<ValueBit, 64> *LHSBits;
1797 std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0),
1798 NumBits);
1799
1800 EVT FromType = cast<VTSDNode>(V.getOperand(1))->getVT();
1801 const unsigned NumValidBits = FromType.getSizeInBits();
1802 for (unsigned i = 0; i < NumValidBits; ++i)
1803 Bits[i] = (*LHSBits)[i];
1804
1805 // These bits are known to be zero but the AssertZext may be from a value
1806 // that already has some constant zero bits (i.e. from a masking and).
1807 for (unsigned i = NumValidBits; i < NumBits; ++i)
1808 Bits[i] = (*LHSBits)[i].hasValue()
1809 ? ValueBit((*LHSBits)[i].getValue(),
1810 (*LHSBits)[i].getValueBitIndex(),
1811 ValueBit::VariableKnownToBeZero)
1812 : ValueBit(ValueBit::ConstZero);
1813
1814 return std::make_pair(Interesting, &Bits);
1815 }
1816 case ISD::LOAD:
1817 LoadSDNode *LD = cast<LoadSDNode>(V);
1818 if (ISD::isZEXTLoad(V.getNode()) && V.getResNo() == 0) {
1819 EVT VT = LD->getMemoryVT();
1820 const unsigned NumValidBits = VT.getSizeInBits();
1821
1822 for (unsigned i = 0; i < NumValidBits; ++i)
1823 Bits[i] = ValueBit(V, i);
1824
1825 // These bits are known to be zero.
1826 for (unsigned i = NumValidBits; i < NumBits; ++i)
1827 Bits[i] = ValueBit(V, i, ValueBit::VariableKnownToBeZero);
1828
1829 // Zero-extending load itself cannot be optimized. So, it is not
1830 // interesting by itself though it gives useful information.
1831 return std::make_pair(Interesting = false, &Bits);
1832 }
1833 break;
1834 }
1835
1836 for (unsigned i = 0; i < NumBits; ++i)
1837 Bits[i] = ValueBit(V, i);
1838
1839 return std::make_pair(Interesting = false, &Bits);
1840 }
1841
1842 // For each value (except the constant ones), compute the left-rotate amount
1843 // to get it from its original to final position.
1844 void computeRotationAmounts() {
1845 NeedMask = false;
1846 RLAmt.resize(Bits.size());
1847 for (unsigned i = 0; i < Bits.size(); ++i)
1848 if (Bits[i].hasValue()) {
1849 unsigned VBI = Bits[i].getValueBitIndex();
1850 if (i >= VBI)
1851 RLAmt[i] = i - VBI;
1852 else
1853 RLAmt[i] = Bits.size() - (VBI - i);
1854 } else if (Bits[i].isZero()) {
1855 NeedMask = true;
1856 RLAmt[i] = UINT32_MAX;
1857 } else {
1858 llvm_unreachable("Unknown value bit type");
1859 }
1860 }
1861
1862 // Collect groups of consecutive bits with the same underlying value and
1863 // rotation factor. If we're doing late masking, we ignore zeros, otherwise
1864 // they break up groups.
1865 void collectBitGroups(bool LateMask) {
1866 BitGroups.clear();
1867
1868 unsigned LastRLAmt = RLAmt[0];
1869 SDValue LastValue = Bits[0].hasValue() ? Bits[0].getValue() : SDValue();
1870 unsigned LastGroupStartIdx = 0;
1871 bool IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1872 for (unsigned i = 1; i < Bits.size(); ++i) {
1873 unsigned ThisRLAmt = RLAmt[i];
1874 SDValue ThisValue = Bits[i].hasValue() ? Bits[i].getValue() : SDValue();
1875 if (LateMask && !ThisValue) {
1876 ThisValue = LastValue;
1877 ThisRLAmt = LastRLAmt;
1878 // If we're doing late masking, then the first bit group always starts
1879 // at zero (even if the first bits were zero).
1880 if (BitGroups.empty())
1881 LastGroupStartIdx = 0;
1882 }
1883
1884 // If this bit is known to be zero and the current group is a bit group
1885 // of zeros, we do not need to terminate the current bit group even the
1886 // Value or RLAmt does not match here. Instead, we terminate this group
1887 // when the first non-zero bit appears later.
1888 if (IsGroupOfZeros && Bits[i].isZero())
1889 continue;
1890
1891 // If this bit has the same underlying value and the same rotate factor as
1892 // the last one, then they're part of the same group.
1893 if (ThisRLAmt == LastRLAmt && ThisValue == LastValue)
1894 // We cannot continue the current group if this bits is not known to
1895 // be zero in a bit group of zeros.
1896 if (!(IsGroupOfZeros && ThisValue && !Bits[i].isZero()))
1897 continue;
1898
1899 if (LastValue.getNode())
1900 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1901 i-1));
1902 LastRLAmt = ThisRLAmt;
1903 LastValue = ThisValue;
1904 LastGroupStartIdx = i;
1905 IsGroupOfZeros = !Bits[LastGroupStartIdx].hasValue();
1906 }
1907 if (LastValue.getNode())
1908 BitGroups.push_back(BitGroup(LastValue, LastRLAmt, LastGroupStartIdx,
1909 Bits.size()-1));
1910
1911 if (BitGroups.empty())
1912 return;
1913
1914 // We might be able to combine the first and last groups.
1915 if (BitGroups.size() > 1) {
1916 // If the first and last groups are the same, then remove the first group
1917 // in favor of the last group, making the ending index of the last group
1918 // equal to the ending index of the to-be-removed first group.
1919 if (BitGroups[0].StartIdx == 0 &&
1920 BitGroups[BitGroups.size()-1].EndIdx == Bits.size()-1 &&
1921 BitGroups[0].V == BitGroups[BitGroups.size()-1].V &&
1922 BitGroups[0].RLAmt == BitGroups[BitGroups.size()-1].RLAmt) {
1923 LLVM_DEBUG(dbgs() << "\tcombining final bit group with initial one\n");
1924 BitGroups[BitGroups.size()-1].EndIdx = BitGroups[0].EndIdx;
1925 BitGroups.erase(BitGroups.begin());
1926 }
1927 }
1928 }
1929
1930 // Take all (SDValue, RLAmt) pairs and sort them by the number of groups
1931 // associated with each. If the number of groups are same, we prefer a group
1932 // which does not require rotate, i.e. RLAmt is 0, to avoid the first rotate
1933 // instruction. If there is a degeneracy, pick the one that occurs
1934 // first (in the final value).
1935 void collectValueRotInfo() {
1936 ValueRots.clear();
1937
1938 for (auto &BG : BitGroups) {
1939 unsigned RLAmtKey = BG.RLAmt + (BG.Repl32 ? 64 : 0);
1940 ValueRotInfo &VRI = ValueRots[std::make_pair(BG.V, RLAmtKey)];
1941 VRI.V = BG.V;
1942 VRI.RLAmt = BG.RLAmt;
1943 VRI.Repl32 = BG.Repl32;
1944 VRI.NumGroups += 1;
1945 VRI.FirstGroupStartIdx = std::min(VRI.FirstGroupStartIdx, BG.StartIdx);
1946 }
1947
1948 // Now that we've collected the various ValueRotInfo instances, we need to
1949 // sort them.
1950 ValueRotsVec.clear();
1951 for (auto &I : ValueRots) {
1952 ValueRotsVec.push_back(I.second);
1953 }
1954 llvm::sort(ValueRotsVec);
1955 }
1956
1957 // In 64-bit mode, rlwinm and friends have a rotation operator that
1958 // replicates the low-order 32 bits into the high-order 32-bits. The mask
1959 // indices of these instructions can only be in the lower 32 bits, so they
1960 // can only represent some 64-bit bit groups. However, when they can be used,
1961 // the 32-bit replication can be used to represent, as a single bit group,
1962 // otherwise separate bit groups. We'll convert to replicated-32-bit bit
1963 // groups when possible. Returns true if any of the bit groups were
1964 // converted.
1965 void assignRepl32BitGroups() {
1966 // If we have bits like this:
1967 //
1968 // Indices: 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
1969 // V bits: ... 7 6 5 4 3 2 1 0 31 30 29 28 27 26 25 24
1970 // Groups: | RLAmt = 8 | RLAmt = 40 |
1971 //
1972 // But, making use of a 32-bit operation that replicates the low-order 32
1973 // bits into the high-order 32 bits, this can be one bit group with a RLAmt
1974 // of 8.
1975
1976 auto IsAllLow32 = [this](BitGroup & BG) {
1977 if (BG.StartIdx <= BG.EndIdx) {
1978 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i) {
1979 if (!Bits[i].hasValue())
1980 continue;
1981 if (Bits[i].getValueBitIndex() >= 32)
1982 return false;
1983 }
1984 } else {
1985 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i) {
1986 if (!Bits[i].hasValue())
1987 continue;
1988 if (Bits[i].getValueBitIndex() >= 32)
1989 return false;
1990 }
1991 for (unsigned i = 0; i <= BG.EndIdx; ++i) {
1992 if (!Bits[i].hasValue())
1993 continue;
1994 if (Bits[i].getValueBitIndex() >= 32)
1995 return false;
1996 }
1997 }
1998
1999 return true;
2000 };
2001
2002 for (auto &BG : BitGroups) {
2003 // If this bit group has RLAmt of 0 and will not be merged with
2004 // another bit group, we don't benefit from Repl32. We don't mark
2005 // such group to give more freedom for later instruction selection.
2006 if (BG.RLAmt == 0) {
2007 auto PotentiallyMerged = [this](BitGroup & BG) {
2008 for (auto &BG2 : BitGroups)
2009 if (&BG != &BG2 && BG.V == BG2.V &&
2010 (BG2.RLAmt == 0 || BG2.RLAmt == 32))
2011 return true;
2012 return false;
2013 };
2014 if (!PotentiallyMerged(BG))
2015 continue;
2016 }
2017 if (BG.StartIdx < 32 && BG.EndIdx < 32) {
2018 if (IsAllLow32(BG)) {
2019 if (BG.RLAmt >= 32) {
2020 BG.RLAmt -= 32;
2021 BG.Repl32CR = true;
2022 }
2023
2024 BG.Repl32 = true;
2025
2026 LLVM_DEBUG(dbgs() << "\t32-bit replicated bit group for "
2027 << BG.V.getNode() << " RLAmt = " << BG.RLAmt << " ["
2028 << BG.StartIdx << ", " << BG.EndIdx << "]\n");
2029 }
2030 }
2031 }
2032
2033 // Now walk through the bit groups, consolidating where possible.
2034 for (auto I = BitGroups.begin(); I != BitGroups.end();) {
2035 // We might want to remove this bit group by merging it with the previous
2036 // group (which might be the ending group).
2037 auto IP = (I == BitGroups.begin()) ?
2038 std::prev(BitGroups.end()) : std::prev(I);
2039 if (I->Repl32 && IP->Repl32 && I->V == IP->V && I->RLAmt == IP->RLAmt &&
2040 I->StartIdx == (IP->EndIdx + 1) % 64 && I != IP) {
2041
2042 LLVM_DEBUG(dbgs() << "\tcombining 32-bit replicated bit group for "
2043 << I->V.getNode() << " RLAmt = " << I->RLAmt << " ["
2044 << I->StartIdx << ", " << I->EndIdx
2045 << "] with group with range [" << IP->StartIdx << ", "
2046 << IP->EndIdx << "]\n");
2047
2048 IP->EndIdx = I->EndIdx;
2049 IP->Repl32CR = IP->Repl32CR || I->Repl32CR;
2050 IP->Repl32Coalesced = true;
2051 I = BitGroups.erase(I);
2052 continue;
2053 } else {
2054 // There is a special case worth handling: If there is a single group
2055 // covering the entire upper 32 bits, and it can be merged with both
2056 // the next and previous groups (which might be the same group), then
2057 // do so. If it is the same group (so there will be only one group in
2058 // total), then we need to reverse the order of the range so that it
2059 // covers the entire 64 bits.
2060 if (I->StartIdx == 32 && I->EndIdx == 63) {
2061 assert(std::next(I) == BitGroups.end() &&
2062 "bit group ends at index 63 but there is another?");
2063 auto IN = BitGroups.begin();
2064
2065 if (IP->Repl32 && IN->Repl32 && I->V == IP->V && I->V == IN->V &&
2066 (I->RLAmt % 32) == IP->RLAmt && (I->RLAmt % 32) == IN->RLAmt &&
2067 IP->EndIdx == 31 && IN->StartIdx == 0 && I != IP &&
2068 IsAllLow32(*I)) {
2069
2070 LLVM_DEBUG(dbgs() << "\tcombining bit group for " << I->V.getNode()
2071 << " RLAmt = " << I->RLAmt << " [" << I->StartIdx
2072 << ", " << I->EndIdx
2073 << "] with 32-bit replicated groups with ranges ["
2074 << IP->StartIdx << ", " << IP->EndIdx << "] and ["
2075 << IN->StartIdx << ", " << IN->EndIdx << "]\n");
2076
2077 if (IP == IN) {
2078 // There is only one other group; change it to cover the whole
2079 // range (backward, so that it can still be Repl32 but cover the
2080 // whole 64-bit range).
2081 IP->StartIdx = 31;
2082 IP->EndIdx = 30;
2083 IP->Repl32CR = IP->Repl32CR || I->RLAmt >= 32;
2084 IP->Repl32Coalesced = true;
2085 I = BitGroups.erase(I);
2086 } else {
2087 // There are two separate groups, one before this group and one
2088 // after us (at the beginning). We're going to remove this group,
2089 // but also the group at the very beginning.
2090 IP->EndIdx = IN->EndIdx;
2091 IP->Repl32CR = IP->Repl32CR || IN->Repl32CR || I->RLAmt >= 32;
2092 IP->Repl32Coalesced = true;
2093 I = BitGroups.erase(I);
2094 BitGroups.erase(BitGroups.begin());
2095 }
2096
2097 // This must be the last group in the vector (and we might have
2098 // just invalidated the iterator above), so break here.
2099 break;
2100 }
2101 }
2102 }
2103
2104 ++I;
2105 }
2106 }
2107
2108 SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
2109 return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
2110 }
2111
2112 uint64_t getZerosMask() {
2113 uint64_t Mask = 0;
2114 for (unsigned i = 0; i < Bits.size(); ++i) {
2115 if (Bits[i].hasValue())
2116 continue;
2117 Mask |= (UINT64_C(1) << i);
2118 }
2119
2120 return ~Mask;
2121 }
2122
2123 // This method extends an input value to 64 bit if input is 32-bit integer.
2124 // While selecting instructions in BitPermutationSelector in 64-bit mode,
2125 // an input value can be a 32-bit integer if a ZERO_EXTEND node is included.
2126 // In such case, we extend it to 64 bit to be consistent with other values.
2127 SDValue ExtendToInt64(SDValue V, const SDLoc &dl) {
2128 if (V.getValueSizeInBits() == 64)
2129 return V;
2130
2131 assert(V.getValueSizeInBits() == 32);
2132 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2133 SDValue ImDef = SDValue(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
2134 MVT::i64), 0);
2135 SDValue ExtVal = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
2136 MVT::i64, ImDef, V,
2137 SubRegIdx), 0);
2138 return ExtVal;
2139 }
2140
2141 SDValue TruncateToInt32(SDValue V, const SDLoc &dl) {
2142 if (V.getValueSizeInBits() == 32)
2143 return V;
2144
2145 assert(V.getValueSizeInBits() == 64);
2146 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
2147 SDValue SubVal = SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl,
2148 MVT::i32, V, SubRegIdx), 0);
2149 return SubVal;
2150 }
2151
2152 // Depending on the number of groups for a particular value, it might be
2153 // better to rotate, mask explicitly (using andi/andis), and then or the
2154 // result. Select this part of the result first.
2155 void SelectAndParts32(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2157 return;
2158
2159 for (ValueRotInfo &VRI : ValueRotsVec) {
2160 unsigned Mask = 0;
2161 for (unsigned i = 0; i < Bits.size(); ++i) {
2162 if (!Bits[i].hasValue() || Bits[i].getValue() != VRI.V)
2163 continue;
2164 if (RLAmt[i] != VRI.RLAmt)
2165 continue;
2166 Mask |= (1u << i);
2167 }
2168
2169 // Compute the masks for andi/andis that would be necessary.
2170 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2171 assert((ANDIMask != 0 || ANDISMask != 0) &&
2172 "No set bits in mask for value bit groups");
2173 bool NeedsRotate = VRI.RLAmt != 0;
2174
2175 // We're trying to minimize the number of instructions. If we have one
2176 // group, using one of andi/andis can break even. If we have three
2177 // groups, we can use both andi and andis and break even (to use both
2178 // andi and andis we also need to or the results together). We need four
2179 // groups if we also need to rotate. To use andi/andis we need to do more
2180 // than break even because rotate-and-mask instructions tend to be easier
2181 // to schedule.
2182
2183 // FIXME: We've biased here against using andi/andis, which is right for
2184 // POWER cores, but not optimal everywhere. For example, on the A2,
2185 // andi/andis have single-cycle latency whereas the rotate-and-mask
2186 // instructions take two cycles, and it would be better to bias toward
2187 // andi/andis in break-even cases.
2188
2189 unsigned NumAndInsts = (unsigned) NeedsRotate +
2190 (unsigned) (ANDIMask != 0) +
2191 (unsigned) (ANDISMask != 0) +
2192 (unsigned) (ANDIMask != 0 && ANDISMask != 0) +
2193 (unsigned) (bool) Res;
2194
2195 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2196 << " RL: " << VRI.RLAmt << ":"
2197 << "\n\t\t\tisel using masking: " << NumAndInsts
2198 << " using rotates: " << VRI.NumGroups << "\n");
2199
2200 if (NumAndInsts >= VRI.NumGroups)
2201 continue;
2202
2203 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2204
2205 if (InstCnt) *InstCnt += NumAndInsts;
2206
2207 SDValue VRot;
2208 if (VRI.RLAmt) {
2209 SDValue Ops[] =
2210 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2211 getI32Imm(0, dl), getI32Imm(31, dl) };
2212 VRot = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
2213 Ops), 0);
2214 } else {
2215 VRot = TruncateToInt32(VRI.V, dl);
2216 }
2217
2218 SDValue ANDIVal, ANDISVal;
2219 if (ANDIMask != 0)
2220 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2221 VRot, getI32Imm(ANDIMask, dl)),
2222 0);
2223 if (ANDISMask != 0)
2224 ANDISVal =
2225 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, VRot,
2226 getI32Imm(ANDISMask, dl)),
2227 0);
2228
2229 SDValue TotalVal;
2230 if (!ANDIVal)
2231 TotalVal = ANDISVal;
2232 else if (!ANDISVal)
2233 TotalVal = ANDIVal;
2234 else
2235 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2236 ANDIVal, ANDISVal), 0);
2237
2238 if (!Res)
2239 Res = TotalVal;
2240 else
2241 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2242 Res, TotalVal), 0);
2243
2244 // Now, remove all groups with this underlying value and rotation
2245 // factor.
2246 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2247 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2248 });
2249 }
2250 }
2251
2252 // Instruction selection for the 32-bit case.
2253 SDNode *Select32(SDNode *N, bool LateMask, unsigned *InstCnt) {
2254 SDLoc dl(N);
2255 SDValue Res;
2256
2257 if (InstCnt) *InstCnt = 0;
2258
2259 // Take care of cases that should use andi/andis first.
2260 SelectAndParts32(dl, Res, InstCnt);
2261
2262 // If we've not yet selected a 'starting' instruction, and we have no zeros
2263 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2264 // number of groups), and start with this rotated value.
2265 if ((!NeedMask || LateMask) && !Res) {
2266 ValueRotInfo &VRI = ValueRotsVec[0];
2267 if (VRI.RLAmt) {
2268 if (InstCnt) *InstCnt += 1;
2269 SDValue Ops[] =
2270 { TruncateToInt32(VRI.V, dl), getI32Imm(VRI.RLAmt, dl),
2271 getI32Imm(0, dl), getI32Imm(31, dl) };
2272 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops),
2273 0);
2274 } else {
2275 Res = TruncateToInt32(VRI.V, dl);
2276 }
2277
2278 // Now, remove all groups with this underlying value and rotation factor.
2279 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2280 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt;
2281 });
2282 }
2283
2284 if (InstCnt) *InstCnt += BitGroups.size();
2285
2286 // Insert the other groups (one at a time).
2287 for (auto &BG : BitGroups) {
2288 if (!Res) {
2289 SDValue Ops[] =
2290 { TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2291 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2292 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2293 Res = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
2294 } else {
2295 SDValue Ops[] =
2296 { Res, TruncateToInt32(BG.V, dl), getI32Imm(BG.RLAmt, dl),
2297 getI32Imm(Bits.size() - BG.EndIdx - 1, dl),
2298 getI32Imm(Bits.size() - BG.StartIdx - 1, dl) };
2299 Res = SDValue(CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops), 0);
2300 }
2301 }
2302
2303 if (LateMask) {
2304 unsigned Mask = (unsigned) getZerosMask();
2305
2306 unsigned ANDIMask = (Mask & UINT16_MAX), ANDISMask = Mask >> 16;
2307 assert((ANDIMask != 0 || ANDISMask != 0) &&
2308 "No set bits in zeros mask?");
2309
2310 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2311 (unsigned) (ANDISMask != 0) +
2312 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2313
2314 SDValue ANDIVal, ANDISVal;
2315 if (ANDIMask != 0)
2316 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI_rec, dl, MVT::i32,
2317 Res, getI32Imm(ANDIMask, dl)),
2318 0);
2319 if (ANDISMask != 0)
2320 ANDISVal =
2321 SDValue(CurDAG->getMachineNode(PPC::ANDIS_rec, dl, MVT::i32, Res,
2322 getI32Imm(ANDISMask, dl)),
2323 0);
2324
2325 if (!ANDIVal)
2326 Res = ANDISVal;
2327 else if (!ANDISVal)
2328 Res = ANDIVal;
2329 else
2330 Res = SDValue(CurDAG->getMachineNode(PPC::OR, dl, MVT::i32,
2331 ANDIVal, ANDISVal), 0);
2332 }
2333
2334 return Res.getNode();
2335 }
2336
2337 unsigned SelectRotMask64Count(unsigned RLAmt, bool Repl32,
2338 unsigned MaskStart, unsigned MaskEnd,
2339 bool IsIns) {
2340 // In the notation used by the instructions, 'start' and 'end' are reversed
2341 // because bits are counted from high to low order.
2342 unsigned InstMaskStart = 64 - MaskEnd - 1,
2343 InstMaskEnd = 64 - MaskStart - 1;
2344
2345 if (Repl32)
2346 return 1;
2347
2348 if ((!IsIns && (InstMaskEnd == 63 || InstMaskStart == 0)) ||
2349 InstMaskEnd == 63 - RLAmt)
2350 return 1;
2351
2352 return 2;
2353 }
2354
2355 // For 64-bit values, not all combinations of rotates and masks are
2356 // available. Produce one if it is available.
2357 SDValue SelectRotMask64(SDValue V, const SDLoc &dl, unsigned RLAmt,
2358 bool Repl32, unsigned MaskStart, unsigned MaskEnd,
2359 unsigned *InstCnt = nullptr) {
2360 // In the notation used by the instructions, 'start' and 'end' are reversed
2361 // because bits are counted from high to low order.
2362 unsigned InstMaskStart = 64 - MaskEnd - 1,
2363 InstMaskEnd = 64 - MaskStart - 1;
2364
2365 if (InstCnt) *InstCnt += 1;
2366
2367 if (Repl32) {
2368 // This rotation amount assumes that the lower 32 bits of the quantity
2369 // are replicated in the high 32 bits by the rotation operator (which is
2370 // done by rlwinm and friends).
2371 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2372 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2373 SDValue Ops[] =
2374 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2375 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2376 return SDValue(CurDAG->getMachineNode(PPC::RLWINM8, dl, MVT::i64,
2377 Ops), 0);
2378 }
2379
2380 if (InstMaskEnd == 63) {
2381 SDValue Ops[] =
2382 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2383 getI32Imm(InstMaskStart, dl) };
2384 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Ops), 0);
2385 }
2386
2387 if (InstMaskStart == 0) {
2388 SDValue Ops[] =
2389 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2390 getI32Imm(InstMaskEnd, dl) };
2391 return SDValue(CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64, Ops), 0);
2392 }
2393
2394 if (InstMaskEnd == 63 - RLAmt) {
2395 SDValue Ops[] =
2396 { ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2397 getI32Imm(InstMaskStart, dl) };
2398 return SDValue(CurDAG->getMachineNode(PPC::RLDIC, dl, MVT::i64, Ops), 0);
2399 }
2400
2401 // We cannot do this with a single instruction, so we'll use two. The
2402 // problem is that we're not free to choose both a rotation amount and mask
2403 // start and end independently. We can choose an arbitrary mask start and
2404 // end, but then the rotation amount is fixed. Rotation, however, can be
2405 // inverted, and so by applying an "inverse" rotation first, we can get the
2406 // desired result.
2407 if (InstCnt) *InstCnt += 1;
2408
2409 // The rotation mask for the second instruction must be MaskStart.
2410 unsigned RLAmt2 = MaskStart;
2411 // The first instruction must rotate V so that the overall rotation amount
2412 // is RLAmt.
2413 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2414 if (RLAmt1)
2415 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2416 return SelectRotMask64(V, dl, RLAmt2, false, MaskStart, MaskEnd);
2417 }
2418
2419 // For 64-bit values, not all combinations of rotates and masks are
2420 // available. Produce a rotate-mask-and-insert if one is available.
2421 SDValue SelectRotMaskIns64(SDValue Base, SDValue V, const SDLoc &dl,
2422 unsigned RLAmt, bool Repl32, unsigned MaskStart,
2423 unsigned MaskEnd, unsigned *InstCnt = nullptr) {
2424 // In the notation used by the instructions, 'start' and 'end' are reversed
2425 // because bits are counted from high to low order.
2426 unsigned InstMaskStart = 64 - MaskEnd - 1,
2427 InstMaskEnd = 64 - MaskStart - 1;
2428
2429 if (InstCnt) *InstCnt += 1;
2430
2431 if (Repl32) {
2432 // This rotation amount assumes that the lower 32 bits of the quantity
2433 // are replicated in the high 32 bits by the rotation operator (which is
2434 // done by rlwinm and friends).
2435 assert(InstMaskStart >= 32 && "Mask cannot start out of range");
2436 assert(InstMaskEnd >= 32 && "Mask cannot end out of range");
2437 SDValue Ops[] =
2438 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2439 getI32Imm(InstMaskStart - 32, dl), getI32Imm(InstMaskEnd - 32, dl) };
2440 return SDValue(CurDAG->getMachineNode(PPC::RLWIMI8, dl, MVT::i64,
2441 Ops), 0);
2442 }
2443
2444 if (InstMaskEnd == 63 - RLAmt) {
2445 SDValue Ops[] =
2446 { ExtendToInt64(Base, dl), ExtendToInt64(V, dl), getI32Imm(RLAmt, dl),
2447 getI32Imm(InstMaskStart, dl) };
2448 return SDValue(CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops), 0);
2449 }
2450
2451 // We cannot do this with a single instruction, so we'll use two. The
2452 // problem is that we're not free to choose both a rotation amount and mask
2453 // start and end independently. We can choose an arbitrary mask start and
2454 // end, but then the rotation amount is fixed. Rotation, however, can be
2455 // inverted, and so by applying an "inverse" rotation first, we can get the
2456 // desired result.
2457 if (InstCnt) *InstCnt += 1;
2458
2459 // The rotation mask for the second instruction must be MaskStart.
2460 unsigned RLAmt2 = MaskStart;
2461 // The first instruction must rotate V so that the overall rotation amount
2462 // is RLAmt.
2463 unsigned RLAmt1 = (64 + RLAmt - RLAmt2) % 64;
2464 if (RLAmt1)
2465 V = SelectRotMask64(V, dl, RLAmt1, false, 0, 63);
2466 return SelectRotMaskIns64(Base, V, dl, RLAmt2, false, MaskStart, MaskEnd);
2467 }
2468
2469 void SelectAndParts64(const SDLoc &dl, SDValue &Res, unsigned *InstCnt) {
2471 return;
2472
2473 // The idea here is the same as in the 32-bit version, but with additional
2474 // complications from the fact that Repl32 might be true. Because we
2475 // aggressively convert bit groups to Repl32 form (which, for small
2476 // rotation factors, involves no other change), and then coalesce, it might
2477 // be the case that a single 64-bit masking operation could handle both
2478 // some Repl32 groups and some non-Repl32 groups. If converting to Repl32
2479 // form allowed coalescing, then we must use a 32-bit rotaton in order to
2480 // completely capture the new combined bit group.
2481
2482 for (ValueRotInfo &VRI : ValueRotsVec) {
2483 uint64_t Mask = 0;
2484
2485 // We need to add to the mask all bits from the associated bit groups.
2486 // If Repl32 is false, we need to add bits from bit groups that have
2487 // Repl32 true, but are trivially convertable to Repl32 false. Such a
2488 // group is trivially convertable if it overlaps only with the lower 32
2489 // bits, and the group has not been coalesced.
2490 auto MatchingBG = [VRI](const BitGroup &BG) {
2491 if (VRI.V != BG.V)
2492 return false;
2493
2494 unsigned EffRLAmt = BG.RLAmt;
2495 if (!VRI.Repl32 && BG.Repl32) {
2496 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx <= BG.EndIdx &&
2497 !BG.Repl32Coalesced) {
2498 if (BG.Repl32CR)
2499 EffRLAmt += 32;
2500 } else {
2501 return false;
2502 }
2503 } else if (VRI.Repl32 != BG.Repl32) {
2504 return false;
2505 }
2506
2507 return VRI.RLAmt == EffRLAmt;
2508 };
2509
2510 for (auto &BG : BitGroups) {
2511 if (!MatchingBG(BG))
2512 continue;
2513
2514 if (BG.StartIdx <= BG.EndIdx) {
2515 for (unsigned i = BG.StartIdx; i <= BG.EndIdx; ++i)
2516 Mask |= (UINT64_C(1) << i);
2517 } else {
2518 for (unsigned i = BG.StartIdx; i < Bits.size(); ++i)
2519 Mask |= (UINT64_C(1) << i);
2520 for (unsigned i = 0; i <= BG.EndIdx; ++i)
2521 Mask |= (UINT64_C(1) << i);
2522 }
2523 }
2524
2525 // We can use the 32-bit andi/andis technique if the mask does not
2526 // require any higher-order bits. This can save an instruction compared
2527 // to always using the general 64-bit technique.
2528 bool Use32BitInsts = isUInt<32>(Mask);
2529 // Compute the masks for andi/andis that would be necessary.
2530 unsigned ANDIMask = (Mask & UINT16_MAX),
2531 ANDISMask = (Mask >> 16) & UINT16_MAX;
2532
2533 bool NeedsRotate = VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask));
2534
2535 unsigned NumAndInsts = (unsigned) NeedsRotate +
2536 (unsigned) (bool) Res;
2537 unsigned NumOfSelectInsts = 0;
2538 selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts);
2539 assert(NumOfSelectInsts > 0 && "Failed to select an i64 constant.");
2540 if (Use32BitInsts)
2541 NumAndInsts += (unsigned) (ANDIMask != 0) + (unsigned) (ANDISMask != 0) +
2542 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2543 else
2544 NumAndInsts += NumOfSelectInsts + /* and */ 1;
2545
2546 unsigned NumRLInsts = 0;
2547 bool FirstBG = true;
2548 bool MoreBG = false;
2549 for (auto &BG : BitGroups) {
2550 if (!MatchingBG(BG)) {
2551 MoreBG = true;
2552 continue;
2553 }
2554 NumRLInsts +=
2555 SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
2556 !FirstBG);
2557 FirstBG = false;
2558 }
2559
2560 LLVM_DEBUG(dbgs() << "\t\trotation groups for " << VRI.V.getNode()
2561 << " RL: " << VRI.RLAmt << (VRI.Repl32 ? " (32):" : ":")
2562 << "\n\t\t\tisel using masking: " << NumAndInsts
2563 << " using rotates: " << NumRLInsts << "\n");
2564
2565 // When we'd use andi/andis, we bias toward using the rotates (andi only
2566 // has a record form, and is cracked on POWER cores). However, when using
2567 // general 64-bit constant formation, bias toward the constant form,
2568 // because that exposes more opportunities for CSE.
2569 if (NumAndInsts > NumRLInsts)
2570 continue;
2571 // When merging multiple bit groups, instruction or is used.
2572 // But when rotate is used, rldimi can inert the rotated value into any
2573 // register, so instruction or can be avoided.
2574 if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
2575 continue;
2576
2577 LLVM_DEBUG(dbgs() << "\t\t\t\tusing masking\n");
2578
2579 if (InstCnt) *InstCnt += NumAndInsts;
2580
2581 SDValue VRot;
2582 // We actually need to generate a rotation if we have a non-zero rotation
2583 // factor or, in the Repl32 case, if we care about any of the
2584 // higher-order replicated bits. In the latter case, we generate a mask
2585 // backward so that it actually includes the entire 64 bits.
2586 if (VRI.RLAmt || (VRI.Repl32 && !isUInt<32>(Mask)))
2587 VRot = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2588 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63);
2589 else
2590 VRot = VRI.V;
2591
2592 SDValue TotalVal;
2593 if (Use32BitInsts) {
2594 assert((ANDIMask != 0 || ANDISMask != 0) &&
2595 "No set bits in mask when using 32-bit ands for 64-bit value");
2596
2597 SDValue ANDIVal, ANDISVal;
2598 if (ANDIMask != 0)
2599 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2600 ExtendToInt64(VRot, dl),
2601 getI32Imm(ANDIMask, dl)),
2602 0);
2603 if (ANDISMask != 0)
2604 ANDISVal =
2605 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2606 ExtendToInt64(VRot, dl),
2607 getI32Imm(ANDISMask, dl)),
2608 0);
2609
2610 if (!ANDIVal)
2611 TotalVal = ANDISVal;
2612 else if (!ANDISVal)
2613 TotalVal = ANDIVal;
2614 else
2615 TotalVal = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2616 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2617 } else {
2618 TotalVal = SDValue(selectI64Imm(CurDAG, dl, Mask), 0);
2619 TotalVal =
2620 SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2621 ExtendToInt64(VRot, dl), TotalVal),
2622 0);
2623 }
2624
2625 if (!Res)
2626 Res = TotalVal;
2627 else
2628 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2629 ExtendToInt64(Res, dl), TotalVal),
2630 0);
2631
2632 // Now, remove all groups with this underlying value and rotation
2633 // factor.
2634 eraseMatchingBitGroups(MatchingBG);
2635 }
2636 }
2637
2638 // Instruction selection for the 64-bit case.
2639 SDNode *Select64(SDNode *N, bool LateMask, unsigned *InstCnt) {
2640 SDLoc dl(N);
2641 SDValue Res;
2642
2643 if (InstCnt) *InstCnt = 0;
2644
2645 // Take care of cases that should use andi/andis first.
2646 SelectAndParts64(dl, Res, InstCnt);
2647
2648 // If we've not yet selected a 'starting' instruction, and we have no zeros
2649 // to fill in, select the (Value, RLAmt) with the highest priority (largest
2650 // number of groups), and start with this rotated value.
2651 if ((!NeedMask || LateMask) && !Res) {
2652 // If we have both Repl32 groups and non-Repl32 groups, the non-Repl32
2653 // groups will come first, and so the VRI representing the largest number
2654 // of groups might not be first (it might be the first Repl32 groups).
2655 unsigned MaxGroupsIdx = 0;
2656 if (!ValueRotsVec[0].Repl32) {
2657 for (unsigned i = 0, ie = ValueRotsVec.size(); i < ie; ++i)
2658 if (ValueRotsVec[i].Repl32) {
2659 if (ValueRotsVec[i].NumGroups > ValueRotsVec[0].NumGroups)
2660 MaxGroupsIdx = i;
2661 break;
2662 }
2663 }
2664
2665 ValueRotInfo &VRI = ValueRotsVec[MaxGroupsIdx];
2666 bool NeedsRotate = false;
2667 if (VRI.RLAmt) {
2668 NeedsRotate = true;
2669 } else if (VRI.Repl32) {
2670 for (auto &BG : BitGroups) {
2671 if (BG.V != VRI.V || BG.RLAmt != VRI.RLAmt ||
2672 BG.Repl32 != VRI.Repl32)
2673 continue;
2674
2675 // We don't need a rotate if the bit group is confined to the lower
2676 // 32 bits.
2677 if (BG.StartIdx < 32 && BG.EndIdx < 32 && BG.StartIdx < BG.EndIdx)
2678 continue;
2679
2680 NeedsRotate = true;
2681 break;
2682 }
2683 }
2684
2685 if (NeedsRotate)
2686 Res = SelectRotMask64(VRI.V, dl, VRI.RLAmt, VRI.Repl32,
2687 VRI.Repl32 ? 31 : 0, VRI.Repl32 ? 30 : 63,
2688 InstCnt);
2689 else
2690 Res = VRI.V;
2691
2692 // Now, remove all groups with this underlying value and rotation factor.
2693 if (Res)
2694 eraseMatchingBitGroups([VRI](const BitGroup &BG) {
2695 return BG.V == VRI.V && BG.RLAmt == VRI.RLAmt &&
2696 BG.Repl32 == VRI.Repl32;
2697 });
2698 }
2699
2700 // Because 64-bit rotates are more flexible than inserts, we might have a
2701 // preference regarding which one we do first (to save one instruction).
2702 if (!Res)
2703 for (auto I = BitGroups.begin(), IE = BitGroups.end(); I != IE; ++I) {
2704 if (SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2705 false) <
2706 SelectRotMask64Count(I->RLAmt, I->Repl32, I->StartIdx, I->EndIdx,
2707 true)) {
2708 if (I != BitGroups.begin()) {
2709 BitGroup BG = *I;
2710 BitGroups.erase(I);
2711 BitGroups.insert(BitGroups.begin(), BG);
2712 }
2713
2714 break;
2715 }
2716 }
2717
2718 // Insert the other groups (one at a time).
2719 for (auto &BG : BitGroups) {
2720 if (!Res)
2721 Res = SelectRotMask64(BG.V, dl, BG.RLAmt, BG.Repl32, BG.StartIdx,
2722 BG.EndIdx, InstCnt);
2723 else
2724 Res = SelectRotMaskIns64(Res, BG.V, dl, BG.RLAmt, BG.Repl32,
2725 BG.StartIdx, BG.EndIdx, InstCnt);
2726 }
2727
2728 if (LateMask) {
2729 uint64_t Mask = getZerosMask();
2730
2731 // We can use the 32-bit andi/andis technique if the mask does not
2732 // require any higher-order bits. This can save an instruction compared
2733 // to always using the general 64-bit technique.
2734 bool Use32BitInsts = isUInt<32>(Mask);
2735 // Compute the masks for andi/andis that would be necessary.
2736 unsigned ANDIMask = (Mask & UINT16_MAX),
2737 ANDISMask = (Mask >> 16) & UINT16_MAX;
2738
2739 if (Use32BitInsts) {
2740 assert((ANDIMask != 0 || ANDISMask != 0) &&
2741 "No set bits in mask when using 32-bit ands for 64-bit value");
2742
2743 if (InstCnt) *InstCnt += (unsigned) (ANDIMask != 0) +
2744 (unsigned) (ANDISMask != 0) +
2745 (unsigned) (ANDIMask != 0 && ANDISMask != 0);
2746
2747 SDValue ANDIVal, ANDISVal;
2748 if (ANDIMask != 0)
2749 ANDIVal = SDValue(CurDAG->getMachineNode(PPC::ANDI8_rec, dl, MVT::i64,
2750 ExtendToInt64(Res, dl),
2751 getI32Imm(ANDIMask, dl)),
2752 0);
2753 if (ANDISMask != 0)
2754 ANDISVal =
2755 SDValue(CurDAG->getMachineNode(PPC::ANDIS8_rec, dl, MVT::i64,
2756 ExtendToInt64(Res, dl),
2757 getI32Imm(ANDISMask, dl)),
2758 0);
2759
2760 if (!ANDIVal)
2761 Res = ANDISVal;
2762 else if (!ANDISVal)
2763 Res = ANDIVal;
2764 else
2765 Res = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
2766 ExtendToInt64(ANDIVal, dl), ANDISVal), 0);
2767 } else {
2768 unsigned NumOfSelectInsts = 0;
2769 SDValue MaskVal =
2770 SDValue(selectI64Imm(CurDAG, dl, Mask, &NumOfSelectInsts), 0);
2771 Res = SDValue(CurDAG->getMachineNode(PPC::AND8, dl, MVT::i64,
2772 ExtendToInt64(Res, dl), MaskVal),
2773 0);
2774 if (InstCnt)
2775 *InstCnt += NumOfSelectInsts + /* and */ 1;
2776 }
2777 }
2778
2779 return Res.getNode();
2780 }
2781
2782 SDNode *Select(SDNode *N, bool LateMask, unsigned *InstCnt = nullptr) {
2783 // Fill in BitGroups.
2784 collectBitGroups(LateMask);
2785 if (BitGroups.empty())
2786 return nullptr;
2787
2788 // For 64-bit values, figure out when we can use 32-bit instructions.
2789 if (Bits.size() == 64)
2790 assignRepl32BitGroups();
2791
2792 // Fill in ValueRotsVec.
2793 collectValueRotInfo();
2794
2795 if (Bits.size() == 32) {
2796 return Select32(N, LateMask, InstCnt);
2797 } else {
2798 assert(Bits.size() == 64 && "Not 64 bits here?");
2799 return Select64(N, LateMask, InstCnt);
2800 }
2801
2802 return nullptr;
2803 }
2804
2805 void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
2806 erase_if(BitGroups, F);
2807 }
2808
2810
2811 bool NeedMask = false;
2813
2814 SmallVector<BitGroup, 16> BitGroups;
2815
2816 DenseMap<std::pair<SDValue, unsigned>, ValueRotInfo> ValueRots;
2817 SmallVector<ValueRotInfo, 16> ValueRotsVec;
2818
2819 SelectionDAG *CurDAG = nullptr;
2820
2821public:
2822 BitPermutationSelector(SelectionDAG *DAG)
2823 : CurDAG(DAG) {}
2824
2825 // Here we try to match complex bit permutations into a set of
2826 // rotate-and-shift/shift/and/or instructions, using a set of heuristics
2827 // known to produce optimal code for common cases (like i32 byte swapping).
2828 SDNode *Select(SDNode *N) {
2829 Memoizer.clear();
2830 auto Result =
2831 getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
2832 if (!Result.first)
2833 return nullptr;
2834 Bits = std::move(*Result.second);
2835
2836 LLVM_DEBUG(dbgs() << "Considering bit-permutation-based instruction"
2837 " selection for: ");
2838 LLVM_DEBUG(N->dump(CurDAG));
2839
2840 // Fill it RLAmt and set NeedMask.
2841 computeRotationAmounts();
2842
2843 if (!NeedMask)
2844 return Select(N, false);
2845
2846 // We currently have two techniques for handling results with zeros: early
2847 // masking (the default) and late masking. Late masking is sometimes more
2848 // efficient, but because the structure of the bit groups is different, it
2849 // is hard to tell without generating both and comparing the results. With
2850 // late masking, we ignore zeros in the resulting value when inserting each
2851 // set of bit groups, and then mask in the zeros at the end. With early
2852 // masking, we only insert the non-zero parts of the result at every step.
2853
2854 unsigned InstCnt = 0, InstCntLateMask = 0;
2855 LLVM_DEBUG(dbgs() << "\tEarly masking:\n");
2856 SDNode *RN = Select(N, false, &InstCnt);
2857 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCnt << " instructions\n");
2858
2859 LLVM_DEBUG(dbgs() << "\tLate masking:\n");
2860 SDNode *RNLM = Select(N, true, &InstCntLateMask);
2861 LLVM_DEBUG(dbgs() << "\t\tisel would use " << InstCntLateMask
2862 << " instructions\n");
2863
2864 if (InstCnt <= InstCntLateMask) {
2865 LLVM_DEBUG(dbgs() << "\tUsing early-masking for isel\n");
2866 return RN;
2867 }
2868
2869 LLVM_DEBUG(dbgs() << "\tUsing late-masking for isel\n");
2870 return RNLM;
2871 }
2872};
2873
2874class IntegerCompareEliminator {
2875 SelectionDAG *CurDAG;
2876 PPCDAGToDAGISel *S;
2877 // Conversion type for interpreting results of a 32-bit instruction as
2878 // a 64-bit value or vice versa.
2879 enum ExtOrTruncConversion { Ext, Trunc };
2880
2881 // Modifiers to guide how an ISD::SETCC node's result is to be computed
2882 // in a GPR.
2883 // ZExtOrig - use the original condition code, zero-extend value
2884 // ZExtInvert - invert the condition code, zero-extend value
2885 // SExtOrig - use the original condition code, sign-extend value
2886 // SExtInvert - invert the condition code, sign-extend value
2887 enum SetccInGPROpts { ZExtOrig, ZExtInvert, SExtOrig, SExtInvert };
2888
2889 // Comparisons against zero to emit GPR code sequences for. Each of these
2890 // sequences may need to be emitted for two or more equivalent patterns.
2891 // For example (a >= 0) == (a > -1). The direction of the comparison (</>)
2892 // matters as well as the extension type: sext (-1/0), zext (1/0).
2893 // GEZExt - (zext (LHS >= 0))
2894 // GESExt - (sext (LHS >= 0))
2895 // LEZExt - (zext (LHS <= 0))
2896 // LESExt - (sext (LHS <= 0))
2897 enum ZeroCompare { GEZExt, GESExt, LEZExt, LESExt };
2898
2899 SDNode *tryEXTEND(SDNode *N);
2900 SDNode *tryLogicOpOfCompares(SDNode *N);
2901 SDValue computeLogicOpInGPR(SDValue LogicOp);
2902 SDValue signExtendInputIfNeeded(SDValue Input);
2903 SDValue zeroExtendInputIfNeeded(SDValue Input);
2904 SDValue addExtOrTrunc(SDValue NatWidthRes, ExtOrTruncConversion Conv);
2905 SDValue getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
2906 ZeroCompare CmpTy);
2907 SDValue get32BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2908 int64_t RHSValue, SDLoc dl);
2909 SDValue get32BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2910 int64_t RHSValue, SDLoc dl);
2911 SDValue get64BitZExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2912 int64_t RHSValue, SDLoc dl);
2913 SDValue get64BitSExtCompare(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2914 int64_t RHSValue, SDLoc dl);
2915 SDValue getSETCCInGPR(SDValue Compare, SetccInGPROpts ConvOpts);
2916
2917public:
2918 IntegerCompareEliminator(SelectionDAG *DAG,
2919 PPCDAGToDAGISel *Sel) : CurDAG(DAG), S(Sel) {
2921 .getPointerTy(CurDAG->getDataLayout()).getSizeInBits() == 64 &&
2922 "Only expecting to use this on 64 bit targets.");
2923 }
2924 SDNode *Select(SDNode *N) {
2925 if (CmpInGPR == ICGPR_None)
2926 return nullptr;
2927 switch (N->getOpcode()) {
2928 default: break;
2929 case ISD::ZERO_EXTEND:
2932 return nullptr;
2933 [[fallthrough]];
2934 case ISD::SIGN_EXTEND:
2937 return nullptr;
2938 return tryEXTEND(N);
2939 case ISD::AND:
2940 case ISD::OR:
2941 case ISD::XOR:
2942 return tryLogicOpOfCompares(N);
2943 }
2944 return nullptr;
2945 }
2946};
2947
2948// The obvious case for wanting to keep the value in a GPR. Namely, the
2949// result of the comparison is actually needed in a GPR.
2950SDNode *IntegerCompareEliminator::tryEXTEND(SDNode *N) {
2951 assert((N->getOpcode() == ISD::ZERO_EXTEND ||
2952 N->getOpcode() == ISD::SIGN_EXTEND) &&
2953 "Expecting a zero/sign extend node!");
2954 SDValue WideRes;
2955 // If we are zero-extending the result of a logical operation on i1
2956 // values, we can keep the values in GPRs.
2957 if (ISD::isBitwiseLogicOp(N->getOperand(0).getOpcode()) &&
2958 N->getOperand(0).getValueType() == MVT::i1 &&
2959 N->getOpcode() == ISD::ZERO_EXTEND)
2960 WideRes = computeLogicOpInGPR(N->getOperand(0));
2961 else if (N->getOperand(0).getOpcode() != ISD::SETCC)
2962 return nullptr;
2963 else
2964 WideRes =
2965 getSETCCInGPR(N->getOperand(0),
2966 N->getOpcode() == ISD::SIGN_EXTEND ?
2967 SetccInGPROpts::SExtOrig : SetccInGPROpts::ZExtOrig);
2968
2969 if (!WideRes)
2970 return nullptr;
2971
2972 SDLoc dl(N);
2973 bool Input32Bit = WideRes.getValueType() == MVT::i32;
2974 bool Output32Bit = N->getValueType(0) == MVT::i32;
2975
2976 NumSextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 1 : 0;
2977 NumZextSetcc += N->getOpcode() == ISD::SIGN_EXTEND ? 0 : 1;
2978
2979 SDValue ConvOp = WideRes;
2980 if (Input32Bit != Output32Bit)
2981 ConvOp = addExtOrTrunc(WideRes, Input32Bit ? ExtOrTruncConversion::Ext :
2982 ExtOrTruncConversion::Trunc);
2983 return ConvOp.getNode();
2984}
2985
2986// Attempt to perform logical operations on the results of comparisons while
2987// keeping the values in GPRs. Without doing so, these would end up being
2988// lowered to CR-logical operations which suffer from significant latency and
2989// low ILP.
2990SDNode *IntegerCompareEliminator::tryLogicOpOfCompares(SDNode *N) {
2991 if (N->getValueType(0) != MVT::i1)
2992 return nullptr;
2993 assert(ISD::isBitwiseLogicOp(N->getOpcode()) &&
2994 "Expected a logic operation on setcc results.");
2995 SDValue LoweredLogical = computeLogicOpInGPR(SDValue(N, 0));
2996 if (!LoweredLogical)
2997 return nullptr;
2998
2999 SDLoc dl(N);
3000 bool IsBitwiseNegate = LoweredLogical.getMachineOpcode() == PPC::XORI8;
3001 unsigned SubRegToExtract = IsBitwiseNegate ? PPC::sub_eq : PPC::sub_gt;
3002 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
3003 SDValue LHS = LoweredLogical.getOperand(0);
3004 SDValue RHS = LoweredLogical.getOperand(1);
3005 SDValue WideOp;
3006 SDValue OpToConvToRecForm;
3007
3008 // Look through any 32-bit to 64-bit implicit extend nodes to find the
3009 // opcode that is input to the XORI.
3010 if (IsBitwiseNegate &&
3011 LoweredLogical.getOperand(0).getMachineOpcode() == PPC::INSERT_SUBREG)
3012 OpToConvToRecForm = LoweredLogical.getOperand(0).getOperand(1);
3013 else if (IsBitwiseNegate)
3014 // If the input to the XORI isn't an extension, that's what we're after.
3015 OpToConvToRecForm = LoweredLogical.getOperand(0);
3016 else
3017 // If this is not an XORI, it is a reg-reg logical op and we can convert
3018 // it to record-form.
3019 OpToConvToRecForm = LoweredLogical;
3020
3021 // Get the record-form version of the node we're looking to use to get the
3022 // CR result from.
3023 uint16_t NonRecOpc = OpToConvToRecForm.getMachineOpcode();
3024 int NewOpc = PPCInstrInfo::getRecordFormOpcode(NonRecOpc);
3025
3026 // Convert the right node to record-form. This is either the logical we're
3027 // looking at or it is the input node to the negation (if we're looking at
3028 // a bitwise negation).
3029 if (NewOpc != -1 && IsBitwiseNegate) {
3030 // The input to the XORI has a record-form. Use it.
3031 assert(LoweredLogical.getConstantOperandVal(1) == 1 &&
3032 "Expected a PPC::XORI8 only for bitwise negation.");
3033 // Emit the record-form instruction.
3034 std::vector<SDValue> Ops;
3035 for (int i = 0, e = OpToConvToRecForm.getNumOperands(); i < e; i++)
3036 Ops.push_back(OpToConvToRecForm.getOperand(i));
3037
3038 WideOp =
3039 SDValue(CurDAG->getMachineNode(NewOpc, dl,
3040 OpToConvToRecForm.getValueType(),
3041 MVT::Glue, Ops), 0);
3042 } else {
3043 assert((NewOpc != -1 || !IsBitwiseNegate) &&
3044 "No record form available for AND8/OR8/XOR8?");
3045 WideOp =
3046 SDValue(CurDAG->getMachineNode(NewOpc == -1 ? PPC::ANDI8_rec : NewOpc,
3047 dl, MVT::i64, MVT::Glue, LHS, RHS),
3048 0);
3049 }
3050
3051 // Select this node to a single bit from CR0 set by the record-form node
3052 // just created. For bitwise negation, use the EQ bit which is the equivalent
3053 // of negating the result (i.e. it is a bit set when the result of the
3054 // operation is zero).
3055 SDValue SRIdxVal =
3056 CurDAG->getTargetConstant(SubRegToExtract, dl, MVT::i32);
3057 SDValue CRBit =
3058 SDValue(CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
3059 MVT::i1, CR0Reg, SRIdxVal,
3060 WideOp.getValue(1)), 0);
3061 return CRBit.getNode();
3062}
3063
3064// Lower a logical operation on i1 values into a GPR sequence if possible.
3065// The result can be kept in a GPR if requested.
3066// Three types of inputs can be handled:
3067// - SETCC
3068// - TRUNCATE
3069// - Logical operation (AND/OR/XOR)
3070// There is also a special case that is handled (namely a complement operation
3071// achieved with xor %a, -1).
3072SDValue IntegerCompareEliminator::computeLogicOpInGPR(SDValue LogicOp) {
3074 "Can only handle logic operations here.");
3075 assert(LogicOp.getValueType() == MVT::i1 &&
3076 "Can only handle logic operations on i1 values here.");
3077 SDLoc dl(LogicOp);
3078 SDValue LHS, RHS;
3079
3080 // Special case: xor %a, -1
3081 bool IsBitwiseNegation = isBitwiseNot(LogicOp);
3082
3083 // Produces a GPR sequence for each operand of the binary logic operation.
3084 // For SETCC, it produces the respective comparison, for TRUNCATE it truncates
3085 // the value in a GPR and for logic operations, it will recursively produce
3086 // a GPR sequence for the operation.
3087 auto getLogicOperand = [&] (SDValue Operand) -> SDValue {
3088 unsigned OperandOpcode = Operand.getOpcode();
3089 if (OperandOpcode == ISD::SETCC)
3090 return getSETCCInGPR(Operand, SetccInGPROpts::ZExtOrig);
3091 else if (OperandOpcode == ISD::TRUNCATE) {
3092 SDValue InputOp = Operand.getOperand(0);
3093 EVT InVT = InputOp.getValueType();
3094 return SDValue(CurDAG->getMachineNode(InVT == MVT::i32 ? PPC::RLDICL_32 :
3095 PPC::RLDICL, dl, InVT, InputOp,
3096 S->getI64Imm(0, dl),
3097 S->getI64Imm(63, dl)), 0);
3098 } else if (ISD::isBitwiseLogicOp(OperandOpcode))
3099 return computeLogicOpInGPR(Operand);
3100 return SDValue();
3101 };
3102 LHS = getLogicOperand(LogicOp.getOperand(0));
3103 RHS = getLogicOperand(LogicOp.getOperand(1));
3104
3105 // If a GPR sequence can't be produced for the LHS we can't proceed.
3106 // Not producing a GPR sequence for the RHS is only a problem if this isn't
3107 // a bitwise negation operation.
3108 if (!LHS || (!RHS && !IsBitwiseNegation))
3109 return SDValue();
3110
3111 NumLogicOpsOnComparison++;
3112
3113 // We will use the inputs as 64-bit values.
3114 if (LHS.getValueType() == MVT::i32)
3115 LHS = addExtOrTrunc(LHS, ExtOrTruncConversion::Ext);
3116 if (!IsBitwiseNegation && RHS.getValueType() == MVT::i32)
3117 RHS = addExtOrTrunc(RHS, ExtOrTruncConversion::Ext);
3118
3119 unsigned NewOpc;
3120 switch (LogicOp.getOpcode()) {
3121 default: llvm_unreachable("Unknown logic operation.");
3122 case ISD::AND: NewOpc = PPC::AND8; break;
3123 case ISD::OR: NewOpc = PPC::OR8; break;
3124 case ISD::XOR: NewOpc = PPC::XOR8; break;
3125 }
3126
3127 if (IsBitwiseNegation) {
3128 RHS = S->getI64Imm(1, dl);
3129 NewOpc = PPC::XORI8;
3130 }
3131
3132 return SDValue(CurDAG->getMachineNode(NewOpc, dl, MVT::i64, LHS, RHS), 0);
3133
3134}
3135
3136/// If the value isn't guaranteed to be sign-extended to 64-bits, extend it.
3137/// Otherwise just reinterpret it as a 64-bit value.
3138/// Useful when emitting comparison code for 32-bit values without using
3139/// the compare instruction (which only considers the lower 32-bits).
3140SDValue IntegerCompareEliminator::signExtendInputIfNeeded(SDValue Input) {
3141 assert(Input.getValueType() == MVT::i32 &&
3142 "Can only sign-extend 32-bit values here.");
3143 unsigned Opc = Input.getOpcode();
3144
3145 // The value was sign extended and then truncated to 32-bits. No need to
3146 // sign extend it again.
3147 if (Opc == ISD::TRUNCATE &&
3148 (Input.getOperand(0).getOpcode() == ISD::AssertSext ||
3149 Input.getOperand(0).getOpcode() == ISD::SIGN_EXTEND))
3150 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3151
3152 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3153 // The input is a sign-extending load. All ppc sign-extending loads
3154 // sign-extend to the full 64-bits.
3155 if (InputLoad && InputLoad->getExtensionType() == ISD::SEXTLOAD)
3156 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3157
3158 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3159 // We don't sign-extend constants.
3160 if (InputConst)
3161 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3162
3163 SDLoc dl(Input);
3164 SignExtensionsAdded++;
3165 return SDValue(CurDAG->getMachineNode(PPC::EXTSW_32_64, dl,
3166 MVT::i64, Input), 0);
3167}
3168
3169/// If the value isn't guaranteed to be zero-extended to 64-bits, extend it.
3170/// Otherwise just reinterpret it as a 64-bit value.
3171/// Useful when emitting comparison code for 32-bit values without using
3172/// the compare instruction (which only considers the lower 32-bits).
3173SDValue IntegerCompareEliminator::zeroExtendInputIfNeeded(SDValue Input) {
3174 assert(Input.getValueType() == MVT::i32 &&
3175 "Can only zero-extend 32-bit values here.");
3176 unsigned Opc = Input.getOpcode();
3177
3178 // The only condition under which we can omit the actual extend instruction:
3179 // - The value is a positive constant
3180 // - The value comes from a load that isn't a sign-extending load
3181 // An ISD::TRUNCATE needs to be zero-extended unless it is fed by a zext.
3182 bool IsTruncateOfZExt = Opc == ISD::TRUNCATE &&
3183 (Input.getOperand(0).getOpcode() == ISD::AssertZext ||
3184 Input.getOperand(0).getOpcode() == ISD::ZERO_EXTEND);
3185 if (IsTruncateOfZExt)
3186 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3187
3188 ConstantSDNode *InputConst = dyn_cast<ConstantSDNode>(Input);
3189 if (InputConst && InputConst->getSExtValue() >= 0)
3190 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3191
3192 LoadSDNode *InputLoad = dyn_cast<LoadSDNode>(Input);
3193 // The input is a load that doesn't sign-extend (it will be zero-extended).
3194 if (InputLoad && InputLoad->getExtensionType() != ISD::SEXTLOAD)
3195 return addExtOrTrunc(Input, ExtOrTruncConversion::Ext);
3196
3197 // None of the above, need to zero-extend.
3198 SDLoc dl(Input);
3199 ZeroExtensionsAdded++;
3200 return SDValue(CurDAG->getMachineNode(PPC::RLDICL_32_64, dl, MVT::i64, Input,
3201 S->getI64Imm(0, dl),
3202 S->getI64Imm(32, dl)), 0);
3203}
3204
3205// Handle a 32-bit value in a 64-bit register and vice-versa. These are of
3206// course not actual zero/sign extensions that will generate machine code,
3207// they're just a way to reinterpret a 32 bit value in a register as a
3208// 64 bit value and vice-versa.
3209SDValue IntegerCompareEliminator::addExtOrTrunc(SDValue NatWidthRes,
3210 ExtOrTruncConversion Conv) {
3211 SDLoc dl(NatWidthRes);
3212
3213 // For reinterpreting 32-bit values as 64 bit values, we generate
3214 // INSERT_SUBREG IMPLICIT_DEF:i64, <input>, TargetConstant:i32<1>
3215 if (Conv == ExtOrTruncConversion::Ext) {
3216 SDValue ImDef(CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, MVT::i64), 0);
3217 SDValue SubRegIdx =
3218 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3219 return SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, MVT::i64,
3220 ImDef, NatWidthRes, SubRegIdx), 0);
3221 }
3222
3223 assert(Conv == ExtOrTruncConversion::Trunc &&
3224 "Unknown convertion between 32 and 64 bit values.");
3225 // For reinterpreting 64-bit values as 32-bit values, we just need to
3226 // EXTRACT_SUBREG (i.e. extract the low word).
3227 SDValue SubRegIdx =
3228 CurDAG->getTargetConstant(PPC::sub_32, dl, MVT::i32);
3229 return SDValue(CurDAG->getMachineNode(PPC::EXTRACT_SUBREG, dl, MVT::i32,
3230 NatWidthRes, SubRegIdx), 0);
3231}
3232
3233// Produce a GPR sequence for compound comparisons (<=, >=) against zero.
3234// Handle both zero-extensions and sign-extensions.
3235SDValue
3236IntegerCompareEliminator::getCompoundZeroComparisonInGPR(SDValue LHS, SDLoc dl,
3237 ZeroCompare CmpTy) {
3238 EVT InVT = LHS.getValueType();
3239 bool Is32Bit = InVT == MVT::i32;
3240 SDValue ToExtend;
3241
3242 // Produce the value that needs to be either zero or sign extended.
3243 switch (CmpTy) {
3244 case ZeroCompare::GEZExt:
3245 case ZeroCompare::GESExt:
3246 ToExtend = SDValue(CurDAG->getMachineNode(Is32Bit ? PPC::NOR : PPC::NOR8,
3247 dl, InVT, LHS, LHS), 0);
3248 break;
3249 case ZeroCompare::LEZExt:
3250 case ZeroCompare::LESExt: {
3251 if (Is32Bit) {
3252 // Upper 32 bits cannot be undefined for this sequence.
3253 LHS = signExtendInputIfNeeded(LHS);
3254 SDValue Neg =
3255 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3256 ToExtend =
3257 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3258 Neg, S->getI64Imm(1, dl),
3259 S->getI64Imm(63, dl)), 0);
3260 } else {
3261 SDValue Addi =
3262 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3263 S->getI64Imm(~0ULL, dl)), 0);
3264 ToExtend = SDValue(CurDAG->getMachineNode(PPC::OR8, dl, MVT::i64,
3265 Addi, LHS), 0);
3266 }
3267 break;
3268 }
3269 }
3270
3271 // For 64-bit sequences, the extensions are the same for the GE/LE cases.
3272 if (!Is32Bit &&
3273 (CmpTy == ZeroCompare::GEZExt || CmpTy == ZeroCompare::LEZExt))
3274 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3275 ToExtend, S->getI64Imm(1, dl),
3276 S->getI64Imm(63, dl)), 0);
3277 if (!Is32Bit &&
3278 (CmpTy == ZeroCompare::GESExt || CmpTy == ZeroCompare::LESExt))
3279 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, ToExtend,
3280 S->getI64Imm(63, dl)), 0);
3281
3282 assert(Is32Bit && "Should have handled the 32-bit sequences above.");
3283 // For 32-bit sequences, the extensions differ between GE/LE cases.
3284 switch (CmpTy) {
3285 case ZeroCompare::GEZExt: {
3286 SDValue ShiftOps[] = { ToExtend, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3287 S->getI32Imm(31, dl) };
3288 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3289 ShiftOps), 0);
3290 }
3291 case ZeroCompare::GESExt:
3292 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, ToExtend,
3293 S->getI32Imm(31, dl)), 0);
3294 case ZeroCompare::LEZExt:
3295 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, ToExtend,
3296 S->getI32Imm(1, dl)), 0);
3297 case ZeroCompare::LESExt:
3298 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, ToExtend,
3299 S->getI32Imm(-1, dl)), 0);
3300 }
3301
3302 // The above case covers all the enumerators so it can't have a default clause
3303 // to avoid compiler warnings.
3304 llvm_unreachable("Unknown zero-comparison type.");
3305}
3306
3307/// Produces a zero-extended result of comparing two 32-bit values according to
3308/// the passed condition code.
3309SDValue
3310IntegerCompareEliminator::get32BitZExtCompare(SDValue LHS, SDValue RHS,
3312 int64_t RHSValue, SDLoc dl) {
3315 return SDValue();
3316 bool IsRHSZero = RHSValue == 0;
3317 bool IsRHSOne = RHSValue == 1;
3318 bool IsRHSNegOne = RHSValue == -1LL;
3319 switch (CC) {
3320 default: return SDValue();
3321 case ISD::SETEQ: {
3322 // (zext (setcc %a, %b, seteq)) -> (lshr (cntlzw (xor %a, %b)), 5)
3323 // (zext (setcc %a, 0, seteq)) -> (lshr (cntlzw %a), 5)
3324 SDValue Xor = IsRHSZero ? LHS :
3325 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3326 SDValue Clz =
3327 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3328 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3329 S->getI32Imm(31, dl) };
3330 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3331 ShiftOps), 0);
3332 }
3333 case ISD::SETNE: {
3334 // (zext (setcc %a, %b, setne)) -> (xor (lshr (cntlzw (xor %a, %b)), 5), 1)
3335 // (zext (setcc %a, 0, setne)) -> (xor (lshr (cntlzw %a), 5), 1)
3336 SDValue Xor = IsRHSZero ? LHS :
3337 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3338 SDValue Clz =
3339 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3340 SDValue ShiftOps[] = { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl),
3341 S->getI32Imm(31, dl) };
3342 SDValue Shift =
3343 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3344 return SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3345 S->getI32Imm(1, dl)), 0);
3346 }
3347 case ISD::SETGE: {
3348 // (zext (setcc %a, %b, setge)) -> (xor (lshr (sub %a, %b), 63), 1)
3349 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 31)
3350 if(IsRHSZero)
3351 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3352
3353 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3354 // by swapping inputs and falling through.
3355 std::swap(LHS, RHS);
3356 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3357 IsRHSZero = RHSConst && RHSConst->isZero();
3358 [[fallthrough]];
3359 }
3360 case ISD::SETLE: {
3361 if (CmpInGPR == ICGPR_NonExtIn)
3362 return SDValue();
3363 // (zext (setcc %a, %b, setle)) -> (xor (lshr (sub %b, %a), 63), 1)
3364 // (zext (setcc %a, 0, setle)) -> (xor (lshr (- %a), 63), 1)
3365 if(IsRHSZero) {
3366 if (CmpInGPR == ICGPR_NonExtIn)
3367 return SDValue();
3368 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3369 }
3370
3371 // The upper 32-bits of the register can't be undefined for this sequence.
3372 LHS = signExtendInputIfNeeded(LHS);
3373 RHS = signExtendInputIfNeeded(RHS);
3374 SDValue Sub =
3375 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3376 SDValue Shift =
3377 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Sub,
3378 S->getI64Imm(1, dl), S->getI64Imm(63, dl)),
3379 0);
3380 return
3381 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl,
3382 MVT::i64, Shift, S->getI32Imm(1, dl)), 0);
3383 }
3384 case ISD::SETGT: {
3385 // (zext (setcc %a, %b, setgt)) -> (lshr (sub %b, %a), 63)
3386 // (zext (setcc %a, -1, setgt)) -> (lshr (~ %a), 31)
3387 // (zext (setcc %a, 0, setgt)) -> (lshr (- %a), 63)
3388 // Handle SETLT -1 (which is equivalent to SETGE 0).
3389 if (IsRHSNegOne)
3390 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3391
3392 if (IsRHSZero) {
3393 if (CmpInGPR == ICGPR_NonExtIn)
3394 return SDValue();
3395 // The upper 32-bits of the register can't be undefined for this sequence.
3396 LHS = signExtendInputIfNeeded(LHS);
3397 RHS = signExtendInputIfNeeded(RHS);
3398 SDValue Neg =
3399 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3400 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3401 Neg, S->getI32Imm(1, dl), S->getI32Imm(63, dl)), 0);
3402 }
3403 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3404 // (%b < %a) by swapping inputs and falling through.
3405 std::swap(LHS, RHS);
3406 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3407 IsRHSZero = RHSConst && RHSConst->isZero();
3408 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3409 [[fallthrough]];
3410 }
3411 case ISD::SETLT: {
3412 // (zext (setcc %a, %b, setlt)) -> (lshr (sub %a, %b), 63)
3413 // (zext (setcc %a, 1, setlt)) -> (xor (lshr (- %a), 63), 1)
3414 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 31)
3415 // Handle SETLT 1 (which is equivalent to SETLE 0).
3416 if (IsRHSOne) {
3417 if (CmpInGPR == ICGPR_NonExtIn)
3418 return SDValue();
3419 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3420 }
3421
3422 if (IsRHSZero) {
3423 SDValue ShiftOps[] = { LHS, S->getI32Imm(1, dl), S->getI32Imm(31, dl),
3424 S->getI32Imm(31, dl) };
3425 return SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32,
3426 ShiftOps), 0);
3427 }
3428
3429 if (CmpInGPR == ICGPR_NonExtIn)
3430 return SDValue();
3431 // The upper 32-bits of the register can't be undefined for this sequence.
3432 LHS = signExtendInputIfNeeded(LHS);
3433 RHS = signExtendInputIfNeeded(RHS);
3434 SDValue SUBFNode =
3435 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3436 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3437 SUBFNode, S->getI64Imm(1, dl),
3438 S->getI64Imm(63, dl)), 0);
3439 }
3440 case ISD::SETUGE:
3441 // (zext (setcc %a, %b, setuge)) -> (xor (lshr (sub %b, %a), 63), 1)
3442 // (zext (setcc %a, %b, setule)) -> (xor (lshr (sub %a, %b), 63), 1)
3443 std::swap(LHS, RHS);
3444 [[fallthrough]];
3445 case ISD::SETULE: {
3446 if (CmpInGPR == ICGPR_NonExtIn)
3447 return SDValue();
3448 // The upper 32-bits of the register can't be undefined for this sequence.
3449 LHS = zeroExtendInputIfNeeded(LHS);
3450 RHS = zeroExtendInputIfNeeded(RHS);
3451 SDValue Subtract =
3452 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3453 SDValue SrdiNode =
3454 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3455 Subtract, S->getI64Imm(1, dl),
3456 S->getI64Imm(63, dl)), 0);
3457 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64, SrdiNode,
3458 S->getI32Imm(1, dl)), 0);
3459 }
3460 case ISD::SETUGT:
3461 // (zext (setcc %a, %b, setugt)) -> (lshr (sub %b, %a), 63)
3462 // (zext (setcc %a, %b, setult)) -> (lshr (sub %a, %b), 63)
3463 std::swap(LHS, RHS);
3464 [[fallthrough]];
3465 case ISD::SETULT: {
3466 if (CmpInGPR == ICGPR_NonExtIn)
3467 return SDValue();
3468 // The upper 32-bits of the register can't be undefined for this sequence.
3469 LHS = zeroExtendInputIfNeeded(LHS);
3470 RHS = zeroExtendInputIfNeeded(RHS);
3471 SDValue Subtract =
3472 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3473 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3474 Subtract, S->getI64Imm(1, dl),
3475 S->getI64Imm(63, dl)), 0);
3476 }
3477 }
3478}
3479
3480/// Produces a sign-extended result of comparing two 32-bit values according to
3481/// the passed condition code.
3482SDValue
3483IntegerCompareEliminator::get32BitSExtCompare(SDValue LHS, SDValue RHS,
3485 int64_t RHSValue, SDLoc dl) {
3488 return SDValue();
3489 bool IsRHSZero = RHSValue == 0;
3490 bool IsRHSOne = RHSValue == 1;
3491 bool IsRHSNegOne = RHSValue == -1LL;
3492
3493 switch (CC) {
3494 default: return SDValue();
3495 case ISD::SETEQ: {
3496 // (sext (setcc %a, %b, seteq)) ->
3497 // (ashr (shl (ctlz (xor %a, %b)), 58), 63)
3498 // (sext (setcc %a, 0, seteq)) ->
3499 // (ashr (shl (ctlz %a), 58), 63)
3500 SDValue CountInput = IsRHSZero ? LHS :
3501 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3502 SDValue Cntlzw =
3503 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, CountInput), 0);
3504 SDValue SHLOps[] = { Cntlzw, S->getI32Imm(27, dl),
3505 S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3506 SDValue Slwi =
3507 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, SHLOps), 0);
3508 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Slwi), 0);
3509 }
3510 case ISD::SETNE: {
3511 // Bitwise xor the operands, count leading zeros, shift right by 5 bits and
3512 // flip the bit, finally take 2's complement.
3513 // (sext (setcc %a, %b, setne)) ->
3514 // (neg (xor (lshr (ctlz (xor %a, %b)), 5), 1))
3515 // Same as above, but the first xor is not needed.
3516 // (sext (setcc %a, 0, setne)) ->
3517 // (neg (xor (lshr (ctlz %a), 5), 1))
3518 SDValue Xor = IsRHSZero ? LHS :
3519 SDValue(CurDAG->getMachineNode(PPC::XOR, dl, MVT::i32, LHS, RHS), 0);
3520 SDValue Clz =
3521 SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Xor), 0);
3522 SDValue ShiftOps[] =
3523 { Clz, S->getI32Imm(27, dl), S->getI32Imm(5, dl), S->getI32Imm(31, dl) };
3524 SDValue Shift =
3525 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, ShiftOps), 0);
3526 SDValue Xori =
3527 SDValue(CurDAG->getMachineNode(PPC::XORI, dl, MVT::i32, Shift,
3528 S->getI32Imm(1, dl)), 0);
3529 return SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Xori), 0);
3530 }
3531 case ISD::SETGE: {
3532 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %a, %b), 63), -1)
3533 // (sext (setcc %a, 0, setge)) -> (ashr (~ %a), 31)
3534 if (IsRHSZero)
3535 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3536
3537 // Not a special case (i.e. RHS == 0). Handle (%a >= %b) as (%b <= %a)
3538 // by swapping inputs and falling through.
3539 std::swap(LHS, RHS);
3540 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3541 IsRHSZero = RHSConst && RHSConst->isZero();
3542 [[fallthrough]];
3543 }
3544 case ISD::SETLE: {
3545 if (CmpInGPR == ICGPR_NonExtIn)
3546 return SDValue();
3547 // (sext (setcc %a, %b, setge)) -> (add (lshr (sub %b, %a), 63), -1)
3548 // (sext (setcc %a, 0, setle)) -> (add (lshr (- %a), 63), -1)
3549 if (IsRHSZero)
3550 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3551
3552 // The upper 32-bits of the register can't be undefined for this sequence.
3553 LHS = signExtendInputIfNeeded(LHS);
3554 RHS = signExtendInputIfNeeded(RHS);
3555 SDValue SUBFNode =
3556 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, MVT::Glue,
3557 LHS, RHS), 0);
3558 SDValue Srdi =
3559 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3560 SUBFNode, S->getI64Imm(1, dl),
3561 S->getI64Imm(63, dl)), 0);
3562 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Srdi,
3563 S->getI32Imm(-1, dl)), 0);
3564 }
3565 case ISD::SETGT: {
3566 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %b, %a), 63)
3567 // (sext (setcc %a, -1, setgt)) -> (ashr (~ %a), 31)
3568 // (sext (setcc %a, 0, setgt)) -> (ashr (- %a), 63)
3569 if (IsRHSNegOne)
3570 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3571 if (IsRHSZero) {
3572 if (CmpInGPR == ICGPR_NonExtIn)
3573 return SDValue();
3574 // The upper 32-bits of the register can't be undefined for this sequence.
3575 LHS = signExtendInputIfNeeded(LHS);
3576 RHS = signExtendInputIfNeeded(RHS);
3577 SDValue Neg =
3578 SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, LHS), 0);
3579 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Neg,
3580 S->getI64Imm(63, dl)), 0);
3581 }
3582 // Not a special case (i.e. RHS == 0 or RHS == -1). Handle (%a > %b) as
3583 // (%b < %a) by swapping inputs and falling through.
3584 std::swap(LHS, RHS);
3585 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3586 IsRHSZero = RHSConst && RHSConst->isZero();
3587 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3588 [[fallthrough]];
3589 }
3590 case ISD::SETLT: {
3591 // (sext (setcc %a, %b, setgt)) -> (ashr (sub %a, %b), 63)
3592 // (sext (setcc %a, 1, setgt)) -> (add (lshr (- %a), 63), -1)
3593 // (sext (setcc %a, 0, setgt)) -> (ashr %a, 31)
3594 if (IsRHSOne) {
3595 if (CmpInGPR == ICGPR_NonExtIn)
3596 return SDValue();
3597 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3598 }
3599 if (IsRHSZero)
3600 return SDValue(CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, LHS,
3601 S->getI32Imm(31, dl)), 0);
3602
3603 if (CmpInGPR == ICGPR_NonExtIn)
3604 return SDValue();
3605 // The upper 32-bits of the register can't be undefined for this sequence.
3606 LHS = signExtendInputIfNeeded(LHS);
3607 RHS = signExtendInputIfNeeded(RHS);
3608 SDValue SUBFNode =
3609 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3610 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3611 SUBFNode, S->getI64Imm(63, dl)), 0);
3612 }
3613 case ISD::SETUGE:
3614 // (sext (setcc %a, %b, setuge)) -> (add (lshr (sub %a, %b), 63), -1)
3615 // (sext (setcc %a, %b, setule)) -> (add (lshr (sub %b, %a), 63), -1)
3616 std::swap(LHS, RHS);
3617 [[fallthrough]];
3618 case ISD::SETULE: {
3619 if (CmpInGPR == ICGPR_NonExtIn)
3620 return SDValue();
3621 // The upper 32-bits of the register can't be undefined for this sequence.
3622 LHS = zeroExtendInputIfNeeded(LHS);
3623 RHS = zeroExtendInputIfNeeded(RHS);
3624 SDValue Subtract =
3625 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, LHS, RHS), 0);
3626 SDValue Shift =
3627 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Subtract,
3628 S->getI32Imm(1, dl), S->getI32Imm(63,dl)),
3629 0);
3630 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, Shift,
3631 S->getI32Imm(-1, dl)), 0);
3632 }
3633 case ISD::SETUGT:
3634 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %b, %a), 63)
3635 // (sext (setcc %a, %b, setugt)) -> (ashr (sub %a, %b), 63)
3636 std::swap(LHS, RHS);
3637 [[fallthrough]];
3638 case ISD::SETULT: {
3639 if (CmpInGPR == ICGPR_NonExtIn)
3640 return SDValue();
3641 // The upper 32-bits of the register can't be undefined for this sequence.
3642 LHS = zeroExtendInputIfNeeded(LHS);
3643 RHS = zeroExtendInputIfNeeded(RHS);
3644 SDValue Subtract =
3645 SDValue(CurDAG->getMachineNode(PPC::SUBF8, dl, MVT::i64, RHS, LHS), 0);
3646 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3647 Subtract, S->getI64Imm(63, dl)), 0);
3648 }
3649 }
3650}
3651
3652/// Produces a zero-extended result of comparing two 64-bit values according to
3653/// the passed condition code.
3654SDValue
3655IntegerCompareEliminator::get64BitZExtCompare(SDValue LHS, SDValue RHS,
3657 int64_t RHSValue, SDLoc dl) {
3660 return SDValue();
3661 bool IsRHSZero = RHSValue == 0;
3662 bool IsRHSOne = RHSValue == 1;
3663 bool IsRHSNegOne = RHSValue == -1LL;
3664 switch (CC) {
3665 default: return SDValue();
3666 case ISD::SETEQ: {
3667 // (zext (setcc %a, %b, seteq)) -> (lshr (ctlz (xor %a, %b)), 6)
3668 // (zext (setcc %a, 0, seteq)) -> (lshr (ctlz %a), 6)
3669 SDValue Xor = IsRHSZero ? LHS :
3670 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3671 SDValue Clz =
3672 SDValue(CurDAG->getMachineNode(PPC::CNTLZD, dl, MVT::i64, Xor), 0);
3673 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Clz,
3674 S->getI64Imm(58, dl),
3675 S->getI64Imm(63, dl)), 0);
3676 }
3677 case ISD::SETNE: {
3678 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3679 // (zext (setcc %a, %b, setne)) -> (sube addc.reg, addc.reg, addc.CA)
3680 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3681 // (zext (setcc %a, 0, setne)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3682 SDValue Xor = IsRHSZero ? LHS :
3683 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3684 SDValue AC =
3685 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3686 Xor, S->getI32Imm(~0U, dl)), 0);
3687 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, AC,
3688 Xor, AC.getValue(1)), 0);
3689 }
3690 case ISD::SETGE: {
3691 // {subc.reg, subc.CA} = (subcarry %a, %b)
3692 // (zext (setcc %a, %b, setge)) ->
3693 // (adde (lshr %b, 63), (ashr %a, 63), subc.CA)
3694 // (zext (setcc %a, 0, setge)) -> (lshr (~ %a), 63)
3695 if (IsRHSZero)
3696 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3697 std::swap(LHS, RHS);
3698 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3699 IsRHSZero = RHSConst && RHSConst->isZero();
3700 [[fallthrough]];
3701 }
3702 case ISD::SETLE: {
3703 // {subc.reg, subc.CA} = (subcarry %b, %a)
3704 // (zext (setcc %a, %b, setge)) ->
3705 // (adde (lshr %a, 63), (ashr %b, 63), subc.CA)
3706 // (zext (setcc %a, 0, setge)) -> (lshr (or %a, (add %a, -1)), 63)
3707 if (IsRHSZero)
3708 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3709 SDValue ShiftL =
3710 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3711 S->getI64Imm(1, dl),
3712 S->getI64Imm(63, dl)), 0);
3713 SDValue ShiftR =
3714 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3715 S->getI64Imm(63, dl)), 0);
3716 SDValue SubtractCarry =
3717 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3718 LHS, RHS), 1);
3719 return SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3720 ShiftR, ShiftL, SubtractCarry), 0);
3721 }
3722 case ISD::SETGT: {
3723 // {subc.reg, subc.CA} = (subcarry %b, %a)
3724 // (zext (setcc %a, %b, setgt)) ->
3725 // (xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3726 // (zext (setcc %a, 0, setgt)) -> (lshr (nor (add %a, -1), %a), 63)
3727 if (IsRHSNegOne)
3728 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GEZExt);
3729 if (IsRHSZero) {
3730 SDValue Addi =
3731 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3732 S->getI64Imm(~0ULL, dl)), 0);
3733 SDValue Nor =
3734 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Addi, LHS), 0);
3735 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, Nor,
3736 S->getI64Imm(1, dl),
3737 S->getI64Imm(63, dl)), 0);
3738 }
3739 std::swap(LHS, RHS);
3740 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3741 IsRHSZero = RHSConst && RHSConst->isZero();
3742 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3743 [[fallthrough]];
3744 }
3745 case ISD::SETLT: {
3746 // {subc.reg, subc.CA} = (subcarry %a, %b)
3747 // (zext (setcc %a, %b, setlt)) ->
3748 // (xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3749 // (zext (setcc %a, 0, setlt)) -> (lshr %a, 63)
3750 if (IsRHSOne)
3751 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LEZExt);
3752 if (IsRHSZero)
3753 return SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3754 S->getI64Imm(1, dl),
3755 S->getI64Imm(63, dl)), 0);
3756 SDValue SRADINode =
3757 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3758 LHS, S->getI64Imm(63, dl)), 0);
3759 SDValue SRDINode =
3760 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3761 RHS, S->getI64Imm(1, dl),
3762 S->getI64Imm(63, dl)), 0);
3763 SDValue SUBFC8Carry =
3764 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3765 RHS, LHS), 1);
3766 SDValue ADDE8Node =
3767 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3768 SRDINode, SRADINode, SUBFC8Carry), 0);
3769 return SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3770 ADDE8Node, S->getI64Imm(1, dl)), 0);
3771 }
3772 case ISD::SETUGE:
3773 // {subc.reg, subc.CA} = (subcarry %a, %b)
3774 // (zext (setcc %a, %b, setuge)) -> (add (sube %b, %b, subc.CA), 1)
3775 std::swap(LHS, RHS);
3776 [[fallthrough]];
3777 case ISD::SETULE: {
3778 // {subc.reg, subc.CA} = (subcarry %b, %a)
3779 // (zext (setcc %a, %b, setule)) -> (add (sube %a, %a, subc.CA), 1)
3780 SDValue SUBFC8Carry =
3781 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3782 LHS, RHS), 1);
3783 SDValue SUBFE8Node =
3784 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue,
3785 LHS, LHS, SUBFC8Carry), 0);
3786 return SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64,
3787 SUBFE8Node, S->getI64Imm(1, dl)), 0);
3788 }
3789 case ISD::SETUGT:
3790 // {subc.reg, subc.CA} = (subcarry %b, %a)
3791 // (zext (setcc %a, %b, setugt)) -> -(sube %b, %b, subc.CA)
3792 std::swap(LHS, RHS);
3793 [[fallthrough]];
3794 case ISD::SETULT: {
3795 // {subc.reg, subc.CA} = (subcarry %a, %b)
3796 // (zext (setcc %a, %b, setult)) -> -(sube %a, %a, subc.CA)
3797 SDValue SubtractCarry =
3798 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3799 RHS, LHS), 1);
3800 SDValue ExtSub =
3801 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3802 LHS, LHS, SubtractCarry), 0);
3803 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3804 ExtSub), 0);
3805 }
3806 }
3807}
3808
3809/// Produces a sign-extended result of comparing two 64-bit values according to
3810/// the passed condition code.
3811SDValue
3812IntegerCompareEliminator::get64BitSExtCompare(SDValue LHS, SDValue RHS,
3814 int64_t RHSValue, SDLoc dl) {
3817 return SDValue();
3818 bool IsRHSZero = RHSValue == 0;
3819 bool IsRHSOne = RHSValue == 1;
3820 bool IsRHSNegOne = RHSValue == -1LL;
3821 switch (CC) {
3822 default: return SDValue();
3823 case ISD::SETEQ: {
3824 // {addc.reg, addc.CA} = (addcarry (xor %a, %b), -1)
3825 // (sext (setcc %a, %b, seteq)) -> (sube addc.reg, addc.reg, addc.CA)
3826 // {addcz.reg, addcz.CA} = (addcarry %a, -1)
3827 // (sext (setcc %a, 0, seteq)) -> (sube addcz.reg, addcz.reg, addcz.CA)
3828 SDValue AddInput = IsRHSZero ? LHS :
3829 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3830 SDValue Addic =
3831 SDValue(CurDAG->getMachineNode(PPC::ADDIC8, dl, MVT::i64, MVT::Glue,
3832 AddInput, S->getI32Imm(~0U, dl)), 0);
3833 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, Addic,
3834 Addic, Addic.getValue(1)), 0);
3835 }
3836 case ISD::SETNE: {
3837 // {subfc.reg, subfc.CA} = (subcarry 0, (xor %a, %b))
3838 // (sext (setcc %a, %b, setne)) -> (sube subfc.reg, subfc.reg, subfc.CA)
3839 // {subfcz.reg, subfcz.CA} = (subcarry 0, %a)
3840 // (sext (setcc %a, 0, setne)) -> (sube subfcz.reg, subfcz.reg, subfcz.CA)
3841 SDValue Xor = IsRHSZero ? LHS :
3842 SDValue(CurDAG->getMachineNode(PPC::XOR8, dl, MVT::i64, LHS, RHS), 0);
3843 SDValue SC =
3844 SDValue(CurDAG->getMachineNode(PPC::SUBFIC8, dl, MVT::i64, MVT::Glue,
3845 Xor, S->getI32Imm(0, dl)), 0);
3846 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, SC,
3847 SC, SC.getValue(1)), 0);
3848 }
3849 case ISD::SETGE: {
3850 // {subc.reg, subc.CA} = (subcarry %a, %b)
3851 // (zext (setcc %a, %b, setge)) ->
3852 // (- (adde (lshr %b, 63), (ashr %a, 63), subc.CA))
3853 // (zext (setcc %a, 0, setge)) -> (~ (ashr %a, 63))
3854 if (IsRHSZero)
3855 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3856 std::swap(LHS, RHS);
3857 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3858 IsRHSZero = RHSConst && RHSConst->isZero();
3859 [[fallthrough]];
3860 }
3861 case ISD::SETLE: {
3862 // {subc.reg, subc.CA} = (subcarry %b, %a)
3863 // (zext (setcc %a, %b, setge)) ->
3864 // (- (adde (lshr %a, 63), (ashr %b, 63), subc.CA))
3865 // (zext (setcc %a, 0, setge)) -> (ashr (or %a, (add %a, -1)), 63)
3866 if (IsRHSZero)
3867 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3868 SDValue ShiftR =
3869 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, RHS,
3870 S->getI64Imm(63, dl)), 0);
3871 SDValue ShiftL =
3872 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64, LHS,
3873 S->getI64Imm(1, dl),
3874 S->getI64Imm(63, dl)), 0);
3875 SDValue SubtractCarry =
3876 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3877 LHS, RHS), 1);
3878 SDValue Adde =
3879 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64, MVT::Glue,
3880 ShiftR, ShiftL, SubtractCarry), 0);
3881 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64, Adde), 0);
3882 }
3883 case ISD::SETGT: {
3884 // {subc.reg, subc.CA} = (subcarry %b, %a)
3885 // (zext (setcc %a, %b, setgt)) ->
3886 // -(xor (adde (lshr %a, 63), (ashr %b, 63), subc.CA), 1)
3887 // (zext (setcc %a, 0, setgt)) -> (ashr (nor (add %a, -1), %a), 63)
3888 if (IsRHSNegOne)
3889 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::GESExt);
3890 if (IsRHSZero) {
3891 SDValue Add =
3892 SDValue(CurDAG->getMachineNode(PPC::ADDI8, dl, MVT::i64, LHS,
3893 S->getI64Imm(-1, dl)), 0);
3894 SDValue Nor =
3895 SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64, Add, LHS), 0);
3896 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, Nor,
3897 S->getI64Imm(63, dl)), 0);
3898 }
3899 std::swap(LHS, RHS);
3900 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
3901 IsRHSZero = RHSConst && RHSConst->isZero();
3902 IsRHSOne = RHSConst && RHSConst->getSExtValue() == 1;
3903 [[fallthrough]];
3904 }
3905 case ISD::SETLT: {
3906 // {subc.reg, subc.CA} = (subcarry %a, %b)
3907 // (zext (setcc %a, %b, setlt)) ->
3908 // -(xor (adde (lshr %b, 63), (ashr %a, 63), subc.CA), 1)
3909 // (zext (setcc %a, 0, setlt)) -> (ashr %a, 63)
3910 if (IsRHSOne)
3911 return getCompoundZeroComparisonInGPR(LHS, dl, ZeroCompare::LESExt);
3912 if (IsRHSZero) {
3913 return SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, LHS,
3914 S->getI64Imm(63, dl)), 0);
3915 }
3916 SDValue SRADINode =
3917 SDValue(CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64,
3918 LHS, S->getI64Imm(63, dl)), 0);
3919 SDValue SRDINode =
3920 SDValue(CurDAG->getMachineNode(PPC::RLDICL, dl, MVT::i64,
3921 RHS, S->getI64Imm(1, dl),
3922 S->getI64Imm(63, dl)), 0);
3923 SDValue SUBFC8Carry =
3924 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3925 RHS, LHS), 1);
3926 SDValue ADDE8Node =
3927 SDValue(CurDAG->getMachineNode(PPC::ADDE8, dl, MVT::i64,
3928 SRDINode, SRADINode, SUBFC8Carry), 0);
3929 SDValue XORI8Node =
3930 SDValue(CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
3931 ADDE8Node, S->getI64Imm(1, dl)), 0);
3932 return SDValue(CurDAG->getMachineNode(PPC::NEG8, dl, MVT::i64,
3933 XORI8Node), 0);
3934 }
3935 case ISD::SETUGE:
3936 // {subc.reg, subc.CA} = (subcarry %a, %b)
3937 // (sext (setcc %a, %b, setuge)) -> ~(sube %b, %b, subc.CA)
3938 std::swap(LHS, RHS);
3939 [[fallthrough]];
3940 case ISD::SETULE: {
3941 // {subc.reg, subc.CA} = (subcarry %b, %a)
3942 // (sext (setcc %a, %b, setule)) -> ~(sube %a, %a, subc.CA)
3943 SDValue SubtractCarry =
3944 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3945 LHS, RHS), 1);
3946 SDValue ExtSub =
3947 SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64, MVT::Glue, LHS,
3948 LHS, SubtractCarry), 0);
3949 return SDValue(CurDAG->getMachineNode(PPC::NOR8, dl, MVT::i64,
3950 ExtSub, ExtSub), 0);
3951 }
3952 case ISD::SETUGT:
3953 // {subc.reg, subc.CA} = (subcarry %b, %a)
3954 // (sext (setcc %a, %b, setugt)) -> (sube %b, %b, subc.CA)
3955 std::swap(LHS, RHS);
3956 [[fallthrough]];
3957 case ISD::SETULT: {
3958 // {subc.reg, subc.CA} = (subcarry %a, %b)
3959 // (sext (setcc %a, %b, setult)) -> (sube %a, %a, subc.CA)
3960 SDValue SubCarry =
3961 SDValue(CurDAG->getMachineNode(PPC::SUBFC8, dl, MVT::i64, MVT::Glue,
3962 RHS, LHS), 1);
3963 return SDValue(CurDAG->getMachineNode(PPC::SUBFE8, dl, MVT::i64,
3964 LHS, LHS, SubCarry), 0);
3965 }
3966 }
3967}
3968
3969/// Do all uses of this SDValue need the result in a GPR?
3970/// This is meant to be used on values that have type i1 since
3971/// it is somewhat meaningless to ask if values of other types
3972/// should be kept in GPR's.
3973static bool allUsesExtend(SDValue Compare, SelectionDAG *CurDAG) {
3974 assert(Compare.getOpcode() == ISD::SETCC &&
3975 "An ISD::SETCC node required here.");
3976
3977 // For values that have a single use, the caller should obviously already have
3978 // checked if that use is an extending use. We check the other uses here.
3979 if (Compare.hasOneUse())
3980 return true;
3981 // We want the value in a GPR if it is being extended, used for a select, or
3982 // used in logical operations.
3983 for (auto *CompareUse : Compare.getNode()->uses())
3984 if (CompareUse->getOpcode() != ISD::SIGN_EXTEND &&
3985 CompareUse->getOpcode() != ISD::ZERO_EXTEND &&
3986 CompareUse->getOpcode() != ISD::SELECT &&
3987 !ISD::isBitwiseLogicOp(CompareUse->getOpcode())) {
3988 OmittedForNonExtendUses++;
3989 return false;
3990 }
3991 return true;
3992}
3993
3994/// Returns an equivalent of a SETCC node but with the result the same width as
3995/// the inputs. This can also be used for SELECT_CC if either the true or false
3996/// values is a power of two while the other is zero.
3997SDValue IntegerCompareEliminator::getSETCCInGPR(SDValue Compare,
3998 SetccInGPROpts ConvOpts) {
3999 assert((Compare.getOpcode() == ISD::SETCC ||
4000 Compare.getOpcode() == ISD::SELECT_CC) &&
4001 "An ISD::SETCC node required here.");
4002
4003 // Don't convert this comparison to a GPR sequence because there are uses
4004 // of the i1 result (i.e. uses that require the result in the CR).
4005 if ((Compare.getOpcode() == ISD::SETCC) && !allUsesExtend(Compare, CurDAG))
4006 return SDValue();
4007
4008 SDValue LHS = Compare.getOperand(0);
4009 SDValue RHS = Compare.getOperand(1);
4010
4011 // The condition code is operand 2 for SETCC and operand 4 for SELECT_CC.
4012 int CCOpNum = Compare.getOpcode() == ISD::SELECT_CC ? 4 : 2;
4014 cast<CondCodeSDNode>(Compare.getOperand(CCOpNum))->get();
4015 EVT InputVT = LHS.getValueType();
4016 if (InputVT != MVT::i32 && InputVT != MVT::i64)
4017 return SDValue();
4018
4019 if (ConvOpts == SetccInGPROpts::ZExtInvert ||
4020 ConvOpts == SetccInGPROpts::SExtInvert)
4021 CC = ISD::getSetCCInverse(CC, InputVT);
4022
4023 bool Inputs32Bit = InputVT == MVT::i32;
4024
4025 SDLoc dl(Compare);
4026 ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
4027 int64_t RHSValue = RHSConst ? RHSConst->getSExtValue() : INT64_MAX;
4028 bool IsSext = ConvOpts == SetccInGPROpts::SExtOrig ||
4029 ConvOpts == SetccInGPROpts::SExtInvert;
4030
4031 if (IsSext && Inputs32Bit)
4032 return get32BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4033 else if (Inputs32Bit)
4034 return get32BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4035 else if (IsSext)
4036 return get64BitSExtCompare(LHS, RHS, CC, RHSValue, dl);
4037 return get64BitZExtCompare(LHS, RHS, CC, RHSValue, dl);
4038}
4039
4040} // end anonymous namespace
4041
4042bool PPCDAGToDAGISel::tryIntCompareInGPR(SDNode *N) {
4043 if (N->getValueType(0) != MVT::i32 &&
4044 N->getValueType(0) != MVT::i64)
4045 return false;
4046
4047 // This optimization will emit code that assumes 64-bit registers
4048 // so we don't want to run it in 32-bit mode. Also don't run it
4049 // on functions that are not to be optimized.
4050 if (TM.getOptLevel() == CodeGenOptLevel::None || !TM.isPPC64())
4051 return false;
4052
4053 // For POWER10, it is more profitable to use the set boolean extension
4054 // instructions rather than the integer compare elimination codegen.
4055 // Users can override this via the command line option, `--ppc-gpr-icmps`.
4056 if (!(CmpInGPR.getNumOccurrences() > 0) && Subtarget->isISA3_1())
4057 return false;
4058
4059 switch (N->getOpcode()) {
4060 default: break;
4061 case ISD::ZERO_EXTEND:
4062 case ISD::SIGN_EXTEND:
4063 case ISD::AND:
4064 case ISD::OR:
4065 case ISD::XOR: {
4066 IntegerCompareEliminator ICmpElim(CurDAG, this);
4067 if (SDNode *New = ICmpElim.Select(N)) {
4068 ReplaceNode(N, New);
4069 return true;
4070 }
4071 }
4072 }
4073 return false;
4074}
4075
4076bool PPCDAGToDAGISel::tryBitPermutation(SDNode *N) {
4077 if (N->getValueType(0) != MVT::i32 &&
4078 N->getValueType(0) != MVT::i64)
4079 return false;
4080
4081 if (!UseBitPermRewriter)
4082 return false;
4083
4084 switch (N->getOpcode()) {
4085 default: break;
4086 case ISD::SRL:
4087 // If we are on P10, we have a pattern for 32-bit (srl (bswap r), 16) that
4088 // uses the BRH instruction.
4089 if (Subtarget->isISA3_1() && N->getValueType(0) == MVT::i32 &&
4090 N->getOperand(0).getOpcode() == ISD::BSWAP) {
4091 auto &OpRight = N->getOperand(1);
4092 ConstantSDNode *SRLConst = dyn_cast<ConstantSDNode>(OpRight);
4093 if (SRLConst && SRLConst->getSExtValue() == 16)
4094 return false;
4095 }
4096 [[fallthrough]];
4097 case ISD::ROTL:
4098 case ISD::SHL:
4099 case ISD::AND:
4100 case ISD::OR: {
4101 BitPermutationSelector BPS(CurDAG);
4102 if (SDNode *New = BPS.Select(N)) {
4103 ReplaceNode(N, New);
4104 return true;
4105 }
4106 return false;
4107 }
4108 }
4109
4110 return false;
4111}
4112
4113/// SelectCC - Select a comparison of the specified values with the specified
4114/// condition code, returning the CR# of the expression.
4115SDValue PPCDAGToDAGISel::SelectCC(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4116 const SDLoc &dl, SDValue Chain) {
4117 // Always select the LHS.
4118 unsigned Opc;
4119
4120 if (LHS.getValueType() == MVT::i32) {
4121 unsigned Imm;
4122 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4123 if (isInt32Immediate(RHS, Imm)) {
4124 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4125 if (isUInt<16>(Imm))
4126 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4127 getI32Imm(Imm & 0xFFFF, dl)),
4128 0);
4129 // If this is a 16-bit signed immediate, fold it.
4130 if (isInt<16>((int)Imm))
4131 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4132 getI32Imm(Imm & 0xFFFF, dl)),
4133 0);
4134
4135 // For non-equality comparisons, the default code would materialize the
4136 // constant, then compare against it, like this:
4137 // lis r2, 4660
4138 // ori r2, r2, 22136
4139 // cmpw cr0, r3, r2
4140 // Since we are just comparing for equality, we can emit this instead:
4141 // xoris r0,r3,0x1234
4142 // cmplwi cr0,r0,0x5678
4143 // beq cr0,L6
4144 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS, dl, MVT::i32, LHS,
4145 getI32Imm(Imm >> 16, dl)), 0);
4146 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, Xor,
4147 getI32Imm(Imm & 0xFFFF, dl)), 0);
4148 }
4149 Opc = PPC::CMPLW;
4150 } else if (ISD::isUnsignedIntSetCC(CC)) {
4151 if (isInt32Immediate(RHS, Imm) && isUInt<16>(Imm))
4152 return SDValue(CurDAG->getMachineNode(PPC::CMPLWI, dl, MVT::i32, LHS,
4153 getI32Imm(Imm & 0xFFFF, dl)), 0);
4154 Opc = PPC::CMPLW;
4155 } else {
4156 int16_t SImm;
4157 if (isIntS16Immediate(RHS, SImm))
4158 return SDValue(CurDAG->getMachineNode(PPC::CMPWI, dl, MVT::i32, LHS,
4159 getI32Imm((int)SImm & 0xFFFF,
4160 dl)),
4161 0);
4162 Opc = PPC::CMPW;
4163 }
4164 } else if (LHS.getValueType() == MVT::i64) {
4165 uint64_t Imm;
4166 if (CC == ISD::SETEQ || CC == ISD::SETNE) {
4167 if (isInt64Immediate(RHS.getNode(), Imm)) {
4168 // SETEQ/SETNE comparison with 16-bit immediate, fold it.
4169 if (isUInt<16>(Imm))
4170 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4171 getI32Imm(Imm & 0xFFFF, dl)),
4172 0);
4173 // If this is a 16-bit signed immediate, fold it.
4174 if (isInt<16>(Imm))
4175 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4176 getI32Imm(Imm & 0xFFFF, dl)),
4177 0);
4178
4179 // For non-equality comparisons, the default code would materialize the
4180 // constant, then compare against it, like this:
4181 // lis r2, 4660
4182 // ori r2, r2, 22136
4183 // cmpd cr0, r3, r2
4184 // Since we are just comparing for equality, we can emit this instead:
4185 // xoris r0,r3,0x1234
4186 // cmpldi cr0,r0,0x5678
4187 // beq cr0,L6
4188 if (isUInt<32>(Imm)) {
4189 SDValue Xor(CurDAG->getMachineNode(PPC::XORIS8, dl, MVT::i64, LHS,
4190 getI64Imm(Imm >> 16, dl)), 0);
4191 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, Xor,
4192 getI64Imm(Imm & 0xFFFF, dl)),
4193 0);
4194 }
4195 }
4196 Opc = PPC::CMPLD;
4197 } else if (ISD::isUnsignedIntSetCC(CC)) {
4198 if (isInt64Immediate(RHS.getNode(), Imm) && isUInt<16>(Imm))
4199 return SDValue(CurDAG->getMachineNode(PPC::CMPLDI, dl, MVT::i64, LHS,
4200 getI64Imm(Imm & 0xFFFF, dl)), 0);
4201 Opc = PPC::CMPLD;
4202 } else {
4203 int16_t SImm;
4204 if (isIntS16Immediate(RHS, SImm))
4205 return SDValue(CurDAG->getMachineNode(PPC::CMPDI, dl, MVT::i64, LHS,
4206 getI64Imm(SImm & 0xFFFF, dl)),
4207 0);
4208 Opc = PPC::CMPD;
4209 }
4210 } else if (LHS.getValueType() == MVT::f32) {
4211 if (Subtarget->hasSPE()) {
4212 switch (CC) {
4213 default:
4214 case ISD::SETEQ:
4215 case ISD::SETNE:
4216 Opc = PPC::EFSCMPEQ;
4217 break;
4218 case ISD::SETLT:
4219 case ISD::SETGE:
4220 case ISD::SETOLT:
4221 case ISD::SETOGE:
4222 case ISD::SETULT:
4223 case ISD::SETUGE:
4224 Opc = PPC::EFSCMPLT;
4225 break;
4226 case ISD::SETGT:
4227 case ISD::SETLE:
4228 case ISD::SETOGT:
4229 case ISD::SETOLE:
4230 case ISD::SETUGT:
4231 case ISD::SETULE:
4232 Opc = PPC::EFSCMPGT;
4233 break;
4234 }
4235 } else
4236 Opc = PPC::FCMPUS;
4237 } else if (LHS.getValueType() == MVT::f64) {
4238 if (Subtarget->hasSPE()) {
4239 switch (CC) {
4240 default:
4241 case ISD::SETEQ:
4242 case ISD::SETNE:
4243 Opc = PPC::EFDCMPEQ;
4244 break;
4245 case ISD::SETLT:
4246 case ISD::SETGE:
4247 case ISD::SETOLT:
4248 case ISD::SETOGE:
4249 case ISD::SETULT:
4250 case ISD::SETUGE:
4251 Opc = PPC::EFDCMPLT;
4252 break;
4253 case ISD::SETGT:
4254 case ISD::SETLE:
4255 case ISD::SETOGT:
4256 case ISD::SETOLE:
4257 case ISD::SETUGT:
4258 case ISD::SETULE:
4259 Opc = PPC::EFDCMPGT;
4260 break;
4261 }
4262 } else
4263 Opc = Subtarget->hasVSX() ? PPC::XSCMPUDP : PPC::FCMPUD;
4264 } else {
4265 assert(LHS.getValueType() == MVT::f128 && "Unknown vt!");
4266 assert(Subtarget->hasP9Vector() && "XSCMPUQP requires Power9 Vector");
4267 Opc = PPC::XSCMPUQP;
4268 }
4269 if (Chain)
4270 return SDValue(
4271 CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::Other, LHS, RHS, Chain),
4272 0);
4273 else
4274 return SDValue(CurDAG->getMachineNode(Opc, dl, MVT::i32, LHS, RHS), 0);
4275}
4276
4278 const PPCSubtarget *Subtarget) {
4279 // For SPE instructions, the result is in GT bit of the CR
4280 bool UseSPE = Subtarget->hasSPE() && VT.isFloatingPoint();
4281
4282 switch (CC) {
4283 case ISD::SETUEQ:
4284 case ISD::SETONE:
4285 case ISD::SETOLE:
4286 case ISD::SETOGE:
4287 llvm_unreachable("Should be lowered by legalize!");
4288 default: llvm_unreachable("Unknown condition!");
4289 case ISD::SETOEQ:
4290 case ISD::SETEQ:
4291 return UseSPE ? PPC::PRED_GT : PPC::PRED_EQ;
4292 case ISD::SETUNE:
4293 case ISD::SETNE:
4294 return UseSPE ? PPC::PRED_LE : PPC::PRED_NE;
4295 case ISD::SETOLT:
4296 case ISD::SETLT:
4297 return UseSPE ? PPC::PRED_GT : PPC::PRED_LT;
4298 case ISD::SETULE:
4299 case ISD::SETLE:
4300 return PPC::PRED_LE;
4301 case ISD::SETOGT:
4302 case ISD::SETGT:
4303 return PPC::PRED_GT;
4304 case ISD::SETUGE:
4305 case ISD::SETGE:
4306 return UseSPE ? PPC::PRED_LE : PPC::PRED_GE;
4307 case ISD::SETO: return PPC::PRED_NU;
4308 case ISD::SETUO: return PPC::PRED_UN;
4309 // These two are invalid for floating point. Assume we have int.
4310 case ISD::SETULT: return PPC::PRED_LT;
4311 case ISD::SETUGT: return PPC::PRED_GT;
4312 }
4313}
4314
4315/// getCRIdxForSetCC - Return the index of the condition register field
4316/// associated with the SetCC condition, and whether or not the field is
4317/// treated as inverted. That is, lt = 0; ge = 0 inverted.
4318static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert) {
4319 Invert = false;
4320 switch (CC) {
4321 default: llvm_unreachable("Unknown condition!");
4322 case ISD::SETOLT:
4323 case ISD::SETLT: return 0; // Bit #0 = SETOLT
4324 case ISD::SETOGT:
4325 case ISD::SETGT: return 1; // Bit #1 = SETOGT
4326 case ISD::SETOEQ:
4327 case ISD::SETEQ: return 2; // Bit #2 = SETOEQ
4328 case ISD::SETUO: return 3; // Bit #3 = SETUO
4329 case ISD::SETUGE:
4330 case ISD::SETGE: Invert = true; return 0; // !Bit #0 = SETUGE
4331 case ISD::SETULE:
4332 case ISD::SETLE: Invert = true; return 1; // !Bit #1 = SETULE
4333 case ISD::SETUNE:
4334 case ISD::SETNE: Invert = true; return 2; // !Bit #2 = SETUNE
4335 case ISD::SETO: Invert = true; return 3; // !Bit #3 = SETO
4336 case ISD::SETUEQ:
4337 case ISD::SETOGE:
4338 case ISD::SETOLE:
4339 case ISD::SETONE:
4340 llvm_unreachable("Invalid branch code: should be expanded by legalize");
4341 // These are invalid for floating point. Assume integer.
4342 case ISD::SETULT: return 0;
4343 case ISD::SETUGT: return 1;
4344 }
4345}
4346
4347// getVCmpInst: return the vector compare instruction for the specified
4348// vector type and condition code. Since this is for altivec specific code,
4349// only support the altivec types (v16i8, v8i16, v4i32, v2i64, v1i128,
4350// and v4f32).
4351static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC,
4352 bool HasVSX, bool &Swap, bool &Negate) {
4353 Swap = false;
4354 Negate = false;
4355
4356 if (VecVT.isFloatingPoint()) {
4357 /* Handle some cases by swapping input operands. */
4358 switch (CC) {
4359 case ISD::SETLE: CC = ISD::SETGE; Swap = true; break;
4360 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4361 case ISD::SETOLE: CC = ISD::SETOGE; Swap = true; break;
4362 case ISD::SETOLT: CC = ISD::SETOGT; Swap = true; break;
4363 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4364 case ISD::SETUGT: CC = ISD::SETULT; Swap = true; break;
4365 default: break;
4366 }
4367 /* Handle some cases by negating the result. */
4368 switch (CC) {
4369 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4370 case ISD::SETUNE: CC = ISD::SETOEQ; Negate = true; break;
4371 case ISD::SETULE: CC = ISD::SETOGT; Negate = true; break;
4372 case ISD::SETULT: CC = ISD::SETOGE; Negate = true; break;
4373 default: break;
4374 }
4375 /* We have instructions implementing the remaining cases. */
4376 switch (CC) {
4377 case ISD::SETEQ:
4378 case ISD::SETOEQ:
4379 if (VecVT == MVT::v4f32)
4380 return HasVSX ? PPC::XVCMPEQSP : PPC::VCMPEQFP;
4381 else if (VecVT == MVT::v2f64)
4382 return PPC::XVCMPEQDP;
4383 break;
4384 case ISD::SETGT:
4385 case ISD::SETOGT:
4386 if (VecVT == MVT::v4f32)
4387 return HasVSX ? PPC::XVCMPGTSP : PPC::VCMPGTFP;
4388 else if (VecVT == MVT::v2f64)
4389 return PPC::XVCMPGTDP;
4390 break;
4391 case ISD::SETGE:
4392 case ISD::SETOGE:
4393 if (VecVT == MVT::v4f32)
4394 return HasVSX ? PPC::XVCMPGESP : PPC::VCMPGEFP;
4395 else if (VecVT == MVT::v2f64)
4396 return PPC::XVCMPGEDP;
4397 break;
4398 default:
4399 break;
4400 }
4401 llvm_unreachable("Invalid floating-point vector compare condition");
4402 } else {
4403 /* Handle some cases by swapping input operands. */
4404 switch (CC) {
4405 case ISD::SETGE: CC = ISD::SETLE; Swap = true; break;
4406 case ISD::SETLT: CC = ISD::SETGT; Swap = true; break;
4407 case ISD::SETUGE: CC = ISD::SETULE; Swap = true; break;
4408 case ISD::SETULT: CC = ISD::SETUGT; Swap = true; break;
4409 default: break;
4410 }
4411 /* Handle some cases by negating the result. */
4412 switch (CC) {
4413 case ISD::SETNE: CC = ISD::SETEQ; Negate = true; break;
4414 case ISD::SETUNE: CC = ISD::SETUEQ; Negate = true; break;
4415 case ISD::SETLE: CC = ISD::SETGT; Negate = true; break;
4416 case ISD::SETULE: CC = ISD::SETUGT; Negate = true; break;
4417 default: break;
4418 }
4419 /* We have instructions implementing the remaining cases. */
4420 switch (CC) {
4421 case ISD::SETEQ:
4422 case ISD::SETUEQ:
4423 if (VecVT == MVT::v16i8)
4424 return PPC::VCMPEQUB;
4425 else if (VecVT == MVT::v8i16)
4426 return PPC::VCMPEQUH;
4427 else if (VecVT == MVT::v4i32)
4428 return PPC::VCMPEQUW;
4429 else if (VecVT == MVT::v2i64)
4430 return PPC::VCMPEQUD;
4431 else if (VecVT == MVT::v1i128)
4432 return PPC::VCMPEQUQ;
4433 break;
4434 case ISD::SETGT:
4435 if (VecVT == MVT::v16i8)
4436 return PPC::VCMPGTSB;
4437 else if (VecVT == MVT::v8i16)
4438 return PPC::VCMPGTSH;
4439 else if (VecVT == MVT::v4i32)
4440 return PPC::VCMPGTSW;
4441 else if (VecVT == MVT::v2i64)
4442 return PPC::VCMPGTSD;
4443 else if (VecVT == MVT::v1i128)
4444 return PPC::VCMPGTSQ;
4445 break;
4446 case ISD::SETUGT:
4447 if (VecVT == MVT::v16i8)
4448 return PPC::VCMPGTUB;
4449 else if (VecVT == MVT::v8i16)
4450 return PPC::VCMPGTUH;
4451 else if (VecVT == MVT::v4i32)
4452 return PPC::VCMPGTUW;
4453 else if (VecVT == MVT::v2i64)
4454 return PPC::VCMPGTUD;
4455 else if (VecVT == MVT::v1i128)
4456 return PPC::VCMPGTUQ;
4457 break;
4458 default:
4459 break;
4460 }
4461 llvm_unreachable("Invalid integer vector compare condition");
4462 }
4463}
4464
4465bool PPCDAGToDAGISel::trySETCC(SDNode *N) {
4466 SDLoc dl(N);
4467 unsigned Imm;
4468 bool IsStrict = N->isStrictFPOpcode();
4470 cast<CondCodeSDNode>(N->getOperand(IsStrict ? 3 : 2))->get();
4471 EVT PtrVT =
4473 bool isPPC64 = (PtrVT == MVT::i64);
4474 SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
4475
4476 SDValue LHS = N->getOperand(IsStrict ? 1 : 0);
4477 SDValue RHS = N->getOperand(IsStrict ? 2 : 1);
4478
4479 if (!IsStrict && !Subtarget->useCRBits() && isInt32Immediate(RHS, Imm)) {
4480 // We can codegen setcc op, imm very efficiently compared to a brcond.
4481 // Check for those cases here.
4482 // setcc op, 0
4483 if (Imm == 0) {
4484 SDValue Op = LHS;
4485 switch (CC) {
4486 default: break;
4487 case ISD::SETEQ: {
4488 Op = SDValue(CurDAG->getMachineNode(PPC::CNTLZW, dl, MVT::i32, Op), 0);
4489 SDValue Ops[] = { Op, getI32Imm(27, dl), getI32Imm(5, dl),
4490 getI32Imm(31, dl) };
4491 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4492 return true;
4493 }
4494 case ISD::SETNE: {
4495 if (isPPC64) break;
4496 SDValue AD =
4497 SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4498 Op, getI32Imm(~0U, dl)), 0);
4499 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, AD, Op, AD.getValue(1));
4500 return true;
4501 }
4502 case ISD::SETLT: {
4503 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4504 getI32Imm(31, dl) };
4505 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4506 return true;
4507 }
4508 case ISD::SETGT: {
4509 SDValue T =
4510 SDValue(CurDAG->getMachineNode(PPC::NEG, dl, MVT::i32, Op), 0);
4511 T = SDValue(CurDAG->getMachineNode(PPC::ANDC, dl, MVT::i32, T, Op), 0);
4512 SDValue Ops[] = { T, getI32Imm(1, dl), getI32Imm(31, dl),
4513 getI32Imm(31, dl) };
4514 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4515 return true;
4516 }
4517 }
4518 } else if (Imm == ~0U) { // setcc op, -1
4519 SDValue Op = LHS;
4520 switch (CC) {
4521 default: break;
4522 case ISD::SETEQ:
4523 if (isPPC64) break;
4524 Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4525 Op, getI32Imm(1, dl)), 0);
4526 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
4527 SDValue(CurDAG->getMachineNode(PPC::LI, dl,
4528 MVT::i32,
4529 getI32Imm(0, dl)),
4530 0), Op.getValue(1));
4531 return true;
4532 case ISD::SETNE: {
4533 if (isPPC64) break;
4534 Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
4535 SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
4536 Op, getI32Imm(~0U, dl));
4537 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(AD, 0), Op,
4538 SDValue(AD, 1));
4539 return true;
4540 }
4541 case ISD::SETLT: {
4542 SDValue AD = SDValue(CurDAG->getMachineNode(PPC::ADDI, dl, MVT::i32, Op,
4543 getI32Imm(1, dl)), 0);
4544 SDValue AN = SDValue(CurDAG->getMachineNode(PPC::AND, dl, MVT::i32, AD,
4545 Op), 0);
4546 SDValue Ops[] = { AN, getI32Imm(1, dl), getI32Imm(31, dl),
4547 getI32Imm(31, dl) };
4548 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4549 return true;
4550 }
4551 case ISD::SETGT: {
4552 SDValue Ops[] = { Op, getI32Imm(1, dl), getI32Imm(31, dl),
4553 getI32Imm(31, dl) };
4554 Op = SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4555 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Op, getI32Imm(1, dl));
4556 return true;
4557 }
4558 }
4559 }
4560 }
4561
4562 // Altivec Vector compare instructions do not set any CR register by default and
4563 // vector compare operations return the same type as the operands.
4564 if (!IsStrict && LHS.getValueType().isVector()) {
4565 if (Subtarget->hasSPE())
4566 return false;
4567
4568 EVT VecVT = LHS.getValueType();
4569 bool Swap, Negate;
4570 unsigned int VCmpInst =
4571 getVCmpInst(VecVT.getSimpleVT(), CC, Subtarget->hasVSX(), Swap, Negate);
4572 if (Swap)
4573 std::swap(LHS, RHS);
4574
4575 EVT ResVT = VecVT.changeVectorElementTypeToInteger();
4576 if (Negate) {
4577 SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, ResVT, LHS, RHS), 0);
4578 CurDAG->SelectNodeTo(N, Subtarget->hasVSX() ? PPC::XXLNOR : PPC::VNOR,
4579 ResVT, VCmp, VCmp);
4580 return true;
4581 }
4582
4583 CurDAG->SelectNodeTo(N, VCmpInst, ResVT, LHS, RHS);
4584 return true;
4585 }
4586
4587 if (Subtarget->useCRBits())
4588 return false;
4589
4590 bool Inv;
4591 unsigned Idx = getCRIdxForSetCC(CC, Inv);
4592 SDValue CCReg = SelectCC(LHS, RHS, CC, dl, Chain);
4593 if (IsStrict)
4594 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), CCReg.getValue(1));
4595 SDValue IntCR;
4596
4597 // SPE e*cmp* instructions only set the 'gt' bit, so hard-code that
4598 // The correct compare instruction is already set by SelectCC()
4599 if (Subtarget->hasSPE() && LHS.getValueType().isFloatingPoint()) {
4600 Idx = 1;
4601 }
4602
4603 // Force the ccreg into CR7.
4604 SDValue CR7Reg = CurDAG->getRegister(PPC::CR7, MVT::i32);
4605
4606 SDValue InGlue; // Null incoming flag value.
4607 CCReg = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, CR7Reg, CCReg,
4608 InGlue).getValue(1);
4609
4610 IntCR = SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR7Reg,
4611 CCReg), 0);
4612
4613 SDValue Ops[] = { IntCR, getI32Imm((32 - (3 - Idx)) & 31, dl),
4614 getI32Imm(31, dl), getI32Imm(31, dl) };
4615 if (!Inv) {
4616 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4617 return true;
4618 }
4619
4620 // Get the specified bit.
4621 SDValue Tmp =
4622 SDValue(CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
4623 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Tmp, getI32Imm(1, dl));
4624 return true;
4625}
4626
4627/// Does this node represent a load/store node whose address can be represented
4628/// with a register plus an immediate that's a multiple of \p Val:
4629bool PPCDAGToDAGISel::isOffsetMultipleOf(SDNode *N, unsigned Val) const {
4630 LoadSDNode *LDN = dyn_cast<LoadSDNode>(N);
4631 StoreSDNode *STN = dyn_cast<StoreSDNode>(N);
4632 MemIntrinsicSDNode *MIN = dyn_cast<MemIntrinsicSDNode>(N);
4633 SDValue AddrOp;
4634 if (LDN || (MIN && MIN->getOpcode() == PPCISD::LD_SPLAT))
4635 AddrOp = N->getOperand(1);
4636 else if (STN)
4637 AddrOp = STN->getOperand(2);
4638
4639 // If the address points a frame object or a frame object with an offset,
4640 // we need to check the object alignment.
4641 short Imm = 0;
4642 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(
4643 AddrOp.getOpcode() == ISD::ADD ? AddrOp.getOperand(0) :
4644 AddrOp)) {
4645 // If op0 is a frame index that is under aligned, we can't do it either,
4646 // because it is translated to r31 or r1 + slot + offset. We won't know the
4647 // slot number until the stack frame is finalized.
4648 const MachineFrameInfo &MFI = CurDAG->getMachineFunction().getFrameInfo();
4649 unsigned SlotAlign = MFI.getObjectAlign(FI->getIndex()).value();
4650 if ((SlotAlign % Val) != 0)
4651 return false;
4652
4653 // If we have an offset, we need further check on the offset.
4654 if (AddrOp.getOpcode() != ISD::ADD)
4655 return true;
4656 }
4657
4658 if (AddrOp.getOpcode() == ISD::ADD)
4659 return isIntS16Immediate(AddrOp.getOperand(1), Imm) && !(Imm % Val);
4660
4661 // If the address comes from the outside, the offset will be zero.
4662 return AddrOp.getOpcode() == ISD::CopyFromReg;
4663}
4664
4665void PPCDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
4666 // Transfer memoperands.
4667 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
4668 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
4669}
4670
4672 bool &NeedSwapOps, bool &IsUnCmp) {
4673
4674 assert(N->getOpcode() == ISD::SELECT_CC && "Expecting a SELECT_CC here.");
4675
4676 SDValue LHS = N->getOperand(0);
4677 SDValue RHS = N->getOperand(1);
4678 SDValue TrueRes = N->getOperand(2);
4679 SDValue FalseRes = N->getOperand(3);
4680 ConstantSDNode *TrueConst = dyn_cast<ConstantSDNode>(TrueRes);
4681 if (!TrueConst || (N->getSimpleValueType(0) != MVT::i64 &&
4682 N->getSimpleValueType(0) != MVT::i32))
4683 return false;
4684
4685 // We are looking for any of:
4686 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4687 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, cc2)), cc1)
4688 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, 1, -1, cc2), seteq)
4689 // (select_cc lhs, rhs, 0, (select_cc [lr]hs, [lr]hs, -1, 1, cc2), seteq)
4690 int64_t TrueResVal = TrueConst->getSExtValue();
4691 if ((TrueResVal < -1 || TrueResVal > 1) ||
4692 (TrueResVal == -1 && FalseRes.getOpcode() != ISD::ZERO_EXTEND) ||
4693 (TrueResVal == 1 && FalseRes.getOpcode() != ISD::SIGN_EXTEND) ||
4694 (TrueResVal == 0 &&
4695 (FalseRes.getOpcode() != ISD::SELECT_CC || CC != ISD::SETEQ)))
4696 return false;
4697
4698 SDValue SetOrSelCC = FalseRes.getOpcode() == ISD::SELECT_CC
4699 ? FalseRes
4700 : FalseRes.getOperand(0);
4701 bool InnerIsSel = SetOrSelCC.getOpcode() == ISD::SELECT_CC;
4702 if (SetOrSelCC.getOpcode() != ISD::SETCC &&
4703 SetOrSelCC.getOpcode() != ISD::SELECT_CC)
4704 return false;
4705
4706 // Without this setb optimization, the outer SELECT_CC will be manually
4707 // selected to SELECT_CC_I4/SELECT_CC_I8 Pseudo, then expand-isel-pseudos pass
4708 // transforms pseudo instruction to isel instruction. When there are more than
4709 // one use for result like zext/sext, with current optimization we only see
4710 // isel is replaced by setb but can't see any significant gain. Since
4711 // setb has longer latency than original isel, we should avoid this. Another
4712 // point is that setb requires comparison always kept, it can break the
4713 // opportunity to get the comparison away if we have in future.
4714 if (!SetOrSelCC.hasOneUse() || (!InnerIsSel && !FalseRes.hasOneUse()))
4715 return false;
4716
4717 SDValue InnerLHS = SetOrSelCC.getOperand(0);
4718 SDValue InnerRHS = SetOrSelCC.getOperand(1);
4719 ISD::CondCode InnerCC =
4720 cast<CondCodeSDNode>(SetOrSelCC.getOperand(InnerIsSel ? 4 : 2))->get();
4721 // If the inner comparison is a select_cc, make sure the true/false values are
4722 // 1/-1 and canonicalize it if needed.
4723 if (InnerIsSel) {
4724 ConstantSDNode *SelCCTrueConst =
4725 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(2));
4726 ConstantSDNode *SelCCFalseConst =
4727 dyn_cast<ConstantSDNode>(SetOrSelCC.getOperand(3));
4728 if (!SelCCTrueConst || !SelCCFalseConst)
4729 return false;
4730 int64_t SelCCTVal = SelCCTrueConst->getSExtValue();
4731 int64_t SelCCFVal = SelCCFalseConst->getSExtValue();
4732 // The values must be -1/1 (requiring a swap) or 1/-1.
4733 if (SelCCTVal == -1 && SelCCFVal == 1) {
4734 std::swap(InnerLHS, InnerRHS);
4735 } else if (SelCCTVal != 1 || SelCCFVal != -1)
4736 return false;
4737 }
4738
4739 // Canonicalize unsigned case
4740 if (InnerCC == ISD::SETULT || InnerCC == ISD::SETUGT) {
4741 IsUnCmp = true;
4742 InnerCC = (InnerCC == ISD::SETULT) ? ISD::SETLT : ISD::SETGT;
4743 }
4744
4745 bool InnerSwapped = false;
4746 if (LHS == InnerRHS && RHS == InnerLHS)
4747 InnerSwapped = true;
4748 else if (LHS != InnerLHS || RHS != InnerRHS)
4749 return false;
4750
4751 switch (CC) {
4752 // (select_cc lhs, rhs, 0, \
4753 // (select_cc [lr]hs, [lr]hs, 1, -1, setlt/setgt), seteq)
4754 case ISD::SETEQ:
4755 if (!InnerIsSel)
4756 return false;
4757 if (InnerCC != ISD::SETLT && InnerCC != ISD::SETGT)
4758 return false;
4759 NeedSwapOps = (InnerCC == ISD::SETGT) ? InnerSwapped : !InnerSwapped;
4760 break;
4761
4762 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4763 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setgt)), setu?lt)
4764 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setlt)), setu?lt)
4765 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?lt)
4766 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setgt)), setu?lt)
4767 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setlt)), setu?lt)
4768 case ISD::SETULT:
4769 if (!IsUnCmp && InnerCC != ISD::SETNE)
4770 return false;
4771 IsUnCmp = true;
4772 [[fallthrough]];
4773 case ISD::SETLT:
4774 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETGT && !InnerSwapped) ||
4775 (InnerCC == ISD::SETLT && InnerSwapped))
4776 NeedSwapOps = (TrueResVal == 1);
4777 else
4778 return false;
4779 break;
4780
4781 // (select_cc lhs, rhs, 1, (sext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4782 // (select_cc lhs, rhs, 1, (sext (setcc lhs, rhs, setlt)), setu?gt)
4783 // (select_cc lhs, rhs, 1, (sext (setcc rhs, lhs, setgt)), setu?gt)
4784 // (select_cc lhs, rhs, -1, (zext (setcc [lr]hs, [lr]hs, setne)), setu?gt)
4785 // (select_cc lhs, rhs, -1, (zext (setcc lhs, rhs, setlt)), setu?gt)
4786 // (select_cc lhs, rhs, -1, (zext (setcc rhs, lhs, setgt)), setu?gt)
4787 case ISD::SETUGT:
4788 if (!IsUnCmp && InnerCC != ISD::SETNE)
4789 return false;
4790 IsUnCmp = true;
4791 [[fallthrough]];
4792 case ISD::SETGT:
4793 if (InnerCC == ISD::SETNE || (InnerCC == ISD::SETLT && !InnerSwapped) ||
4794 (InnerCC == ISD::SETGT && InnerSwapped))
4795 NeedSwapOps = (TrueResVal == -1);
4796 else
4797 return false;
4798 break;
4799
4800 default:
4801 return false;
4802 }
4803
4804 LLVM_DEBUG(dbgs() << "Found a node that can be lowered to a SETB: ");
4805 LLVM_DEBUG(N->dump());
4806
4807 return true;
4808}
4809
4810// Return true if it's a software square-root/divide operand.
4811static bool isSWTestOp(SDValue N) {
4812 if (N.getOpcode() == PPCISD::FTSQRT)
4813 return true;
4814 if (N.getNumOperands() < 1 || !isa<ConstantSDNode>(N.getOperand(0)) ||
4815 N.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
4816 return false;
4817 switch (N.getConstantOperandVal(0)) {
4818 case Intrinsic::ppc_vsx_xvtdivdp:
4819 case Intrinsic::ppc_vsx_xvtdivsp:
4820 case Intrinsic::ppc_vsx_xvtsqrtdp:
4821 case Intrinsic::ppc_vsx_xvtsqrtsp:
4822 return true;
4823 }
4824 return false;
4825}
4826
4827bool PPCDAGToDAGISel::tryFoldSWTestBRCC(SDNode *N) {
4828 assert(N->getOpcode() == ISD::BR_CC && "ISD::BR_CC is expected.");
4829 // We are looking for following patterns, where `truncate to i1` actually has
4830 // the same semantic with `and 1`.
4831 // (br_cc seteq, (truncateToi1 SWTestOp), 0) -> (BCC PRED_NU, SWTestOp)
4832 // (br_cc seteq, (and SWTestOp, 2), 0) -> (BCC PRED_NE, SWTestOp)
4833 // (br_cc seteq, (and SWTestOp, 4), 0) -> (BCC PRED_LE, SWTestOp)
4834 // (br_cc seteq, (and SWTestOp, 8), 0) -> (BCC PRED_GE, SWTestOp)
4835 // (br_cc setne, (truncateToi1 SWTestOp), 0) -> (BCC PRED_UN, SWTestOp)
4836 // (br_cc setne, (and SWTestOp, 2), 0) -> (BCC PRED_EQ, SWTestOp)
4837 // (br_cc setne, (and SWTestOp, 4), 0) -> (BCC PRED_GT, SWTestOp)
4838 // (br_cc setne, (and SWTestOp, 8), 0) -> (BCC PRED_LT, SWTestOp)
4839 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4840 if (CC != ISD::SETEQ && CC != ISD::SETNE)
4841 return false;
4842
4843 SDValue CmpRHS = N->getOperand(3);
4844 if (!isNullConstant(CmpRHS))
4845 return false;
4846
4847 SDValue CmpLHS = N->getOperand(2);
4848 if (CmpLHS.getNumOperands() < 1 || !isSWTestOp(CmpLHS.getOperand(0)))
4849 return false;
4850
4851 unsigned PCC = 0;
4852 bool IsCCNE = CC == ISD::SETNE;
4853 if (CmpLHS.getOpcode() == ISD::AND &&
4854 isa<ConstantSDNode>(CmpLHS.getOperand(1)))
4855 switch (CmpLHS.getConstantOperandVal(1)) {
4856 case 1:
4857 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4858 break;
4859 case 2:
4860 PCC = IsCCNE ? PPC::PRED_EQ : PPC::PRED_NE;
4861 break;
4862 case 4:
4863 PCC = IsCCNE ? PPC::PRED_GT : PPC::PRED_LE;
4864 break;
4865 case 8:
4866 PCC = IsCCNE ? PPC::PRED_LT : PPC::PRED_GE;
4867 break;
4868 default:
4869 return false;
4870 }
4871 else if (CmpLHS.getOpcode() == ISD::TRUNCATE &&
4872 CmpLHS.getValueType() == MVT::i1)
4873 PCC = IsCCNE ? PPC::PRED_UN : PPC::PRED_NU;
4874
4875 if (PCC) {
4876 SDLoc dl(N);
4877 SDValue Ops[] = {getI32Imm(PCC, dl), CmpLHS.getOperand(0), N->getOperand(4),
4878 N->getOperand(0)};
4879 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
4880 return true;
4881 }
4882 return false;
4883}
4884
4885bool PPCDAGToDAGISel::trySelectLoopCountIntrinsic(SDNode *N) {
4886 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
4887 // value, for example when crbits is disabled. If so, select the
4888 // loop_decrement intrinsics now.
4889 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
4890 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
4891
4892 if (LHS.getOpcode() != ISD::AND || !isa<ConstantSDNode>(LHS.getOperand(1)) ||
4893 isNullConstant(LHS.getOperand(1)))
4894 return false;
4895
4896 if (LHS.getOperand(0).getOpcode() != ISD::INTRINSIC_W_CHAIN ||
4897 LHS.getOperand(0).getConstantOperandVal(1) != Intrinsic::loop_decrement)
4898 return false;
4899
4900 if (!isa<ConstantSDNode>(RHS))
4901 return false;
4902
4903 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
4904 "Counter decrement comparison is not EQ or NE");
4905
4906 SDValue OldDecrement = LHS.getOperand(0);
4907 assert(OldDecrement.hasOneUse() && "loop decrement has more than one use!");
4908
4909 SDLoc DecrementLoc(OldDecrement);
4910 SDValue ChainInput = OldDecrement.getOperand(0);
4911 SDValue DecrementOps[] = {Subtarget->isPPC64() ? getI64Imm(1, DecrementLoc)
4912 : getI32Imm(1, DecrementLoc)};
4913 unsigned DecrementOpcode =
4914 Subtarget->isPPC64() ? PPC::DecreaseCTR8loop : PPC::DecreaseCTRloop;
4915 SDNode *NewDecrement = CurDAG->getMachineNode(DecrementOpcode, DecrementLoc,
4916 MVT::i1, DecrementOps);
4917
4918 unsigned Val = RHS->getAsZExtVal();
4919 bool IsBranchOnTrue = (CC == ISD::SETEQ && Val) || (CC == ISD::SETNE && !Val);
4920 unsigned Opcode = IsBranchOnTrue ? PPC::BC : PPC::BCn;
4921
4922 ReplaceUses(LHS.getValue(0), LHS.getOperand(1));
4923 CurDAG->RemoveDeadNode(LHS.getNode());
4924
4925 // Mark the old loop_decrement intrinsic as dead.
4926 ReplaceUses(OldDecrement.getValue(1), ChainInput);
4927 CurDAG->RemoveDeadNode(OldDecrement.getNode());
4928
4929 SDValue Chain = CurDAG->getNode(ISD::TokenFactor, SDLoc(N), MVT::Other,
4930 ChainInput, N->getOperand(0));
4931
4932 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, SDValue(NewDecrement, 0),
4933 N->getOperand(4), Chain);
4934 return true;
4935}
4936
4937bool PPCDAGToDAGISel::tryAsSingleRLWINM(SDNode *N) {
4938 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4939 unsigned Imm;
4940 if (!isInt32Immediate(N->getOperand(1), Imm))
4941 return false;
4942
4943 SDLoc dl(N);
4944 SDValue Val = N->getOperand(0);
4945 unsigned SH, MB, ME;
4946 // If this is an and of a value rotated between 0 and 31 bits and then and'd
4947 // with a mask, emit rlwinm
4948 if (isRotateAndMask(Val.getNode(), Imm, false, SH, MB, ME)) {
4949 Val = Val.getOperand(0);
4950 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl),
4951 getI32Imm(ME, dl)};
4952 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4953 return true;
4954 }
4955
4956 // If this is just a masked value where the input is not handled, and
4957 // is not a rotate-left (handled by a pattern in the .td file), emit rlwinm
4958 if (isRunOfOnes(Imm, MB, ME) && Val.getOpcode() != ISD::ROTL) {
4959 SDValue Ops[] = {Val, getI32Imm(0, dl), getI32Imm(MB, dl),
4960 getI32Imm(ME, dl)};
4961 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
4962 return true;
4963 }
4964
4965 // AND X, 0 -> 0, not "rlwinm 32".
4966 if (Imm == 0) {
4967 ReplaceUses(SDValue(N, 0), N->getOperand(1));
4968 return true;
4969 }
4970
4971 return false;
4972}
4973
4974bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
4975 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
4977 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
4978 return false;
4979
4980 unsigned MB, ME;
4981 if (isRunOfOnes64(Imm64, MB, ME) && MB >= 32 && MB <= ME) {
4982 // MB ME
4983 // +----------------------+
4984 // |xxxxxxxxxxx00011111000|
4985 // +----------------------+
4986 // 0 32 64
4987 // We can only do it if the MB is larger than 32 and MB <= ME
4988 // as RLWINM will replace the contents of [0 - 32) with [32 - 64) even
4989 // we didn't rotate it.
4990 SDLoc dl(N);
4991 SDValue Ops[] = {N->getOperand(0), getI64Imm(0, dl), getI64Imm(MB - 32, dl),
4992 getI64Imm(ME - 32, dl)};
4993 CurDAG->SelectNodeTo(N, PPC::RLWINM8, MVT::i64, Ops);
4994 return true;
4995 }
4996
4997 return false;
4998}
4999
5000bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
5001 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5003 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
5004 return false;
5005
5006 // Do nothing if it is 16-bit imm as the pattern in the .td file handle
5007 // it well with "andi.".
5008 if (isUInt<16>(Imm64))
5009 return false;
5010
5011 SDLoc Loc(N);
5012 SDValue Val = N->getOperand(0);
5013
5014 // Optimized with two rldicl's as follows:
5015 // Add missing bits on left to the mask and check that the mask is a
5016 // wrapped run of ones, i.e.
5017 // Change pattern |0001111100000011111111|
5018 // to |1111111100000011111111|.
5019 unsigned NumOfLeadingZeros = llvm::countl_zero(Imm64);
5020 if (NumOfLeadingZeros != 0)
5021 Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
5022
5023 unsigned MB, ME;
5024 if (!isRunOfOnes64(Imm64, MB, ME))
5025 return false;
5026
5027 // ME MB MB-ME+63
5028 // +----------------------+ +----------------------+
5029 // |1111111100000011111111| -> |0000001111111111111111|
5030 // +----------------------+ +----------------------+
5031 // 0 63 0 63
5032 // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
5033 unsigned OnesOnLeft = ME + 1;
5034 unsigned ZerosInBetween = (MB - ME + 63) & 63;
5035 // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
5036 // on the left the bits that are already zeros in the mask.
5037 Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
5038 getI64Imm(OnesOnLeft, Loc),
5039 getI64Imm(ZerosInBetween, Loc)),
5040 0);
5041 // MB-ME+63 ME MB
5042 // +----------------------+ +----------------------+
5043 // |0000001111111111111111| -> |0001111100000011111111|
5044 // +----------------------+ +----------------------+
5045 // 0 63 0 63
5046 // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
5047 // left the number of ones we previously added.
5048 SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
5049 getI64Imm(NumOfLeadingZeros, Loc)};
5050 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5051 return true;
5052}
5053
5054bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
5055 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5056 unsigned Imm;
5057 if (!isInt32Immediate(N->getOperand(1), Imm))
5058 return false;
5059
5060 SDValue Val = N->getOperand(0);
5061 unsigned Imm2;
5062 // ISD::OR doesn't get all the bitfield insertion fun.
5063 // (and (or x, c1), c2) where isRunOfOnes(~(c1^c2)) might be a
5064 // bitfield insert.
5065 if (Val.getOpcode() != ISD::OR || !isInt32Immediate(Val.getOperand(1), Imm2))
5066 return false;
5067
5068 // The idea here is to check whether this is equivalent to:
5069 // (c1 & m) | (x & ~m)
5070 // where m is a run-of-ones mask. The logic here is that, for each bit in
5071 // c1 and c2:
5072 // - if both are 1, then the output will be 1.
5073 // - if both are 0, then the output will be 0.
5074 // - if the bit in c1 is 0, and the bit in c2 is 1, then the output will
5075 // come from x.
5076 // - if the bit in c1 is 1, and the bit in c2 is 0, then the output will
5077 // be 0.
5078 // If that last condition is never the case, then we can form m from the
5079 // bits that are the same between c1 and c2.
5080 unsigned MB, ME;
5081 if (isRunOfOnes(~(Imm ^ Imm2), MB, ME) && !(~Imm & Imm2)) {
5082 SDLoc dl(N);
5083 SDValue Ops[] = {Val.getOperand(0), Val.getOperand(1), getI32Imm(0, dl),
5084 getI32Imm(MB, dl), getI32Imm(ME, dl)};
5085 ReplaceNode(N, CurDAG->getMachineNode(PPC::RLWIMI, dl, MVT::i32, Ops));
5086 return true;
5087 }
5088
5089 return false;
5090}
5091
5092bool PPCDAGToDAGISel::tryAsSingleRLDCL(SDNode *N) {
5093 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5094
5096 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5097 return false;
5098
5099 SDValue Val = N->getOperand(0);
5100
5101 if (Val.getOpcode() != ISD::ROTL)
5102 return false;
5103
5104 // Looking to try to avoid a situation like this one:
5105 // %2 = tail call i64 @llvm.fshl.i64(i64 %word, i64 %word, i64 23)
5106 // %and1 = and i64 %2, 9223372036854775807
5107 // In this function we are looking to try to match RLDCL. However, the above
5108 // DAG would better match RLDICL instead which is not what we are looking
5109 // for here.
5110 SDValue RotateAmt = Val.getOperand(1);
5111 if (RotateAmt.getOpcode() == ISD::Constant)
5112 return false;
5113
5114 unsigned MB = 64 - llvm::countr_one(Imm64);
5115 SDLoc dl(N);
5116 SDValue Ops[] = {Val.getOperand(0), RotateAmt, getI32Imm(MB, dl)};
5117 CurDAG->SelectNodeTo(N, PPC::RLDCL, MVT::i64, Ops);
5118 return true;
5119}
5120
5121bool PPCDAGToDAGISel::tryAsSingleRLDICL(SDNode *N) {
5122 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5124 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) || !isMask_64(Imm64))
5125 return false;
5126
5127 // If this is a 64-bit zero-extension mask, emit rldicl.
5128 unsigned MB = 64 - llvm::countr_one(Imm64);
5129 unsigned SH = 0;
5130 unsigned Imm;
5131 SDValue Val = N->getOperand(0);
5132 SDLoc dl(N);
5133
5134 if (Val.getOpcode() == ISD::ANY_EXTEND) {
5135 auto Op0 = Val.getOperand(0);
5136 if (Op0.getOpcode() == ISD::SRL &&
5137 isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
5138
5139 auto ResultType = Val.getNode()->getValueType(0);
5140 auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl, ResultType);
5141 SDValue IDVal(ImDef, 0);
5142
5143 Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl, ResultType,
5144 IDVal, Op0.getOperand(0),
5145 getI32Imm(1, dl)),
5146 0);
5147 SH = 64 - Imm;
5148 }
5149 }
5150
5151 // If the operand is a logical right shift, we can fold it into this
5152 // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
5153 // for n <= mb. The right shift is really a left rotate followed by a
5154 // mask, and this mask is a more-restrictive sub-mask of the mask implied
5155 // by the shift.
5156 if (Val.getOpcode() == ISD::SRL &&
5157 isInt32Immediate(Val.getOperand(1).getNode(), Imm) && Imm <= MB) {
5158 assert(Imm < 64 && "Illegal shift amount");
5159 Val = Val.getOperand(0);
5160 SH = 64 - Imm;
5161 }
5162
5163 SDValue Ops[] = {Val, getI32Imm(SH, dl), getI32Imm(MB, dl)};
5164 CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
5165 return true;
5166}
5167
5168bool PPCDAGToDAGISel::tryAsSingleRLDICR(SDNode *N) {
5169 assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
5171 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5172 !isMask_64(~Imm64))
5173 return false;
5174
5175 // If this is a negated 64-bit zero-extension mask,
5176 // i.e. the immediate is a sequence of ones from most significant side
5177 // and all zero for reminder, we should use rldicr.
5178 unsigned MB = 63 - llvm::countr_one(~Imm64);
5179 unsigned SH = 0;
5180 SDLoc dl(N);
5181 SDValue Ops[] = {N->getOperand(0), getI32Imm(SH, dl), getI32Imm(MB, dl)};
5182 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5183 return true;
5184}
5185
5186bool PPCDAGToDAGISel::tryAsSingleRLDIMI(SDNode *N) {
5187 assert(N->getOpcode() == ISD::OR && "ISD::OR SDNode expected");
5189 unsigned MB, ME;
5190 SDValue N0 = N->getOperand(0);
5191
5192 // We won't get fewer instructions if the imm is 32-bit integer.
5193 // rldimi requires the imm to have consecutive ones with both sides zero.
5194 // Also, make sure the first Op has only one use, otherwise this may increase
5195 // register pressure since rldimi is destructive.
5196 if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64) ||
5197 isUInt<32>(Imm64) || !isRunOfOnes64(Imm64, MB, ME) || !N0.hasOneUse())
5198 return false;
5199
5200 unsigned SH = 63 - ME;
5201 SDLoc Dl(N);
5202 // Use select64Imm for making LI instr instead of directly putting Imm64
5203 SDValue Ops[] = {
5204 N->getOperand(0),
5205 SDValue(selectI64Imm(CurDAG, getI64Imm(-1, Dl).getNode()), 0),
5206 getI32Imm(SH, Dl), getI32Imm(MB, Dl)};
5207 CurDAG->SelectNodeTo(N, PPC::RLDIMI, MVT::i64, Ops);
5208 return true;
5209}
5210
5211// Select - Convert the specified operand from a target-independent to a
5212// target-specific node if it hasn't already been changed.
5213void PPCDAGToDAGISel::Select(SDNode *N) {
5214 SDLoc dl(N);
5215 if (N->isMachineOpcode()) {
5216 N->setNodeId(-1);
5217 return; // Already selected.
5218 }
5219
5220 // In case any misguided DAG-level optimizations form an ADD with a
5221 // TargetConstant operand, crash here instead of miscompiling (by selecting
5222 // an r+r add instead of some kind of r+i add).
5223 if (N->getOpcode() == ISD::ADD &&
5224 N->getOperand(1).getOpcode() == ISD::TargetConstant)
5225 llvm_unreachable("Invalid ADD with TargetConstant operand");
5226
5227 // Try matching complex bit permutations before doing anything else.
5228 if (tryBitPermutation(N))
5229 return;
5230
5231 // Try to emit integer compares as GPR-only sequences (i.e. no use of CR).
5232 if (tryIntCompareInGPR(N))
5233 return;
5234
5235 switch (N->getOpcode()) {
5236 default: break;
5237
5238 case ISD::Constant:
5239 if (N->getValueType(0) == MVT::i64) {
5240 ReplaceNode(N, selectI64Imm(CurDAG, N));
5241 return;
5242 }
5243 break;
5244
5245 case ISD::INTRINSIC_VOID: {
5246 auto IntrinsicID = N->getConstantOperandVal(1);
5247 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
5248 IntrinsicID != Intrinsic::ppc_trapd &&
5249 IntrinsicID != Intrinsic::ppc_trap)
5250 break;
5251 unsigned Opcode = (IntrinsicID == Intrinsic::ppc_tdw ||
5252 IntrinsicID == Intrinsic::ppc_trapd)
5253 ? PPC::TDI
5254 : PPC::TWI;
5255 SmallVector<SDValue, 4> OpsWithMD;
5256 unsigned MDIndex;
5257 if (IntrinsicID == Intrinsic::ppc_tdw ||
5258 IntrinsicID == Intrinsic::ppc_tw) {
5259 SDValue Ops[] = {N->getOperand(4), N->getOperand(2), N->getOperand(3)};
5260 int16_t SImmOperand2;
5261 int16_t SImmOperand3;
5262 int16_t SImmOperand4;
5263 bool isOperand2IntS16Immediate =
5264 isIntS16Immediate(N->getOperand(2), SImmOperand2);
5265 bool isOperand3IntS16Immediate =
5266 isIntS16Immediate(N->getOperand(3), SImmOperand3);
5267 // We will emit PPC::TD or PPC::TW if the 2nd and 3rd operands are reg +
5268 // reg or imm + imm. The imm + imm form will be optimized to either an
5269 // unconditional trap or a nop in a later pass.
5270 if (isOperand2IntS16Immediate == isOperand3IntS16Immediate)
5271 Opcode = IntrinsicID == Intrinsic::ppc_tdw ? PPC::TD : PPC::TW;
5272 else if (isOperand3IntS16Immediate)
5273 // The 2nd and 3rd operands are reg + imm.
5274 Ops[2] = getI32Imm(int(SImmOperand3) & 0xFFFF, dl);
5275 else {
5276 // The 2nd and 3rd operands are imm + reg.
5277 bool isOperand4IntS16Immediate =
5278 isIntS16Immediate(N->getOperand(4), SImmOperand4);
5279 (void)isOperand4IntS16Immediate;
5280 assert(isOperand4IntS16Immediate &&
5281 "The 4th operand is not an Immediate");
5282 // We need to flip the condition immediate TO.
5283 int16_t TO = int(SImmOperand4) & 0x1F;
5284 // We swap the first and second bit of TO if they are not same.
5285 if ((TO & 0x1) != ((TO & 0x2) >> 1))
5286 TO = (TO & 0x1) ? TO + 1 : TO - 1;
5287 // We swap the fourth and fifth bit of TO if they are not same.
5288 if ((TO & 0x8) != ((TO & 0x10) >> 1))
5289 TO = (TO & 0x8) ? TO + 8 : TO - 8;
5290 Ops[0] = getI32Imm(TO, dl);
5291 Ops[1] = N->getOperand(3);
5292 Ops[2] = getI32Imm(int(SImmOperand2) & 0xFFFF, dl);
5293 }
5294 OpsWithMD = {Ops[0], Ops[1], Ops[2]};
5295 MDIndex = 5;
5296 } else {
5297 OpsWithMD = {getI32Imm(24, dl), N->getOperand(2), getI32Imm(0, dl)};
5298 MDIndex = 3;
5299 }
5300
5301 if (N->getNumOperands() > MDIndex) {
5302 SDValue MDV = N->getOperand(MDIndex);
5303 const MDNode *MD = cast<MDNodeSDNode>(MDV)->getMD();
5304 assert(MD->getNumOperands() != 0 && "Empty MDNode in operands!");
5305 assert((isa<MDString>(MD->getOperand(0)) && cast<MDString>(
5306 MD->getOperand(0))->getString().equals("ppc-trap-reason"))
5307 && "Unsupported annotation data type!");
5308 for (unsigned i = 1; i < MD->getNumOperands(); i++) {
5309 assert(isa<MDString>(MD->getOperand(i)) &&
5310 "Invalid data type for annotation ppc-trap-reason!");
5311 OpsWithMD.push_back(
5312 getI32Imm(std::stoi(cast<MDString>(
5313 MD->getOperand(i))->getString().str()), dl));
5314 }
5315 }
5316 OpsWithMD.push_back(N->getOperand(0)); // chain
5317 CurDAG->SelectNodeTo(N, Opcode, MVT::Other, OpsWithMD);
5318 return;
5319 }
5320
5322 // We emit the PPC::FSELS instruction here because of type conflicts with
5323 // the comparison operand. The FSELS instruction is defined to use an 8-byte
5324 // comparison like the FSELD version. The fsels intrinsic takes a 4-byte
5325 // value for the comparison. When selecting through a .td file, a type
5326 // error is raised. Must check this first so we never break on the
5327 // !Subtarget->isISA3_1() check.
5328 auto IntID = N->getConstantOperandVal(0);
5329 if (IntID == Intrinsic::ppc_fsels) {
5330 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3)};
5331 CurDAG->SelectNodeTo(N, PPC::FSELS, MVT::f32, Ops);
5332 return;
5333 }
5334
5335 if (IntID == Intrinsic::ppc_bcdadd_p || IntID == Intrinsic::ppc_bcdsub_p) {
5336 auto Pred = N->getConstantOperandVal(1);
5337 unsigned Opcode =
5338 IntID == Intrinsic::ppc_bcdadd_p ? PPC::BCDADD_rec : PPC::BCDSUB_rec;
5339 unsigned SubReg = 0;
5340 unsigned ShiftVal = 0;
5341 bool Reverse = false;
5342 switch (Pred) {
5343 case 0:
5344 SubReg = PPC::sub_eq;
5345 ShiftVal = 1;
5346 break;
5347 case 1:
5348 SubReg = PPC::sub_eq;
5349 ShiftVal = 1;
5350 Reverse = true;
5351 break;
5352 case 2:
5353 SubReg = PPC::sub_lt;
5354 ShiftVal = 3;
5355 break;
5356 case 3:
5357 SubReg = PPC::sub_lt;
5358 ShiftVal = 3;
5359 Reverse = true;
5360 break;
5361 case 4:
5362 SubReg = PPC::sub_gt;
5363 ShiftVal = 2;
5364 break;
5365 case 5:
5366 SubReg = PPC::sub_gt;
5367 ShiftVal = 2;
5368 Reverse = true;
5369 break;
5370 case 6:
5371 SubReg = PPC::sub_un;
5372 break;
5373 case 7:
5374 SubReg = PPC::sub_un;
5375 Reverse = true;
5376 break;
5377 }
5378
5379 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5380 SDValue Ops[] = {N->getOperand(2), N->getOperand(3),
5381 CurDAG->getTargetConstant(0, dl, MVT::i32)};
5382 SDValue BCDOp = SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, Ops), 0);
5383 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5384 // On Power10, we can use SETBC[R]. On prior architectures, we have to use
5385 // MFOCRF and shift/negate the value.
5386 if (Subtarget->isISA3_1()) {
5387 SDValue SubRegIdx = CurDAG->getTargetConstant(SubReg, dl, MVT::i32);
5388 SDValue CRBit = SDValue(
5389 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5390 CR6Reg, SubRegIdx, BCDOp.getValue(1)),
5391 0);
5392 CurDAG->SelectNodeTo(N, Reverse ? PPC::SETBCR : PPC::SETBC, MVT::i32,
5393 CRBit);
5394 } else {
5395 SDValue Move =
5396 SDValue(CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32, CR6Reg,
5397 BCDOp.getValue(1)),
5398 0);
5399 SDValue Ops[] = {Move, getI32Imm((32 - (4 + ShiftVal)) & 31, dl),
5400 getI32Imm(31, dl), getI32Imm(31, dl)};
5401 if (!Reverse)
5402 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5403 else {
5404 SDValue Shift = SDValue(
5405 CurDAG->getMachineNode(PPC::RLWINM, dl, MVT::i32, Ops), 0);
5406 CurDAG->SelectNodeTo(N, PPC::XORI, MVT::i32, Shift, getI32Imm(1, dl));
5407 }
5408 }
5409 return;
5410 }
5411
5412 if (!Subtarget->isISA3_1())
5413 break;
5414 unsigned Opcode = 0;
5415 switch (IntID) {
5416 default:
5417 break;
5418 case Intrinsic::ppc_altivec_vstribr_p:
5419 Opcode = PPC::VSTRIBR_rec;
5420 break;
5421 case Intrinsic::ppc_altivec_vstribl_p:
5422 Opcode = PPC::VSTRIBL_rec;
5423 break;
5424 case Intrinsic::ppc_altivec_vstrihr_p:
5425 Opcode = PPC::VSTRIHR_rec;
5426 break;
5427 case Intrinsic::ppc_altivec_vstrihl_p:
5428 Opcode = PPC::VSTRIHL_rec;
5429 break;
5430 }
5431 if (!Opcode)
5432 break;
5433
5434 // Generate the appropriate vector string isolate intrinsic to match.
5435 EVT VTs[] = {MVT::v16i8, MVT::Glue};
5436 SDValue VecStrOp =
5437 SDValue(CurDAG->getMachineNode(Opcode, dl, VTs, N->getOperand(2)), 0);
5438 // Vector string isolate instructions update the EQ bit of CR6.
5439 // Generate a SETBC instruction to extract the bit and place it in a GPR.
5440 SDValue SubRegIdx = CurDAG->getTargetConstant(PPC::sub_eq, dl, MVT::i32);
5441 SDValue CR6Reg = CurDAG->getRegister(PPC::CR6, MVT::i32);
5442 SDValue CRBit = SDValue(
5443 CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
5444 CR6Reg, SubRegIdx, VecStrOp.getValue(1)),
5445 0);
5446 CurDAG->SelectNodeTo(N, PPC::SETBC, MVT::i32, CRBit);
5447 return;
5448 }
5449
5450 case ISD::SETCC:
5451 case ISD::STRICT_FSETCC:
5453 if (trySETCC(N))
5454 return;
5455 break;
5456 // These nodes will be transformed into GETtlsADDR32 node, which
5457 // later becomes BL_TLS __tls_get_addr(sym at tlsgd)@PLT
5460 const Module *Mod = MF->getFunction().getParent();
5461 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5462 !Subtarget->isSecurePlt() || !Subtarget->isTargetELF() ||
5464 break;
5465 // Attach global base pointer on GETtlsADDR32 node in order to
5466 // generate secure plt code for TLS symbols.
5467 getGlobalBaseReg();
5468 } break;
5469 case PPCISD::CALL: {
5470 if (PPCLowering->getPointerTy(CurDAG->getDataLayout()) != MVT::i32 ||
5471 !TM.isPositionIndependent() || !Subtarget->isSecurePlt() ||
5472 !Subtarget->isTargetELF())
5473 break;
5474
5475 SDValue Op = N->getOperand(1);
5476
5477 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5478 if (GA->getTargetFlags() == PPCII::MO_PLT)
5479 getGlobalBaseReg();
5480 }
5481 else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
5482 if (ES->getTargetFlags() == PPCII::MO_PLT)
5483 getGlobalBaseReg();
5484 }
5485 }
5486 break;
5487
5489 ReplaceNode(N, getGlobalBaseReg());
5490 return;
5491
5492 case ISD::FrameIndex:
5493 selectFrameIndex(N, N);
5494 return;
5495
5496 case PPCISD::MFOCRF: {
5497 SDValue InGlue = N->getOperand(1);
5498 ReplaceNode(N, CurDAG->getMachineNode(PPC::MFOCRF, dl, MVT::i32,
5499 N->getOperand(0), InGlue));
5500 return;
5501 }
5502
5504 ReplaceNode(N, CurDAG->getMachineNode(PPC::ReadTB, dl, MVT::i32, MVT::i32,
5505 MVT::Other, N->getOperand(0)));
5506 return;
5507
5508 case PPCISD::SRA_ADDZE: {
5509 SDValue N0 = N->getOperand(0);
5510 SDValue ShiftAmt =
5511 CurDAG->getTargetConstant(*cast<ConstantSDNode>(N->getOperand(1))->
5512 getConstantIntValue(), dl,
5513 N->getValueType(0));
5514 if (N->getValueType(0) == MVT::i64) {
5515 SDNode *Op =
5516 CurDAG->getMachineNode(PPC::SRADI, dl, MVT::i64, MVT::Glue,
5517 N0, ShiftAmt);
5518 CurDAG->SelectNodeTo(N, PPC::ADDZE8, MVT::i64, SDValue(Op, 0),
5519 SDValue(Op, 1));
5520 return;
5521 } else {
5522 assert(N->getValueType(0) == MVT::i32 &&
5523 "Expecting i64 or i32 in PPCISD::SRA_ADDZE");
5524 SDNode *Op =
5525 CurDAG->getMachineNode(PPC::SRAWI, dl, MVT::i32, MVT::Glue,
5526 N0, ShiftAmt);
5527 CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32, SDValue(Op, 0),
5528 SDValue(Op, 1));
5529 return;
5530 }
5531 }
5532
5533 case ISD::STORE: {
5534 // Change TLS initial-exec (or TLS local-exec on AIX) D-form stores to
5535 // X-form stores.
5536 StoreSDNode *ST = cast<StoreSDNode>(N);
5537 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()) &&
5538 ST->getAddressingMode() != ISD::PRE_INC)
5539 if (tryTLSXFormStore(ST))
5540 return;
5541 break;
5542 }
5543 case ISD::LOAD: {
5544 // Handle preincrement loads.
5545 LoadSDNode *LD = cast<LoadSDNode>(N);
5546 EVT LoadedVT = LD->getMemoryVT();
5547
5548 // Normal loads are handled by code generated from the .td file.
5549 if (LD->getAddressingMode() != ISD::PRE_INC) {
5550 // Change TLS initial-exec (or TLS local-exec on AIX) D-form loads to
5551 // X-form loads.
5552 if (EnableTLSOpt && (Subtarget->isELFv2ABI() || Subtarget->isAIXABI()))
5553 if (tryTLSXFormLoad(LD))
5554 return;
5555 break;
5556 }
5557
5558 SDValue Offset = LD->getOffset();
5559 if (Offset.getOpcode() == ISD::TargetConstant ||
5560 Offset.getOpcode() == ISD::TargetGlobalAddress) {
5561
5562 unsigned Opcode;
5563 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5564 if (LD->getValueType(0) != MVT::i64) {
5565 // Handle PPC32 integer and normal FP loads.
5566 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5567 switch (LoadedVT.getSimpleVT().SimpleTy) {
5568 default: llvm_unreachable("Invalid PPC load type!");
5569 case MVT::f64: Opcode = PPC::LFDU; break;
5570 case MVT::f32: Opcode = PPC::LFSU; break;
5571 case MVT::i32: Opcode = PPC::LWZU; break;
5572 case MVT::i16: Opcode = isSExt ? PPC::LHAU : PPC::LHZU; break;
5573 case MVT::i1:
5574 case MVT::i8: Opcode = PPC::LBZU; break;
5575 }
5576 } else {
5577 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5578 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5579 switch (LoadedVT.getSimpleVT().SimpleTy) {
5580 default: llvm_unreachable("Invalid PPC load type!");
5581 case MVT::i64: Opcode = PPC::LDU; break;
5582 case MVT::i32: Opcode = PPC::LWZU8; break;
5583 case MVT::i16: Opcode = isSExt ? PPC::LHAU8 : PPC::LHZU8; break;
5584 case MVT::i1:
5585 case MVT::i8: Opcode = PPC::LBZU8; break;
5586 }
5587 }
5588
5589 SDValue Chain = LD->getChain();
5590 SDValue Base = LD->getBasePtr();
5591 SDValue Ops[] = { Offset, Base, Chain };
5592 SDNode *MN = CurDAG->getMachineNode(
5593 Opcode, dl, LD->getValueType(0),
5594 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5595 transferMemOperands(N, MN);
5596 ReplaceNode(N, MN);
5597 return;
5598 } else {
5599 unsigned Opcode;
5600 bool isSExt = LD->getExtensionType() == ISD::SEXTLOAD;
5601 if (LD->getValueType(0) != MVT::i64) {
5602 // Handle PPC32 integer and normal FP loads.
5603 assert((!isSExt || LoadedVT == MVT::i16) && "Invalid sext update load");
5604 switch (LoadedVT.getSimpleVT().SimpleTy) {
5605 default: llvm_unreachable("Invalid PPC load type!");
5606 case MVT::f64: Opcode = PPC::LFDUX; break;
5607 case MVT::f32: Opcode = PPC::LFSUX; break;
5608 case MVT::i32: Opcode = PPC::LWZUX; break;
5609 case MVT::i16: Opcode = isSExt ? PPC::LHAUX : PPC::LHZUX; break;
5610 case MVT::i1:
5611 case MVT::i8: Opcode = PPC::LBZUX; break;
5612 }
5613 } else {
5614 assert(LD->getValueType(0) == MVT::i64 && "Unknown load result type!");
5615 assert((!isSExt || LoadedVT == MVT::i16 || LoadedVT == MVT::i32) &&
5616 "Invalid sext update load");
5617 switch (LoadedVT.getSimpleVT().SimpleTy) {
5618 default: llvm_unreachable("Invalid PPC load type!");
5619 case MVT::i64: Opcode = PPC::LDUX; break;
5620 case MVT::i32: Opcode = isSExt ? PPC::LWAUX : PPC::LWZUX8; break;
5621 case MVT::i16: Opcode = isSExt ? PPC::LHAUX8 : PPC::LHZUX8; break;
5622 case MVT::i1:
5623 case MVT::i8: Opcode = PPC::LBZUX8; break;
5624 }
5625 }
5626
5627 SDValue Chain = LD->getChain();
5628 SDValue Base = LD->getBasePtr();
5629 SDValue Ops[] = { Base, Offset, Chain };
5630 SDNode *MN = CurDAG->getMachineNode(
5631 Opcode, dl, LD->getValueType(0),
5632 PPCLowering->getPointerTy(CurDAG->getDataLayout()), MVT::Other, Ops);
5633 transferMemOperands(N, MN);
5634 ReplaceNode(N, MN);
5635 return;
5636 }
5637 }
5638
5639 case ISD::AND:
5640 // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
5641 if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDCL(N) ||
5642 tryAsSingleRLDICL(N) || tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) ||
5643 tryAsPairOfRLDICL(N))
5644 return;
5645
5646 // Other cases are autogenerated.
5647 break;
5648 case ISD::OR: {
5649 if (N->getValueType(0) == MVT::i32)
5650 if (tryBitfieldInsert(N))
5651 return;
5652
5653 int16_t Imm;
5654 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5655 isIntS16Immediate(N->getOperand(1), Imm)) {
5656 KnownBits LHSKnown = CurDAG->computeKnownBits(N->getOperand(0));
5657
5658 // If this is equivalent to an add, then we can fold it with the
5659 // FrameIndex calculation.
5660 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)Imm) == ~0ULL) {
5661 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5662 return;
5663 }
5664 }
5665
5666 // If this is 'or' against an imm with consecutive ones and both sides zero,
5667 // try to emit rldimi
5668 if (tryAsSingleRLDIMI(N))
5669 return;
5670
5671 // OR with a 32-bit immediate can be handled by ori + oris
5672 // without creating an immediate in a GPR.
5673 uint64_t Imm64 = 0;
5674 bool IsPPC64 = Subtarget->isPPC64();
5675 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5676 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5677 // If ImmHi (ImmHi) is zero, only one ori (oris) is generated later.
5678 uint64_t ImmHi = Imm64 >> 16;
5679 uint64_t ImmLo = Imm64 & 0xFFFF;
5680 if (ImmHi != 0 && ImmLo != 0) {
5681 SDNode *Lo = CurDAG->getMachineNode(PPC::ORI8, dl, MVT::i64,
5682 N->getOperand(0),
5683 getI16Imm(ImmLo, dl));
5684 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5685 CurDAG->SelectNodeTo(N, PPC::ORIS8, MVT::i64, Ops1);
5686 return;
5687 }
5688 }
5689
5690 // Other cases are autogenerated.
5691 break;
5692 }
5693 case ISD::XOR: {
5694 // XOR with a 32-bit immediate can be handled by xori + xoris
5695 // without creating an immediate in a GPR.
5696 uint64_t Imm64 = 0;
5697 bool IsPPC64 = Subtarget->isPPC64();
5698 if (IsPPC64 && isInt64Immediate(N->getOperand(1), Imm64) &&
5699 (Imm64 & ~0xFFFFFFFFuLL) == 0) {
5700 // If ImmHi (ImmHi) is zero, only one xori (xoris) is generated later.
5701 uint64_t ImmHi = Imm64 >> 16;
5702 uint64_t ImmLo = Imm64 & 0xFFFF;
5703 if (ImmHi != 0 && ImmLo != 0) {
5704 SDNode *Lo = CurDAG->getMachineNode(PPC::XORI8, dl, MVT::i64,
5705 N->getOperand(0),
5706 getI16Imm(ImmLo, dl));
5707 SDValue Ops1[] = { SDValue(Lo, 0), getI16Imm(ImmHi, dl)};
5708 CurDAG->SelectNodeTo(N, PPC::XORIS8, MVT::i64, Ops1);
5709 return;
5710 }
5711 }
5712
5713 break;
5714 }
5715 case ISD::ADD: {
5716 int16_t Imm;
5717 if (N->getOperand(0)->getOpcode() == ISD::FrameIndex &&
5718 isIntS16Immediate(N->getOperand(1), Imm)) {
5719 selectFrameIndex(N, N->getOperand(0).getNode(), (int64_t)Imm);
5720 return;
5721 }
5722
5723 break;
5724 }
5725 case ISD::SHL: {
5726 unsigned Imm, SH, MB, ME;
5727 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5728 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5729 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5730 getI32Imm(SH, dl), getI32Imm(MB, dl),
5731 getI32Imm(ME, dl) };
5732 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5733 return;
5734 }
5735
5736 // Other cases are autogenerated.
5737 break;
5738 }
5739 case ISD::SRL: {
5740 unsigned Imm, SH, MB, ME;
5741 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, Imm) &&
5742 isRotateAndMask(N, Imm, true, SH, MB, ME)) {
5743 SDValue Ops[] = { N->getOperand(0).getOperand(0),
5744 getI32Imm(SH, dl), getI32Imm(MB, dl),
5745 getI32Imm(ME, dl) };
5746 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5747 return;
5748 }
5749
5750 // Other cases are autogenerated.
5751 break;
5752 }
5753 case ISD::MUL: {
5754 SDValue Op1 = N->getOperand(1);
5755 if (Op1.getOpcode() != ISD::Constant ||
5756 (Op1.getValueType() != MVT::i64 && Op1.getValueType() != MVT::i32))
5757 break;
5758
5759 // If the multiplier fits int16, we can handle it with mulli.
5760 int64_t Imm = Op1->getAsZExtVal();
5761 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
5762 if (isInt<16>(Imm) || !Shift)
5763 break;
5764
5765 // If the shifted value fits int16, we can do this transformation:
5766 // (mul X, c1 << c2) -> (rldicr (mulli X, c1) c2). We do this in ISEL due to
5767 // DAGCombiner prefers (shl (mul X, c1), c2) -> (mul X, c1 << c2).
5768 uint64_t ImmSh = Imm >> Shift;
5769 if (!isInt<16>(ImmSh))
5770 break;
5771
5772 uint64_t SextImm = SignExtend64(ImmSh & 0xFFFF, 16);
5773 if (Op1.getValueType() == MVT::i64) {
5774 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i64);
5775 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI8, dl, MVT::i64,
5776 N->getOperand(0), SDImm);
5777
5778 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5779 getI32Imm(63 - Shift, dl)};
5780 CurDAG->SelectNodeTo(N, PPC::RLDICR, MVT::i64, Ops);
5781 return;
5782 } else {
5783 SDValue SDImm = CurDAG->getTargetConstant(SextImm, dl, MVT::i32);
5784 SDNode *MulNode = CurDAG->getMachineNode(PPC::MULLI, dl, MVT::i32,
5785 N->getOperand(0), SDImm);
5786
5787 SDValue Ops[] = {SDValue(MulNode, 0), getI32Imm(Shift, dl),
5788 getI32Imm(0, dl), getI32Imm(31 - Shift, dl)};
5789 CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops);
5790 return;
5791 }
5792 break;
5793 }
5794 // FIXME: Remove this once the ANDI glue bug is fixed:
5797 if (!ANDIGlueBug)
5798 break;
5799
5800 EVT InVT = N->getOperand(0).getValueType();
5801 assert((InVT == MVT::i64 || InVT == MVT::i32) &&
5802 "Invalid input type for ANDI_rec_1_EQ_BIT");
5803
5804 unsigned Opcode = (InVT == MVT::i64) ? PPC::ANDI8_rec : PPC::ANDI_rec;
5805 SDValue AndI(CurDAG->getMachineNode(Opcode, dl, InVT, MVT::Glue,
5806 N->getOperand(0),
5807 CurDAG->getTargetConstant(1, dl, InVT)),
5808 0);
5809 SDValue CR0Reg = CurDAG->getRegister(PPC::CR0, MVT::i32);
5810 SDValue SRIdxVal = CurDAG->getTargetConstant(
5811 N->getOpcode() == PPCISD::ANDI_rec_1_EQ_BIT ? PPC::sub_eq : PPC::sub_gt,
5812 dl, MVT::i32);
5813
5814 CurDAG->SelectNodeTo(N, TargetOpcode::EXTRACT_SUBREG, MVT::i1, CR0Reg,
5815 SRIdxVal, SDValue(AndI.getNode(), 1) /* glue */);
5816 return;
5817 }
5818 case ISD::SELECT_CC: {
5819 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
5820 EVT PtrVT =
5822 bool isPPC64 = (PtrVT == MVT::i64);
5823
5824 // If this is a select of i1 operands, we'll pattern match it.
5825 if (Subtarget->useCRBits() && N->getOperand(0).getValueType() == MVT::i1)
5826 break;
5827
5828 if (Subtarget->isISA3_0() && Subtarget->isPPC64()) {
5829 bool NeedSwapOps = false;
5830 bool IsUnCmp = false;
5831 if (mayUseP9Setb(N, CC, CurDAG, NeedSwapOps, IsUnCmp)) {
5832 SDValue LHS = N->getOperand(0);
5833 SDValue RHS = N->getOperand(1);
5834 if (NeedSwapOps)
5835 std::swap(LHS, RHS);
5836
5837 // Make use of SelectCC to generate the comparison to set CR bits, for
5838 // equality comparisons having one literal operand, SelectCC probably
5839 // doesn't need to materialize the whole literal and just use xoris to
5840 // check it first, it leads the following comparison result can't
5841 // exactly represent GT/LT relationship. So to avoid this we specify
5842 // SETGT/SETUGT here instead of SETEQ.
5843 SDValue GenCC =
5844 SelectCC(LHS, RHS, IsUnCmp ? ISD::SETUGT : ISD::SETGT, dl);
5845 CurDAG->SelectNodeTo(
5846 N, N->getSimpleValueType(0) == MVT::i64 ? PPC::SETB8 : PPC::SETB,
5847 N->getValueType(0), GenCC);
5848 NumP9Setb++;
5849 return;
5850 }
5851 }
5852
5853 // Handle the setcc cases here. select_cc lhs, 0, 1, 0, cc
5854 if (!isPPC64 && isNullConstant(N->getOperand(1)) &&
5855 isOneConstant(N->getOperand(2)) && isNullConstant(N->getOperand(3)) &&
5856 CC == ISD::SETNE &&
5857 // FIXME: Implement this optzn for PPC64.
5858 N->getValueType(0) == MVT::i32) {
5859 SDNode *Tmp =
5860 CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
5861 N->getOperand(0), getI32Imm(~0U, dl));
5862 CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32, SDValue(Tmp, 0),
5863 N->getOperand(0), SDValue(Tmp, 1));
5864 return;
5865 }
5866
5867 SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
5868
5869 if (N->getValueType(0) == MVT::i1) {
5870 // An i1 select is: (c & t) | (!c & f).
5871 bool Inv;
5872 unsigned Idx = getCRIdxForSetCC(CC, Inv);
5873
5874 unsigned SRI;
5875 switch (Idx) {
5876 default: llvm_unreachable("Invalid CC index");
5877 case 0: SRI = PPC::sub_lt; break;
5878 case 1: SRI = PPC::sub_gt; break;
5879 case 2: SRI = PPC::sub_eq; break;
5880 case 3: SRI = PPC::sub_un; break;
5881 }
5882
5883 SDValue CCBit = CurDAG->getTargetExtractSubreg(SRI, dl, MVT::i1, CCReg);
5884
5885 SDValue NotCCBit(CurDAG->getMachineNode(PPC::CRNOR, dl, MVT::i1,
5886 CCBit, CCBit), 0);
5887 SDValue C = Inv ? NotCCBit : CCBit,
5888 NotC = Inv ? CCBit : NotCCBit;
5889
5890 SDValue CAndT(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5891 C, N->getOperand(2)), 0);
5892 SDValue NotCAndF(CurDAG->getMachineNode(PPC::CRAND, dl, MVT::i1,
5893 NotC, N->getOperand(3)), 0);
5894
5895 CurDAG->SelectNodeTo(N, PPC::CROR, MVT::i1, CAndT, NotCAndF);
5896 return;
5897 }
5898
5899 unsigned BROpc =
5900 getPredicateForSetCC(CC, N->getOperand(0).getValueType(), Subtarget);
5901
5902 unsigned SelectCCOp;
5903 if (N->getValueType(0) == MVT::i32)
5904 SelectCCOp = PPC::SELECT_CC_I4;
5905 else if (N->getValueType(0) == MVT::i64)
5906 SelectCCOp = PPC::SELECT_CC_I8;
5907 else if (N->getValueType(0) == MVT::f32) {
5908 if (Subtarget->hasP8Vector())
5909 SelectCCOp = PPC::SELECT_CC_VSSRC;
5910 else if (Subtarget->hasSPE())
5911 SelectCCOp = PPC::SELECT_CC_SPE4;
5912 else
5913 SelectCCOp = PPC::SELECT_CC_F4;
5914 } else if (N->getValueType(0) == MVT::f64) {
5915 if (Subtarget->hasVSX())
5916 SelectCCOp = PPC::SELECT_CC_VSFRC;
5917 else if (Subtarget->hasSPE())
5918 SelectCCOp = PPC::SELECT_CC_SPE;
5919 else
5920 SelectCCOp = PPC::SELECT_CC_F8;
5921 } else if (N->getValueType(0) == MVT::f128)
5922 SelectCCOp = PPC::SELECT_CC_F16;
5923 else if (Subtarget->hasSPE())
5924 SelectCCOp = PPC::SELECT_CC_SPE;
5925 else if (N->getValueType(0) == MVT::v2f64 ||
5926 N->getValueType(0) == MVT::v2i64)
5927 SelectCCOp = PPC::SELECT_CC_VSRC;
5928 else
5929 SelectCCOp = PPC::SELECT_CC_VRRC;
5930
5931 SDValue Ops[] = { CCReg, N->getOperand(2), N->getOperand(3),
5932 getI32Imm(BROpc, dl) };
5933 CurDAG->SelectNodeTo(N, SelectCCOp, N->getValueType(0), Ops);
5934 return;
5935 }
5937 if (Subtarget->hasVSX() && (N->getValueType(0) == MVT::v2f64 ||
5938 N->getValueType(0) == MVT::v2i64)) {
5939 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
5940
5941 SDValue Op1 = N->getOperand(SVN->getMaskElt(0) < 2 ? 0 : 1),
5942 Op2 = N->getOperand(SVN->getMaskElt(1) < 2 ? 0 : 1);
5943 unsigned DM[2];
5944
5945 for (int i = 0; i < 2; ++i)
5946 if (SVN->getMaskElt(i) <= 0 || SVN->getMaskElt(i) == 2)
5947 DM[i] = 0;
5948 else
5949 DM[i] = 1;
5950
5951 if (Op1 == Op2 && DM[0] == 0 && DM[1] == 0 &&
5953 isa<LoadSDNode>(Op1.getOperand(0))) {
5954 LoadSDNode *LD = cast<LoadSDNode>(Op1.getOperand(0));
5956
5957 if (LD->isUnindexed() && LD->hasOneUse() && Op1.hasOneUse() &&
5958 (LD->getMemoryVT() == MVT::f64 ||
5959 LD->getMemoryVT() == MVT::i64) &&
5960 SelectAddrIdxOnly(LD->getBasePtr(), Base, Offset)) {
5961 SDValue Chain = LD->getChain();
5962 SDValue Ops[] = { Base, Offset, Chain };
5963 MachineMemOperand *MemOp = LD->getMemOperand();
5964 SDNode *NewN = CurDAG->SelectNodeTo(N, PPC::LXVDSX,
5965 N->getValueType(0), Ops);
5966 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NewN), {MemOp});
5967 return;
5968 }
5969 }
5970
5971 // For little endian, we must swap the input operands and adjust
5972 // the mask elements (reverse and invert them).
5973 if (Subtarget->isLittleEndian()) {
5974 std::swap(Op1, Op2);
5975 unsigned tmp = DM[0];
5976 DM[0] = 1 - DM[1];
5977 DM[1] = 1 - tmp;
5978 }
5979
5980 SDValue DMV = CurDAG->getTargetConstant(DM[1] | (DM[0] << 1), dl,
5981 MVT::i32);
5982 SDValue Ops[] = { Op1, Op2, DMV };
5983 CurDAG->SelectNodeTo(N, PPC::XXPERMDI, N->getValueType(0), Ops);
5984 return;
5985 }
5986
5987 break;
5988 case PPCISD::BDNZ:
5989 case PPCISD::BDZ: {
5990 bool IsPPC64 = Subtarget->isPPC64();
5991 SDValue Ops[] = { N->getOperand(1), N->getOperand(0) };
5992 CurDAG->SelectNodeTo(N, N->getOpcode() == PPCISD::BDNZ
5993 ? (IsPPC64 ? PPC::BDNZ8 : PPC::BDNZ)
5994 : (IsPPC64 ? PPC::BDZ8 : PPC::BDZ),
5995 MVT::Other, Ops);
5996 return;
5997 }
5998 case PPCISD::COND_BRANCH: {
5999 // Op #0 is the Chain.
6000 // Op #1 is the PPC::PRED_* number.
6001 // Op #2 is the CR#
6002 // Op #3 is the Dest MBB
6003 // Op #4 is the Flag.
6004 // Prevent PPC::PRED_* from being selected into LI.
6005 unsigned PCC = N->getConstantOperandVal(1);
6006 if (EnableBranchHint)
6007 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(3));
6008
6009 SDValue Pred = getI32Imm(PCC, dl);
6010 SDValue Ops[] = { Pred, N->getOperand(2), N->getOperand(3),
6011 N->getOperand(0), N->getOperand(4) };
6012 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6013 return;
6014 }
6015 case ISD::BR_CC: {
6016 if (tryFoldSWTestBRCC(N))
6017 return;
6018 if (trySelectLoopCountIntrinsic(N))
6019 return;
6020 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
6021 unsigned PCC =
6022 getPredicateForSetCC(CC, N->getOperand(2).getValueType(), Subtarget);
6023
6024 if (N->getOperand(2).getValueType() == MVT::i1) {
6025 unsigned Opc;
6026 bool Swap;
6027 switch (PCC) {
6028 default: llvm_unreachable("Unexpected Boolean-operand predicate");
6029 case PPC::PRED_LT: Opc = PPC::CRANDC; Swap = true; break;
6030 case PPC::PRED_LE: Opc = PPC::CRORC; Swap = true; break;
6031 case PPC::PRED_EQ: Opc = PPC::CREQV; Swap = false; break;
6032 case PPC::PRED_GE: Opc = PPC::CRORC; Swap = false; break;
6033 case PPC::PRED_GT: Opc = PPC::CRANDC; Swap = false; break;
6034 case PPC::PRED_NE: Opc = PPC::CRXOR; Swap = false; break;
6035 }
6036
6037 // A signed comparison of i1 values produces the opposite result to an
6038 // unsigned one if the condition code includes less-than or greater-than.
6039 // This is because 1 is the most negative signed i1 number and the most
6040 // positive unsigned i1 number. The CR-logical operations used for such
6041 // comparisons are non-commutative so for signed comparisons vs. unsigned
6042 // ones, the input operands just need to be swapped.
6044 Swap = !Swap;
6045
6046 SDValue BitComp(CurDAG->getMachineNode(Opc, dl, MVT::i1,
6047 N->getOperand(Swap ? 3 : 2),
6048 N->getOperand(Swap ? 2 : 3)), 0);
6049 CurDAG->SelectNodeTo(N, PPC::BC, MVT::Other, BitComp, N->getOperand(4),
6050 N->getOperand(0));
6051 return;
6052 }
6053
6054 if (EnableBranchHint)
6055 PCC |= getBranchHint(PCC, *FuncInfo, N->getOperand(4));
6056
6057 SDValue CondCode = SelectCC(N->getOperand(2), N->getOperand(3), CC, dl);
6058 SDValue Ops[] = { getI32Imm(PCC, dl), CondCode,
6059 N->getOperand(4), N->getOperand(0) };
6060 CurDAG->SelectNodeTo(N, PPC::BCC, MVT::Other, Ops);
6061 return;
6062 }
6063 case ISD::BRIND: {
6064 // FIXME: Should custom lower this.
6065 SDValue Chain = N->getOperand(0);
6066 SDValue Target = N->getOperand(1);
6067 unsigned Opc = Target.getValueType() == MVT::i32 ? PPC::MTCTR : PPC::MTCTR8;
6068 unsigned Reg = Target.getValueType() == MVT::i32 ? PPC::BCTR : PPC::BCTR8;
6069 Chain = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, Target,
6070 Chain), 0);
6071 CurDAG->SelectNodeTo(N, Reg, MVT::Other, Chain);
6072 return;
6073 }
6074 case PPCISD::TOC_ENTRY: {
6075 const bool isPPC64 = Subtarget->isPPC64();
6076 const bool isELFABI = Subtarget->isSVR4ABI();
6077 const bool isAIXABI = Subtarget->isAIXABI();
6078
6079 // PowerPC only support small, medium and large code model.
6080 const CodeModel::Model CModel = getCodeModel(*Subtarget, TM, N);
6081
6082 assert(!(CModel == CodeModel::Tiny || CModel == CodeModel::Kernel) &&
6083 "PowerPC doesn't support tiny or kernel code models.");
6084
6085 if (isAIXABI && CModel == CodeModel::Medium)
6086 report_fatal_error("Medium code model is not supported on AIX.");
6087
6088 // For 64-bit ELF small code model, we allow SelectCodeCommon to handle
6089 // this, selecting one of LDtoc, LDtocJTI, LDtocCPT, and LDtocBA. For AIX
6090 // small code model, we need to check for a toc-data attribute.
6091 if (isPPC64 && !isAIXABI && CModel == CodeModel::Small)
6092 break;
6093
6094 auto replaceWith = [this, &dl](unsigned OpCode, SDNode *TocEntry,
6095 EVT OperandTy) {
6096 SDValue GA = TocEntry->getOperand(0);
6097 SDValue TocBase = TocEntry->getOperand(1);
6098 SDNode *MN = CurDAG->getMachineNode(OpCode, dl, OperandTy, GA, TocBase);
6099 transferMemOperands(TocEntry, MN);
6100 ReplaceNode(TocEntry, MN);
6101 };
6102
6103 // Handle 32-bit small code model.
6104 if (!isPPC64 && CModel == CodeModel::Small) {
6105 // Transforms the ISD::TOC_ENTRY node to passed in Opcode, either
6106 // PPC::ADDItoc, or PPC::LWZtoc
6107 if (isELFABI) {
6108 assert(TM.isPositionIndependent() &&
6109 "32-bit ELF can only have TOC entries in position independent"
6110 " code.");
6111 // 32-bit ELF always uses a small code model toc access.
6112 replaceWith(PPC::LWZtoc, N, MVT::i32);
6113 return;
6114 }
6115
6116 assert(isAIXABI && "ELF ABI already handled");
6117
6118 if (hasTocDataAttr(N->getOperand(0),
6119 CurDAG->getDataLayout().getPointerSize())) {
6120 replaceWith(PPC::ADDItoc, N, MVT::i32);
6121 return;
6122 }
6123
6124 replaceWith(PPC::LWZtoc, N, MVT::i32);
6125 return;
6126 }
6127
6128 if (isPPC64 && CModel == CodeModel::Small) {
6129 assert(isAIXABI && "ELF ABI handled in common SelectCode");
6130
6131 if (hasTocDataAttr(N->getOperand(0),
6132 CurDAG->getDataLayout().getPointerSize())) {
6133 replaceWith(PPC::ADDItoc8, N, MVT::i64);
6134 return;
6135 }
6136 // Break if it doesn't have toc data attribute. Proceed with common
6137 // SelectCode.
6138 break;
6139 }
6140
6141 assert(CModel != CodeModel::Small && "All small code models handled.");
6142
6143 assert((isPPC64 || (isAIXABI && !isPPC64)) && "We are dealing with 64-bit"
6144 " ELF/AIX or 32-bit AIX in the following.");
6145
6146 // Transforms the ISD::TOC_ENTRY node for 32-bit AIX large code model mode
6147 // or 64-bit medium (ELF-only) or large (ELF and AIX) code model code. We
6148 // generate two instructions as described below. The first source operand
6149 // is a symbol reference. If it must be toc-referenced according to
6150 // Subtarget, we generate:
6151 // [32-bit AIX]
6152 // LWZtocL(@sym, ADDIStocHA(%r2, @sym))
6153 // [64-bit ELF/AIX]
6154 // LDtocL(@sym, ADDIStocHA8(%x2, @sym))
6155 // Otherwise we generate:
6156 // ADDItocL8(ADDIStocHA8(%x2, @sym), @sym)
6157 SDValue GA = N->getOperand(0);
6158 SDValue TOCbase = N->getOperand(1);
6159
6160 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
6161 SDNode *Tmp = CurDAG->getMachineNode(
6162 isPPC64 ? PPC::ADDIStocHA8 : PPC::ADDIStocHA, dl, VT, TOCbase, GA);
6163
6164 if (PPCLowering->isAccessedAsGotIndirect(GA)) {
6165 // If it is accessed as got-indirect, we need an extra LWZ/LD to load
6166 // the address.
6167 SDNode *MN = CurDAG->getMachineNode(
6168 isPPC64 ? PPC::LDtocL : PPC::LWZtocL, dl, VT, GA, SDValue(Tmp, 0));
6169
6170 transferMemOperands(N, MN);
6171 ReplaceNode(N, MN);
6172 return;
6173 }
6174
6175 // Build the address relative to the TOC-pointer.
6176 ReplaceNode(N, CurDAG->getMachineNode(PPC::ADDItocL8, dl, MVT::i64,
6177 SDValue(Tmp, 0), GA));
6178 return;
6179 }
6181 // Generate a PIC-safe GOT reference.
6182 assert(Subtarget->is32BitELFABI() &&
6183 "PPCISD::PPC32_PICGOT is only supported for 32-bit SVR4");
6184 CurDAG->SelectNodeTo(N, PPC::PPC32PICGOT,
6185 PPCLowering->getPointerTy(CurDAG->getDataLayout()),
6186 MVT::i32);
6187 return;
6188
6189 case PPCISD::VADD_SPLAT: {
6190 // This expands into one of three sequences, depending on whether
6191 // the first operand is odd or even, positive or negative.
6192 assert(isa<ConstantSDNode>(N->getOperand(0)) &&
6193 isa<ConstantSDNode>(N->getOperand(1)) &&
6194 "Invalid operand on VADD_SPLAT!");
6195
6196 int Elt = N->getConstantOperandVal(0);
6197 int EltSize = N->getConstantOperandVal(1);
6198 unsigned Opc1, Opc2, Opc3;
6199 EVT VT;
6200
6201 if (EltSize == 1) {
6202 Opc1 = PPC::VSPLTISB;
6203 Opc2 = PPC::VADDUBM;
6204 Opc3 = PPC::VSUBUBM;
6205 VT = MVT::v16i8;
6206 } else if (EltSize == 2) {
6207 Opc1 = PPC::VSPLTISH;
6208 Opc2 = PPC::VADDUHM;
6209 Opc3 = PPC::VSUBUHM;
6210 VT = MVT::v8i16;
6211 } else {
6212 assert(EltSize == 4 && "Invalid element size on VADD_SPLAT!");
6213 Opc1 = PPC::VSPLTISW;
6214 Opc2 = PPC::VADDUWM;
6215 Opc3 = PPC::VSUBUWM;
6216 VT = MVT::v4i32;
6217 }
6218
6219 if ((Elt & 1) == 0) {
6220 // Elt is even, in the range [-32,-18] + [16,30].
6221 //
6222 // Convert: VADD_SPLAT elt, size
6223 // Into: tmp = VSPLTIS[BHW] elt
6224 // VADDU[BHW]M tmp, tmp
6225 // Where: [BHW] = B for size = 1, H for size = 2, W for size = 4
6226 SDValue EltVal = getI32Imm(Elt >> 1, dl);
6227 SDNode *Tmp = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6228 SDValue TmpVal = SDValue(Tmp, 0);
6229 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, TmpVal, TmpVal));
6230 return;
6231 } else if (Elt > 0) {
6232 // Elt is odd and positive, in the range [17,31].
6233 //
6234 // Convert: VADD_SPLAT elt, size
6235 // Into: tmp1 = VSPLTIS[BHW] elt-16
6236 // tmp2 = VSPLTIS[BHW] -16
6237 // VSUBU[BHW]M tmp1, tmp2
6238 SDValue EltVal = getI32Imm(Elt - 16, dl);
6239 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6240 EltVal = getI32Imm(-16, dl);
6241 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6242 ReplaceNode(N, CurDAG->getMachineNode(Opc3, dl, VT, SDValue(Tmp1, 0),
6243 SDValue(Tmp2, 0)));
6244 return;
6245 } else {
6246 // Elt is odd and negative, in the range [-31,-17].
6247 //
6248 // Convert: VADD_SPLAT elt, size
6249 // Into: tmp1 = VSPLTIS[BHW] elt+16
6250 // tmp2 = VSPLTIS[BHW] -16
6251 // VADDU[BHW]M tmp1, tmp2
6252 SDValue EltVal = getI32Imm(Elt + 16, dl);
6253 SDNode *Tmp1 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6254 EltVal = getI32Imm(-16, dl);
6255 SDNode *Tmp2 = CurDAG->getMachineNode(Opc1, dl, VT, EltVal);
6256 ReplaceNode(N, CurDAG->getMachineNode(Opc2, dl, VT, SDValue(Tmp1, 0),
6257 SDValue(Tmp2, 0)));
6258 return;
6259 }
6260 }
6261 case PPCISD::LD_SPLAT: {
6262 // Here we want to handle splat load for type v16i8 and v8i16 when there is
6263 // no direct move, we don't need to use stack for this case. If target has
6264 // direct move, we should be able to get the best selection in the .td file.
6265 if (!Subtarget->hasAltivec() || Subtarget->hasDirectMove())
6266 break;
6267
6268 EVT Type = N->getValueType(0);
6269 if (Type != MVT::v16i8 && Type != MVT::v8i16)
6270 break;
6271
6272 // If the alignment for the load is 16 or bigger, we don't need the
6273 // permutated mask to get the required value. The value must be the 0
6274 // element in big endian target or 7/15 in little endian target in the
6275 // result vsx register of lvx instruction.
6276 // Select the instruction in the .td file.
6277 if (cast<MemIntrinsicSDNode>(N)->getAlign() >= Align(16) &&
6278 isOffsetMultipleOf(N, 16))
6279 break;
6280
6281 SDValue ZeroReg =
6282 CurDAG->getRegister(Subtarget->isPPC64() ? PPC::ZERO8 : PPC::ZERO,
6283 Subtarget->isPPC64() ? MVT::i64 : MVT::i32);
6284 unsigned LIOpcode = Subtarget->isPPC64() ? PPC::LI8 : PPC::LI;
6285 // v16i8 LD_SPLAT addr
6286 // ======>
6287 // Mask = LVSR/LVSL 0, addr
6288 // LoadLow = LVX 0, addr
6289 // Perm = VPERM LoadLow, LoadLow, Mask
6290 // Splat = VSPLTB 15/0, Perm
6291 //
6292 // v8i16 LD_SPLAT addr
6293 // ======>
6294 // Mask = LVSR/LVSL 0, addr
6295 // LoadLow = LVX 0, addr
6296 // LoadHigh = LVX (LI, 1), addr
6297 // Perm = VPERM LoadLow, LoadHigh, Mask
6298 // Splat = VSPLTH 7/0, Perm
6299 unsigned SplatOp = (Type == MVT::v16i8) ? PPC::VSPLTB : PPC::VSPLTH;
6300 unsigned SplatElemIndex =
6301 Subtarget->isLittleEndian() ? ((Type == MVT::v16i8) ? 15 : 7) : 0;
6302
6303 SDNode *Mask = CurDAG->getMachineNode(
6304 Subtarget->isLittleEndian() ? PPC::LVSR : PPC::LVSL, dl, Type, ZeroReg,
6305 N->getOperand(1));
6306
6307 SDNode *LoadLow =
6308 CurDAG->getMachineNode(PPC::LVX, dl, MVT::v16i8, MVT::Other,
6309 {ZeroReg, N->getOperand(1), N->getOperand(0)});
6310
6311 SDNode *LoadHigh = LoadLow;
6312 if (Type == MVT::v8i16) {
6313 LoadHigh = CurDAG->getMachineNode(
6314 PPC::LVX, dl, MVT::v16i8, MVT::Other,
6315 {SDValue(CurDAG->getMachineNode(
6316 LIOpcode, dl, MVT::i32,
6317 CurDAG->getTargetConstant(1, dl, MVT::i8)),
6318 0),
6319 N->getOperand(1), SDValue(LoadLow, 1)});
6320 }
6321
6322 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(LoadHigh, 1));
6323 transferMemOperands(N, LoadHigh);
6324
6325 SDNode *Perm =
6326 CurDAG->getMachineNode(PPC::VPERM, dl, Type, SDValue(LoadLow, 0),
6327 SDValue(LoadHigh, 0), SDValue(Mask, 0));
6328 CurDAG->SelectNodeTo(N, SplatOp, Type,
6329 CurDAG->getTargetConstant(SplatElemIndex, dl, MVT::i8),
6330 SDValue(Perm, 0));
6331 return;
6332 }
6333 }
6334
6335 SelectCode(N);
6336}
6337
6338// If the target supports the cmpb instruction, do the idiom recognition here.
6339// We don't do this as a DAG combine because we don't want to do it as nodes
6340// are being combined (because we might miss part of the eventual idiom). We
6341// don't want to do it during instruction selection because we want to reuse
6342// the logic for lowering the masking operations already part of the
6343// instruction selector.
6344SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
6345 SDLoc dl(N);
6346
6347 assert(N->getOpcode() == ISD::OR &&
6348 "Only OR nodes are supported for CMPB");
6349
6350 SDValue Res;
6351 if (!Subtarget->hasCMPB())
6352 return Res;
6353
6354 if (N->getValueType(0) != MVT::i32 &&
6355 N->getValueType(0) != MVT::i64)
6356 return Res;
6357
6358 EVT VT = N->getValueType(0);
6359
6360 SDValue RHS, LHS;
6361 bool BytesFound[8] = {false, false, false, false, false, false, false, false};
6362 uint64_t Mask = 0, Alt = 0;
6363
6364 auto IsByteSelectCC = [this](SDValue O, unsigned &b,
6365 uint64_t &Mask, uint64_t &Alt,
6366 SDValue &LHS, SDValue &RHS) {
6367 if (O.getOpcode() != ISD::SELECT_CC)
6368 return false;
6369 ISD::CondCode CC = cast<CondCodeSDNode>(O.getOperand(4))->get();
6370
6371 if (!isa<ConstantSDNode>(O.getOperand(2)) ||
6372 !isa<ConstantSDNode>(O.getOperand(3)))
6373 return false;
6374
6375 uint64_t PM = O.getConstantOperandVal(2);
6376 uint64_t PAlt = O.getConstantOperandVal(3);
6377 for (b = 0; b < 8; ++b) {
6378 uint64_t Mask = UINT64_C(0xFF) << (8*b);
6379 if (PM && (PM & Mask) == PM && (PAlt & Mask) == PAlt)
6380 break;
6381 }
6382
6383 if (b == 8)
6384 return false;
6385 Mask |= PM;
6386 Alt |= PAlt;
6387
6388 if (!isa<ConstantSDNode>(O.getOperand(1)) ||
6389 O.getConstantOperandVal(1) != 0) {
6390 SDValue Op0 = O.getOperand(0), Op1 = O.getOperand(1);
6391 if (Op0.getOpcode() == ISD::TRUNCATE)
6392 Op0 = Op0.getOperand(0);
6393 if (Op1.getOpcode() == ISD::TRUNCATE)
6394 Op1 = Op1.getOperand(0);
6395
6396 if (Op0.getOpcode() == ISD::SRL && Op1.getOpcode() == ISD::SRL &&
6397 Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
6398 isa<ConstantSDNode>(Op0.getOperand(1))) {
6399
6400 unsigned Bits = Op0.getValueSizeInBits();
6401 if (b != Bits/8-1)
6402 return false;
6403 if (Op0.getConstantOperandVal(1) != Bits-8)
6404 return false;
6405
6406 LHS = Op0.getOperand(0);
6407 RHS = Op1.getOperand(0);
6408 return true;
6409 }
6410
6411 // When we have small integers (i16 to be specific), the form present
6412 // post-legalization uses SETULT in the SELECT_CC for the
6413 // higher-order byte, depending on the fact that the
6414 // even-higher-order bytes are known to all be zero, for example:
6415 // select_cc (xor $lhs, $rhs), 256, 65280, 0, setult
6416 // (so when the second byte is the same, because all higher-order
6417 // bits from bytes 3 and 4 are known to be zero, the result of the
6418 // xor can be at most 255)
6419 if (Op0.getOpcode() == ISD::XOR && CC == ISD::SETULT &&
6420 isa<ConstantSDNode>(O.getOperand(1))) {
6421
6422 uint64_t ULim = O.getConstantOperandVal(1);
6423 if (ULim != (UINT64_C(1) << b*8))
6424 return false;
6425
6426 // Now we need to make sure that the upper bytes are known to be
6427 // zero.
6428 unsigned Bits = Op0.getValueSizeInBits();
6429 if (!CurDAG->MaskedValueIsZero(
6430 Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
6431 return false;
6432
6433 LHS = Op0.getOperand(0);
6434 RHS = Op0.getOperand(1);
6435 return true;
6436 }
6437
6438 return false;
6439 }
6440
6441 if (CC != ISD::SETEQ)
6442 return false;
6443
6444 SDValue Op = O.getOperand(0);
6445 if (Op.getOpcode() == ISD::AND) {
6446 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6447 return false;
6448 if (Op.getConstantOperandVal(1) != (UINT64_C(0xFF) << (8*b)))
6449 return false;
6450
6451 SDValue XOR = Op.getOperand(0);
6452 if (XOR.getOpcode() == ISD::TRUNCATE)
6453 XOR = XOR.getOperand(0);
6454 if (XOR.getOpcode() != ISD::XOR)
6455 return false;
6456
6457 LHS = XOR.getOperand(0);
6458 RHS = XOR.getOperand(1);
6459 return true;
6460 } else if (Op.getOpcode() == ISD::SRL) {
6461 if (!isa<ConstantSDNode>(Op.getOperand(1)))
6462 return false;
6463 unsigned Bits = Op.getValueSizeInBits();
6464 if (b != Bits/8-1)
6465 return false;
6466 if (Op.getConstantOperandVal(1) != Bits-8)
6467 return false;
6468
6469 SDValue XOR = Op.getOperand(0);
6470 if (XOR.getOpcode() == ISD::TRUNCATE)
6471 XOR = XOR.getOperand(0);
6472 if (XOR.getOpcode() != ISD::XOR)
6473 return false;
6474
6475 LHS = XOR.getOperand(0);
6476 RHS = XOR.getOperand(1);
6477 return true;
6478 }
6479
6480 return false;
6481 };
6482
6484 while (!Queue.empty()) {
6485 SDValue V = Queue.pop_back_val();
6486
6487 for (const SDValue &O : V.getNode()->ops()) {
6488 unsigned b = 0;
6489 uint64_t M = 0, A = 0;
6490 SDValue OLHS, ORHS;
6491 if (O.getOpcode() == ISD::OR) {
6492 Queue.push_back(O);
6493 } else if (IsByteSelectCC(O, b, M, A, OLHS, ORHS)) {
6494 if (!LHS) {
6495 LHS = OLHS;
6496 RHS = ORHS;
6497 BytesFound[b] = true;
6498 Mask |= M;
6499 Alt |= A;
6500 } else if ((LHS == ORHS && RHS == OLHS) ||
6501 (RHS == ORHS && LHS == OLHS)) {
6502 BytesFound[b] = true;
6503 Mask |= M;
6504 Alt |= A;
6505 } else {
6506 return Res;
6507 }
6508 } else {
6509 return Res;
6510 }
6511 }
6512 }
6513
6514 unsigned LastB = 0, BCnt = 0;
6515 for (unsigned i = 0; i < 8; ++i)
6516 if (BytesFound[LastB]) {
6517 ++BCnt;
6518 LastB = i;
6519 }
6520
6521 if (!LastB || BCnt < 2)
6522 return Res;
6523
6524 // Because we'll be zero-extending the output anyway if don't have a specific
6525 // value for each input byte (via the Mask), we can 'anyext' the inputs.
6526 if (LHS.getValueType() != VT) {
6527 LHS = CurDAG->getAnyExtOrTrunc(LHS, dl, VT);
6528 RHS = CurDAG->getAnyExtOrTrunc(RHS, dl, VT);
6529 }
6530
6531 Res = CurDAG->getNode(PPCISD::CMPB, dl, VT, LHS, RHS);
6532
6533 bool NonTrivialMask = ((int64_t) Mask) != INT64_C(-1);
6534 if (NonTrivialMask && !Alt) {
6535 // Res = Mask & CMPB
6536 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6537 CurDAG->getConstant(Mask, dl, VT));
6538 } else if (Alt) {
6539 // Res = (CMPB & Mask) | (~CMPB & Alt)
6540 // Which, as suggested here:
6541 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
6542 // can be written as:
6543 // Res = Alt ^ ((Alt ^ Mask) & CMPB)
6544 // useful because the (Alt ^ Mask) can be pre-computed.
6545 Res = CurDAG->getNode(ISD::AND, dl, VT, Res,
6546 CurDAG->getConstant(Mask ^ Alt, dl, VT));
6547 Res = CurDAG->getNode(ISD::XOR, dl, VT, Res,
6548 CurDAG->getConstant(Alt, dl, VT));
6549 }
6550
6551 return Res;
6552}
6553
6554// When CR bit registers are enabled, an extension of an i1 variable to a i32
6555// or i64 value is lowered in terms of a SELECT_I[48] operation, and thus
6556// involves constant materialization of a 0 or a 1 or both. If the result of
6557// the extension is then operated upon by some operator that can be constant
6558// folded with a constant 0 or 1, and that constant can be materialized using
6559// only one instruction (like a zero or one), then we should fold in those
6560// operations with the select.
6561void PPCDAGToDAGISel::foldBoolExts(SDValue &Res, SDNode *&N) {
6562 if (!Subtarget->useCRBits())
6563 return;
6564
6565 if (N->getOpcode() != ISD::ZERO_EXTEND &&
6566 N->getOpcode() != ISD::SIGN_EXTEND &&
6567 N->getOpcode() != ISD::ANY_EXTEND)
6568 return;
6569
6570 if (N->getOperand(0).getValueType() != MVT::i1)
6571 return;
6572
6573 if (!N->hasOneUse())
6574 return;
6575
6576 SDLoc dl(N);
6577 EVT VT = N->getValueType(0);
6578 SDValue Cond = N->getOperand(0);
6579 SDValue ConstTrue =
6580 CurDAG->getConstant(N->getOpcode() == ISD::SIGN_EXTEND ? -1 : 1, dl, VT);
6581 SDValue ConstFalse = CurDAG->getConstant(0, dl, VT);
6582
6583 do {
6584 SDNode *User = *N->use_begin();
6585 if (User->getNumOperands() != 2)
6586 break;
6587
6588 auto TryFold = [this, N, User, dl](SDValue Val) {
6589 SDValue UserO0 = User->getOperand(0), UserO1 = User->getOperand(1);
6590 SDValue O0 = UserO0.getNode() == N ? Val : UserO0;
6591 SDValue O1 = UserO1.getNode() == N ? Val : UserO1;
6592
6593 return CurDAG->FoldConstantArithmetic(User->getOpcode(), dl,
6594 User->getValueType(0), {O0, O1});
6595 };
6596
6597 // FIXME: When the semantics of the interaction between select and undef
6598 // are clearly defined, it may turn out to be unnecessary to break here.
6599 SDValue TrueRes = TryFold(ConstTrue);
6600 if (!TrueRes || TrueRes.isUndef())
6601 break;
6602 SDValue FalseRes = TryFold(ConstFalse);
6603 if (!FalseRes || FalseRes.isUndef())
6604 break;
6605
6606 // For us to materialize these using one instruction, we must be able to
6607 // represent them as signed 16-bit integers.
6608 uint64_t True = TrueRes->getAsZExtVal(), False = FalseRes->getAsZExtVal();
6609 if (!isInt<16>(True) || !isInt<16>(False))
6610 break;
6611
6612 // We can replace User with a new SELECT node, and try again to see if we
6613 // can fold the select with its user.
6614 Res = CurDAG->getSelect(dl, User->getValueType(0), Cond, TrueRes, FalseRes);
6615 N = User;
6616 ConstTrue = TrueRes;
6617 ConstFalse = FalseRes;
6618 } while (N->hasOneUse());
6619}
6620
6621void PPCDAGToDAGISel::PreprocessISelDAG() {
6622 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
6623
6624 bool MadeChange = false;
6625 while (Position != CurDAG->allnodes_begin()) {
6626 SDNode *N = &*--Position;
6627 if (N->use_empty())
6628 continue;
6629
6630 SDValue Res;
6631 switch (N->getOpcode()) {
6632 default: break;
6633 case ISD::OR:
6634 Res = combineToCMPB(N);
6635 break;
6636 }
6637
6638 if (!Res)
6639 foldBoolExts(Res, N);
6640
6641 if (Res) {
6642 LLVM_DEBUG(dbgs() << "PPC DAG preprocessing replacing:\nOld: ");
6643 LLVM_DEBUG(N->dump(CurDAG));
6644 LLVM_DEBUG(dbgs() << "\nNew: ");
6645 LLVM_DEBUG(Res.getNode()->dump(CurDAG));
6646 LLVM_DEBUG(dbgs() << "\n");
6647
6648 CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res);
6649 MadeChange = true;
6650 }
6651 }
6652
6653 if (MadeChange)
6654 CurDAG->RemoveDeadNodes();
6655}
6656
6657/// PostprocessISelDAG - Perform some late peephole optimizations
6658/// on the DAG representation.
6659void PPCDAGToDAGISel::PostprocessISelDAG() {
6660 // Skip peepholes at -O0.
6661 if (TM.getOptLevel() == CodeGenOptLevel::None)
6662 return;
6663
6664 PeepholePPC64();
6665 PeepholeCROps();
6666 PeepholePPC64ZExt();
6667}
6668
6669// Check if all users of this node will become isel where the second operand
6670// is the constant zero. If this is so, and if we can negate the condition,
6671// then we can flip the true and false operands. This will allow the zero to
6672// be folded with the isel so that we don't need to materialize a register
6673// containing zero.
6674bool PPCDAGToDAGISel::AllUsersSelectZero(SDNode *N) {
6675 for (const SDNode *User : N->uses()) {
6676 if (!User->isMachineOpcode())
6677 return false;
6678 if (User->getMachineOpcode() != PPC::SELECT_I4 &&
6679 User->getMachineOpcode() != PPC::SELECT_I8)
6680 return false;
6681
6682 SDNode *Op1 = User->getOperand(1).getNode();
6683 SDNode *Op2 = User->getOperand(2).getNode();
6684 // If we have a degenerate select with two equal operands, swapping will
6685 // not do anything, and we may run into an infinite loop.
6686 if (Op1 == Op2)
6687 return false;
6688
6689 if (!Op2->isMachineOpcode())
6690 return false;
6691
6692 if (Op2->getMachineOpcode() != PPC::LI &&
6693 Op2->getMachineOpcode() != PPC::LI8)
6694 return false;
6695
6696 if (!isNullConstant(Op2->getOperand(0)))
6697 return false;
6698 }
6699
6700 return true;
6701}
6702
6703void PPCDAGToDAGISel::SwapAllSelectUsers(SDNode *N) {
6704 SmallVector<SDNode *, 4> ToReplace;
6705 for (SDNode *User : N->uses()) {
6706 assert((User->getMachineOpcode() == PPC::SELECT_I4 ||
6707 User->getMachineOpcode() == PPC::SELECT_I8) &&
6708 "Must have all select users");
6709 ToReplace.push_back(User);
6710 }
6711
6712 for (SDNode *User : ToReplace) {
6713 SDNode *ResNode =
6714 CurDAG->getMachineNode(User->getMachineOpcode(), SDLoc(User),
6715 User->getValueType(0), User->getOperand(0),
6716 User->getOperand(2),
6717 User->getOperand(1));
6718
6719 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
6720 LLVM_DEBUG(User->dump(CurDAG));
6721 LLVM_DEBUG(dbgs() << "\nNew: ");
6722 LLVM_DEBUG(ResNode->dump(CurDAG));
6723 LLVM_DEBUG(dbgs() << "\n");
6724
6725 ReplaceUses(User, ResNode);
6726 }
6727}
6728
6729void PPCDAGToDAGISel::PeepholeCROps() {
6730 bool IsModified;
6731 do {
6732 IsModified = false;
6733 for (SDNode &Node : CurDAG->allnodes()) {
6734 MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(&Node);
6735 if (!MachineNode || MachineNode->use_empty())
6736 continue;
6737 SDNode *ResNode = MachineNode;
6738
6739 bool Op1Set = false, Op1Unset = false,
6740 Op1Not = false,
6741 Op2Set = false, Op2Unset = false,
6742 Op2Not = false;
6743
6744 unsigned Opcode = MachineNode->getMachineOpcode();
6745 switch (Opcode) {
6746 default: break;
6747 case PPC::CRAND:
6748 case PPC::CRNAND:
6749 case PPC::CROR:
6750 case PPC::CRXOR:
6751 case PPC::CRNOR:
6752 case PPC::CREQV:
6753 case PPC::CRANDC:
6754 case PPC::CRORC: {
6755 SDValue Op = MachineNode->getOperand(1);
6756 if (Op.isMachineOpcode()) {
6757 if (Op.getMachineOpcode() == PPC::CRSET)
6758 Op2Set = true;
6759 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6760 Op2Unset = true;
6761 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6762 Op.getOperand(0) == Op.getOperand(1)) ||
6763 Op.getMachineOpcode() == PPC::CRNOT)
6764 Op2Not = true;
6765 }
6766 [[fallthrough]];
6767 }
6768 case PPC::BC:
6769 case PPC::BCn:
6770 case PPC::SELECT_I4:
6771 case PPC::SELECT_I8:
6772 case PPC::SELECT_F4:
6773 case PPC::SELECT_F8:
6774 case PPC::SELECT_SPE:
6775 case PPC::SELECT_SPE4:
6776 case PPC::SELECT_VRRC:
6777 case PPC::SELECT_VSFRC:
6778 case PPC::SELECT_VSSRC:
6779 case PPC::SELECT_VSRC: {
6780 SDValue Op = MachineNode->getOperand(0);
6781 if (Op.isMachineOpcode()) {
6782 if (Op.getMachineOpcode() == PPC::CRSET)
6783 Op1Set = true;
6784 else if (Op.getMachineOpcode() == PPC::CRUNSET)
6785 Op1Unset = true;
6786 else if ((Op.getMachineOpcode() == PPC::CRNOR &&
6787 Op.getOperand(0) == Op.getOperand(1)) ||
6788 Op.getMachineOpcode() == PPC::CRNOT)
6789 Op1Not = true;
6790 }
6791 }
6792 break;
6793 }
6794
6795 bool SelectSwap = false;
6796 switch (Opcode) {
6797 default: break;
6798 case PPC::CRAND:
6799 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6800 // x & x = x
6801 ResNode = MachineNode->getOperand(0).getNode();
6802 else if (Op1Set)
6803 // 1 & y = y
6804 ResNode = MachineNode->getOperand(1).getNode();
6805 else if (Op2Set)
6806 // x & 1 = x
6807 ResNode = MachineNode->getOperand(0).getNode();
6808 else if (Op1Unset || Op2Unset)
6809 // x & 0 = 0 & y = 0
6810 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6811 MVT::i1);
6812 else if (Op1Not)
6813 // ~x & y = andc(y, x)
6814 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6815 MVT::i1, MachineNode->getOperand(1),
6816 MachineNode->getOperand(0).
6817 getOperand(0));
6818 else if (Op2Not)
6819 // x & ~y = andc(x, y)
6820 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6821 MVT::i1, MachineNode->getOperand(0),
6822 MachineNode->getOperand(1).
6823 getOperand(0));
6824 else if (AllUsersSelectZero(MachineNode)) {
6825 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
6826 MVT::i1, MachineNode->getOperand(0),
6827 MachineNode->getOperand(1));
6828 SelectSwap = true;
6829 }
6830 break;
6831 case PPC::CRNAND:
6832 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6833 // nand(x, x) -> nor(x, x)
6834 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6835 MVT::i1, MachineNode->getOperand(0),
6836 MachineNode->getOperand(0));
6837 else if (Op1Set)
6838 // nand(1, y) -> nor(y, y)
6839 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6840 MVT::i1, MachineNode->getOperand(1),
6841 MachineNode->getOperand(1));
6842 else if (Op2Set)
6843 // nand(x, 1) -> nor(x, x)
6844 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6845 MVT::i1, MachineNode->getOperand(0),
6846 MachineNode->getOperand(0));
6847 else if (Op1Unset || Op2Unset)
6848 // nand(x, 0) = nand(0, y) = 1
6849 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6850 MVT::i1);
6851 else if (Op1Not)
6852 // nand(~x, y) = ~(~x & y) = x | ~y = orc(x, y)
6853 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6854 MVT::i1, MachineNode->getOperand(0).
6855 getOperand(0),
6856 MachineNode->getOperand(1));
6857 else if (Op2Not)
6858 // nand(x, ~y) = ~x | y = orc(y, x)
6859 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6860 MVT::i1, MachineNode->getOperand(1).
6861 getOperand(0),
6862 MachineNode->getOperand(0));
6863 else if (AllUsersSelectZero(MachineNode)) {
6864 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
6865 MVT::i1, MachineNode->getOperand(0),
6866 MachineNode->getOperand(1));
6867 SelectSwap = true;
6868 }
6869 break;
6870 case PPC::CROR:
6871 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6872 // x | x = x
6873 ResNode = MachineNode->getOperand(0).getNode();
6874 else if (Op1Set || Op2Set)
6875 // x | 1 = 1 | y = 1
6876 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6877 MVT::i1);
6878 else if (Op1Unset)
6879 // 0 | y = y
6880 ResNode = MachineNode->getOperand(1).getNode();
6881 else if (Op2Unset)
6882 // x | 0 = x
6883 ResNode = MachineNode->getOperand(0).getNode();
6884 else if (Op1Not)
6885 // ~x | y = orc(y, x)
6886 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6887 MVT::i1, MachineNode->getOperand(1),
6888 MachineNode->getOperand(0).
6889 getOperand(0));
6890 else if (Op2Not)
6891 // x | ~y = orc(x, y)
6892 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
6893 MVT::i1, MachineNode->getOperand(0),
6894 MachineNode->getOperand(1).
6895 getOperand(0));
6896 else if (AllUsersSelectZero(MachineNode)) {
6897 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6898 MVT::i1, MachineNode->getOperand(0),
6899 MachineNode->getOperand(1));
6900 SelectSwap = true;
6901 }
6902 break;
6903 case PPC::CRXOR:
6904 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6905 // xor(x, x) = 0
6906 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6907 MVT::i1);
6908 else if (Op1Set)
6909 // xor(1, y) -> nor(y, y)
6910 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6911 MVT::i1, MachineNode->getOperand(1),
6912 MachineNode->getOperand(1));
6913 else if (Op2Set)
6914 // xor(x, 1) -> nor(x, x)
6915 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6916 MVT::i1, MachineNode->getOperand(0),
6917 MachineNode->getOperand(0));
6918 else if (Op1Unset)
6919 // xor(0, y) = y
6920 ResNode = MachineNode->getOperand(1).getNode();
6921 else if (Op2Unset)
6922 // xor(x, 0) = x
6923 ResNode = MachineNode->getOperand(0).getNode();
6924 else if (Op1Not)
6925 // xor(~x, y) = eqv(x, y)
6926 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6927 MVT::i1, MachineNode->getOperand(0).
6928 getOperand(0),
6929 MachineNode->getOperand(1));
6930 else if (Op2Not)
6931 // xor(x, ~y) = eqv(x, y)
6932 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6933 MVT::i1, MachineNode->getOperand(0),
6934 MachineNode->getOperand(1).
6935 getOperand(0));
6936 else if (AllUsersSelectZero(MachineNode)) {
6937 ResNode = CurDAG->getMachineNode(PPC::CREQV, SDLoc(MachineNode),
6938 MVT::i1, MachineNode->getOperand(0),
6939 MachineNode->getOperand(1));
6940 SelectSwap = true;
6941 }
6942 break;
6943 case PPC::CRNOR:
6944 if (Op1Set || Op2Set)
6945 // nor(1, y) -> 0
6946 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
6947 MVT::i1);
6948 else if (Op1Unset)
6949 // nor(0, y) = ~y -> nor(y, y)
6950 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6951 MVT::i1, MachineNode->getOperand(1),
6952 MachineNode->getOperand(1));
6953 else if (Op2Unset)
6954 // nor(x, 0) = ~x
6955 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6956 MVT::i1, MachineNode->getOperand(0),
6957 MachineNode->getOperand(0));
6958 else if (Op1Not)
6959 // nor(~x, y) = andc(x, y)
6960 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6961 MVT::i1, MachineNode->getOperand(0).
6962 getOperand(0),
6963 MachineNode->getOperand(1));
6964 else if (Op2Not)
6965 // nor(x, ~y) = andc(y, x)
6966 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
6967 MVT::i1, MachineNode->getOperand(1).
6968 getOperand(0),
6969 MachineNode->getOperand(0));
6970 else if (AllUsersSelectZero(MachineNode)) {
6971 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
6972 MVT::i1, MachineNode->getOperand(0),
6973 MachineNode->getOperand(1));
6974 SelectSwap = true;
6975 }
6976 break;
6977 case PPC::CREQV:
6978 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
6979 // eqv(x, x) = 1
6980 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
6981 MVT::i1);
6982 else if (Op1Set)
6983 // eqv(1, y) = y
6984 ResNode = MachineNode->getOperand(1).getNode();
6985 else if (Op2Set)
6986 // eqv(x, 1) = x
6987 ResNode = MachineNode->getOperand(0).getNode();
6988 else if (Op1Unset)
6989 // eqv(0, y) = ~y -> nor(y, y)
6990 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6991 MVT::i1, MachineNode->getOperand(1),
6992 MachineNode->getOperand(1));
6993 else if (Op2Unset)
6994 // eqv(x, 0) = ~x
6995 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
6996 MVT::i1, MachineNode->getOperand(0),
6997 MachineNode->getOperand(0));
6998 else if (Op1Not)
6999 // eqv(~x, y) = xor(x, y)
7000 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7001 MVT::i1, MachineNode->getOperand(0).
7002 getOperand(0),
7003 MachineNode->getOperand(1));
7004 else if (Op2Not)
7005 // eqv(x, ~y) = xor(x, y)
7006 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7007 MVT::i1, MachineNode->getOperand(0),
7008 MachineNode->getOperand(1).
7009 getOperand(0));
7010 else if (AllUsersSelectZero(MachineNode)) {
7011 ResNode = CurDAG->getMachineNode(PPC::CRXOR, SDLoc(MachineNode),
7012 MVT::i1, MachineNode->getOperand(0),
7013 MachineNode->getOperand(1));
7014 SelectSwap = true;
7015 }
7016 break;
7017 case PPC::CRANDC:
7018 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7019 // andc(x, x) = 0
7020 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7021 MVT::i1);
7022 else if (Op1Set)
7023 // andc(1, y) = ~y
7024 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7025 MVT::i1, MachineNode->getOperand(1),
7026 MachineNode->getOperand(1));
7027 else if (Op1Unset || Op2Set)
7028 // andc(0, y) = andc(x, 1) = 0
7029 ResNode = CurDAG->getMachineNode(PPC::CRUNSET, SDLoc(MachineNode),
7030 MVT::i1);
7031 else if (Op2Unset)
7032 // andc(x, 0) = x
7033 ResNode = MachineNode->getOperand(0).getNode();
7034 else if (Op1Not)
7035 // andc(~x, y) = ~(x | y) = nor(x, y)
7036 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7037 MVT::i1, MachineNode->getOperand(0).
7038 getOperand(0),
7039 MachineNode->getOperand(1));
7040 else if (Op2Not)
7041 // andc(x, ~y) = x & y
7042 ResNode = CurDAG->getMachineNode(PPC::CRAND, SDLoc(MachineNode),
7043 MVT::i1, MachineNode->getOperand(0),
7044 MachineNode->getOperand(1).
7045 getOperand(0));
7046 else if (AllUsersSelectZero(MachineNode)) {
7047 ResNode = CurDAG->getMachineNode(PPC::CRORC, SDLoc(MachineNode),
7048 MVT::i1, MachineNode->getOperand(1),
7049 MachineNode->getOperand(0));
7050 SelectSwap = true;
7051 }
7052 break;
7053 case PPC::CRORC:
7054 if (MachineNode->getOperand(0) == MachineNode->getOperand(1))
7055 // orc(x, x) = 1
7056 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7057 MVT::i1);
7058 else if (Op1Set || Op2Unset)
7059 // orc(1, y) = orc(x, 0) = 1
7060 ResNode = CurDAG->getMachineNode(PPC::CRSET, SDLoc(MachineNode),
7061 MVT::i1);
7062 else if (Op2Set)
7063 // orc(x, 1) = x
7064 ResNode = MachineNode->getOperand(0).getNode();
7065 else if (Op1Unset)
7066 // orc(0, y) = ~y
7067 ResNode = CurDAG->getMachineNode(PPC::CRNOR, SDLoc(MachineNode),
7068 MVT::i1, MachineNode->getOperand(1),
7069 MachineNode->getOperand(1));
7070 else if (Op1Not)
7071 // orc(~x, y) = ~(x & y) = nand(x, y)
7072 ResNode = CurDAG->getMachineNode(PPC::CRNAND, SDLoc(MachineNode),
7073 MVT::i1, MachineNode->getOperand(0).
7074 getOperand(0),
7075 MachineNode->getOperand(1));
7076 else if (Op2Not)
7077 // orc(x, ~y) = x | y
7078 ResNode = CurDAG->getMachineNode(PPC::CROR, SDLoc(MachineNode),
7079 MVT::i1, MachineNode->getOperand(0),
7080 MachineNode->getOperand(1).
7081 getOperand(0));
7082 else if (AllUsersSelectZero(MachineNode)) {
7083 ResNode = CurDAG->getMachineNode(PPC::CRANDC, SDLoc(MachineNode),
7084 MVT::i1, MachineNode->getOperand(1),
7085 MachineNode->getOperand(0));
7086 SelectSwap = true;
7087 }
7088 break;
7089 case PPC::SELECT_I4:
7090 case PPC::SELECT_I8:
7091 case PPC::SELECT_F4:
7092 case PPC::SELECT_F8:
7093 case PPC::SELECT_SPE:
7094 case PPC::SELECT_SPE4:
7095 case PPC::SELECT_VRRC:
7096 case PPC::SELECT_VSFRC:
7097 case PPC::SELECT_VSSRC:
7098 case PPC::SELECT_VSRC:
7099 if (Op1Set)
7100 ResNode = MachineNode->getOperand(1).getNode();
7101 else if (Op1Unset)
7102 ResNode = MachineNode->getOperand(2).getNode();
7103 else if (Op1Not)
7104 ResNode = CurDAG->getMachineNode(MachineNode->getMachineOpcode(),
7105 SDLoc(MachineNode),
7106 MachineNode->getValueType(0),
7107 MachineNode->getOperand(0).
7108 getOperand(0),
7109 MachineNode->getOperand(2),
7110 MachineNode->getOperand(1));
7111 break;
7112 case PPC::BC:
7113 case PPC::BCn:
7114 if (Op1Not)
7115 ResNode = CurDAG->getMachineNode(Opcode == PPC::BC ? PPC::BCn :
7116 PPC::BC,
7117 SDLoc(MachineNode),
7118 MVT::Other,
7119 MachineNode->getOperand(0).
7120 getOperand(0),
7121 MachineNode->getOperand(1),
7122 MachineNode->getOperand(2));
7123 // FIXME: Handle Op1Set, Op1Unset here too.
7124 break;
7125 }
7126
7127 // If we're inverting this node because it is used only by selects that
7128 // we'd like to swap, then swap the selects before the node replacement.
7129 if (SelectSwap)
7130 SwapAllSelectUsers(MachineNode);
7131
7132 if (ResNode != MachineNode) {
7133 LLVM_DEBUG(dbgs() << "CR Peephole replacing:\nOld: ");
7134 LLVM_DEBUG(MachineNode->dump(CurDAG));
7135 LLVM_DEBUG(dbgs() << "\nNew: ");
7136 LLVM_DEBUG(ResNode->dump(CurDAG));
7137 LLVM_DEBUG(dbgs() << "\n");
7138
7139 ReplaceUses(MachineNode, ResNode);
7140 IsModified = true;
7141 }
7142 }
7143 if (IsModified)
7144 CurDAG->RemoveDeadNodes();
7145 } while (IsModified);
7146}
7147
7148// Gather the set of 32-bit operations that are known to have their
7149// higher-order 32 bits zero, where ToPromote contains all such operations.
7151 SmallPtrSetImpl<SDNode *> &ToPromote) {
7152 if (!Op32.isMachineOpcode())
7153 return false;
7154
7155 // First, check for the "frontier" instructions (those that will clear the
7156 // higher-order 32 bits.
7157
7158 // For RLWINM and RLWNM, we need to make sure that the mask does not wrap
7159 // around. If it does not, then these instructions will clear the
7160 // higher-order bits.
7161 if ((Op32.getMachineOpcode() == PPC::RLWINM ||
7162 Op32.getMachineOpcode() == PPC::RLWNM) &&
7163 Op32.getConstantOperandVal(2) <= Op32.getConstantOperandVal(3)) {
7164 ToPromote.insert(Op32.getNode());
7165 return true;
7166 }
7167
7168 // SLW and SRW always clear the higher-order bits.
7169 if (Op32.getMachineOpcode() == PPC::SLW ||
7170 Op32.getMachineOpcode() == PPC::SRW) {
7171 ToPromote.insert(Op32.getNode());
7172 return true;
7173 }
7174
7175 // For LI and LIS, we need the immediate to be positive (so that it is not
7176 // sign extended).
7177 if (Op32.getMachineOpcode() == PPC::LI ||
7178 Op32.getMachineOpcode() == PPC::LIS) {
7179 if (!isUInt<15>(Op32.getConstantOperandVal(0)))
7180 return false;
7181
7182 ToPromote.insert(Op32.getNode());
7183 return true;
7184 }
7185
7186 // LHBRX and LWBRX always clear the higher-order bits.
7187 if (Op32.getMachineOpcode() == PPC::LHBRX ||
7188 Op32.getMachineOpcode() == PPC::LWBRX) {
7189 ToPromote.insert(Op32.getNode());
7190 return true;
7191 }
7192
7193 // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
7194 if (Op32.getMachineOpcode() == PPC::CNTLZW ||
7195 Op32.getMachineOpcode() == PPC::CNTTZW) {
7196 ToPromote.insert(Op32.getNode());
7197 return true;
7198 }
7199
7200 // Next, check for those instructions we can look through.
7201
7202 // Assuming the mask does not wrap around, then the higher-order bits are
7203 // taken directly from the first operand.
7204 if (Op32.getMachineOpcode() == PPC::RLWIMI &&
7205 Op32.getConstantOperandVal(3) <= Op32.getConstantOperandVal(4)) {
7206 SmallPtrSet<SDNode *, 16> ToPromote1;
7207 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7208 return false;
7209
7210 ToPromote.insert(Op32.getNode());
7211 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7212 return true;
7213 }
7214
7215 // For OR, the higher-order bits are zero if that is true for both operands.
7216 // For SELECT_I4, the same is true (but the relevant operand numbers are
7217 // shifted by 1).
7218 if (Op32.getMachineOpcode() == PPC::OR ||
7219 Op32.getMachineOpcode() == PPC::SELECT_I4) {
7220 unsigned B = Op32.getMachineOpcode() == PPC::SELECT_I4 ? 1 : 0;
7221 SmallPtrSet<SDNode *, 16> ToPromote1;
7222 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+0), ToPromote1))
7223 return false;
7224 if (!PeepholePPC64ZExtGather(Op32.getOperand(B+1), ToPromote1))
7225 return false;
7226
7227 ToPromote.insert(Op32.getNode());
7228 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7229 return true;
7230 }
7231
7232 // For ORI and ORIS, we need the higher-order bits of the first operand to be
7233 // zero, and also for the constant to be positive (so that it is not sign
7234 // extended).
7235 if (Op32.getMachineOpcode() == PPC::ORI ||
7236 Op32.getMachineOpcode() == PPC::ORIS) {
7237 SmallPtrSet<SDNode *, 16> ToPromote1;
7238 if (!PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1))
7239 return false;
7240 if (!isUInt<15>(Op32.getConstantOperandVal(1)))
7241 return false;
7242
7243 ToPromote.insert(Op32.getNode());
7244 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7245 return true;
7246 }
7247
7248 // The higher-order bits of AND are zero if that is true for at least one of
7249 // the operands.
7250 if (Op32.getMachineOpcode() == PPC::AND) {
7251 SmallPtrSet<SDNode *, 16> ToPromote1, ToPromote2;
7252 bool Op0OK =
7253 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7254 bool Op1OK =
7255 PeepholePPC64ZExtGather(Op32.getOperand(1), ToPromote2);
7256 if (!Op0OK && !Op1OK)
7257 return false;
7258
7259 ToPromote.insert(Op32.getNode());
7260
7261 if (Op0OK)
7262 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7263
7264 if (Op1OK)
7265 ToPromote.insert(ToPromote2.begin(), ToPromote2.end());
7266
7267 return true;
7268 }
7269
7270 // For ANDI and ANDIS, the higher-order bits are zero if either that is true
7271 // of the first operand, or if the second operand is positive (so that it is
7272 // not sign extended).
7273 if (Op32.getMachineOpcode() == PPC::ANDI_rec ||
7274 Op32.getMachineOpcode() == PPC::ANDIS_rec) {
7275 SmallPtrSet<SDNode *, 16> ToPromote1;
7276 bool Op0OK =
7277 PeepholePPC64ZExtGather(Op32.getOperand(0), ToPromote1);
7278 bool Op1OK = isUInt<15>(Op32.getConstantOperandVal(1));
7279 if (!Op0OK && !Op1OK)
7280 return false;
7281
7282 ToPromote.insert(Op32.getNode());
7283
7284 if (Op0OK)
7285 ToPromote.insert(ToPromote1.begin(), ToPromote1.end());
7286
7287 return true;
7288 }
7289
7290 return false;
7291}
7292
7293void PPCDAGToDAGISel::PeepholePPC64ZExt() {
7294 if (!Subtarget->isPPC64())
7295 return;
7296
7297 // When we zero-extend from i32 to i64, we use a pattern like this:
7298 // def : Pat<(i64 (zext i32:$in)),
7299 // (RLDICL (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $in, sub_32),
7300 // 0, 32)>;
7301 // There are several 32-bit shift/rotate instructions, however, that will
7302 // clear the higher-order bits of their output, rendering the RLDICL
7303 // unnecessary. When that happens, we remove it here, and redefine the
7304 // relevant 32-bit operation to be a 64-bit operation.
7305
7306 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7307
7308 bool MadeChange = false;
7309 while (Position != CurDAG->allnodes_begin()) {
7310 SDNode *N = &*--Position;
7311 // Skip dead nodes and any non-machine opcodes.
7312 if (N->use_empty() || !N->isMachineOpcode())
7313 continue;
7314
7315 if (N->getMachineOpcode() != PPC::RLDICL)
7316 continue;
7317
7318 if (N->getConstantOperandVal(1) != 0 ||
7319 N->getConstantOperandVal(2) != 32)
7320 continue;
7321
7322 SDValue ISR = N->getOperand(0);
7323 if (!ISR.isMachineOpcode() ||
7324 ISR.getMachineOpcode() != TargetOpcode::INSERT_SUBREG)
7325 continue;
7326
7327 if (!ISR.hasOneUse())
7328 continue;
7329
7330 if (ISR.getConstantOperandVal(2) != PPC::sub_32)
7331 continue;
7332
7333 SDValue IDef = ISR.getOperand(0);
7334 if (!IDef.isMachineOpcode() ||
7335 IDef.getMachineOpcode() != TargetOpcode::IMPLICIT_DEF)
7336 continue;
7337
7338 // We now know that we're looking at a canonical i32 -> i64 zext. See if we
7339 // can get rid of it.
7340
7341 SDValue Op32 = ISR->getOperand(1);
7342 if (!Op32.isMachineOpcode())
7343 continue;
7344
7345 // There are some 32-bit instructions that always clear the high-order 32
7346 // bits, there are also some instructions (like AND) that we can look
7347 // through.
7348 SmallPtrSet<SDNode *, 16> ToPromote;
7349 if (!PeepholePPC64ZExtGather(Op32, ToPromote))
7350 continue;
7351
7352 // If the ToPromote set contains nodes that have uses outside of the set
7353 // (except for the original INSERT_SUBREG), then abort the transformation.
7354 bool OutsideUse = false;
7355 for (SDNode *PN : ToPromote) {
7356 for (SDNode *UN : PN->uses()) {
7357 if (!ToPromote.count(UN) && UN != ISR.getNode()) {
7358 OutsideUse = true;
7359 break;
7360 }
7361 }
7362
7363 if (OutsideUse)
7364 break;
7365 }
7366 if (OutsideUse)
7367 continue;
7368
7369 MadeChange = true;
7370
7371 // We now know that this zero extension can be removed by promoting to
7372 // nodes in ToPromote to 64-bit operations, where for operations in the
7373 // frontier of the set, we need to insert INSERT_SUBREGs for their
7374 // operands.
7375 for (SDNode *PN : ToPromote) {
7376 unsigned NewOpcode;
7377 switch (PN->getMachineOpcode()) {
7378 default:
7379 llvm_unreachable("Don't know the 64-bit variant of this instruction");
7380 case PPC::RLWINM: NewOpcode = PPC::RLWINM8; break;
7381 case PPC::RLWNM: NewOpcode = PPC::RLWNM8; break;
7382 case PPC::SLW: NewOpcode = PPC::SLW8; break;
7383 case PPC::SRW: NewOpcode = PPC::SRW8; break;
7384 case PPC::LI: NewOpcode = PPC::LI8; break;
7385 case PPC::LIS: NewOpcode = PPC::LIS8; break;
7386 case PPC::LHBRX: NewOpcode = PPC::LHBRX8; break;
7387 case PPC::LWBRX: NewOpcode = PPC::LWBRX8; break;
7388 case PPC::CNTLZW: NewOpcode = PPC::CNTLZW8; break;
7389 case PPC::CNTTZW: NewOpcode = PPC::CNTTZW8; break;
7390 case PPC::RLWIMI: NewOpcode = PPC::RLWIMI8; break;
7391 case PPC::OR: NewOpcode = PPC::OR8; break;
7392 case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
7393 case PPC::ORI: NewOpcode = PPC::ORI8; break;
7394 case PPC::ORIS: NewOpcode = PPC::ORIS8; break;
7395 case PPC::AND: NewOpcode = PPC::AND8; break;
7396 case PPC::ANDI_rec:
7397 NewOpcode = PPC::ANDI8_rec;
7398 break;
7399 case PPC::ANDIS_rec:
7400 NewOpcode = PPC::ANDIS8_rec;
7401 break;
7402 }
7403
7404 // Note: During the replacement process, the nodes will be in an
7405 // inconsistent state (some instructions will have operands with values
7406 // of the wrong type). Once done, however, everything should be right
7407 // again.
7408
7410 for (const SDValue &V : PN->ops()) {
7411 if (!ToPromote.count(V.getNode()) && V.getValueType() == MVT::i32 &&
7412 !isa<ConstantSDNode>(V)) {
7413 SDValue ReplOpOps[] = { ISR.getOperand(0), V, ISR.getOperand(2) };
7414 SDNode *ReplOp =
7415 CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, SDLoc(V),
7416 ISR.getNode()->getVTList(), ReplOpOps);
7417 Ops.push_back(SDValue(ReplOp, 0));
7418 } else {
7419 Ops.push_back(V);
7420 }
7421 }
7422
7423 // Because all to-be-promoted nodes only have users that are other
7424 // promoted nodes (or the original INSERT_SUBREG), we can safely replace
7425 // the i32 result value type with i64.
7426
7427 SmallVector<EVT, 2> NewVTs;
7428 SDVTList VTs = PN->getVTList();
7429 for (unsigned i = 0, ie = VTs.NumVTs; i != ie; ++i)
7430 if (VTs.VTs[i] == MVT::i32)
7431 NewVTs.push_back(MVT::i64);
7432 else
7433 NewVTs.push_back(VTs.VTs[i]);
7434
7435 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole morphing:\nOld: ");
7436 LLVM_DEBUG(PN->dump(CurDAG));
7437
7438 CurDAG->SelectNodeTo(PN, NewOpcode, CurDAG->getVTList(NewVTs), Ops);
7439
7440 LLVM_DEBUG(dbgs() << "\nNew: ");
7441 LLVM_DEBUG(PN->dump(CurDAG));
7442 LLVM_DEBUG(dbgs() << "\n");
7443 }
7444
7445 // Now we replace the original zero extend and its associated INSERT_SUBREG
7446 // with the value feeding the INSERT_SUBREG (which has now been promoted to
7447 // return an i64).
7448
7449 LLVM_DEBUG(dbgs() << "PPC64 ZExt Peephole replacing:\nOld: ");
7450 LLVM_DEBUG(N->dump(CurDAG));
7451 LLVM_DEBUG(dbgs() << "\nNew: ");
7452 LLVM_DEBUG(Op32.getNode()->dump(CurDAG));
7453 LLVM_DEBUG(dbgs() << "\n");
7454
7455 ReplaceUses(N, Op32.getNode());
7456 }
7457
7458 if (MadeChange)
7459 CurDAG->RemoveDeadNodes();
7460}
7461
7462static bool isVSXSwap(SDValue N) {
7463 if (!N->isMachineOpcode())
7464 return false;
7465 unsigned Opc = N->getMachineOpcode();
7466
7467 // Single-operand XXPERMDI or the regular XXPERMDI/XXSLDWI where the immediate
7468 // operand is 2.
7469 if (Opc == PPC::XXPERMDIs) {
7470 return isa<ConstantSDNode>(N->getOperand(1)) &&
7471 N->getConstantOperandVal(1) == 2;
7472 } else if (Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) {
7473 return N->getOperand(0) == N->getOperand(1) &&
7474 isa<ConstantSDNode>(N->getOperand(2)) &&
7475 N->getConstantOperandVal(2) == 2;
7476 }
7477
7478 return false;
7479}
7480
7481// TODO: Make this complete and replace with a table-gen bit.
7483 if (!N->isMachineOpcode())
7484 return false;
7485 unsigned Opc = N->getMachineOpcode();
7486
7487 switch (Opc) {
7488 default:
7489 return false;
7490 case PPC::VAVGSB:
7491 case PPC::VAVGUB:
7492 case PPC::VAVGSH:
7493 case PPC::VAVGUH:
7494 case PPC::VAVGSW:
7495 case PPC::VAVGUW:
7496 case PPC::VMAXFP:
7497 case PPC::VMAXSB:
7498 case PPC::VMAXUB:
7499 case PPC::VMAXSH:
7500 case PPC::VMAXUH:
7501 case PPC::VMAXSW:
7502 case PPC::VMAXUW:
7503 case PPC::VMINFP:
7504 case PPC::VMINSB:
7505 case PPC::VMINUB:
7506 case PPC::VMINSH:
7507 case PPC::VMINUH:
7508 case PPC::VMINSW:
7509 case PPC::VMINUW:
7510 case PPC::VADDFP:
7511 case PPC::VADDUBM:
7512 case PPC::VADDUHM:
7513 case PPC::VADDUWM:
7514 case PPC::VSUBFP:
7515 case PPC::VSUBUBM:
7516 case PPC::VSUBUHM:
7517 case PPC::VSUBUWM:
7518 case PPC::VAND:
7519 case PPC::VANDC:
7520 case PPC::VOR:
7521 case PPC::VORC:
7522 case PPC::VXOR:
7523 case PPC::VNOR:
7524 case PPC::VMULUWM:
7525 return true;
7526 }
7527}
7528
7529// Try to simplify (xxswap (vec-op (xxswap) (xxswap))) where vec-op is
7530// lane-insensitive.
7531static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG) {
7532 // Our desired xxswap might be source of COPY_TO_REGCLASS.
7533 // TODO: Can we put this a common method for DAG?
7534 auto SkipRCCopy = [](SDValue V) {
7535 while (V->isMachineOpcode() &&
7536 V->getMachineOpcode() == TargetOpcode::COPY_TO_REGCLASS) {
7537 // All values in the chain should have single use.
7538 if (V->use_empty() || !V->use_begin()->isOnlyUserOf(V.getNode()))
7539 return SDValue();
7540 V = V->getOperand(0);
7541 }
7542 return V.hasOneUse() ? V : SDValue();
7543 };
7544
7545 SDValue VecOp = SkipRCCopy(N->getOperand(0));
7546 if (!VecOp || !isLaneInsensitive(VecOp))
7547 return;
7548
7549 SDValue LHS = SkipRCCopy(VecOp.getOperand(0)),
7550 RHS = SkipRCCopy(VecOp.getOperand(1));
7551 if (!LHS || !RHS || !isVSXSwap(LHS) || !isVSXSwap(RHS))
7552 return;
7553
7554 // These swaps may still have chain-uses here, count on dead code elimination
7555 // in following passes to remove them.
7556 DAG->ReplaceAllUsesOfValueWith(LHS, LHS.getOperand(0));
7557 DAG->ReplaceAllUsesOfValueWith(RHS, RHS.getOperand(0));
7558 DAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
7559}
7560
7561// Is an ADDI eligible for folding for non-TOC-based local-exec accesses?
7563 SDValue ADDIToFold) {
7564 // Check if ADDIToFold (the ADDI that we want to fold into local-exec
7565 // accesses), is truly an ADDI.
7566 if (!ADDIToFold.isMachineOpcode() ||
7567 (ADDIToFold.getMachineOpcode() != PPC::ADDI8))
7568 return false;
7569
7570 // The first operand of the ADDIToFold should be the thread pointer.
7571 // This transformation is only performed if the first operand of the
7572 // addi is the thread pointer.
7573 SDValue TPRegNode = ADDIToFold.getOperand(0);
7574 RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7575 const PPCSubtarget &Subtarget =
7577 if (!TPReg || (TPReg->getReg() != Subtarget.getThreadPointerRegister()))
7578 return false;
7579
7580 // The second operand of the ADDIToFold should be the global TLS address
7581 // (the local-exec TLS variable). We only perform the folding if the TLS
7582 // variable is the second operand.
7583 SDValue TLSVarNode = ADDIToFold.getOperand(1);
7584 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7585 if (!GA)
7586 return false;
7587
7588 // The local-exec TLS variable should only have the MO_TPREL_FLAG target flag,
7589 // so this optimization is not performed otherwise if the flag is not set.
7590 unsigned TargetFlags = GA->getTargetFlags();
7591 if (TargetFlags != PPCII::MO_TPREL_FLAG)
7592 return false;
7593
7594 // If all conditions are satisfied, the ADDI is valid for folding.
7595 return true;
7596}
7597
7598// For non-TOC-based local-exec access where an addi is feeding into another
7599// addi, fold this sequence into a single addi if possible.
7600// Before this optimization, the sequence appears as:
7601// addi rN, r13, sym@le
7602// addi rM, rN, imm
7603// After this optimization, we can fold the two addi into a single one:
7604// addi rM, r13, sym@le + imm
7606 if (N->getMachineOpcode() != PPC::ADDI8)
7607 return;
7608
7609 // InitialADDI is the addi feeding into N (also an addi), and the addi that
7610 // we want optimized out.
7611 SDValue InitialADDI = N->getOperand(0);
7612
7613 if (!isEligibleToFoldADDIForLocalExecAccesses(DAG, InitialADDI))
7614 return;
7615
7616 // At this point, InitialADDI can be folded into a non-TOC-based local-exec
7617 // access. The first operand of InitialADDI should be the thread pointer,
7618 // which has been checked in isEligibleToFoldADDIForLocalExecAccesses().
7619 SDValue TPRegNode = InitialADDI.getOperand(0);
7620 [[maybe_unused]] RegisterSDNode *TPReg = dyn_cast<RegisterSDNode>(TPRegNode.getNode());
7621 [[maybe_unused]] const PPCSubtarget &Subtarget =
7623 assert((TPReg && (TPReg->getReg() == Subtarget.getThreadPointerRegister())) &&
7624 "Expecting the first operand to be a thread pointer for folding addi "
7625 "in local-exec accesses!");
7626
7627 // The second operand of the InitialADDI should be the global TLS address
7628 // (the local-exec TLS variable), with the MO_TPREL_FLAG target flag.
7629 // This has been checked in isEligibleToFoldADDIForLocalExecAccesses().
7630 SDValue TLSVarNode = InitialADDI.getOperand(1);
7631 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(TLSVarNode);
7632 assert(GA && "Expecting a valid GlobalAddressSDNode when folding addi into "
7633 "local-exec accesses!");
7634 unsigned TargetFlags = GA->getTargetFlags();
7635
7636 // The second operand of the addi that we want to preserve will be an
7637 // immediate. We add this immediate, together with the address of the TLS
7638 // variable found in InitialADDI, in order to preserve the correct TLS address
7639 // information during assembly printing. The offset is likely to be non-zero
7640 // when we end up in this case.
7641 int Offset = N->getConstantOperandVal(1);
7642 TLSVarNode = DAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA), MVT::i64,
7643 Offset, TargetFlags);
7644
7645 (void)DAG->UpdateNodeOperands(N, TPRegNode, TLSVarNode);
7646 if (InitialADDI.getNode()->use_empty())
7647 DAG->RemoveDeadNode(InitialADDI.getNode());
7648}
7649
7650void PPCDAGToDAGISel::PeepholePPC64() {
7651 SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end();
7652 bool HasAIXSmallLocalExecTLS = Subtarget->hasAIXSmallLocalExecTLS();
7653
7654 while (Position != CurDAG->allnodes_begin()) {
7655 SDNode *N = &*--Position;
7656 // Skip dead nodes and any non-machine opcodes.
7657 if (N->use_empty() || !N->isMachineOpcode())
7658 continue;
7659
7660 if (isVSXSwap(SDValue(N, 0)))
7661 reduceVSXSwap(N, CurDAG);
7662
7663 // This optimization is performed for non-TOC-based local-exec accesses.
7664 if (HasAIXSmallLocalExecTLS)
7666
7667 unsigned FirstOp;
7668 unsigned StorageOpcode = N->getMachineOpcode();
7669 bool RequiresMod4Offset = false;
7670
7671 switch (StorageOpcode) {
7672 default: continue;
7673
7674 case PPC::LWA:
7675 case PPC::LD:
7676 case PPC::DFLOADf64:
7677 case PPC::DFLOADf32:
7678 RequiresMod4Offset = true;
7679 [[fallthrough]];
7680 case PPC::LBZ:
7681 case PPC::LBZ8:
7682 case PPC::LFD:
7683 case PPC::LFS:
7684 case PPC::LHA:
7685 case PPC::LHA8:
7686 case PPC::LHZ:
7687 case PPC::LHZ8:
7688 case PPC::LWZ:
7689 case PPC::LWZ8:
7690 FirstOp = 0;
7691 break;
7692
7693 case PPC::STD:
7694 case PPC::DFSTOREf64:
7695 case PPC::DFSTOREf32:
7696 RequiresMod4Offset = true;
7697 [[fallthrough]];
7698 case PPC::STB:
7699 case PPC::STB8:
7700 case PPC::STFD:
7701 case PPC::STFS:
7702 case PPC::STH:
7703 case PPC::STH8:
7704 case PPC::STW:
7705 case PPC::STW8:
7706 FirstOp = 1;
7707 break;
7708 }
7709
7710 // If this is a load or store with a zero offset, or within the alignment,
7711 // we may be able to fold an add-immediate into the memory operation.
7712 // The check against alignment is below, as it can't occur until we check
7713 // the arguments to N
7714 if (!isa<ConstantSDNode>(N->getOperand(FirstOp)))
7715 continue;
7716
7717 SDValue Base = N->getOperand(FirstOp + 1);
7718 if (!Base.isMachineOpcode())
7719 continue;
7720
7721 unsigned Flags = 0;
7722 bool ReplaceFlags = true;
7723
7724 // When the feeding operation is an add-immediate of some sort,
7725 // determine whether we need to add relocation information to the
7726 // target flags on the immediate operand when we fold it into the
7727 // load instruction.
7728 //
7729 // For something like ADDItocL8, the relocation information is
7730 // inferred from the opcode; when we process it in the AsmPrinter,
7731 // we add the necessary relocation there. A load, though, can receive
7732 // relocation from various flavors of ADDIxxx, so we need to carry
7733 // the relocation information in the target flags.
7734 switch (Base.getMachineOpcode()) {
7735 default: continue;
7736
7737 case PPC::ADDI8:
7738 case PPC::ADDI:
7739 // In some cases (such as TLS) the relocation information
7740 // is already in place on the operand, so copying the operand
7741 // is sufficient.
7742 ReplaceFlags = false;
7743 break;
7744 case PPC::ADDIdtprelL:
7746 break;
7747 case PPC::ADDItlsldL:
7749 break;
7750 case PPC::ADDItocL8:
7752 break;
7753 }
7754
7755 SDValue ImmOpnd = Base.getOperand(1);
7756
7757 // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
7758 // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
7759 // we might have needed different @ha relocation values for the offset
7760 // pointers).
7761 int MaxDisplacement = 7;
7762 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7763 const GlobalValue *GV = GA->getGlobal();
7764 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7765 MaxDisplacement = std::min((int)Alignment.value() - 1, MaxDisplacement);
7766 }
7767
7768 bool UpdateHBase = false;
7769 SDValue HBase = Base.getOperand(0);
7770
7771 int Offset = N->getConstantOperandVal(FirstOp);
7772 if (ReplaceFlags) {
7773 if (Offset < 0 || Offset > MaxDisplacement) {
7774 // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
7775 // one use, then we can do this for any offset, we just need to also
7776 // update the offset (i.e. the symbol addend) on the addis also.
7777 if (Base.getMachineOpcode() != PPC::ADDItocL8)
7778 continue;
7779
7780 if (!HBase.isMachineOpcode() ||
7781 HBase.getMachineOpcode() != PPC::ADDIStocHA8)
7782 continue;
7783
7784 if (!Base.hasOneUse() || !HBase.hasOneUse())
7785 continue;
7786
7787 SDValue HImmOpnd = HBase.getOperand(1);
7788 if (HImmOpnd != ImmOpnd)
7789 continue;
7790
7791 UpdateHBase = true;
7792 }
7793 } else {
7794 // Global addresses can be folded, but only if they are sufficiently
7795 // aligned.
7796 if (RequiresMod4Offset) {
7797 if (GlobalAddressSDNode *GA =
7798 dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7799 const GlobalValue *GV = GA->getGlobal();
7800 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7801 if (Alignment < 4)
7802 continue;
7803 }
7804 }
7805
7806 // If we're directly folding the addend from an addi instruction, then:
7807 // 1. In general, the offset on the memory access must be zero.
7808 // 2. If the addend is a constant, then it can be combined with a
7809 // non-zero offset, but only if the result meets the encoding
7810 // requirements.
7811 if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
7812 Offset += C->getSExtValue();
7813
7814 if (RequiresMod4Offset && (Offset % 4) != 0)
7815 continue;
7816
7817 if (!isInt<16>(Offset))
7818 continue;
7819
7820 ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
7821 ImmOpnd.getValueType());
7822 } else if (Offset != 0) {
7823 // This optimization is performed for non-TOC-based local-exec accesses.
7824 if (HasAIXSmallLocalExecTLS &&
7826 // Add the non-zero offset information into the load or store
7827 // instruction to be used for non-TOC-based local-exec accesses.
7828 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd);
7829 assert(GA && "Expecting a valid GlobalAddressSDNode when folding "
7830 "addi into local-exec accesses!");
7831 ImmOpnd = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
7832 MVT::i64, Offset,
7833 GA->getTargetFlags());
7834 } else
7835 continue;
7836 }
7837 }
7838
7839 // We found an opportunity. Reverse the operands from the add
7840 // immediate and substitute them into the load or store. If
7841 // needed, update the target flags for the immediate operand to
7842 // reflect the necessary relocation information.
7843 LLVM_DEBUG(dbgs() << "Folding add-immediate into mem-op:\nBase: ");
7844 LLVM_DEBUG(Base->dump(CurDAG));
7845 LLVM_DEBUG(dbgs() << "\nN: ");
7846 LLVM_DEBUG(N->dump(CurDAG));
7847 LLVM_DEBUG(dbgs() << "\n");
7848
7849 // If the relocation information isn't already present on the
7850 // immediate operand, add it now.
7851 if (ReplaceFlags) {
7852 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
7853 SDLoc dl(GA);
7854 const GlobalValue *GV = GA->getGlobal();
7855 Align Alignment = GV->getPointerAlignment(CurDAG->getDataLayout());
7856 // We can't perform this optimization for data whose alignment
7857 // is insufficient for the instruction encoding.
7858 if (Alignment < 4 && (RequiresMod4Offset || (Offset % 4) != 0)) {
7859 LLVM_DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n");
7860 continue;
7861 }
7862 ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags);
7863 } else if (ConstantPoolSDNode *CP =
7864 dyn_cast<ConstantPoolSDNode>(ImmOpnd)) {
7865 const Constant *C = CP->getConstVal();
7866 ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlign(),
7867 Offset, Flags);
7868 }
7869 }
7870
7871 if (FirstOp == 1) // Store
7872 (void)CurDAG->UpdateNodeOperands(N, N->getOperand(0), ImmOpnd,
7873 Base.getOperand(0), N->getOperand(3));
7874 else // Load
7875 (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
7876 N->getOperand(2));
7877
7878 if (UpdateHBase)
7879 (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
7880 ImmOpnd);
7881
7882 // The add-immediate may now be dead, in which case remove it.
7883 if (Base.getNode()->use_empty())
7884 CurDAG->RemoveDeadNode(Base.getNode());
7885 }
7886}
7887
7888/// createPPCISelDag - This pass converts a legalized DAG into a
7889/// PowerPC-specific DAG, ready for instruction scheduling.
7890///
7892 CodeGenOptLevel OptLevel) {
7893 return new PPCDAGToDAGISel(TM, OptLevel);
7894}
unsigned SubReg
MachineBasicBlock MachineBasicBlock::iterator MBBI
amdgpu AMDGPU Register Bank Select
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define clEnumValN(ENUMVAL, FLAGNAME, DESC)
Definition: CommandLine.h:693
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1291
const HexagonInstrInfo * TII
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:530
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:531
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static cl::opt< bool > UseBitPermRewriter("ppc-use-bit-perm-rewriter", cl::init(true), cl::desc("use aggressive ppc isel for bit permutations"), cl::Hidden)
static bool canOptimizeTLSDFormToXForm(SelectionDAG *CurDAG, SDValue Base)
static cl::opt< bool > EnableBranchHint("ppc-use-branch-hint", cl::init(true), cl::desc("Enable static hinting of branches on ppc"), cl::Hidden)
static bool isThreadPointerAcquisitionNode(SDValue Base, SelectionDAG *CurDAG)
static bool PeepholePPC64ZExtGather(SDValue Op32, SmallPtrSetImpl< SDNode * > &ToPromote)
static bool isLaneInsensitive(SDValue N)
static unsigned allUsesTruncate(SelectionDAG *CurDAG, SDNode *N)
static CodeModel::Model getCodeModel(const PPCSubtarget &Subtarget, const TargetMachine &TM, const SDNode *Node)
static void reduceVSXSwap(SDNode *N, SelectionDAG *DAG)
static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned &Imm)
static PPC::Predicate getPredicateForSetCC(ISD::CondCode CC, const EVT &VT, const PPCSubtarget *Subtarget)
static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert)
getCRIdxForSetCC - Return the index of the condition register field associated with the SetCC conditi...
static bool isEligibleToFoldADDIForLocalExecAccesses(SelectionDAG *DAG, SDValue ADDIToFold)
static bool isInt64Immediate(SDNode *N, uint64_t &Imm)
isInt64Immediate - This method tests to see if the node is a 64-bit constant operand.
static void foldADDIForLocalExecAccesses(SDNode *N, SelectionDAG *DAG)
static bool isInt32Immediate(SDNode *N, unsigned &Imm)
isInt32Immediate - This method tests to see if the node is a 32-bit constant operand.
static int findContiguousZerosAtLeast(uint64_t Imm, unsigned Num)
static unsigned getBranchHint(unsigned PCC, const FunctionLoweringInfo &FuncInfo, const SDValue &DestMBB)
static bool mayUseP9Setb(SDNode *N, const ISD::CondCode &CC, SelectionDAG *DAG, bool &NeedSwapOps, bool &IsUnCmp)
static cl::opt< bool > EnableTLSOpt("ppc-tls-opt", cl::init(true), cl::desc("Enable tls optimization peephole"), cl::Hidden)
static unsigned int getVCmpInst(MVT VecVT, ISD::CondCode CC, bool HasVSX, bool &Swap, bool &Negate)
static bool hasTocDataAttr(SDValue Val, unsigned PointerSize)
static cl::opt< ICmpInGPRType > CmpInGPR("ppc-gpr-icmps", cl::Hidden, cl::init(ICGPR_All), cl::desc("Specify the types of comparisons to emit GPR-only code for."), cl::values(clEnumValN(ICGPR_None, "none", "Do not modify integer comparisons."), clEnumValN(ICGPR_All, "all", "All possible int comparisons in GPRs."), clEnumValN(ICGPR_I32, "i32", "Only i32 comparisons in GPRs."), clEnumValN(ICGPR_I64, "i64", "Only i64 comparisons in GPRs."), clEnumValN(ICGPR_NonExtIn, "nonextin", "Only comparisons where inputs don't need [sz]ext."), clEnumValN(ICGPR_Zext, "zext", "Only comparisons with zext result."), clEnumValN(ICGPR_ZextI32, "zexti32", "Only i32 comparisons with zext result."), clEnumValN(ICGPR_ZextI64, "zexti64", "Only i64 comparisons with zext result."), clEnumValN(ICGPR_Sext, "sext", "Only comparisons with sext result."), clEnumValN(ICGPR_SextI32, "sexti32", "Only i32 comparisons with sext result."), clEnumValN(ICGPR_SextI64, "sexti64", "Only i64 comparisons with sext result.")))
static SDNode * selectI64ImmDirectPrefix(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
#define PASS_NAME
#define DEBUG_TYPE
static SDNode * selectI64ImmDirect(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned &InstCnt)
static cl::opt< bool > BPermRewriterNoMasking("ppc-bit-perm-rewriter-stress-rotates", cl::desc("stress rotate selection in aggressive ppc isel for " "bit permutations"), cl::Hidden)
static bool isSWTestOp(SDValue N)
static SDNode * selectI64Imm(SelectionDAG *CurDAG, const SDLoc &dl, uint64_t Imm, unsigned *InstCnt=nullptr)
ICmpInGPRType
@ ICGPR_ZextI32
@ ICGPR_I64
@ ICGPR_All
@ ICGPR_None
@ ICGPR_NonExtIn
@ ICGPR_Sext
@ ICGPR_I32
@ ICGPR_SextI64
@ ICGPR_ZextI64
@ ICGPR_SextI32
@ ICGPR_Zext
static bool isVSXSwap(SDValue N)
Module * Mod
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Value * RHS
Value * LHS
support::ulittle16_t & Lo
Definition: aarch32.cpp:206
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
APInt rotr(unsigned rotateAmt) const
Rotate right by rotateAmt.
Definition: APInt.cpp:1124
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
MachineBasicBlock * getBasicBlock() const
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:220
BranchProbability getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const
Get an edge's probability, relative to other out-edges of the Src.
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
unsigned getPointerSize(unsigned AS=0) const
Layout pointer size in bytes, rounded up to a whole number of bytes.
Definition: DataLayout.cpp:750
A debug info location.
Definition: DebugLoc.h:33
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
BranchProbabilityInfo * BPI
MachineBasicBlock * MBB
MBB - The current block.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists.
static StringRef getMemConstraintName(ConstraintCode C)
Definition: InlineAsm.h:467
unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
This class is used to represent ISD::LOAD nodes.
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Metadata node.
Definition: Metadata.h:1067
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1428
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1434
Machine Value Type.
SimpleValueType SimpleTy
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
A description of a memory reference used in the backend.
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
PICLevel::Level getPICLevel() const
Returns the PIC level (small or large model)
Definition: Module.cpp:595
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setROPProtectionHashSaveIndex(int Idx)
static int getRecordFormOpcode(unsigned Opcode)
bool is32BitELFABI() const
Definition: PPCSubtarget.h:219
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
const PPCTargetLowering * getTargetLowering() const override
Definition: PPCSubtarget.h:146
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
MCRegister getThreadPointerRegister() const
Definition: PPCSubtarget.h:283
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetELF() const
Definition: PPCSubtarget.h:210
CodeModel::Model getCodeModel(const TargetMachine &TM, const GlobalValue *GV) const
Calculates the effective code model for argument GV.
bool isELFv2ABI() const
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:152
Common code between 32-bit and 64-bit PowerPC targets.
Register getReg() const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
void dump() const
Dump this node, for debugging.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
SDVTList getVTList() const
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getMachineOpcode() const
unsigned getOpcode() const
unsigned getNumOperands() const
SelectionDAGISel - This is the common base class used for SelectionDAG-based pattern-matching instruc...
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, InlineAsm::ConstraintCode ConstraintID, std::vector< SDValue > &OutOps)
SelectInlineAsmMemoryOperand - Select the specified address as a target addressing mode,...
virtual void PostprocessISelDAG()
PostprocessISelDAG() - This hook allows the target to hack on the graph right after selection.
virtual void PreprocessISelDAG()
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
bool runOnMachineFunction(MachineFunction &MF) override
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDNode * SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT)
These are used for target selectors to mutate the specified node to have the specified return type,...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:531
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:532
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getRegister(unsigned Reg, EVT VT)
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:543
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDValue > Ops)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
ilist< SDNode >::iterator allnodes_iterator
Definition: SelectionDAG.h:534
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
iterator end() const
Definition: SmallPtrSet.h:385
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
iterator begin() const
Definition: SmallPtrSet.h:380
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
unsigned getID() const
Return the register class ID number.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition: Use.cpp:31
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5219
An efficient, type-erasing, non-owning reference to a callable.
#define INT64_MAX
Definition: DataTypes.h:71
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BRIND
BRIND - Indirect branch.
Definition: ISDOpcodes.h:1052
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition: ISDOpcodes.h:158
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isBitwiseLogicOp(unsigned Opcode)
Whether this is bitwise logic opcode.
Definition: ISDOpcodes.h:1415
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1556
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1562
@ MO_TLSLD_LO
Definition: PPC.h:184
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_DTPREL_LO
These values identify relocations on immediates folded into memory operations.
Definition: PPC.h:183
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TOC_LO
Definition: PPC.h:185
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ SRL
These nodes represent PPC shifts.
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ CALL
CALL - A direct function call.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ FTSQRT
Test instruction for software square root.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
@ BR_NONTAKEN_HINT
Definition: PPCPredicates.h:64
@ Define
Register definition.
Reg
All possible values of the reg field in the ModR/M byte.
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:718
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:361
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:136
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:246
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:141
FunctionPass * createPPCISelDag(PPCTargetMachine &TM, CodeGenOptLevel OL)
createPPCISelDag - This pass converts a legalized DAG into a PowerPC-specific DAG,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2060
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
unsigned int NumVTs