LLVM  16.0.0git
HexagonISelLowering.cpp
Go to the documentation of this file.
1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the interfaces that Hexagon uses to lower LLVM code
10 // into a selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "HexagonISelLowering.h"
15 #include "Hexagon.h"
17 #include "HexagonRegisterInfo.h"
18 #include "HexagonSubtarget.h"
19 #include "HexagonTargetMachine.h"
21 #include "llvm/ADT/APInt.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringSwitch.h"
34 #include "llvm/IR/BasicBlock.h"
35 #include "llvm/IR/CallingConv.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/DiagnosticInfo.h"
40 #include "llvm/IR/Function.h"
41 #include "llvm/IR/GlobalValue.h"
42 #include "llvm/IR/InlineAsm.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/IntrinsicsHexagon.h"
47 #include "llvm/IR/IRBuilder.h"
48 #include "llvm/IR/Module.h"
49 #include "llvm/IR/Type.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/MC/MCRegisterInfo.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/CodeGen.h"
55 #include "llvm/Support/Debug.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstddef>
63 #include <cstdint>
64 #include <limits>
65 #include <utility>
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "hexagon-lowering"
70 
71 static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
72  cl::init(true), cl::Hidden,
73  cl::desc("Control jump table emission on Hexagon target"));
74 
75 static cl::opt<bool>
76  EnableHexSDNodeSched("enable-hexagon-sdnode-sched", cl::Hidden,
77  cl::desc("Enable Hexagon SDNode scheduling"));
78 
79 static cl::opt<bool> EnableFastMath("ffast-math", cl::Hidden,
80  cl::desc("Enable Fast Math processing"));
81 
82 static cl::opt<int> MinimumJumpTables("minimum-jump-tables", cl::Hidden,
83  cl::init(5),
84  cl::desc("Set minimum jump tables"));
85 
86 static cl::opt<int>
87  MaxStoresPerMemcpyCL("max-store-memcpy", cl::Hidden, cl::init(6),
88  cl::desc("Max #stores to inline memcpy"));
89 
90 static cl::opt<int>
91  MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os", cl::Hidden, cl::init(4),
92  cl::desc("Max #stores to inline memcpy"));
93 
94 static cl::opt<int>
95  MaxStoresPerMemmoveCL("max-store-memmove", cl::Hidden, cl::init(6),
96  cl::desc("Max #stores to inline memmove"));
97 
98 static cl::opt<int>
99  MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os", cl::Hidden,
100  cl::init(4),
101  cl::desc("Max #stores to inline memmove"));
102 
103 static cl::opt<int>
104  MaxStoresPerMemsetCL("max-store-memset", cl::Hidden, cl::init(8),
105  cl::desc("Max #stores to inline memset"));
106 
107 static cl::opt<int>
108  MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden, cl::init(4),
109  cl::desc("Max #stores to inline memset"));
110 
111 static cl::opt<bool> AlignLoads("hexagon-align-loads",
112  cl::Hidden, cl::init(false),
113  cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
114 
115 static cl::opt<bool>
116  DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden,
117  cl::init(false),
118  cl::desc("Disable minimum alignment of 1 for "
119  "arguments passed by value on stack"));
120 
121 namespace {
122 
123  class HexagonCCState : public CCState {
124  unsigned NumNamedVarArgParams = 0;
125 
126  public:
127  HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
129  unsigned NumNamedArgs)
130  : CCState(CC, IsVarArg, MF, locs, C),
131  NumNamedVarArgParams(NumNamedArgs) {}
132  unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
133  };
134 
135 } // end anonymous namespace
136 
137 
138 // Implement calling convention for Hexagon.
139 
140 static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
141  CCValAssign::LocInfo &LocInfo,
142  ISD::ArgFlagsTy &ArgFlags, CCState &State) {
143  static const MCPhysReg ArgRegs[] = {
144  Hexagon::R0, Hexagon::R1, Hexagon::R2,
145  Hexagon::R3, Hexagon::R4, Hexagon::R5
146  };
147  const unsigned NumArgRegs = std::size(ArgRegs);
148  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
149 
150  // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
151  if (RegNum != NumArgRegs && RegNum % 2 == 1)
152  State.AllocateReg(ArgRegs[RegNum]);
153 
154  // Always return false here, as this function only makes sure that the first
155  // unallocated register has an even register number and does not actually
156  // allocate a register for the current argument.
157  return false;
158 }
159 
160 #include "HexagonGenCallingConv.inc"
161 
162 
163 SDValue
165  const {
166  return SDValue();
167 }
168 
169 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
170 /// by "Src" to address "Dst" of size "Size". Alignment information is
171 /// specified by the specific parameter attribute. The copy will be passed as
172 /// a byval function parameter. Sometimes what we are copying is the end of a
173 /// larger object, the part that does not fit in registers.
175  SDValue Chain, ISD::ArgFlagsTy Flags,
176  SelectionDAG &DAG, const SDLoc &dl) {
177  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
178  return DAG.getMemcpy(
179  Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
180  /*isVolatile=*/false, /*AlwaysInline=*/false,
181  /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
182 }
183 
184 bool
186  CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
188  LLVMContext &Context) const {
190  CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
191 
193  return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
194  return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
195 }
196 
197 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
198 // passed by value, the function prototype is modified to return void and
199 // the value is stored in memory pointed by a pointer passed by caller.
200 SDValue
202  bool IsVarArg,
204  const SmallVectorImpl<SDValue> &OutVals,
205  const SDLoc &dl, SelectionDAG &DAG) const {
206  // CCValAssign - represent the assignment of the return value to locations.
208 
209  // CCState - Info about the registers and stack slot.
210  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
211  *DAG.getContext());
212 
213  // Analyze return values of ISD::RET
214  if (Subtarget.useHVXOps())
215  CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
216  else
217  CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
218 
219  SDValue Flag;
220  SmallVector<SDValue, 4> RetOps(1, Chain);
221 
222  // Copy the result values into the output registers.
223  for (unsigned i = 0; i != RVLocs.size(); ++i) {
224  CCValAssign &VA = RVLocs[i];
225  SDValue Val = OutVals[i];
226 
227  switch (VA.getLocInfo()) {
228  default:
229  // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
230  llvm_unreachable("Unknown loc info!");
231  case CCValAssign::Full:
232  break;
233  case CCValAssign::BCvt:
234  Val = DAG.getBitcast(VA.getLocVT(), Val);
235  break;
236  case CCValAssign::SExt:
237  Val = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Val);
238  break;
239  case CCValAssign::ZExt:
240  Val = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Val);
241  break;
242  case CCValAssign::AExt:
243  Val = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Val);
244  break;
245  }
246 
247  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Val, Flag);
248 
249  // Guarantee that all emitted copies are stuck together with flags.
250  Flag = Chain.getValue(1);
251  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
252  }
253 
254  RetOps[0] = Chain; // Update chain.
255 
256  // Add the flag if we have it.
257  if (Flag.getNode())
258  RetOps.push_back(Flag);
259 
260  return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
261 }
262 
264  // If either no tail call or told not to tail call at all, don't.
265  return CI->isTailCall();
266 }
267 
269  const char* RegName, LLT VT, const MachineFunction &) const {
270  // Just support r19, the linux kernel uses it.
272  .Case("r0", Hexagon::R0)
273  .Case("r1", Hexagon::R1)
274  .Case("r2", Hexagon::R2)
275  .Case("r3", Hexagon::R3)
276  .Case("r4", Hexagon::R4)
277  .Case("r5", Hexagon::R5)
278  .Case("r6", Hexagon::R6)
279  .Case("r7", Hexagon::R7)
280  .Case("r8", Hexagon::R8)
281  .Case("r9", Hexagon::R9)
282  .Case("r10", Hexagon::R10)
283  .Case("r11", Hexagon::R11)
284  .Case("r12", Hexagon::R12)
285  .Case("r13", Hexagon::R13)
286  .Case("r14", Hexagon::R14)
287  .Case("r15", Hexagon::R15)
288  .Case("r16", Hexagon::R16)
289  .Case("r17", Hexagon::R17)
290  .Case("r18", Hexagon::R18)
291  .Case("r19", Hexagon::R19)
292  .Case("r20", Hexagon::R20)
293  .Case("r21", Hexagon::R21)
294  .Case("r22", Hexagon::R22)
295  .Case("r23", Hexagon::R23)
296  .Case("r24", Hexagon::R24)
297  .Case("r25", Hexagon::R25)
298  .Case("r26", Hexagon::R26)
299  .Case("r27", Hexagon::R27)
300  .Case("r28", Hexagon::R28)
301  .Case("r29", Hexagon::R29)
302  .Case("r30", Hexagon::R30)
303  .Case("r31", Hexagon::R31)
304  .Case("r1:0", Hexagon::D0)
305  .Case("r3:2", Hexagon::D1)
306  .Case("r5:4", Hexagon::D2)
307  .Case("r7:6", Hexagon::D3)
308  .Case("r9:8", Hexagon::D4)
309  .Case("r11:10", Hexagon::D5)
310  .Case("r13:12", Hexagon::D6)
311  .Case("r15:14", Hexagon::D7)
312  .Case("r17:16", Hexagon::D8)
313  .Case("r19:18", Hexagon::D9)
314  .Case("r21:20", Hexagon::D10)
315  .Case("r23:22", Hexagon::D11)
316  .Case("r25:24", Hexagon::D12)
317  .Case("r27:26", Hexagon::D13)
318  .Case("r29:28", Hexagon::D14)
319  .Case("r31:30", Hexagon::D15)
320  .Case("sp", Hexagon::R29)
321  .Case("fp", Hexagon::R30)
322  .Case("lr", Hexagon::R31)
323  .Case("p0", Hexagon::P0)
324  .Case("p1", Hexagon::P1)
325  .Case("p2", Hexagon::P2)
326  .Case("p3", Hexagon::P3)
327  .Case("sa0", Hexagon::SA0)
328  .Case("lc0", Hexagon::LC0)
329  .Case("sa1", Hexagon::SA1)
330  .Case("lc1", Hexagon::LC1)
331  .Case("m0", Hexagon::M0)
332  .Case("m1", Hexagon::M1)
333  .Case("usr", Hexagon::USR)
334  .Case("ugp", Hexagon::UGP)
335  .Case("cs0", Hexagon::CS0)
336  .Case("cs1", Hexagon::CS1)
337  .Default(Register());
338  if (Reg)
339  return Reg;
340 
341  report_fatal_error("Invalid register name global variable");
342 }
343 
344 /// LowerCallResult - Lower the result values of an ISD::CALL into the
345 /// appropriate copies out of appropriate physical registers. This assumes that
346 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
347 /// being lowered. Returns a SDNode with the same number of values as the
348 /// ISD::CALL.
350  SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
351  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
353  const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
354  // Assign locations to each value returned by this call.
356 
357  CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
358  *DAG.getContext());
359 
360  if (Subtarget.useHVXOps())
361  CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
362  else
363  CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
364 
365  // Copy all of the result registers out of their specified physreg.
366  for (unsigned i = 0; i != RVLocs.size(); ++i) {
367  SDValue RetVal;
368  if (RVLocs[i].getValVT() == MVT::i1) {
369  // Return values of type MVT::i1 require special handling. The reason
370  // is that MVT::i1 is associated with the PredRegs register class, but
371  // values of that type are still returned in R0. Generate an explicit
372  // copy into a predicate register from R0, and treat the value of the
373  // predicate register as the call result.
374  auto &MRI = DAG.getMachineFunction().getRegInfo();
375  SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
376  MVT::i32, Glue);
377  // FR0 = (Value, Chain, Glue)
378  Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
379  SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
380  FR0.getValue(0), FR0.getValue(2));
381  // TPR = (Chain, Glue)
382  // Don't glue this CopyFromReg, because it copies from a virtual
383  // register. If it is glued to the call, InstrEmitter will add it
384  // as an implicit def to the call (EmitMachineNode).
385  RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
386  Glue = TPR.getValue(1);
387  Chain = TPR.getValue(0);
388  } else {
389  RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
390  RVLocs[i].getValVT(), Glue);
391  Glue = RetVal.getValue(2);
392  Chain = RetVal.getValue(1);
393  }
394  InVals.push_back(RetVal.getValue(0));
395  }
396 
397  return Chain;
398 }
399 
400 /// LowerCall - Functions arguments are copied from virtual regs to
401 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
402 SDValue
404  SmallVectorImpl<SDValue> &InVals) const {
405  SelectionDAG &DAG = CLI.DAG;
406  SDLoc &dl = CLI.DL;
408  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
410  SDValue Chain = CLI.Chain;
411  SDValue Callee = CLI.Callee;
412  CallingConv::ID CallConv = CLI.CallConv;
413  bool IsVarArg = CLI.IsVarArg;
414  bool DoesNotReturn = CLI.DoesNotReturn;
415 
416  bool IsStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
418  MachineFrameInfo &MFI = MF.getFrameInfo();
419  auto PtrVT = getPointerTy(MF.getDataLayout());
420 
421  unsigned NumParams = CLI.CB ? CLI.CB->getFunctionType()->getNumParams() : 0;
422  if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
423  Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
424 
425  // Linux ABI treats var-arg calls the same way as regular ones.
426  bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
427 
428  // Analyze operands of the call, assigning locations to each operand.
430  HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(),
431  NumParams);
432 
433  if (Subtarget.useHVXOps())
434  CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
435  else if (DisableArgsMinAlignment)
436  CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_Legacy);
437  else
438  CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
439 
440  if (CLI.IsTailCall) {
441  bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
443  IsVarArg, IsStructRet, StructAttrFlag, Outs,
444  OutVals, Ins, DAG);
445  for (const CCValAssign &VA : ArgLocs) {
446  if (VA.isMemLoc()) {
447  CLI.IsTailCall = false;
448  break;
449  }
450  }
451  LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
452  : "Argument must be passed on stack. "
453  "Not eligible for Tail Call\n"));
454  }
455  // Get a count of how many bytes are to be pushed on the stack.
456  unsigned NumBytes = CCInfo.getNextStackOffset();
458  SmallVector<SDValue, 8> MemOpChains;
459 
460  const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
461  SDValue StackPtr =
462  DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
463 
464  bool NeedsArgAlign = false;
465  Align LargestAlignSeen;
466  // Walk the register/memloc assignments, inserting copies/loads.
467  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
468  CCValAssign &VA = ArgLocs[i];
469  SDValue Arg = OutVals[i];
470  ISD::ArgFlagsTy Flags = Outs[i].Flags;
471  // Record if we need > 8 byte alignment on an argument.
472  bool ArgAlign = Subtarget.isHVXVectorType(VA.getValVT());
473  NeedsArgAlign |= ArgAlign;
474 
475  // Promote the value if needed.
476  switch (VA.getLocInfo()) {
477  default:
478  // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
479  llvm_unreachable("Unknown loc info!");
480  case CCValAssign::Full:
481  break;
482  case CCValAssign::BCvt:
483  Arg = DAG.getBitcast(VA.getLocVT(), Arg);
484  break;
485  case CCValAssign::SExt:
486  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
487  break;
488  case CCValAssign::ZExt:
489  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
490  break;
491  case CCValAssign::AExt:
492  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
493  break;
494  }
495 
496  if (VA.isMemLoc()) {
497  unsigned LocMemOffset = VA.getLocMemOffset();
498  SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
499  StackPtr.getValueType());
500  MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
501  if (ArgAlign)
502  LargestAlignSeen = std::max(
503  LargestAlignSeen, Align(VA.getLocVT().getStoreSizeInBits() / 8));
504  if (Flags.isByVal()) {
505  // The argument is a struct passed by value. According to LLVM, "Arg"
506  // is a pointer.
507  MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
508  Flags, DAG, dl));
509  } else {
511  DAG.getMachineFunction(), LocMemOffset);
512  SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI);
513  MemOpChains.push_back(S);
514  }
515  continue;
516  }
517 
518  // Arguments that can be passed on register must be kept at RegsToPass
519  // vector.
520  if (VA.isRegLoc())
521  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
522  }
523 
524  if (NeedsArgAlign && Subtarget.hasV60Ops()) {
525  LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
526  Align VecAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
527  LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
528  MFI.ensureMaxAlignment(LargestAlignSeen);
529  }
530  // Transform all store nodes into one single node because all store
531  // nodes are independent of each other.
532  if (!MemOpChains.empty())
533  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
534 
535  SDValue Glue;
536  if (!CLI.IsTailCall) {
537  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
538  Glue = Chain.getValue(1);
539  }
540 
541  // Build a sequence of copy-to-reg nodes chained together with token
542  // chain and flag operands which copy the outgoing args into registers.
543  // The Glue is necessary since all emitted instructions must be
544  // stuck together.
545  if (!CLI.IsTailCall) {
546  for (const auto &R : RegsToPass) {
547  Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue);
548  Glue = Chain.getValue(1);
549  }
550  } else {
551  // For tail calls lower the arguments to the 'real' stack slot.
552  //
553  // Force all the incoming stack arguments to be loaded from the stack
554  // before any new outgoing arguments are stored to the stack, because the
555  // outgoing stack slots may alias the incoming argument stack slots, and
556  // the alias isn't otherwise explicit. This is slightly more conservative
557  // than necessary, because it means that each store effectively depends
558  // on every argument instead of just those arguments it would clobber.
559  //
560  // Do not flag preceding copytoreg stuff together with the following stuff.
561  Glue = SDValue();
562  for (const auto &R : RegsToPass) {
563  Chain = DAG.getCopyToReg(Chain, dl, R.first, R.second, Glue);
564  Glue = Chain.getValue(1);
565  }
566  Glue = SDValue();
567  }
568 
569  bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
570  unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
571 
572  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
573  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
574  // node so that legalize doesn't hack it.
575  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
576  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
577  } else if (ExternalSymbolSDNode *S =
578  dyn_cast<ExternalSymbolSDNode>(Callee)) {
579  Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
580  }
581 
582  // Returns a chain & a flag for retval copy to use.
583  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
585  Ops.push_back(Chain);
586  Ops.push_back(Callee);
587 
588  // Add argument registers to the end of the list so that they are
589  // known live into the call.
590  for (const auto &R : RegsToPass)
591  Ops.push_back(DAG.getRegister(R.first, R.second.getValueType()));
592 
593  const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
594  assert(Mask && "Missing call preserved mask for calling convention");
595  Ops.push_back(DAG.getRegisterMask(Mask));
596 
597  if (Glue.getNode())
598  Ops.push_back(Glue);
599 
600  if (CLI.IsTailCall) {
601  MFI.setHasTailCall();
602  return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
603  }
604 
605  // Set this here because we need to know this for "hasFP" in frame lowering.
606  // The target-independent code calls getFrameRegister before setting it, and
607  // getFrameRegister uses hasFP to determine whether the function has FP.
608  MFI.setHasCalls(true);
609 
610  unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
611  Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
612  Glue = Chain.getValue(1);
613 
614  // Create the CALLSEQ_END node.
615  Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, dl);
616  Glue = Chain.getValue(1);
617 
618  // Handle result values, copying them out of physregs into vregs that we
619  // return.
620  return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
621  InVals, OutVals, Callee);
622 }
623 
624 /// Returns true by value, base pointer and offset pointer and addressing
625 /// mode by reference if this node can be combined with a load / store to
626 /// form a post-indexed load / store.
628  SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
629  SelectionDAG &DAG) const {
630  LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
631  if (!LSN)
632  return false;
633  EVT VT = LSN->getMemoryVT();
634  if (!VT.isSimple())
635  return false;
636  bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
637  VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
638  VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
639  VT == MVT::v4i16 || VT == MVT::v8i8 ||
640  Subtarget.isHVXVectorType(VT.getSimpleVT());
641  if (!IsLegalType)
642  return false;
643 
644  if (Op->getOpcode() != ISD::ADD)
645  return false;
646  Base = Op->getOperand(0);
647  Offset = Op->getOperand(1);
648  if (!isa<ConstantSDNode>(Offset.getNode()))
649  return false;
650  AM = ISD::POST_INC;
651 
652  int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
653  return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
654 }
655 
656 SDValue
659  auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
660  const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
661  unsigned LR = HRI.getRARegister();
662 
663  if ((Op.getOpcode() != ISD::INLINEASM &&
664  Op.getOpcode() != ISD::INLINEASM_BR) || HMFI.hasClobberLR())
665  return Op;
666 
667  unsigned NumOps = Op.getNumOperands();
668  if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
669  --NumOps; // Ignore the flag operand.
670 
671  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
672  unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
673  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
674  ++i; // Skip the ID value.
675 
676  switch (InlineAsm::getKind(Flags)) {
677  default:
678  llvm_unreachable("Bad flags!");
680  case InlineAsm::Kind_Imm:
681  case InlineAsm::Kind_Mem:
682  i += NumVals;
683  break;
687  for (; NumVals; --NumVals, ++i) {
688  Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
689  if (Reg != LR)
690  continue;
691  HMFI.setHasClobberLR(true);
692  return Op;
693  }
694  break;
695  }
696  }
697  }
698 
699  return Op;
700 }
701 
702 // Need to transform ISD::PREFETCH into something that doesn't inherit
703 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
704 // SDNPMayStore.
706  SelectionDAG &DAG) const {
707  SDValue Chain = Op.getOperand(0);
708  SDValue Addr = Op.getOperand(1);
709  // Lower it to DCFETCH($reg, #0). A "pat" will try to merge the offset in,
710  // if the "reg" is fed by an "add".
711  SDLoc DL(Op);
712  SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
713  return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
714 }
715 
716 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
717 // is marked as having side-effects, while the register read on Hexagon does
718 // not have any. TableGen refuses to accept the direct pattern from that node
719 // to the A4_tfrcpp.
721  SelectionDAG &DAG) const {
722  SDValue Chain = Op.getOperand(0);
723  SDLoc dl(Op);
725  return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
726 }
727 
729  SelectionDAG &DAG) const {
730  SDValue Chain = Op.getOperand(0);
731  unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
732  // Lower the hexagon_prefetch builtin to DCFETCH, as above.
733  if (IntNo == Intrinsic::hexagon_prefetch) {
734  SDValue Addr = Op.getOperand(2);
735  SDLoc DL(Op);
736  SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
737  return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
738  }
739  return SDValue();
740 }
741 
742 SDValue
744  SelectionDAG &DAG) const {
745  SDValue Chain = Op.getOperand(0);
746  SDValue Size = Op.getOperand(1);
747  SDValue Align = Op.getOperand(2);
748  SDLoc dl(Op);
749 
750  ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
751  assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
752 
753  unsigned A = AlignConst->getSExtValue();
754  auto &HFI = *Subtarget.getFrameLowering();
755  // "Zero" means natural stack alignment.
756  if (A == 0)
757  A = HFI.getStackAlign().value();
758 
759  LLVM_DEBUG({
760  dbgs () << __func__ << " Align: " << A << " Size: ";
761  Size.getNode()->dump(&DAG);
762  dbgs() << "\n";
763  });
764 
765  SDValue AC = DAG.getConstant(A, dl, MVT::i32);
767  SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
768 
769  DAG.ReplaceAllUsesOfValueWith(Op, AA);
770  return AA;
771 }
772 
774  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
775  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
776  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
778  MachineFrameInfo &MFI = MF.getFrameInfo();
780 
781  // Linux ABI treats var-arg calls the same way as regular ones.
782  bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
783 
784  // Assign locations to all of the incoming arguments.
786  HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs,
787  *DAG.getContext(),
789 
790  if (Subtarget.useHVXOps())
791  CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
792  else if (DisableArgsMinAlignment)
793  CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_Legacy);
794  else
795  CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
796 
797  // For LLVM, in the case when returning a struct by value (>8byte),
798  // the first argument is a pointer that points to the location on caller's
799  // stack where the return value will be stored. For Hexagon, the location on
800  // caller's stack is passed only when the struct size is smaller than (and
801  // equal to) 8 bytes. If not, no address will be passed into callee and
802  // callee return the result direclty through R0/R1.
803  auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
804  switch (RC.getID()) {
805  case Hexagon::IntRegsRegClassID:
806  return Reg - Hexagon::R0 + 1;
807  case Hexagon::DoubleRegsRegClassID:
808  return (Reg - Hexagon::D0 + 1) * 2;
809  case Hexagon::HvxVRRegClassID:
810  return Reg - Hexagon::V0 + 1;
811  case Hexagon::HvxWRRegClassID:
812  return (Reg - Hexagon::W0 + 1) * 2;
813  }
814  llvm_unreachable("Unexpected register class");
815  };
816 
817  auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
818  auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
819  HFL.FirstVarArgSavedReg = 0;
821 
822  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
823  CCValAssign &VA = ArgLocs[i];
824  ISD::ArgFlagsTy Flags = Ins[i].Flags;
825  bool ByVal = Flags.isByVal();
826 
827  // Arguments passed in registers:
828  // 1. 32- and 64-bit values and HVX vectors are passed directly,
829  // 2. Large structs are passed via an address, and the address is
830  // passed in a register.
831  if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
832  llvm_unreachable("ByValSize must be bigger than 8 bytes");
833 
834  bool InReg = VA.isRegLoc() &&
835  (!ByVal || (ByVal && Flags.getByValSize() > 8));
836 
837  if (InReg) {
838  MVT RegVT = VA.getLocVT();
839  if (VA.getLocInfo() == CCValAssign::BCvt)
840  RegVT = VA.getValVT();
841 
842  const TargetRegisterClass *RC = getRegClassFor(RegVT);
844  SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
845 
846  // Treat values of type MVT::i1 specially: they are passed in
847  // registers of type i32, but they need to remain as values of
848  // type i1 for consistency of the argument lowering.
849  if (VA.getValVT() == MVT::i1) {
850  assert(RegVT.getSizeInBits() <= 32);
851  SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
852  Copy, DAG.getConstant(1, dl, RegVT));
853  Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
854  ISD::SETNE);
855  } else {
856 #ifndef NDEBUG
857  unsigned RegSize = RegVT.getSizeInBits();
858  assert(RegSize == 32 || RegSize == 64 ||
859  Subtarget.isHVXVectorType(RegVT));
860 #endif
861  }
862  InVals.push_back(Copy);
863  MRI.addLiveIn(VA.getLocReg(), VReg);
864  HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
865  } else {
866  assert(VA.isMemLoc() && "Argument should be passed in memory");
867 
868  // If it's a byval parameter, then we need to compute the
869  // "real" size, not the size of the pointer.
870  unsigned ObjSize = Flags.isByVal()
871  ? Flags.getByValSize()
872  : VA.getLocVT().getStoreSizeInBits() / 8;
873 
874  // Create the frame index object for this incoming parameter.
875  int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
876  int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
877  SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
878 
879  if (Flags.isByVal()) {
880  // If it's a pass-by-value aggregate, then do not dereference the stack
881  // location. Instead, we should generate a reference to the stack
882  // location.
883  InVals.push_back(FIN);
884  } else {
885  SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
887  InVals.push_back(L);
888  }
889  }
890  }
891 
892  if (IsVarArg && Subtarget.isEnvironmentMusl()) {
893  for (int i = HFL.FirstVarArgSavedReg; i < 6; i++)
894  MRI.addLiveIn(Hexagon::R0+i);
895  }
896 
897  if (IsVarArg && Subtarget.isEnvironmentMusl()) {
898  HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1);
899  HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
900 
901  // Create Frame index for the start of register saved area.
902  int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg;
903  bool RequiresPadding = (NumVarArgRegs & 1);
904  int RegSaveAreaSizePlusPadding = RequiresPadding
905  ? (NumVarArgRegs + 1) * 4
906  : NumVarArgRegs * 4;
907 
908  if (RegSaveAreaSizePlusPadding > 0) {
909  // The offset to saved register area should be 8 byte aligned.
910  int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
911  if (!(RegAreaStart % 8))
912  RegAreaStart = (RegAreaStart + 7) & -8;
913 
914  int RegSaveAreaFrameIndex =
915  MFI.CreateFixedObject(RegSaveAreaSizePlusPadding, RegAreaStart, true);
916  HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);
917 
918  // This will point to the next argument passed via stack.
919  int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
920  int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
921  HMFI.setVarArgsFrameIndex(FI);
922  } else {
923  // This will point to the next argument passed via stack, when
924  // there is no saved register area.
925  int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
926  int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
927  HMFI.setRegSavedAreaStartFrameIndex(FI);
928  HMFI.setVarArgsFrameIndex(FI);
929  }
930  }
931 
932 
933  if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
934  // This will point to the next argument passed via stack.
935  int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
936  int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
937  HMFI.setVarArgsFrameIndex(FI);
938  }
939 
940  return Chain;
941 }
942 
943 SDValue
945  // VASTART stores the address of the VarArgsFrameIndex slot into the
946  // memory location argument.
950  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
951 
952  if (!Subtarget.isEnvironmentMusl()) {
953  return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
954  MachinePointerInfo(SV));
955  }
956  auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
957  auto &HFL = *Subtarget.getFrameLowering();
958  SDLoc DL(Op);
960 
961  // Get frame index of va_list.
962  SDValue FIN = Op.getOperand(1);
963 
964  // If first Vararg register is odd, add 4 bytes to start of
965  // saved register area to point to the first register location.
966  // This is because the saved register area has to be 8 byte aligned.
967  // Incase of an odd start register, there will be 4 bytes of padding in
968  // the beginning of saved register area. If all registers area used up,
969  // the following condition will handle it correctly.
970  SDValue SavedRegAreaStartFrameIndex =
971  DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32);
972 
973  auto PtrVT = getPointerTy(DAG.getDataLayout());
974 
975  if (HFL.FirstVarArgSavedReg & 1)
976  SavedRegAreaStartFrameIndex =
977  DAG.getNode(ISD::ADD, DL, PtrVT,
978  DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(),
979  MVT::i32),
980  DAG.getIntPtrConstant(4, DL));
981 
982  // Store the saved register area start pointer.
983  SDValue Store =
984  DAG.getStore(Op.getOperand(0), DL,
985  SavedRegAreaStartFrameIndex,
986  FIN, MachinePointerInfo(SV));
987  MemOps.push_back(Store);
988 
989  // Store saved register area end pointer.
990  FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
991  FIN, DAG.getIntPtrConstant(4, DL));
992  Store = DAG.getStore(Op.getOperand(0), DL,
993  DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
994  PtrVT),
995  FIN, MachinePointerInfo(SV, 4));
996  MemOps.push_back(Store);
997 
998  // Store overflow area pointer.
999  FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
1000  FIN, DAG.getIntPtrConstant(4, DL));
1001  Store = DAG.getStore(Op.getOperand(0), DL,
1002  DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
1003  PtrVT),
1004  FIN, MachinePointerInfo(SV, 8));
1005  MemOps.push_back(Store);
1006 
1007  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1008 }
1009 
1010 SDValue
1012  // Assert that the linux ABI is enabled for the current compilation.
1013  assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
1014  SDValue Chain = Op.getOperand(0);
1015  SDValue DestPtr = Op.getOperand(1);
1016  SDValue SrcPtr = Op.getOperand(2);
1017  const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
1018  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
1019  SDLoc DL(Op);
1020  // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
1021  // we need to memcopy 12 bytes from va_list to another similar list.
1022  return DAG.getMemcpy(Chain, DL, DestPtr, SrcPtr,
1023  DAG.getIntPtrConstant(12, DL), Align(4),
1024  /*isVolatile*/ false, false, false,
1025  MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
1026 }
1027 
1029  const SDLoc &dl(Op);
1030  SDValue LHS = Op.getOperand(0);
1031  SDValue RHS = Op.getOperand(1);
1032  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
1033  MVT ResTy = ty(Op);
1034  MVT OpTy = ty(LHS);
1035 
1036  if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
1037  MVT ElemTy = OpTy.getVectorElementType();
1038  assert(ElemTy.isScalarInteger());
1039  MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
1040  OpTy.getVectorNumElements());
1041  return DAG.getSetCC(dl, ResTy,
1042  DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
1043  DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
1044  }
1045 
1046  // Treat all other vector types as legal.
1047  if (ResTy.isVector())
1048  return Op;
1049 
1050  // Comparisons of short integers should use sign-extend, not zero-extend,
1051  // since we can represent small negative values in the compare instructions.
1052  // The LLVM default is to use zero-extend arbitrarily in these cases.
1053  auto isSExtFree = [this](SDValue N) {
1054  switch (N.getOpcode()) {
1055  case ISD::TRUNCATE: {
1056  // A sign-extend of a truncate of a sign-extend is free.
1057  SDValue Op = N.getOperand(0);
1058  if (Op.getOpcode() != ISD::AssertSext)
1059  return false;
1060  EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
1061  unsigned ThisBW = ty(N).getSizeInBits();
1062  unsigned OrigBW = OrigTy.getSizeInBits();
1063  // The type that was sign-extended to get the AssertSext must be
1064  // narrower than the type of N (so that N has still the same value
1065  // as the original).
1066  return ThisBW >= OrigBW;
1067  }
1068  case ISD::LOAD:
1069  // We have sign-extended loads.
1070  return true;
1071  }
1072  return false;
1073  };
1074 
1075  if (OpTy == MVT::i8 || OpTy == MVT::i16) {
1076  ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
1077  bool IsNegative = C && C->getAPIntValue().isNegative();
1078  if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
1079  return DAG.getSetCC(dl, ResTy,
1081  DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
1082  }
1083 
1084  return SDValue();
1085 }
1086 
1087 SDValue
1089  SDValue PredOp = Op.getOperand(0);
1090  SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
1091  MVT OpTy = ty(Op1);
1092  const SDLoc &dl(Op);
1093 
1094  if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
1095  MVT ElemTy = OpTy.getVectorElementType();
1096  assert(ElemTy.isScalarInteger());
1097  MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
1098  OpTy.getVectorNumElements());
1099  // Generate (trunc (select (_, sext, sext))).
1100  return DAG.getSExtOrTrunc(
1101  DAG.getSelect(dl, WideTy, PredOp,
1102  DAG.getSExtOrTrunc(Op1, dl, WideTy),
1103  DAG.getSExtOrTrunc(Op2, dl, WideTy)),
1104  dl, OpTy);
1105  }
1106 
1107  return SDValue();
1108 }
1109 
1110 SDValue
1112  EVT ValTy = Op.getValueType();
1113  ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
1114  Constant *CVal = nullptr;
1115  bool isVTi1Type = false;
1116  if (auto *CV = dyn_cast<ConstantVector>(CPN->getConstVal())) {
1117  if (cast<VectorType>(CV->getType())->getElementType()->isIntegerTy(1)) {
1118  IRBuilder<> IRB(CV->getContext());
1119  SmallVector<Constant*, 128> NewConst;
1120  unsigned VecLen = CV->getNumOperands();
1121  assert(isPowerOf2_32(VecLen) &&
1122  "conversion only supported for pow2 VectorSize");
1123  for (unsigned i = 0; i < VecLen; ++i)
1124  NewConst.push_back(IRB.getInt8(CV->getOperand(i)->isZeroValue()));
1125 
1126  CVal = ConstantVector::get(NewConst);
1127  isVTi1Type = true;
1128  }
1129  }
1130  Align Alignment = CPN->getAlign();
1131  bool IsPositionIndependent = isPositionIndependent();
1132  unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
1133 
1134  unsigned Offset = 0;
1135  SDValue T;
1136  if (CPN->isMachineConstantPoolEntry())
1137  T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Alignment,
1138  Offset, TF);
1139  else if (isVTi1Type)
1140  T = DAG.getTargetConstantPool(CVal, ValTy, Alignment, Offset, TF);
1141  else
1142  T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Alignment, Offset,
1143  TF);
1144 
1145  assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
1146  "Inconsistent target flag encountered");
1147 
1148  if (IsPositionIndependent)
1149  return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
1150  return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
1151 }
1152 
1153 SDValue
1155  EVT VT = Op.getValueType();
1156  int Idx = cast<JumpTableSDNode>(Op)->getIndex();
1157  if (isPositionIndependent()) {
1159  return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
1160  }
1161 
1162  SDValue T = DAG.getTargetJumpTable(Idx, VT);
1163  return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
1164 }
1165 
1166 SDValue
1168  const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1169  MachineFunction &MF = DAG.getMachineFunction();
1170  MachineFrameInfo &MFI = MF.getFrameInfo();
1171  MFI.setReturnAddressIsTaken(true);
1172 
1174  return SDValue();
1175 
1176  EVT VT = Op.getValueType();
1177  SDLoc dl(Op);
1178  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1179  if (Depth) {
1180  SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
1181  SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
1182  return DAG.getLoad(VT, dl, DAG.getEntryNode(),
1183  DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
1184  MachinePointerInfo());
1185  }
1186 
1187  // Return LR, which contains the return address. Mark it an implicit live-in.
1188  Register Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
1189  return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
1190 }
1191 
1192 SDValue
1194  const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
1196  MFI.setFrameAddressIsTaken(true);
1197 
1198  EVT VT = Op.getValueType();
1199  SDLoc dl(Op);
1200  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1201  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
1202  HRI.getFrameRegister(), VT);
1203  while (Depth--)
1204  FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
1205  MachinePointerInfo());
1206  return FrameAddr;
1207 }
1208 
1209 SDValue
1211  SDLoc dl(Op);
1212  return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
1213 }
1214 
1215 SDValue
1217  SDLoc dl(Op);
1218  auto *GAN = cast<GlobalAddressSDNode>(Op);
1219  auto PtrVT = getPointerTy(DAG.getDataLayout());
1220  auto *GV = GAN->getGlobal();
1221  int64_t Offset = GAN->getOffset();
1222 
1223  auto &HLOF = *HTM.getObjFileLowering();
1225 
1226  if (RM == Reloc::Static) {
1227  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
1228  const GlobalObject *GO = GV->getAliaseeObject();
1229  if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
1230  return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
1231  return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
1232  }
1233 
1234  bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
1235  if (UsePCRel) {
1236  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
1238  return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
1239  }
1240 
1241  // Use GOT index.
1242  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
1243  SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
1244  SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
1245  return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
1246 }
1247 
1248 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
1249 SDValue
1251  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1252  SDLoc dl(Op);
1253  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1254 
1256  if (RM == Reloc::Static) {
1257  SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
1258  return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
1259  }
1260 
1261  SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
1262  return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
1263 }
1264 
1265 SDValue
1267  const {
1268  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1271  return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
1272 }
1273 
1274 SDValue
1276  GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
1277  unsigned char OperandFlags) const {
1278  MachineFunction &MF = DAG.getMachineFunction();
1279  MachineFrameInfo &MFI = MF.getFrameInfo();
1280  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1281  SDLoc dl(GA);
1282  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
1283  GA->getValueType(0),
1284  GA->getOffset(),
1285  OperandFlags);
1286  // Create Operands for the call.The Operands should have the following:
1287  // 1. Chain SDValue
1288  // 2. Callee which in this case is the Global address value.
1289  // 3. Registers live into the call.In this case its R0, as we
1290  // have just one argument to be passed.
1291  // 4. Glue.
1292  // Note: The order is important.
1293 
1294  const auto &HRI = *Subtarget.getRegisterInfo();
1295  const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
1296  assert(Mask && "Missing call preserved mask for calling convention");
1297  SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
1298  DAG.getRegisterMask(Mask), Glue };
1299  Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
1300 
1301  // Inform MFI that function has calls.
1302  MFI.setAdjustsStack(true);
1303 
1304  Glue = Chain.getValue(1);
1305  return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
1306 }
1307 
1308 //
1309 // Lower using the intial executable model for TLS addresses
1310 //
1311 SDValue
1313  SelectionDAG &DAG) const {
1314  SDLoc dl(GA);
1315  int64_t Offset = GA->getOffset();
1316  auto PtrVT = getPointerTy(DAG.getDataLayout());
1317 
1318  // Get the thread pointer.
1319  SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1320 
1321  bool IsPositionIndependent = isPositionIndependent();
1322  unsigned char TF =
1323  IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
1324 
1325  // First generate the TLS symbol address
1326  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
1327  Offset, TF);
1328 
1329  SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1330 
1331  if (IsPositionIndependent) {
1332  // Generate the GOT pointer in case of position independent code
1333  SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);
1334 
1335  // Add the TLS Symbol address to GOT pointer.This gives
1336  // GOT relative relocation for the symbol.
1337  Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1338  }
1339 
1340  // Load the offset value for TLS symbol.This offset is relative to
1341  // thread pointer.
1342  SDValue LoadOffset =
1343  DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo());
1344 
1345  // Address of the thread local variable is the add of thread
1346  // pointer and the offset of the variable.
1347  return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
1348 }
1349 
1350 //
1351 // Lower using the local executable model for TLS addresses
1352 //
1353 SDValue
1355  SelectionDAG &DAG) const {
1356  SDLoc dl(GA);
1357  int64_t Offset = GA->getOffset();
1358  auto PtrVT = getPointerTy(DAG.getDataLayout());
1359 
1360  // Get the thread pointer.
1361  SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
1362  // Generate the TLS symbol address
1363  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1365  SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1366 
1367  // Address of the thread local variable is the add of thread
1368  // pointer and the offset of the variable.
1369  return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
1370 }
1371 
1372 //
1373 // Lower using the general dynamic model for TLS addresses
1374 //
1375 SDValue
1377  SelectionDAG &DAG) const {
1378  SDLoc dl(GA);
1379  int64_t Offset = GA->getOffset();
1380  auto PtrVT = getPointerTy(DAG.getDataLayout());
1381 
1382  // First generate the TLS symbol address
1383  SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
1385 
1386  // Then, generate the GOT pointer
1387  SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);
1388 
1389  // Add the TLS symbol and the GOT pointer
1390  SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
1391  SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
1392 
1393  // Copy over the argument to R0
1394  SDValue InFlag;
1395  Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
1396  InFlag = Chain.getValue(1);
1397 
1398  unsigned Flags = DAG.getSubtarget<HexagonSubtarget>().useLongCalls()
1401 
1402  return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
1403  Hexagon::R0, Flags);
1404 }
1405 
1406 //
1407 // Lower TLS addresses.
1408 //
1409 // For now for dynamic models, we only support the general dynamic model.
1410 //
1411 SDValue
1413  SelectionDAG &DAG) const {
1414  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
1415 
1416  switch (HTM.getTLSModel(GA->getGlobal())) {
1419  return LowerToTLSGeneralDynamicModel(GA, DAG);
1420  case TLSModel::InitialExec:
1421  return LowerToTLSInitialExecModel(GA, DAG);
1422  case TLSModel::LocalExec:
1423  return LowerToTLSLocalExecModel(GA, DAG);
1424  }
1425  llvm_unreachable("Bogus TLS model");
1426 }
1427 
1428 //===----------------------------------------------------------------------===//
1429 // TargetLowering Implementation
1430 //===----------------------------------------------------------------------===//
1431 
1433  const HexagonSubtarget &ST)
1434  : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
1435  Subtarget(ST) {
1436  auto &HRI = *Subtarget.getRegisterInfo();
1437 
1441  setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
1444 
1447 
1450  else
1452 
1453  // Limits for inline expansion of memcpy/memmove
1460 
1461  //
1462  // Set up register classes.
1463  //
1464 
1465  addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass);
1466  addRegisterClass(MVT::v2i1, &Hexagon::PredRegsRegClass); // bbbbaaaa
1467  addRegisterClass(MVT::v4i1, &Hexagon::PredRegsRegClass); // ddccbbaa
1468  addRegisterClass(MVT::v8i1, &Hexagon::PredRegsRegClass); // hgfedcba
1469  addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass);
1470  addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
1471  addRegisterClass(MVT::v4i8, &Hexagon::IntRegsRegClass);
1472  addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass);
1473  addRegisterClass(MVT::v8i8, &Hexagon::DoubleRegsRegClass);
1474  addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
1475  addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
1476 
1477  addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
1478  addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
1479 
1480  //
1481  // Handling of scalar operations.
1482  //
1483  // All operations default to "legal", except:
1484  // - indexed loads and stores (pre-/post-incremented),
1485  // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
1486  // ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
1487  // FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
1488  // FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
1489  // which default to "expand" for at least one type.
1490 
1491  // Misc operations.
1508 
1509  // Custom legalize GlobalAddress nodes into CONST32.
1513 
1514  // Hexagon needs to optimize cases with negative constants.
1519 
1520  // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
1524  if (Subtarget.isEnvironmentMusl())
1526  else
1528 
1532 
1533  if (EmitJumpTables)
1535  else
1538 
1539  for (unsigned LegalIntOp :
1541  setOperationAction(LegalIntOp, MVT::i32, Legal);
1542  setOperationAction(LegalIntOp, MVT::i64, Legal);
1543  }
1544 
1545  // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
1546  // but they only operate on i64.
1547  for (MVT VT : MVT::integer_valuetypes()) {
1554  }
1557 
1562 
1563  // Popcount can count # of 1s in i64 but returns i32.
1568 
1573 
1578 
1579  for (unsigned IntExpOp :
1584  for (MVT VT : MVT::integer_valuetypes())
1585  setOperationAction(IntExpOp, VT, Expand);
1586  }
1587 
1588  for (unsigned FPExpOp :
1591  for (MVT VT : MVT::fp_valuetypes())
1592  setOperationAction(FPExpOp, VT, Expand);
1593  }
1594 
1595  // No extending loads from i32.
1596  for (MVT VT : MVT::integer_valuetypes()) {
1600  }
1601  // Turn FP truncstore into trunc + store.
1603  // Turn FP extload into load/fpextend.
1604  for (MVT VT : MVT::fp_valuetypes())
1606 
1607  // Expand BR_CC and SELECT_CC for all integer and fp types.
1608  for (MVT VT : MVT::integer_valuetypes()) {
1611  }
1612  for (MVT VT : MVT::fp_valuetypes()) {
1615  }
1617 
1618  //
1619  // Handling of vector operations.
1620  //
1621 
1622  // Set the action for vector operations to "expand", then override it with
1623  // either "custom" or "legal" for specific cases.
1624  static const unsigned VectExpOps[] = {
1625  // Integer arithmetic:
1629  // Logical/bit:
1632  // Floating point arithmetic/math functions:
1639  // Misc:
1641  // Vector:
1647  };
1648 
1649  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1650  for (unsigned VectExpOp : VectExpOps)
1651  setOperationAction(VectExpOp, VT, Expand);
1652 
1653  // Expand all extending loads and truncating stores:
1654  for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) {
1655  if (TargetVT == VT)
1656  continue;
1657  setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
1658  setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
1659  setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
1660  setTruncStoreAction(VT, TargetVT, Expand);
1661  }
1662 
1663  // Normalize all inputs to SELECT to be vectors of i32.
1664  if (VT.getVectorElementType() != MVT::i32) {
1665  MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
1667  AddPromotedToType(ISD::SELECT, VT, VT32);
1668  }
1672  }
1673 
1674  // Extending loads from (native) vectors of i8 into (native) vectors of i16
1675  // are legal.
1682 
1686 
1687  // Types natively supported:
1688  for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
1696 
1697  setOperationAction(ISD::ADD, NativeVT, Legal);
1698  setOperationAction(ISD::SUB, NativeVT, Legal);
1699  setOperationAction(ISD::MUL, NativeVT, Legal);
1700  setOperationAction(ISD::AND, NativeVT, Legal);
1701  setOperationAction(ISD::OR, NativeVT, Legal);
1702  setOperationAction(ISD::XOR, NativeVT, Legal);
1703 
1704  if (NativeVT.getVectorElementType() != MVT::i1)
1706  }
1707 
1708  for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
1713  }
1714 
1715  // Custom lower unaligned loads.
1716  // Also, for both loads and stores, verify the alignment of the address
1717  // in case it is a compile-time constant. This is a usability feature to
1718  // provide a meaningful error message to users.
1723  }
1724 
1725  // Custom-lower load/stores of boolean vectors.
1726  for (MVT VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
1729  }
1730 
1732  MVT::v2i32}) {
1740  }
1741 
1742  // Custom-lower bitcasts from i8 to v8i1.
1750 
1751  // V5+.
1756 
1759 
1772 
1773  // Special handling for half-precision floating point conversions.
1774  // Lower half float conversions into library calls.
1779 
1784 
1785  // Handling of indexed loads/stores: default is "expand".
1786  //
1791  }
1792 
1793  // Subtarget-specific operation actions.
1794  //
1795  if (Subtarget.hasV60Ops()) {
1800  }
1801  if (Subtarget.hasV66Ops()) {
1804  }
1805  if (Subtarget.hasV67Ops()) {
1809  }
1810 
1814 
1815  if (Subtarget.useHVXOps())
1816  initializeHVXLowering();
1817 
1819 
1820  //
1821  // Library calls for unsupported operations
1822  //
1823  bool FastMath = EnableFastMath;
1824 
1825  setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
1826  setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
1827  setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
1828  setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
1829  setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
1830  setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
1831  setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
1832  setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
1833 
1834  setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
1835  setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
1836  setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
1837  setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
1838  setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
1839  setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
1840 
1841  // This is the only fast library function for sqrtd.
1842  if (FastMath)
1843  setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
1844 
1845  // Prefix is: nothing for "slow-math",
1846  // "fast2_" for V5+ fast-math double-precision
1847  // (actually, keep fast-math and fast-math2 separate for now)
1848  if (FastMath) {
1849  setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
1850  setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
1851  setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
1852  setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
1853  setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
1854  } else {
1855  setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
1856  setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
1857  setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
1858  setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
1859  setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
1860  }
1861 
1862  if (FastMath)
1863  setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
1864  else
1865  setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
1866 
1867  // Routines to handle fp16 storage type.
1868  setLibcallName(RTLIB::FPROUND_F32_F16, "__truncsfhf2");
1869  setLibcallName(RTLIB::FPROUND_F64_F16, "__truncdfhf2");
1870  setLibcallName(RTLIB::FPEXT_F16_F32, "__extendhfsf2");
1871 
1872  // These cause problems when the shift amount is non-constant.
1873  setLibcallName(RTLIB::SHL_I128, nullptr);
1874  setLibcallName(RTLIB::SRL_I128, nullptr);
1875  setLibcallName(RTLIB::SRA_I128, nullptr);
1876 }
1877 
1878 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
1879  switch ((HexagonISD::NodeType)Opcode) {
1880  case HexagonISD::ADDC: return "HexagonISD::ADDC";
1881  case HexagonISD::SUBC: return "HexagonISD::SUBC";
1882  case HexagonISD::ALLOCA: return "HexagonISD::ALLOCA";
1883  case HexagonISD::AT_GOT: return "HexagonISD::AT_GOT";
1884  case HexagonISD::AT_PCREL: return "HexagonISD::AT_PCREL";
1885  case HexagonISD::BARRIER: return "HexagonISD::BARRIER";
1886  case HexagonISD::CALL: return "HexagonISD::CALL";
1887  case HexagonISD::CALLnr: return "HexagonISD::CALLnr";
1888  case HexagonISD::CALLR: return "HexagonISD::CALLR";
1889  case HexagonISD::COMBINE: return "HexagonISD::COMBINE";
1890  case HexagonISD::CONST32_GP: return "HexagonISD::CONST32_GP";
1891  case HexagonISD::CONST32: return "HexagonISD::CONST32";
1892  case HexagonISD::CP: return "HexagonISD::CP";
1893  case HexagonISD::DCFETCH: return "HexagonISD::DCFETCH";
1894  case HexagonISD::EH_RETURN: return "HexagonISD::EH_RETURN";
1895  case HexagonISD::TSTBIT: return "HexagonISD::TSTBIT";
1896  case HexagonISD::EXTRACTU: return "HexagonISD::EXTRACTU";
1897  case HexagonISD::INSERT: return "HexagonISD::INSERT";
1898  case HexagonISD::JT: return "HexagonISD::JT";
1899  case HexagonISD::RET_FLAG: return "HexagonISD::RET_FLAG";
1900  case HexagonISD::TC_RETURN: return "HexagonISD::TC_RETURN";
1901  case HexagonISD::VASL: return "HexagonISD::VASL";
1902  case HexagonISD::VASR: return "HexagonISD::VASR";
1903  case HexagonISD::VLSR: return "HexagonISD::VLSR";
1904  case HexagonISD::MFSHL: return "HexagonISD::MFSHL";
1905  case HexagonISD::MFSHR: return "HexagonISD::MFSHR";
1906  case HexagonISD::SSAT: return "HexagonISD::SSAT";
1907  case HexagonISD::USAT: return "HexagonISD::USAT";
1908  case HexagonISD::SMUL_LOHI: return "HexagonISD::SMUL_LOHI";
1909  case HexagonISD::UMUL_LOHI: return "HexagonISD::UMUL_LOHI";
1910  case HexagonISD::USMUL_LOHI: return "HexagonISD::USMUL_LOHI";
1911  case HexagonISD::VEXTRACTW: return "HexagonISD::VEXTRACTW";
1912  case HexagonISD::VINSERTW0: return "HexagonISD::VINSERTW0";
1913  case HexagonISD::VROR: return "HexagonISD::VROR";
1914  case HexagonISD::READCYCLE: return "HexagonISD::READCYCLE";
1915  case HexagonISD::PTRUE: return "HexagonISD::PTRUE";
1916  case HexagonISD::PFALSE: return "HexagonISD::PFALSE";
1917  case HexagonISD::D2P: return "HexagonISD::D2P";
1918  case HexagonISD::P2D: return "HexagonISD::P2D";
1919  case HexagonISD::V2Q: return "HexagonISD::V2Q";
1920  case HexagonISD::Q2V: return "HexagonISD::Q2V";
1921  case HexagonISD::QCAT: return "HexagonISD::QCAT";
1922  case HexagonISD::QTRUE: return "HexagonISD::QTRUE";
1923  case HexagonISD::QFALSE: return "HexagonISD::QFALSE";
1924  case HexagonISD::TL_EXTEND: return "HexagonISD::TL_EXTEND";
1925  case HexagonISD::TL_TRUNCATE: return "HexagonISD::TL_TRUNCATE";
1926  case HexagonISD::TYPECAST: return "HexagonISD::TYPECAST";
1927  case HexagonISD::VALIGN: return "HexagonISD::VALIGN";
1928  case HexagonISD::VALIGNADDR: return "HexagonISD::VALIGNADDR";
1929  case HexagonISD::ISEL: return "HexagonISD::ISEL";
1930  case HexagonISD::OP_END: break;
1931  }
1932  return nullptr;
1933 }
1934 
1935 bool
1936 HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, Align NeedAlign,
1937  const SDLoc &dl, SelectionDAG &DAG) const {
1938  auto *CA = dyn_cast<ConstantSDNode>(Ptr);
1939  if (!CA)
1940  return true;
1941  unsigned Addr = CA->getZExtValue();
1942  Align HaveAlign =
1943  Addr != 0 ? Align(1ull << countTrailingZeros(Addr)) : NeedAlign;
1944  if (HaveAlign >= NeedAlign)
1945  return true;
1946 
1947  static int DK_MisalignedTrap = llvm::getNextAvailablePluginDiagnosticKind();
1948 
1949  struct DiagnosticInfoMisalignedTrap : public DiagnosticInfo {
1950  DiagnosticInfoMisalignedTrap(StringRef M)
1951  : DiagnosticInfo(DK_MisalignedTrap, DS_Remark), Msg(M) {}
1952  void print(DiagnosticPrinter &DP) const override {
1953  DP << Msg;
1954  }
1955  static bool classof(const DiagnosticInfo *DI) {
1956  return DI->getKind() == DK_MisalignedTrap;
1957  }
1958  StringRef Msg;
1959  };
1960 
1961  std::string ErrMsg;
1962  raw_string_ostream O(ErrMsg);
1963  O << "Misaligned constant address: " << format_hex(Addr, 10)
1964  << " has alignment " << HaveAlign.value()
1965  << ", but the memory access requires " << NeedAlign.value();
1966  if (DebugLoc DL = dl.getDebugLoc())
1967  DL.print(O << ", at ");
1968  O << ". The instruction has been replaced with a trap.";
1969 
1970  DAG.getContext()->diagnose(DiagnosticInfoMisalignedTrap(O.str()));
1971  return false;
1972 }
1973 
1974 SDValue
1975 HexagonTargetLowering::replaceMemWithUndef(SDValue Op, SelectionDAG &DAG)
1976  const {
1977  const SDLoc &dl(Op);
1978  auto *LS = cast<LSBaseSDNode>(Op.getNode());
1979  assert(!LS->isIndexed() && "Not expecting indexed ops on constant address");
1980 
1981  SDValue Chain = LS->getChain();
1982  SDValue Trap = DAG.getNode(ISD::TRAP, dl, MVT::Other, Chain);
1983  if (LS->getOpcode() == ISD::LOAD)
1984  return DAG.getMergeValues({DAG.getUNDEF(ty(Op)), Trap}, dl);
1985  return Trap;
1986 }
1987 
1988 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
1989 // intrinsic.
1990 static bool isBrevLdIntrinsic(const Value *Inst) {
1991  unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
1992  return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
1993  ID == Intrinsic::hexagon_L2_loadri_pbr ||
1994  ID == Intrinsic::hexagon_L2_loadrh_pbr ||
1995  ID == Intrinsic::hexagon_L2_loadruh_pbr ||
1996  ID == Intrinsic::hexagon_L2_loadrb_pbr ||
1997  ID == Intrinsic::hexagon_L2_loadrub_pbr);
1998 }
1999 
2000 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
2001 // instruction. So far we only handle bitcast, extract value and bit reverse
2002 // load intrinsic instructions. Should we handle CGEP ?
2004  if (Operator::getOpcode(V) == Instruction::ExtractValue ||
2005  Operator::getOpcode(V) == Instruction::BitCast)
2006  V = cast<Operator>(V)->getOperand(0);
2007  else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
2008  V = cast<Instruction>(V)->getOperand(0);
2009  return V;
2010 }
2011 
2012 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
2013 // a back edge. If the back edge comes from the intrinsic itself, the incoming
2014 // edge is returned.
2015 static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
2016  const BasicBlock *Parent = PN->getParent();
2017  int Idx = -1;
2018  for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
2019  BasicBlock *Blk = PN->getIncomingBlock(i);
2020  // Determine if the back edge is originated from intrinsic.
2021  if (Blk == Parent) {
2022  Value *BackEdgeVal = PN->getIncomingValue(i);
2023  Value *BaseVal;
2024  // Loop over till we return the same Value or we hit the IntrBaseVal.
2025  do {
2026  BaseVal = BackEdgeVal;
2027  BackEdgeVal = getBrevLdObject(BackEdgeVal);
2028  } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
2029  // If the getBrevLdObject returns IntrBaseVal, we should return the
2030  // incoming edge.
2031  if (IntrBaseVal == BackEdgeVal)
2032  continue;
2033  Idx = i;
2034  break;
2035  } else // Set the node to incoming edge.
2036  Idx = i;
2037  }
2038  assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
2039  return PN->getIncomingValue(Idx);
2040 }
2041 
2042 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
2043 // pointer points to, for the bit-reverse load intrinsic. Setting this to
2044 // memoperand might help alias analysis to figure out the dependencies.
2046  Value *IntrBaseVal = V;
2047  Value *BaseVal;
2048  // Loop over till we return the same Value, implies we either figure out
2049  // the object or we hit a PHI
2050  do {
2051  BaseVal = V;
2052  V = getBrevLdObject(V);
2053  } while (BaseVal != V);
2054 
2055  // Identify the object from PHINode.
2056  if (const PHINode *PN = dyn_cast<PHINode>(V))
2057  return returnEdge(PN, IntrBaseVal);
2058  // For non PHI nodes, the object is the last value returned by getBrevLdObject
2059  else
2060  return V;
2061 }
2062 
2063 /// Given an intrinsic, checks if on the target the intrinsic will need to map
2064 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
2065 /// true and store the intrinsic information into the IntrinsicInfo that was
2066 /// passed to the function.
2068  const CallInst &I,
2069  MachineFunction &MF,
2070  unsigned Intrinsic) const {
2071  switch (Intrinsic) {
2072  case Intrinsic::hexagon_L2_loadrd_pbr:
2073  case Intrinsic::hexagon_L2_loadri_pbr:
2074  case Intrinsic::hexagon_L2_loadrh_pbr:
2075  case Intrinsic::hexagon_L2_loadruh_pbr:
2076  case Intrinsic::hexagon_L2_loadrb_pbr:
2077  case Intrinsic::hexagon_L2_loadrub_pbr: {
2079  auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
2080  auto &Cont = I.getCalledFunction()->getParent()->getContext();
2081  // The intrinsic function call is of the form { ElTy, i8* }
2082  // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
2083  // should be derived from ElTy.
2084  Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(0);
2085  Info.memVT = MVT::getVT(ElTy);
2086  llvm::Value *BasePtrVal = I.getOperand(0);
2087  Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
2088  // The offset value comes through Modifier register. For now, assume the
2089  // offset is 0.
2090  Info.offset = 0;
2091  Info.align = DL.getABITypeAlign(Info.memVT.getTypeForEVT(Cont));
2093  return true;
2094  }
2095  case Intrinsic::hexagon_V6_vgathermw:
2096  case Intrinsic::hexagon_V6_vgathermw_128B:
2097  case Intrinsic::hexagon_V6_vgathermh:
2098  case Intrinsic::hexagon_V6_vgathermh_128B:
2099  case Intrinsic::hexagon_V6_vgathermhw:
2100  case Intrinsic::hexagon_V6_vgathermhw_128B:
2101  case Intrinsic::hexagon_V6_vgathermwq:
2102  case Intrinsic::hexagon_V6_vgathermwq_128B:
2103  case Intrinsic::hexagon_V6_vgathermhq:
2104  case Intrinsic::hexagon_V6_vgathermhq_128B:
2105  case Intrinsic::hexagon_V6_vgathermhwq:
2106  case Intrinsic::hexagon_V6_vgathermhwq_128B: {
2107  const Module &M = *I.getParent()->getParent()->getParent();
2109  Type *VecTy = I.getArgOperand(1)->getType();
2110  Info.memVT = MVT::getVT(VecTy);
2111  Info.ptrVal = I.getArgOperand(0);
2112  Info.offset = 0;
2113  Info.align =
2114  MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8);
2118  return true;
2119  }
2120  default:
2121  break;
2122  }
2123  return false;
2124 }
2125 
2127  return X.getValueType().isScalarInteger(); // 'tstbit'
2128 }
2129 
2131  return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
2132 }
2133 
2135  if (!VT1.isSimple() || !VT2.isSimple())
2136  return false;
2137  return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
2138 }
2139 
2141  const MachineFunction &MF, EVT VT) const {
2142  return isOperationLegalOrCustom(ISD::FMA, VT);
2143 }
2144 
2145 // Should we expand the build vector with shuffles?
2147  unsigned DefinedValues) const {
2148  return false;
2149 }
2150 
2152  unsigned Index) const {
2153  assert(ResVT.getVectorElementType() == SrcVT.getVectorElementType());
2154  if (!ResVT.isSimple() || !SrcVT.isSimple())
2155  return false;
2156 
2157  MVT ResTy = ResVT.getSimpleVT(), SrcTy = SrcVT.getSimpleVT();
2158  if (ResTy.getVectorElementType() != MVT::i1)
2159  return true;
2160 
2161  // Non-HVX bool vectors are relatively cheap.
2162  return SrcTy.getVectorNumElements() <= 8;
2163 }
2164 
2166  EVT VT) const {
2167  return true;
2168 }
2169 
2172  unsigned VecLen = VT.getVectorMinNumElements();
2173  MVT ElemTy = VT.getVectorElementType();
2174 
2175  if (VecLen == 1 || VT.isScalableVector())
2177 
2178  if (Subtarget.useHVXOps()) {
2179  unsigned Action = getPreferredHvxVectorAction(VT);
2180  if (Action != ~0u)
2181  return static_cast<TargetLoweringBase::LegalizeTypeAction>(Action);
2182  }
2183 
2184  // Always widen (remaining) vectors of i1.
2185  if (ElemTy == MVT::i1)
2187  // Widen non-power-of-2 vectors. Such types cannot be split right now,
2188  // and computeRegisterProperties will override "split" with "widen",
2189  // which can cause other issues.
2190  if (!isPowerOf2_32(VecLen))
2192 
2194 }
2195 
2198  if (Subtarget.useHVXOps()) {
2199  unsigned Action = getCustomHvxOperationAction(Op);
2200  if (Action != ~0u)
2201  return static_cast<TargetLoweringBase::LegalizeAction>(Action);
2202  }
2204 }
2205 
2206 std::pair<SDValue, int>
2207 HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
2208  if (Addr.getOpcode() == ISD::ADD) {
2209  SDValue Op1 = Addr.getOperand(1);
2210  if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
2211  return { Addr.getOperand(0), CN->getSExtValue() };
2212  }
2213  return { Addr, 0 };
2214 }
2215 
2216 // Lower a vector shuffle (V1, V2, V3). V1 and V2 are the two vectors
2217 // to select data from, V3 is the permutation.
2218 SDValue
2220  const {
2221  const auto *SVN = cast<ShuffleVectorSDNode>(Op);
2222  ArrayRef<int> AM = SVN->getMask();
2223  assert(AM.size() <= 8 && "Unexpected shuffle mask");
2224  unsigned VecLen = AM.size();
2225 
2226  MVT VecTy = ty(Op);
2227  assert(!Subtarget.isHVXVectorType(VecTy, true) &&
2228  "HVX shuffles should be legal");
2229  assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
2230 
2231  SDValue Op0 = Op.getOperand(0);
2232  SDValue Op1 = Op.getOperand(1);
2233  const SDLoc &dl(Op);
2234 
2235  // If the inputs are not the same as the output, bail. This is not an
2236  // error situation, but complicates the handling and the default expansion
2237  // (into BUILD_VECTOR) should be adequate.
2238  if (ty(Op0) != VecTy || ty(Op1) != VecTy)
2239  return SDValue();
2240 
2241  // Normalize the mask so that the first non-negative index comes from
2242  // the first operand.
2243  SmallVector<int,8> Mask(AM.begin(), AM.end());
2244  unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
2245  if (F == AM.size())
2246  return DAG.getUNDEF(VecTy);
2247  if (AM[F] >= int(VecLen)) {
2249  std::swap(Op0, Op1);
2250  }
2251 
2252  // Express the shuffle mask in terms of bytes.
2253  SmallVector<int,8> ByteMask;
2254  unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
2255  for (int M : Mask) {
2256  if (M < 0) {
2257  for (unsigned j = 0; j != ElemBytes; ++j)
2258  ByteMask.push_back(-1);
2259  } else {
2260  for (unsigned j = 0; j != ElemBytes; ++j)
2261  ByteMask.push_back(M*ElemBytes + j);
2262  }
2263  }
2264  assert(ByteMask.size() <= 8);
2265 
2266  // All non-undef (non-negative) indexes are well within [0..127], so they
2267  // fit in a single byte. Build two 64-bit words:
2268  // - MaskIdx where each byte is the corresponding index (for non-negative
2269  // indexes), and 0xFF for negative indexes, and
2270  // - MaskUnd that has 0xFF for each negative index.
2271  uint64_t MaskIdx = 0;
2272  uint64_t MaskUnd = 0;
2273  for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
2274  unsigned S = 8*i;
2275  uint64_t M = ByteMask[i] & 0xFF;
2276  if (M == 0xFF)
2277  MaskUnd |= M << S;
2278  MaskIdx |= M << S;
2279  }
2280 
2281  if (ByteMask.size() == 4) {
2282  // Identity.
2283  if (MaskIdx == (0x03020100 | MaskUnd))
2284  return Op0;
2285  // Byte swap.
2286  if (MaskIdx == (0x00010203 | MaskUnd)) {
2287  SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
2288  SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
2289  return DAG.getBitcast(VecTy, T1);
2290  }
2291 
2292  // Byte packs.
2293  SDValue Concat10 =
2294  getCombine(Op1, Op0, dl, typeJoin({ty(Op1), ty(Op0)}), DAG);
2295  if (MaskIdx == (0x06040200 | MaskUnd))
2296  return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
2297  if (MaskIdx == (0x07050301 | MaskUnd))
2298  return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
2299 
2300  SDValue Concat01 =
2301  getCombine(Op0, Op1, dl, typeJoin({ty(Op0), ty(Op1)}), DAG);
2302  if (MaskIdx == (0x02000604 | MaskUnd))
2303  return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
2304  if (MaskIdx == (0x03010705 | MaskUnd))
2305  return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
2306  }
2307 
2308  if (ByteMask.size() == 8) {
2309  // Identity.
2310  if (MaskIdx == (0x0706050403020100ull | MaskUnd))
2311  return Op0;
2312  // Byte swap.
2313  if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
2314  SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
2315  SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
2316  return DAG.getBitcast(VecTy, T1);
2317  }
2318 
2319  // Halfword picks.
2320  if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
2321  return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
2322  if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
2323  return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
2324  if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
2325  return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
2326  if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
2327  return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
2328  if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
2329  VectorPair P = opSplit(Op0, dl, DAG);
2330  return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
2331  }
2332 
2333  // Byte packs.
2334  if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
2335  return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
2336  if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
2337  return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
2338  }
2339 
2340  return SDValue();
2341 }
2342 
2343 SDValue
2344 HexagonTargetLowering::getSplatValue(SDValue Op, SelectionDAG &DAG) const {
2345  switch (Op.getOpcode()) {
2346  case ISD::BUILD_VECTOR:
2347  if (SDValue S = cast<BuildVectorSDNode>(Op)->getSplatValue())
2348  return S;
2349  break;
2350  case ISD::SPLAT_VECTOR:
2351  return Op.getOperand(0);
2352  }
2353  return SDValue();
2354 }
2355 
2356 // Create a Hexagon-specific node for shifting a vector by an integer.
2357 SDValue
2358 HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
2359  const {
2360  unsigned NewOpc;
2361  switch (Op.getOpcode()) {
2362  case ISD::SHL:
2363  NewOpc = HexagonISD::VASL;
2364  break;
2365  case ISD::SRA:
2366  NewOpc = HexagonISD::VASR;
2367  break;
2368  case ISD::SRL:
2369  NewOpc = HexagonISD::VLSR;
2370  break;
2371  default:
2372  llvm_unreachable("Unexpected shift opcode");
2373  }
2374 
2375  if (SDValue Sp = getSplatValue(Op.getOperand(1), DAG))
2376  return DAG.getNode(NewOpc, SDLoc(Op), ty(Op), Op.getOperand(0), Sp);
2377  return SDValue();
2378 }
2379 
2380 SDValue
2382  const SDLoc &dl(Op);
2383 
2384  // First try to convert the shift (by vector) to a shift by a scalar.
2385  // If we first split the shift, the shift amount will become 'extract
2386  // subvector', and will no longer be recognized as scalar.
2387  SDValue Res = Op;
2388  if (SDValue S = getVectorShiftByInt(Op, DAG))
2389  Res = S;
2390 
2391  unsigned Opc = Res.getOpcode();
2392  switch (Opc) {
2393  case HexagonISD::VASR:
2394  case HexagonISD::VLSR:
2395  case HexagonISD::VASL:
2396  break;
2397  default:
2398  // No instructions for shifts by non-scalars.
2399  return SDValue();
2400  }
2401 
2402  MVT ResTy = ty(Res);
2403  if (ResTy.getVectorElementType() != MVT::i8)
2404  return Res;
2405 
2406  // For shifts of i8, extend the inputs to i16, then truncate back to i8.
2407  assert(ResTy.getVectorElementType() == MVT::i8);
2408  SDValue Val = Res.getOperand(0), Amt = Res.getOperand(1);
2409 
2410  auto ShiftPartI8 = [&dl, &DAG, this](unsigned Opc, SDValue V, SDValue A) {
2411  MVT Ty = ty(V);
2413  SDValue ExtV = Opc == HexagonISD::VASR ? DAG.getSExtOrTrunc(V, dl, ExtTy)
2414  : DAG.getZExtOrTrunc(V, dl, ExtTy);
2415  SDValue ExtS = DAG.getNode(Opc, dl, ExtTy, {ExtV, A});
2416  return DAG.getZExtOrTrunc(ExtS, dl, Ty);
2417  };
2418 
2419  if (ResTy.getSizeInBits() == 32)
2420  return ShiftPartI8(Opc, Val, Amt);
2421 
2422  auto [LoV, HiV] = opSplit(Val, dl, DAG);
2423  return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy,
2424  {ShiftPartI8(Opc, LoV, Amt), ShiftPartI8(Opc, HiV, Amt)});
2425 }
2426 
2427 SDValue
2429  if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
2430  return Op;
2431  return SDValue();
2432 }
2433 
2434 SDValue
2436  MVT ResTy = ty(Op);
2437  SDValue InpV = Op.getOperand(0);
2438  MVT InpTy = ty(InpV);
2439  assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
2440  const SDLoc &dl(Op);
2441 
2442  // Handle conversion from i8 to v8i1.
2443  if (InpTy == MVT::i8) {
2444  if (ResTy == MVT::v8i1) {
2445  SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
2446  SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
2447  return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
2448  }
2449  return SDValue();
2450  }
2451 
2452  return Op;
2453 }
2454 
2455 bool
2456 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
2457  MVT VecTy, SelectionDAG &DAG,
2458  MutableArrayRef<ConstantInt*> Consts) const {
2459  MVT ElemTy = VecTy.getVectorElementType();
2460  unsigned ElemWidth = ElemTy.getSizeInBits();
2461  IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth);
2462  bool AllConst = true;
2463 
2464  for (unsigned i = 0, e = Values.size(); i != e; ++i) {
2465  SDValue V = Values[i];
2466  if (V.isUndef()) {
2467  Consts[i] = ConstantInt::get(IntTy, 0);
2468  continue;
2469  }
2470  // Make sure to always cast to IntTy.
2471  if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
2472  const ConstantInt *CI = CN->getConstantIntValue();
2473  Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
2474  } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
2475  const ConstantFP *CF = CN->getConstantFPValue();
2476  APInt A = CF->getValueAPF().bitcastToAPInt();
2477  Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
2478  } else {
2479  AllConst = false;
2480  }
2481  }
2482  return AllConst;
2483 }
2484 
2485 SDValue
2486 HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
2487  MVT VecTy, SelectionDAG &DAG) const {
2488  MVT ElemTy = VecTy.getVectorElementType();
2489  assert(VecTy.getVectorNumElements() == Elem.size());
2490 
2491  SmallVector<ConstantInt*,4> Consts(Elem.size());
2492  bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2493 
2494  unsigned First, Num = Elem.size();
2495  for (First = 0; First != Num; ++First) {
2496  if (!isUndef(Elem[First]))
2497  break;
2498  }
2499  if (First == Num)
2500  return DAG.getUNDEF(VecTy);
2501 
2502  if (AllConst &&
2503  llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2504  return getZero(dl, VecTy, DAG);
2505 
2506  if (ElemTy == MVT::i16 || ElemTy == MVT::f16) {
2507  assert(Elem.size() == 2);
2508  if (AllConst) {
2509  // The 'Consts' array will have all values as integers regardless
2510  // of the vector element type.
2511  uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
2512  Consts[1]->getZExtValue() << 16;
2513  return DAG.getBitcast(VecTy, DAG.getConstant(V, dl, MVT::i32));
2514  }
2515  SDValue E0, E1;
2516  if (ElemTy == MVT::f16) {
2517  E0 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[0]), dl, MVT::i32);
2518  E1 = DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Elem[1]), dl, MVT::i32);
2519  } else {
2520  E0 = Elem[0];
2521  E1 = Elem[1];
2522  }
2523  SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {E1, E0}, DAG);
2524  return DAG.getBitcast(VecTy, N);
2525  }
2526 
2527  if (ElemTy == MVT::i8) {
2528  // First try generating a constant.
2529  if (AllConst) {
2530  int32_t V = (Consts[0]->getZExtValue() & 0xFF) |
2531  (Consts[1]->getZExtValue() & 0xFF) << 8 |
2532  (Consts[2]->getZExtValue() & 0xFF) << 16 |
2533  Consts[3]->getZExtValue() << 24;
2534  return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
2535  }
2536 
2537  // Then try splat.
2538  bool IsSplat = true;
2539  for (unsigned i = First+1; i != Num; ++i) {
2540  if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2541  continue;
2542  IsSplat = false;
2543  break;
2544  }
2545  if (IsSplat) {
2546  // Legalize the operand of SPLAT_VECTOR.
2547  SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
2548  return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
2549  }
2550 
2551  // Generate
2552  // (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
2553  // (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
2554  assert(Elem.size() == 4);
2555  SDValue Vs[4];
2556  for (unsigned i = 0; i != 4; ++i) {
2557  Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
2558  Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
2559  }
2560  SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
2561  SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
2562  SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
2563  SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
2564  SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
2565 
2566  SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
2567  return DAG.getBitcast(MVT::v4i8, R);
2568  }
2569 
2570 #ifndef NDEBUG
2571  dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n';
2572 #endif
2573  llvm_unreachable("Unexpected vector element type");
2574 }
2575 
2576 SDValue
2577 HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
2578  MVT VecTy, SelectionDAG &DAG) const {
2579  MVT ElemTy = VecTy.getVectorElementType();
2580  assert(VecTy.getVectorNumElements() == Elem.size());
2581 
2582  SmallVector<ConstantInt*,8> Consts(Elem.size());
2583  bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
2584 
2585  unsigned First, Num = Elem.size();
2586  for (First = 0; First != Num; ++First) {
2587  if (!isUndef(Elem[First]))
2588  break;
2589  }
2590  if (First == Num)
2591  return DAG.getUNDEF(VecTy);
2592 
2593  if (AllConst &&
2594  llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
2595  return getZero(dl, VecTy, DAG);
2596 
2597  // First try splat if possible.
2598  if (ElemTy == MVT::i16 || ElemTy == MVT::f16) {
2599  bool IsSplat = true;
2600  for (unsigned i = First+1; i != Num; ++i) {
2601  if (Elem[i] == Elem[First] || isUndef(Elem[i]))
2602  continue;
2603  IsSplat = false;
2604  break;
2605  }
2606  if (IsSplat) {
2607  // Legalize the operand of SPLAT_VECTOR
2608  SDValue S = ElemTy == MVT::f16 ? DAG.getBitcast(MVT::i16, Elem[First])
2609  : Elem[First];
2610  SDValue Ext = DAG.getZExtOrTrunc(S, dl, MVT::i32);
2611  return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
2612  }
2613  }
2614 
2615  // Then try constant.
2616  if (AllConst) {
2617  uint64_t Val = 0;
2618  unsigned W = ElemTy.getSizeInBits();
2619  uint64_t Mask = (1ull << W) - 1;
2620  for (unsigned i = 0; i != Num; ++i)
2621  Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
2622  SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
2623  return DAG.getBitcast(VecTy, V0);
2624  }
2625 
2626  // Build two 32-bit vectors and concatenate.
2627  MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
2628  SDValue L = (ElemTy == MVT::i32)
2629  ? Elem[0]
2630  : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
2631  SDValue H = (ElemTy == MVT::i32)
2632  ? Elem[1]
2633  : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
2634  return getCombine(H, L, dl, VecTy, DAG);
2635 }
2636 
2637 SDValue
2638 HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
2639  const SDLoc &dl, MVT ValTy, MVT ResTy,
2640  SelectionDAG &DAG) const {
2641  MVT VecTy = ty(VecV);
2642  assert(!ValTy.isVector() ||
2643  VecTy.getVectorElementType() == ValTy.getVectorElementType());
2644  if (VecTy.getVectorElementType() == MVT::i1)
2645  return extractVectorPred(VecV, IdxV, dl, ValTy, ResTy, DAG);
2646 
2647  unsigned VecWidth = VecTy.getSizeInBits();
2648  unsigned ValWidth = ValTy.getSizeInBits();
2649  unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
2650  assert((VecWidth % ElemWidth) == 0);
2651  assert(VecWidth == 32 || VecWidth == 64);
2652 
2653  // Cast everything to scalar integer types.
2654  MVT ScalarTy = tyScalar(VecTy);
2655  VecV = DAG.getBitcast(ScalarTy, VecV);
2656 
2657  SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2658  SDValue ExtV;
2659 
2660  if (auto *IdxN = dyn_cast<ConstantSDNode>(IdxV)) {
2661  unsigned Off = IdxN->getZExtValue() * ElemWidth;
2662  if (VecWidth == 64 && ValWidth == 32) {
2663  assert(Off == 0 || Off == 32);
2664  ExtV = Off == 0 ? LoHalf(VecV, DAG) : HiHalf(VecV, DAG);
2665  } else if (Off == 0 && (ValWidth % 8) == 0) {
2666  ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy));
2667  } else {
2668  SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
2669  // The return type of EXTRACTU must be the same as the type of the
2670  // input vector.
2671  ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2672  {VecV, WidthV, OffV});
2673  }
2674  } else {
2675  if (ty(IdxV) != MVT::i32)
2676  IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2677  SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2678  DAG.getConstant(ElemWidth, dl, MVT::i32));
2679  ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
2680  {VecV, WidthV, OffV});
2681  }
2682 
2683  // Cast ExtV to the requested result type.
2684  ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy));
2685  ExtV = DAG.getBitcast(ResTy, ExtV);
2686  return ExtV;
2687 }
2688 
2689 SDValue
2690 HexagonTargetLowering::extractVectorPred(SDValue VecV, SDValue IdxV,
2691  const SDLoc &dl, MVT ValTy, MVT ResTy,
2692  SelectionDAG &DAG) const {
2693  // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
2694  // without any coprocessors).
2695  MVT VecTy = ty(VecV);
2696  unsigned VecWidth = VecTy.getSizeInBits();
2697  unsigned ValWidth = ValTy.getSizeInBits();
2698  assert(VecWidth == VecTy.getVectorNumElements() &&
2699  "Vector elements should equal vector width size");
2700  assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
2701 
2702  // Check if this is an extract of the lowest bit.
2703  if (auto *IdxN = dyn_cast<ConstantSDNode>(IdxV)) {
2704  // Extracting the lowest bit is a no-op, but it changes the type,
2705  // so it must be kept as an operation to avoid errors related to
2706  // type mismatches.
2707  if (IdxN->isZero() && ValTy.getSizeInBits() == 1)
2708  return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
2709  }
2710 
2711  // If the value extracted is a single bit, use tstbit.
2712  if (ValWidth == 1) {
2713  SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2714  SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
2715  SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
2716  return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
2717  }
2718 
2719  // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
2720  // a predicate register. The elements of the vector are repeated
2721  // in the register (if necessary) so that the total number is 8.
2722  // The extracted subvector will need to be expanded in such a way.
2723  unsigned Scale = VecWidth / ValWidth;
2724 
2725  // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
2726  // position 0.
2727  assert(ty(IdxV) == MVT::i32);
2728  unsigned VecRep = 8 / VecWidth;
2729  SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
2730  DAG.getConstant(8*VecRep, dl, MVT::i32));
2731  SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2732  SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
2733  while (Scale > 1) {
2734  // The longest possible subvector is at most 32 bits, so it is always
2735  // contained in the low subregister.
2736  T1 = LoHalf(T1, DAG);
2737  T1 = expandPredicate(T1, dl, DAG);
2738  Scale /= 2;
2739  }
2740 
2741  return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
2742 }
2743 
2744 SDValue
2745 HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
2746  const SDLoc &dl, MVT ValTy,
2747  SelectionDAG &DAG) const {
2748  MVT VecTy = ty(VecV);
2749  if (VecTy.getVectorElementType() == MVT::i1)
2750  return insertVectorPred(VecV, ValV, IdxV, dl, ValTy, DAG);
2751 
2752  unsigned VecWidth = VecTy.getSizeInBits();
2753  unsigned ValWidth = ValTy.getSizeInBits();
2754  assert(VecWidth == 32 || VecWidth == 64);
2755  assert((VecWidth % ValWidth) == 0);
2756 
2757  // Cast everything to scalar integer types.
2758  MVT ScalarTy = MVT::getIntegerVT(VecWidth);
2759  // The actual type of ValV may be different than ValTy (which is related
2760  // to the vector type).
2761  unsigned VW = ty(ValV).getSizeInBits();
2762  ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV);
2763  VecV = DAG.getBitcast(ScalarTy, VecV);
2764  if (VW != VecWidth)
2765  ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy);
2766 
2767  SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
2768  SDValue InsV;
2769 
2770  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
2771  unsigned W = C->getZExtValue() * ValWidth;
2772  SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
2773  InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2774  {VecV, ValV, WidthV, OffV});
2775  } else {
2776  if (ty(IdxV) != MVT::i32)
2777  IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
2778  SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
2779  InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
2780  {VecV, ValV, WidthV, OffV});
2781  }
2782 
2783  return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
2784 }
2785 
2786 SDValue
2787 HexagonTargetLowering::insertVectorPred(SDValue VecV, SDValue ValV,
2788  SDValue IdxV, const SDLoc &dl,
2789  MVT ValTy, SelectionDAG &DAG) const {
2790  MVT VecTy = ty(VecV);
2791  unsigned VecLen = VecTy.getVectorNumElements();
2792 
2793  if (ValTy == MVT::i1) {
2794  SDValue ToReg = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
2795  SDValue Ext = DAG.getSExtOrTrunc(ValV, dl, MVT::i32);
2796  SDValue Width = DAG.getConstant(8 / VecLen, dl, MVT::i32);
2797  SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, Width);
2798  SDValue Ins =
2799  DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, {ToReg, Ext, Width, Idx});
2800  return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Ins}, DAG);
2801  }
2802 
2803  assert(ValTy.getVectorElementType() == MVT::i1);
2804  SDValue ValR = ValTy.isVector()
2805  ? DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV)
2806  : DAG.getSExtOrTrunc(ValV, dl, MVT::i64);
2807 
2808  unsigned Scale = VecLen / ValTy.getVectorNumElements();
2809  assert(Scale > 1);
2810 
2811  for (unsigned R = Scale; R > 1; R /= 2) {
2812  ValR = contractPredicate(ValR, dl, DAG);
2813  ValR = getCombine(DAG.getUNDEF(MVT::i32), ValR, dl, MVT::i64, DAG);
2814  }
2815 
2816  SDValue Width = DAG.getConstant(64 / Scale, dl, MVT::i32);
2817  SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, Width);
2818  SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
2819  SDValue Ins =
2820  DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, {VecR, ValR, Width, Idx});
2821  return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
2822 }
2823 
2824 SDValue
2825 HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
2826  SelectionDAG &DAG) const {
2827  assert(ty(Vec32).getSizeInBits() == 32);
2828  if (isUndef(Vec32))
2829  return DAG.getUNDEF(MVT::i64);
2830  SDValue P = DAG.getBitcast(MVT::v4i8, Vec32);
2832  return DAG.getBitcast(MVT::i64, X);
2833 }
2834 
2835 SDValue
2836 HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
2837  SelectionDAG &DAG) const {
2838  assert(ty(Vec64).getSizeInBits() == 64);
2839  if (isUndef(Vec64))
2840  return DAG.getUNDEF(MVT::i32);
2841  // Collect even bytes:
2842  SDValue A = DAG.getBitcast(MVT::v8i8, Vec64);
2844  {0, 2, 4, 6, 1, 3, 5, 7});
2845  return extractVector(S, DAG.getConstant(0, dl, MVT::i32), dl, MVT::v4i8,
2846  MVT::i32, DAG);
2847 }
2848 
2849 SDValue
2850 HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
2851  const {
2852  if (Ty.isVector()) {
2853  unsigned W = Ty.getSizeInBits();
2854  if (W <= 64)
2855  return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
2856  return DAG.getNode(ISD::SPLAT_VECTOR, dl, Ty, getZero(dl, MVT::i32, DAG));
2857  }
2858 
2859  if (Ty.isInteger())
2860  return DAG.getConstant(0, dl, Ty);
2861  if (Ty.isFloatingPoint())
2862  return DAG.getConstantFP(0.0, dl, Ty);
2863  llvm_unreachable("Invalid type for zero");
2864 }
2865 
2866 SDValue
2867 HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
2868  const {
2869  MVT ValTy = ty(Val);
2870  assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());
2871 
2872  unsigned ValLen = ValTy.getVectorNumElements();
2873  unsigned ResLen = ResTy.getVectorNumElements();
2874  if (ValLen == ResLen)
2875  return Val;
2876 
2877  const SDLoc &dl(Val);
2878  assert(ValLen < ResLen);
2879  assert(ResLen % ValLen == 0);
2880 
2881  SmallVector<SDValue, 4> Concats = {Val};
2882  for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i)
2883  Concats.push_back(DAG.getUNDEF(ValTy));
2884 
2885  return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Concats);
2886 }
2887 
2888 SDValue
2889 HexagonTargetLowering::getCombine(SDValue Hi, SDValue Lo, const SDLoc &dl,
2890  MVT ResTy, SelectionDAG &DAG) const {
2891  MVT ElemTy = ty(Hi);
2892  assert(ElemTy == ty(Lo));
2893 
2894  if (!ElemTy.isVector()) {
2895  assert(ElemTy.isScalarInteger());
2896  MVT PairTy = MVT::getIntegerVT(2 * ElemTy.getSizeInBits());
2897  SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, PairTy, Lo, Hi);
2898  return DAG.getBitcast(ResTy, Pair);
2899  }
2900 
2901  unsigned Width = ElemTy.getSizeInBits();
2902  MVT IntTy = MVT::getIntegerVT(Width);
2904  SDValue Pair =
2905  DAG.getNode(ISD::BUILD_PAIR, dl, PairTy,
2906  {DAG.getBitcast(IntTy, Lo), DAG.getBitcast(IntTy, Hi)});
2907  return DAG.getBitcast(ResTy, Pair);
2908 }
2909 
2910 SDValue
2912  MVT VecTy = ty(Op);
2913  unsigned BW = VecTy.getSizeInBits();
2914  const SDLoc &dl(Op);
2916  for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
2917  Ops.push_back(Op.getOperand(i));
2918 
2919  if (BW == 32)
2920  return buildVector32(Ops, dl, VecTy, DAG);
2921  if (BW == 64)
2922  return buildVector64(Ops, dl, VecTy, DAG);
2923 
2924  if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
2925  // Check if this is a special case or all-0 or all-1.
2926  bool All0 = true, All1 = true;
2927  for (SDValue P : Ops) {
2928  auto *CN = dyn_cast<ConstantSDNode>(P.getNode());
2929  if (CN == nullptr) {
2930  All0 = All1 = false;
2931  break;
2932  }
2933  uint32_t C = CN->getZExtValue();
2934  All0 &= (C == 0);
2935  All1 &= (C == 1);
2936  }
2937  if (All0)
2938  return DAG.getNode(HexagonISD::PFALSE, dl, VecTy);
2939  if (All1)
2940  return DAG.getNode(HexagonISD::PTRUE, dl, VecTy);
2941 
2942  // For each i1 element in the resulting predicate register, put 1
2943  // shifted by the index of the element into a general-purpose register,
2944  // then or them together and transfer it back into a predicate register.
2945  SDValue Rs[8];
2946  SDValue Z = getZero(dl, MVT::i32, DAG);
2947  // Always produce 8 bits, repeat inputs if necessary.
2948  unsigned Rep = 8 / VecTy.getVectorNumElements();
2949  for (unsigned i = 0; i != 8; ++i) {
2950  SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
2951  Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
2952  }
2953  for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
2954  for (unsigned i = 0, e = A.size()/2; i != e; ++i)
2955  Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
2956  }
2957  // Move the value directly to a predicate register.
2958  return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
2959  }
2960 
2961  return SDValue();
2962 }
2963 
2964 SDValue
2966  SelectionDAG &DAG) const {
2967  MVT VecTy = ty(Op);
2968  const SDLoc &dl(Op);
2969  if (VecTy.getSizeInBits() == 64) {
2970  assert(Op.getNumOperands() == 2);
2971  return getCombine(Op.getOperand(1), Op.getOperand(0), dl, VecTy, DAG);
2972  }
2973 
2974  MVT ElemTy = VecTy.getVectorElementType();
2975  if (ElemTy == MVT::i1) {
2976  assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
2977  MVT OpTy = ty(Op.getOperand(0));
2978  // Scale is how many times the operands need to be contracted to match
2979  // the representation in the target register.
2980  unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
2981  assert(Scale == Op.getNumOperands() && Scale > 1);
2982 
2983  // First, convert all bool vectors to integers, then generate pairwise
2984  // inserts to form values of doubled length. Up until there are only
2985  // two values left to concatenate, all of these values will fit in a
2986  // 32-bit integer, so keep them as i32 to use 32-bit inserts.
2987  SmallVector<SDValue,4> Words[2];
2988  unsigned IdxW = 0;
2989 
2990  for (SDValue P : Op.getNode()->op_values()) {
2991  SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
2992  for (unsigned R = Scale; R > 1; R /= 2) {
2993  W = contractPredicate(W, dl, DAG);
2994  W = getCombine(DAG.getUNDEF(MVT::i32), W, dl, MVT::i64, DAG);
2995  }
2996  W = LoHalf(W, DAG);
2997  Words[IdxW].push_back(W);
2998  }
2999 
3000  while (Scale > 2) {
3001  SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
3002  Words[IdxW ^ 1].clear();
3003 
3004  for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
3005  SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
3006  // Insert W1 into W0 right next to the significant bits of W0.
3008  {W0, W1, WidthV, WidthV});
3009  Words[IdxW ^ 1].push_back(T);
3010  }
3011  IdxW ^= 1;
3012  Scale /= 2;
3013  }
3014 
3015  // At this point there should only be two words left, and Scale should be 2.
3016  assert(Scale == 2 && Words[IdxW].size() == 2);
3017 
3018  SDValue WW = getCombine(Words[IdxW][1], Words[IdxW][0], dl, MVT::i64, DAG);
3019  return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
3020  }
3021 
3022  return SDValue();
3023 }
3024 
3025 SDValue
3027  SelectionDAG &DAG) const {
3028  SDValue Vec = Op.getOperand(0);
3029  MVT ElemTy = ty(Vec).getVectorElementType();
3030  return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
3031 }
3032 
3033 SDValue
3035  SelectionDAG &DAG) const {
3036  return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
3037  ty(Op), ty(Op), DAG);
3038 }
3039 
3040 SDValue
3042  SelectionDAG &DAG) const {
3043  return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
3044  SDLoc(Op), ty(Op).getVectorElementType(), DAG);
3045 }
3046 
3047 SDValue
3049  SelectionDAG &DAG) const {
3050  SDValue ValV = Op.getOperand(1);
3051  return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
3052  SDLoc(Op), ty(ValV), DAG);
3053 }
3054 
3055 bool
3057  // Assuming the caller does not have either a signext or zeroext modifier, and
3058  // only one value is accepted, any reasonable truncation is allowed.
3059  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
3060  return false;
3061 
3062  // FIXME: in principle up to 64-bit could be made safe, but it would be very
3063  // fragile at the moment: any support for multiple value returns would be
3064  // liable to disallow tail calls involving i64 -> iN truncation in many cases.
3065  return Ty1->getPrimitiveSizeInBits() <= 32;
3066 }
3067 
3068 SDValue
3070  MVT Ty = ty(Op);
3071  const SDLoc &dl(Op);
3072  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
3073  MVT MemTy = LN->getMemoryVT().getSimpleVT();
3075 
3076  bool LoadPred = MemTy == MVT::v2i1 || MemTy == MVT::v4i1 || MemTy == MVT::v8i1;
3077  if (LoadPred) {
3078  SDValue NL = DAG.getLoad(
3079  LN->getAddressingMode(), ISD::ZEXTLOAD, MVT::i32, dl, LN->getChain(),
3080  LN->getBasePtr(), LN->getOffset(), LN->getPointerInfo(),
3081  /*MemoryVT*/ MVT::i8, LN->getAlign(), LN->getMemOperand()->getFlags(),
3082  LN->getAAInfo(), LN->getRanges());
3083  LN = cast<LoadSDNode>(NL.getNode());
3084  }
3085 
3086  Align ClaimAlign = LN->getAlign();
3087  if (!validateConstPtrAlignment(LN->getBasePtr(), ClaimAlign, dl, DAG))
3088  return replaceMemWithUndef(Op, DAG);
3089 
3090  // Call LowerUnalignedLoad for all loads, it recognizes loads that
3091  // don't need extra aligning.
3092  SDValue LU = LowerUnalignedLoad(SDValue(LN, 0), DAG);
3093  if (LoadPred) {
3094  SDValue TP = getInstr(Hexagon::C2_tfrrp, dl, MemTy, {LU}, DAG);
3095  if (ET == ISD::SEXTLOAD) {
3096  TP = DAG.getSExtOrTrunc(TP, dl, Ty);
3097  } else if (ET != ISD::NON_EXTLOAD) {
3098  TP = DAG.getZExtOrTrunc(TP, dl, Ty);
3099  }
3100  SDValue Ch = cast<LoadSDNode>(LU.getNode())->getChain();
3101  return DAG.getMergeValues({TP, Ch}, dl);
3102  }
3103  return LU;
3104 }
3105 
3106 SDValue
3108  const SDLoc &dl(Op);
3109  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
3110  SDValue Val = SN->getValue();
3111  MVT Ty = ty(Val);
3112 
3113  if (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1) {
3114  // Store the exact predicate (all bits).
3115  SDValue TR = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {Val}, DAG);
3116  SDValue NS = DAG.getTruncStore(SN->getChain(), dl, TR, SN->getBasePtr(),
3117  MVT::i8, SN->getMemOperand());
3118  if (SN->isIndexed()) {
3119  NS = DAG.getIndexedStore(NS, dl, SN->getBasePtr(), SN->getOffset(),
3120  SN->getAddressingMode());
3121  }
3122  SN = cast<StoreSDNode>(NS.getNode());
3123  }
3124 
3125  Align ClaimAlign = SN->getAlign();
3126  if (!validateConstPtrAlignment(SN->getBasePtr(), ClaimAlign, dl, DAG))
3127  return replaceMemWithUndef(Op, DAG);
3128 
3129  MVT StoreTy = SN->getMemoryVT().getSimpleVT();
3130  Align NeedAlign = Subtarget.getTypeAlignment(StoreTy);
3131  if (ClaimAlign < NeedAlign)
3132  return expandUnalignedStore(SN, DAG);
3133  return SDValue(SN, 0);
3134 }
3135 
3136 SDValue
3138  const {
3139  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
3140  MVT LoadTy = ty(Op);
3141  unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy).value();
3142  unsigned HaveAlign = LN->getAlign().value();
3143  if (HaveAlign >= NeedAlign)
3144  return Op;
3145 
3146  const SDLoc &dl(Op);
3147  const DataLayout &DL = DAG.getDataLayout();
3148  LLVMContext &Ctx = *DAG.getContext();
3149 
3150  // If the load aligning is disabled or the load can be broken up into two
3151  // smaller legal loads, do the default (target-independent) expansion.
3152  bool DoDefault = false;
3153  // Handle it in the default way if this is an indexed load.
3154  if (!LN->isUnindexed())
3155  DoDefault = true;
3156 
3157  if (!AlignLoads) {
3159  *LN->getMemOperand()))
3160  return Op;
3161  DoDefault = true;
3162  }
3163  if (!DoDefault && (2 * HaveAlign) == NeedAlign) {
3164  // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
3165  MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign)
3166  : MVT::getVectorVT(MVT::i8, HaveAlign);
3167  DoDefault =
3168  allowsMemoryAccessForAlignment(Ctx, DL, PartTy, *LN->getMemOperand());
3169  }
3170  if (DoDefault) {
3171  std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
3172  return DAG.getMergeValues({P.first, P.second}, dl);
3173  }
3174 
3175  // The code below generates two loads, both aligned as NeedAlign, and
3176  // with the distance of NeedAlign between them. For that to cover the
3177  // bits that need to be loaded (and without overlapping), the size of
3178  // the loads should be equal to NeedAlign. This is true for all loadable
3179  // types, but add an assertion in case something changes in the future.
3180  assert(LoadTy.getSizeInBits() == 8*NeedAlign);
3181 
3182  unsigned LoadLen = NeedAlign;
3183  SDValue Base = LN->getBasePtr();
3184  SDValue Chain = LN->getChain();
3185  auto BO = getBaseAndOffset(Base);
3186  unsigned BaseOpc = BO.first.getOpcode();
3187  if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
3188  return Op;
3189 
3190  if (BO.second % LoadLen != 0) {
3191  BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
3192  DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
3193  BO.second -= BO.second % LoadLen;
3194  }
3195  SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
3196  ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
3197  DAG.getConstant(NeedAlign, dl, MVT::i32))
3198  : BO.first;
3199  SDValue Base0 =
3200  DAG.getMemBasePlusOffset(BaseNoOff, TypeSize::Fixed(BO.second), dl);
3201  SDValue Base1 = DAG.getMemBasePlusOffset(
3202  BaseNoOff, TypeSize::Fixed(BO.second + LoadLen), dl);
3203 
3204  MachineMemOperand *WideMMO = nullptr;
3205  if (MachineMemOperand *MMO = LN->getMemOperand()) {
3206  MachineFunction &MF = DAG.getMachineFunction();
3207  WideMMO = MF.getMachineMemOperand(
3208  MMO->getPointerInfo(), MMO->getFlags(), 2 * LoadLen, Align(LoadLen),
3209  MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
3210  MMO->getSuccessOrdering(), MMO->getFailureOrdering());
3211  }
3212 
3213  SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
3214  SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
3215 
3216  SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
3217  {Load1, Load0, BaseNoOff.getOperand(0)});
3218  SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3219  Load0.getValue(1), Load1.getValue(1));
3220  SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
3221  return M;
3222 }
3223 
3224 SDValue
3226  SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
3227  auto *CY = dyn_cast<ConstantSDNode>(Y);
3228  if (!CY)
3229  return SDValue();
3230 
3231  const SDLoc &dl(Op);
3232  SDVTList VTs = Op.getNode()->getVTList();
3233  assert(VTs.NumVTs == 2);
3234  assert(VTs.VTs[1] == MVT::i1);
3235  unsigned Opc = Op.getOpcode();
3236 
3237  if (CY) {
3238  uint32_t VY = CY->getZExtValue();
3239  assert(VY != 0 && "This should have been folded");
3240  // X +/- 1
3241  if (VY != 1)
3242  return SDValue();
3243 
3244  if (Opc == ISD::UADDO) {
3245  SDValue Op = DAG.getNode(ISD::ADD, dl, VTs.VTs[0], {X, Y});
3246  SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, getZero(dl, ty(Op), DAG),
3247  ISD::SETEQ);
3248  return DAG.getMergeValues({Op, Ov}, dl);
3249  }
3250  if (Opc == ISD::USUBO) {
3251  SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y});
3252  SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op,
3253  DAG.getConstant(-1, dl, ty(Op)), ISD::SETEQ);
3254  return DAG.getMergeValues({Op, Ov}, dl);
3255  }
3256  }
3257 
3258  return SDValue();
3259 }
3260 
3261 SDValue
3263  const SDLoc &dl(Op);
3264  unsigned Opc = Op.getOpcode();
3265  SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
3266 
3267  if (Opc == ISD::ADDCARRY)
3268  return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
3269  { X, Y, C });
3270 
3271  EVT CarryTy = C.getValueType();
3272  SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
3273  { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
3274  SDValue Out[] = { SubC.getValue(0),
3275  DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
3276  return DAG.getMergeValues(Out, dl);
3277 }
3278 
3279 SDValue
3281  SDValue Chain = Op.getOperand(0);
3282  SDValue Offset = Op.getOperand(1);
3283  SDValue Handler = Op.getOperand(2);
3284  SDLoc dl(Op);
3285  auto PtrVT = getPointerTy(DAG.getDataLayout());
3286 
3287  // Mark function as containing a call to EH_RETURN.
3288  HexagonMachineFunctionInfo *FuncInfo =
3290  FuncInfo->setHasEHReturn();
3291 
3292  unsigned OffsetReg = Hexagon::R28;
3293 
3294  SDValue StoreAddr =
3295  DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
3296  DAG.getIntPtrConstant(4, dl));
3297  Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
3298  Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
3299 
3300  // Not needed we already use it as explict input to EH_RETURN.
3301  // MF.getRegInfo().addLiveOut(OffsetReg);
3302 
3303  return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
3304 }
3305 
3306 SDValue
3308  unsigned Opc = Op.getOpcode();
3309 
3310  // Handle INLINEASM first.
3311  if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
3312  return LowerINLINEASM(Op, DAG);
3313 
3314  if (isHvxOperation(Op.getNode(), DAG)) {
3315  // If HVX lowering returns nothing, try the default lowering.
3316  if (SDValue V = LowerHvxOperation(Op, DAG))
3317  return V;
3318  }
3319 
3320  switch (Opc) {
3321  default:
3322 #ifndef NDEBUG
3323  Op.getNode()->dumpr(&DAG);
3324  if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
3325  errs() << "Error: check for a non-legal type in this operation\n";
3326 #endif
3327  llvm_unreachable("Should not custom lower this!");
3328  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
3329  case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, DAG);
3333  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
3334  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
3335  case ISD::BITCAST: return LowerBITCAST(Op, DAG);
3336  case ISD::LOAD: return LowerLoad(Op, DAG);
3337  case ISD::STORE: return LowerStore(Op, DAG);
3338  case ISD::UADDO:
3339  case ISD::USUBO: return LowerUAddSubO(Op, DAG);
3340  case ISD::ADDCARRY:
3341  case ISD::SUBCARRY: return LowerAddSubCarry(Op, DAG);
3342  case ISD::SRA:
3343  case ISD::SHL:
3344  case ISD::SRL: return LowerVECTOR_SHIFT(Op, DAG);
3345  case ISD::ROTL: return LowerROTL(Op, DAG);
3346  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
3347  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
3348  case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG);
3349  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
3350  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
3351  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
3352  case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG);
3353  case ISD::GlobalAddress: return LowerGLOBALADDRESS(Op, DAG);
3354  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
3356  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
3357  case ISD::VASTART: return LowerVASTART(Op, DAG);
3359  case ISD::SETCC: return LowerSETCC(Op, DAG);
3360  case ISD::VSELECT: return LowerVSELECT(Op, DAG);
3362  case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
3363  case ISD::PREFETCH: return LowerPREFETCH(Op, DAG);
3364  case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, DAG);
3365  break;
3366  }
3367 
3368  return SDValue();
3369 }
3370 
3371 void
3374  SelectionDAG &DAG) const {
3375  if (isHvxOperation(N, DAG)) {
3376  LowerHvxOperationWrapper(N, Results, DAG);
3377  if (!Results.empty())
3378  return;
3379  }
3380 
3381  SDValue Op(N, 0);
3382  unsigned Opc = N->getOpcode();
3383 
3384  switch (Opc) {
3385  case HexagonISD::SSAT:
3386  case HexagonISD::USAT:
3387  Results.push_back(opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG));
3388  break;
3389  case ISD::STORE:
3390  // We are only custom-lowering stores to verify the alignment of the
3391  // address if it is a compile-time constant. Since a store can be
3392  // modified during type-legalization (the value being stored may need
3393  // legalization), return empty Results here to indicate that we don't
3394  // really make any changes in the custom lowering.
3395  return;
3396  default:
3398  break;
3399  }
3400 }
3401 
3402 void
3405  SelectionDAG &DAG) const {
3406  if (isHvxOperation(N, DAG)) {
3407  ReplaceHvxNodeResults(N, Results, DAG);
3408  if (!Results.empty())
3409  return;
3410  }
3411 
3412  const SDLoc &dl(N);
3413  switch (N->getOpcode()) {
3414  case ISD::SRL:
3415  case ISD::SRA:
3416  case ISD::SHL:
3417  return;
3418  case ISD::BITCAST:
3419  // Handle a bitcast from v8i1 to i8.
3420  if (N->getValueType(0) == MVT::i8) {
3421  if (N->getOperand(0).getValueType() == MVT::v8i1) {
3422  SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
3423  N->getOperand(0), DAG);
3424  SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
3425  Results.push_back(T);
3426  }
3427  }
3428  break;
3429  }
3430 }
3431 
3432 SDValue
3434  DAGCombinerInfo &DCI) const {
3435  if (isHvxOperation(N, DCI.DAG)) {
3436  if (SDValue V = PerformHvxDAGCombine(N, DCI))
3437  return V;
3438  return SDValue();
3439  }
3440 
3441  SDValue Op(N, 0);
3442  const SDLoc &dl(Op);
3443  unsigned Opc = Op.getOpcode();
3444 
3445  if (Opc == ISD::TRUNCATE) {
3446  SDValue Op0 = Op.getOperand(0);
3447  // fold (truncate (build pair x, y)) -> (truncate x) or x
3448  if (Op0.getOpcode() == ISD::BUILD_PAIR) {
3449  EVT TruncTy = Op.getValueType();
3450  SDValue Elem0 = Op0.getOperand(0);
3451  // if we match the low element of the pair, just return it.
3452  if (Elem0.getValueType() == TruncTy)
3453  return Elem0;
3454  // otherwise, if the low part is still too large, apply the truncate.
3455  if (Elem0.getValueType().bitsGT(TruncTy))
3456  return DCI.DAG.getNode(ISD::TRUNCATE, dl, TruncTy, Elem0);
3457  }
3458  }
3459 
3460  if (DCI.isBeforeLegalizeOps())
3461  return SDValue();
3462 
3463  if (Opc == HexagonISD::P2D) {
3464  SDValue P = Op.getOperand(0);
3465  switch (P.getOpcode()) {
3466  case HexagonISD::PTRUE:
3467  return DCI.DAG.getConstant(-1, dl, ty(Op));
3468  case HexagonISD::PFALSE:
3469  return getZero(dl, ty(Op), DCI.DAG);
3470  default:
3471  break;
3472  }
3473  } else if (Opc == ISD::VSELECT) {
3474  // This is pretty much duplicated in HexagonISelLoweringHVX...
3475  //
3476  // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
3477  SDValue Cond = Op.getOperand(0);
3478  if (Cond->getOpcode() == ISD::XOR) {
3479  SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3480  if (C1->getOpcode() == HexagonISD::PTRUE) {
3481  SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
3482  Op.getOperand(2), Op.getOperand(1));
3483  return VSel;
3484  }
3485  }
3486  } else if (Opc == ISD::TRUNCATE) {
3487  SDValue Op0 = Op.getOperand(0);
3488  // fold (truncate (build pair x, y)) -> (truncate x) or x
3489  if (Op0.getOpcode() == ISD::BUILD_PAIR) {
3490  MVT TruncTy = ty(Op);
3491  SDValue Elem0 = Op0.getOperand(0);
3492  // if we match the low element of the pair, just return it.
3493  if (ty(Elem0) == TruncTy)
3494  return Elem0;
3495  // otherwise, if the low part is still too large, apply the truncate.
3496  if (ty(Elem0).bitsGT(TruncTy))
3497  return DCI.DAG.getNode(ISD::TRUNCATE, dl, TruncTy, Elem0);
3498  }
3499  } else if (Opc == ISD::OR) {
3500  // fold (or (shl xx, s), (zext y)) -> (COMBINE (shl xx, s-32), y)
3501  // if s >= 32
3502  auto fold0 = [&, this](SDValue Op) {
3503  if (ty(Op) != MVT::i64)
3504  return SDValue();
3505  SDValue Shl = Op.getOperand(0);
3506  SDValue Zxt = Op.getOperand(1);
3507  if (Shl.getOpcode() != ISD::SHL)
3508  std::swap(Shl, Zxt);
3509 
3510  if (Shl.getOpcode() != ISD::SHL || Zxt.getOpcode() != ISD::ZERO_EXTEND)
3511  return SDValue();
3512 
3513  SDValue Z = Zxt.getOperand(0);
3514  auto *Amt = dyn_cast<ConstantSDNode>(Shl.getOperand(1));
3515  if (Amt && Amt->getZExtValue() >= 32 && ty(Z).getSizeInBits() <= 32) {
3516  unsigned A = Amt->getZExtValue();
3517  SDValue S = Shl.getOperand(0);
3518  SDValue T0 = DCI.DAG.getNode(ISD::SHL, dl, ty(S), S,
3519  DCI.DAG.getConstant(32 - A, dl, MVT::i32));
3520  SDValue T1 = DCI.DAG.getZExtOrTrunc(T0, dl, MVT::i32);
3521  SDValue T2 = DCI.DAG.getZExtOrTrunc(Z, dl, MVT::i32);
3522  return DCI.DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64, {T1, T2});
3523  }
3524  return SDValue();
3525  };
3526 
3527  if (SDValue R = fold0(Op))
3528  return R;
3529  }
3530 
3531  return SDValue();
3532 }
3533 
3534 /// Returns relocation base for the given PIC jumptable.
3535 SDValue
3537  SelectionDAG &DAG) const {
3538  int Idx = cast<JumpTableSDNode>(Table)->getIndex();
3539  EVT VT = Table.getValueType();
3541  return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
3542 }
3543 
3544 //===----------------------------------------------------------------------===//
3545 // Inline Assembly Support
3546 //===----------------------------------------------------------------------===//
3547 
3550  if (Constraint.size() == 1) {
3551  switch (Constraint[0]) {
3552  case 'q':
3553  case 'v':
3554  if (Subtarget.useHVXOps())
3555  return C_RegisterClass;
3556  break;
3557  case 'a':
3558  return C_RegisterClass;
3559  default:
3560  break;
3561  }
3562  }
3563  return TargetLowering::getConstraintType(Constraint);
3564 }
3565 
3566 std::pair<unsigned, const TargetRegisterClass*>
3568  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
3569 
3570  if (Constraint.size() == 1) {
3571  switch (Constraint[0]) {
3572  case 'r': // R0-R31
3573  switch (VT.SimpleTy) {
3574  default:
3575  return {0u, nullptr};
3576  case MVT::i1:
3577  case MVT::i8:
3578  case MVT::i16:
3579  case MVT::i32:
3580  case MVT::f32:
3581  return {0u, &Hexagon::IntRegsRegClass};
3582  case MVT::i64:
3583  case MVT::f64:
3584  return {0u, &Hexagon::DoubleRegsRegClass};
3585  }
3586  break;
3587  case 'a': // M0-M1
3588  if (VT != MVT::i32)
3589  return {0u, nullptr};
3590  return {0u, &Hexagon::ModRegsRegClass};
3591  case 'q': // q0-q3
3592  switch (VT.getSizeInBits()) {
3593  default:
3594  return {0u, nullptr};
3595  case 64:
3596  case 128:
3597  return {0u, &Hexagon::HvxQRRegClass};
3598  }
3599  break;
3600  case 'v': // V0-V31
3601  switch (VT.getSizeInBits()) {
3602  default:
3603  return {0u, nullptr};
3604  case 512:
3605  return {0u, &Hexagon::HvxVRRegClass};
3606  case 1024:
3607  if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
3608  return {0u, &Hexagon::HvxVRRegClass};
3609  return {0u, &Hexagon::HvxWRRegClass};
3610  case 2048:
3611  return {0u, &Hexagon::HvxWRRegClass};
3612  }
3613  break;
3614  default:
3615  return {0u, nullptr};
3616  }
3617  }
3618 
3619  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3620 }
3621 
3622 /// isFPImmLegal - Returns true if the target can instruction select the
3623 /// specified FP immediate natively. If false, the legalizer will
3624 /// materialize the FP immediate as a load from a constant pool.
3626  bool ForCodeSize) const {
3627  return true;
3628 }
3629 
3630 /// isLegalAddressingMode - Return true if the addressing mode represented by
3631 /// AM is legal for this target, for a load/store of the specified type.
3633  const AddrMode &AM, Type *Ty,
3634  unsigned AS, Instruction *I) const {
3635  if (Ty->isSized()) {
3636  // When LSR detects uses of the same base address to access different
3637  // types (e.g. unions), it will assume a conservative type for these
3638  // uses:
3639  // LSR Use: Kind=Address of void in addrspace(4294967295), ...
3640  // The type Ty passed here would then be "void". Skip the alignment
3641  // checks, but do not return false right away, since that confuses
3642  // LSR into crashing.
3643  Align A = DL.getABITypeAlign(Ty);
3644  // The base offset must be a multiple of the alignment.
3645  if (!isAligned(A, AM.BaseOffs))
3646  return false;
3647  // The shifted offset must fit in 11 bits.
3648  if (!isInt<11>(AM.BaseOffs >> Log2(A)))
3649  return false;
3650  }
3651 
3652  // No global is ever allowed as a base.
3653  if (AM.BaseGV)
3654  return false;
3655 
3656  int Scale = AM.Scale;
3657  if (Scale < 0)
3658  Scale = -Scale;
3659  switch (Scale) {
3660  case 0: // No scale reg, "r+i", "r", or just "i".
3661  break;
3662  default: // No scaled addressing mode.
3663  return false;
3664  }
3665  return true;
3666 }
3667 
3668 /// Return true if folding a constant offset with the given GlobalAddress is
3669 /// legal. It is frequently not legal in PIC relocation models.
3671  const {
3672  return HTM.getRelocationModel() == Reloc::Static;
3673 }
3674 
3675 /// isLegalICmpImmediate - Return true if the specified immediate is legal
3676 /// icmp immediate, that is the target has icmp instructions which can compare
3677 /// a register against the immediate without having to materialize the
3678 /// immediate into a register.
3680  return Imm >= -512 && Imm <= 511;
3681 }
3682 
3683 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
3684 /// for tail call optimization. Targets which want to do tail call
3685 /// optimization should implement this function.
3687  SDValue Callee,
3688  CallingConv::ID CalleeCC,
3689  bool IsVarArg,
3690  bool IsCalleeStructRet,
3691  bool IsCallerStructRet,
3692  const SmallVectorImpl<ISD::OutputArg> &Outs,
3693  const SmallVectorImpl<SDValue> &OutVals,
3695  SelectionDAG& DAG) const {
3696  const Function &CallerF = DAG.getMachineFunction().getFunction();
3697  CallingConv::ID CallerCC = CallerF.getCallingConv();
3698  bool CCMatch = CallerCC == CalleeCC;
3699 
3700  // ***************************************************************************
3701  // Look for obvious safe cases to perform tail call optimization that do not
3702  // require ABI changes.
3703  // ***************************************************************************
3704 
3705  // If this is a tail call via a function pointer, then don't do it!
3706  if (!isa<GlobalAddressSDNode>(Callee) &&
3707  !isa<ExternalSymbolSDNode>(Callee)) {
3708  return false;
3709  }
3710 
3711  // Do not optimize if the calling conventions do not match and the conventions
3712  // used are not C or Fast.
3713  if (!CCMatch) {
3714  bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
3715  bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
3716  // If R & E, then ok.
3717  if (!R || !E)
3718  return false;
3719  }
3720 
3721  // Do not tail call optimize vararg calls.
3722  if (IsVarArg)
3723  return false;
3724 
3725  // Also avoid tail call optimization if either caller or callee uses struct
3726  // return semantics.
3727  if (IsCalleeStructRet || IsCallerStructRet)
3728  return false;
3729 
3730  // In addition to the cases above, we also disable Tail Call Optimization if
3731  // the calling convention code that at least one outgoing argument needs to
3732  // go on the stack. We cannot check that here because at this point that
3733  // information is not available.
3734  return true;
3735 }
3736 
3737 /// Returns the target specific optimal type for load and store operations as
3738 /// a result of memset, memcpy, and memmove lowering.
3739 ///
3740 /// If DstAlign is zero that means it's safe to destination alignment can
3741 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
3742 /// a need to check it against alignment requirement, probably because the
3743 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
3744 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
3745 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
3746 /// does not need to be loaded. It returns EVT::Other if the type should be
3747 /// determined using generic target-independent logic.
3749  const MemOp &Op, const AttributeList &FuncAttributes) const {
3750  if (Op.size() >= 8 && Op.isAligned(Align(8)))
3751  return MVT::i64;
3752  if (Op.size() >= 4 && Op.isAligned(Align(4)))
3753  return MVT::i32;
3754  if (Op.size() >= 2 && Op.isAligned(Align(2)))
3755  return MVT::i16;
3756  return MVT::Other;
3757 }
3758 
3760  LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
3761  Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const {
3762  MVT SVT = VT.getSimpleVT();
3763  if (Subtarget.isHVXVectorType(SVT, true))
3764  return allowsHvxMemoryAccess(SVT, Flags, Fast);
3766  Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
3767 }
3768 
3770  EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
3771  unsigned *Fast) const {
3772  MVT SVT = VT.getSimpleVT();
3773  if (Subtarget.isHVXVectorType(SVT, true))
3774  return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);
3775  if (Fast)
3776  *Fast = 0;
3777  return false;
3778 }
3779 
3780 std::pair<const TargetRegisterClass*, uint8_t>
3781 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
3782  MVT VT) const {
3783  if (Subtarget.isHVXVectorType(VT, true)) {
3784  unsigned BitWidth = VT.getSizeInBits();
3785  unsigned VecWidth = Subtarget.getVectorLength() * 8;
3786 
3787  if (VT.getVectorElementType() == MVT::i1)
3788  return std::make_pair(&Hexagon::HvxQRRegClass, 1);
3789  if (BitWidth == VecWidth)
3790  return std::make_pair(&Hexagon::HvxVRRegClass, 1);
3791  assert(BitWidth == 2 * VecWidth);
3792  return std::make_pair(&Hexagon::HvxWRRegClass, 1);
3793  }
3794 
3796 }
3797 
3799  ISD::LoadExtType ExtTy, EVT NewVT) const {
3800  // TODO: This may be worth removing. Check regression tests for diffs.
3801  if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
3802  return false;
3803 
3804  auto *L = cast<LoadSDNode>(Load);
3805  std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
3806  // Small-data object, do not shrink.
3807  if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
3808  return false;
3809  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
3810  auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
3811  const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
3812  return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
3813  }
3814  return true;
3815 }
3816 
3818  SDNode *Node) const {
3819  AdjustHvxInstrPostInstrSelection(MI, Node);
3820 }
3821 
3823  Type *ValueTy, Value *Addr,
3824  AtomicOrdering Ord) const {
3825  BasicBlock *BB = Builder.GetInsertBlock();
3826  Module *M = BB->getParent()->getParent();
3827  unsigned SZ = ValueTy->getPrimitiveSizeInBits();
3828  assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
3829  Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
3830  : Intrinsic::hexagon_L4_loadd_locked;
3831  Function *Fn = Intrinsic::getDeclaration(M, IntID);
3832 
3833  auto PtrTy = cast<PointerType>(Addr->getType());
3834  PointerType *NewPtrTy =
3835  Builder.getIntNTy(SZ)->getPointerTo(PtrTy->getAddressSpace());
3836  Addr = Builder.CreateBitCast(Addr, NewPtrTy);
3837 
3838  Value *Call = Builder.CreateCall(Fn, Addr, "larx");
3839 
3840  return Builder.CreateBitCast(Call, ValueTy);
3841 }
3842 
3843 /// Perform a store-conditional operation to Addr. Return the status of the
3844 /// store. This should be 0 if the store succeeded, non-zero otherwise.
3846  Value *Val, Value *Addr,
3847  AtomicOrdering Ord) const {
3848  BasicBlock *BB = Builder.GetInsertBlock();
3849  Module *M = BB->getParent()->getParent();
3850  Type *Ty = Val->getType();
3851  unsigned SZ = Ty->getPrimitiveSizeInBits();
3852 
3853  Type *CastTy = Builder.getIntNTy(SZ);
3854  assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
3855  Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
3856  : Intrinsic::hexagon_S4_stored_locked;
3857  Function *Fn = Intrinsic::getDeclaration(M, IntID);
3858 
3859  unsigned AS = Addr->getType()->getPointerAddressSpace();
3860  Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS));
3861  Val = Builder.CreateBitCast(Val, CastTy);
3862 
3863  Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
3864  Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
3865  Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
3866  return Ext;
3867 }
3868 
3871  // Do not expand loads and stores that don't exceed 64 bits.
3872  return LI->getType()->getPrimitiveSizeInBits() > 64
3875 }
3876 
3879  // Do not expand loads and stores that don't exceed 64 bits.
3880  return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64
3883 }
3884 
3887  AtomicCmpXchgInst *AI) const {
3889 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::StringSwitch::Case
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:70
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:151
llvm::MachineRegisterInfo::addLiveIn
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Definition: MachineRegisterInfo.h:959
llvm::SelectionDAG::getGLOBAL_OFFSET_TABLE
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
Definition: SelectionDAG.h:1036
llvm::SelectionDAG::getMemcpy
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
Definition: SelectionDAG.cpp:7305
i
i
Definition: README.txt:29
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1437
llvm::HexagonSubtarget::getVectorLength
unsigned getVectorLength() const
Definition: HexagonSubtarget.h:313
llvm::TargetLoweringBase::MaxStoresPerMemsetOptSize
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
Definition: TargetLowering.h:3409
llvm::LoadSDNode::getOffset
const SDValue & getOffset() const
Definition: SelectionDAGNodes.h:2364
llvm::CCValAssign::ZExt
@ ZExt
Definition: CallingConvLower.h:36
ValueTypes.h
llvm::HexagonISD::QFALSE
@ QFALSE
Definition: HexagonISelLowering.h:91
llvm::HexagonTargetLowering::shouldExpandAtomicStoreInIR
AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
Definition: HexagonISelLowering.cpp:3878
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1582
llvm::HexagonTargetLowering::LowerRETURNADDR
SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const
Definition: HexagonISelLowering.cpp:1167
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2394
llvm::HexagonTargetLowering::IsEligibleForTailCallOptimization
bool IsEligibleForTailCallOptimization(SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, bool isCalleeStructRet, bool isCallerStructRet, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SmallVectorImpl< ISD::InputArg > &Ins, SelectionDAG &DAG) const
IsEligibleForTailCallOptimization - Check whether the call is eligible for tail call optimization.
Definition: HexagonISelLowering.cpp:3686
llvm::Type::isSized
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:283
llvm::TargetLowering::LowerOperationWrapper
virtual void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
Definition: SelectionDAGBuilder.cpp:10204
llvm::HexagonTargetLowering::isFMAFasterThanFMulAndFAdd
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &, EVT) const override
Return true if an FMA operation is faster than a pair of mul and add instructions.
Definition: HexagonISelLowering.cpp:2140
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2292
llvm::ISD::VECTOR_SHUFFLE
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:586
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:533
llvm::TargetLoweringBase::AddPromotedToType
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
Definition: TargetLowering.h:2480
llvm::HexagonTargetLowering::LowerConstantPool
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
Definition: HexagonISelLowering.cpp:1111
llvm::HexagonTargetLowering::LowerEH_RETURN
SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const
Definition: HexagonISelLowering.cpp:3280
llvm::SelectionDAG::getCALLSEQ_START
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:988
llvm::isAligned
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:146
llvm::HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR
AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
Definition: HexagonISelLowering.cpp:3886
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1448
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::ISD::MemIndexedMode
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1373
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:108
llvm::TargetLoweringBase::allowsMemoryAccessForAlignment
bool allowsMemoryAccessForAlignment(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
This function returns true if the memory access is aligned or if the target allows this specific unal...
Definition: TargetLoweringBase.cpp:1726
MathExtras.h
llvm::TargetRegisterClass::getID
unsigned getID() const
Return the register class ID number.
Definition: TargetRegisterInfo.h:75
MaxStoresPerMemsetOptSizeCL
static cl::opt< int > MaxStoresPerMemsetOptSizeCL("max-store-memset-Os", cl::Hidden, cl::init(4), cl::desc("Max #stores to inline memset"))
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::HexagonSubtarget::useSmallData
bool useSmallData() const
Definition: HexagonSubtarget.h:221
llvm::HexagonMachineFunctionInfo::setHasEHReturn
void setHasEHReturn(bool H=true)
Definition: HexagonMachineFunctionInfo.h:82
llvm::ISD::JumpTable
@ JumpTable
Definition: ISDOpcodes.h:81
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:986
llvm::HexagonSubtarget::getFrameLowering
const HexagonFrameLowering * getFrameLowering() const override
Definition: HexagonSubtarget.h:131
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1106
llvm::CCValAssign::Full
@ Full
Definition: CallingConvLower.h:34
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:196
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:667
llvm::MVT::isInteger
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: MachineValueType.h:361
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:886
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1481
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1449
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:156
llvm::MachineFrameInfo::getNumFixedObjects
unsigned getNumFixedObjects() const
Return the number of fixed objects.
Definition: MachineFrameInfo.h:413
llvm::MachineFrameInfo::setReturnAddressIsTaken
void setReturnAddressIsTaken(bool s)
Definition: MachineFrameInfo.h:378
llvm::TargetLowering::ConstraintType
ConstraintType
Definition: TargetLowering.h:4550
llvm::HexagonII::MO_GDGOT
@ MO_GDGOT
Definition: HexagonBaseInfo.h:203
llvm::ISD::BR_JT
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:991
IntrinsicInst.h
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1404
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:189
llvm::ISD::FMINNUM
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:943
llvm::ISD::AssertSext
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
llvm::SelectionDAG::getCopyToReg
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:767
llvm::Function::empty
bool empty() const
Definition: Function.h:713
llvm::HexagonTargetLowering::emitLoadLinked
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Definition: HexagonISelLowering.cpp:3822
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:50
llvm::HexagonISD::JT
@ JT
Definition: HexagonISelLowering.h:52
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:159
RegSize
unsigned RegSize
Definition: AArch64MIPeepholeOpt.cpp:124
llvm::HexagonISD::EH_RETURN
@ EH_RETURN
Definition: HexagonISelLowering.h:77
llvm::Function
Definition: Function.h:60
llvm::ISD::ConstantFP
@ ConstantFP
Definition: ISDOpcodes.h:77
llvm::HexagonTargetLowering::emitStoreConditional
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
Definition: HexagonISelLowering.cpp:3845
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:542
llvm::HexagonTargetLowering::getConstraintType
ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
Definition: HexagonISelLowering.cpp:3549
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::HexagonTargetLowering::LowerINLINEASM
SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const
Definition: HexagonISelLowering.cpp:657
llvm::HexagonISD::INSERT
@ INSERT
Definition: HexagonISelLowering.h:71
llvm::ISD::BSWAP
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:700
llvm::ISD::UDIV
@ UDIV
Definition: ISDOpcodes.h:243
llvm::HexagonTargetLowering::LowerVECTOR_SHIFT
SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const
Definition: HexagonISelLowering.cpp:2381
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:324
llvm::ISD::DYNAMIC_STACKALLOC
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:976
llvm::HexagonMachineFunctionInfo::setFirstNamedArgFrameIndex
void setFirstNamedArgFrameIndex(int v)
Definition: HexagonMachineFunctionInfo.h:58
llvm::raw_string_ostream
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:629
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::HexagonTargetLowering::getPICJumpTableRelocBase
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Definition: HexagonISelLowering.cpp:3536
llvm::TargetLoweringBase::MaxStoresPerMemset
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
Definition: TargetLowering.h:3407
llvm::TLSModel::GeneralDynamic
@ GeneralDynamic
Definition: CodeGen.h:43
llvm::ISD::FSHL
@ FSHL
Definition: ISDOpcodes.h:696
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::TargetLoweringBase::setMinCmpXchgSizeInBits
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
Definition: TargetLowering.h:2540
llvm::SelectionDAG::getFrameIndex
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
Definition: SelectionDAG.cpp:1739
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::HexagonMachineFunctionInfo::getVarArgsFrameIndex
int getVarArgsFrameIndex()
Definition: HexagonMachineFunctionInfo.h:53
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1444
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::MVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: MachineValueType.h:377
InlineAsm.h
llvm::ISD::STACKRESTORE
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1057
llvm::HexagonISD::QCAT
@ QCAT
Definition: HexagonISelLowering.h:89
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:9353
llvm::HexagonTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: HexagonISelLowering.cpp:3433
llvm::MachineFunction::getMachineMemOperand
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Definition: MachineFunction.cpp:454
llvm::MVT::v2i1
@ v2i1
Definition: MachineValueType.h:67
llvm::HexagonTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: HexagonISelLowering.cpp:3307
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1498
PairTy
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Definition: MachineModuleInfoImpls.cpp:30
llvm::SDVTList::NumVTs
unsigned int NumVTs
Definition: SelectionDAGNodes.h:81
HexagonSubtarget.h
llvm::HexagonTargetMachine
Definition: HexagonTargetMachine.h:25
llvm::HexagonISD::PFALSE
@ PFALSE
Definition: HexagonISelLowering.h:81
llvm::HexagonISD::READCYCLE
@ READCYCLE
Definition: HexagonISelLowering.h:79
ErrorHandling.h
llvm::HexagonISD::COMBINE
@ COMBINE
Definition: HexagonISelLowering.h:55
llvm::X86Disassembler::Reg
Reg
All possible values of the reg field in the ModR/M byte.
Definition: X86DisassemblerDecoder.h:462
llvm::IRBuilder<>
llvm::HexagonTargetLowering::HexagonTargetLowering
HexagonTargetLowering(const TargetMachine &TM, const HexagonSubtarget &ST)
Definition: HexagonISelLowering.cpp:1432
llvm::HexagonISD::USAT
@ USAT
Definition: HexagonISelLowering.h:63
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1355
llvm::ISD::FLOG2
@ FLOG2
Definition: ISDOpcodes.h:920
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1378
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:766
llvm::DiagnosticInfo::getKind
int getKind() const
Definition: DiagnosticInfo.h:122
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:463
llvm::TargetLoweringBase::findRepresentativeClass
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
Definition: TargetLoweringBase.cpp:1257
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:819
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3909
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2344
llvm::MVT::Glue
@ Glue
Definition: MachineValueType.h:273
llvm::MemOp
Definition: TargetLowering.h:111
llvm::HexagonISD::SUBC
@ SUBC
Definition: HexagonISelLowering.h:40
llvm::SelectionDAG::getMemBasePlusOffset
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
Definition: SelectionDAG.cpp:6788
R4
#define R4(n)
APInt.h
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:237
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
llvm::TargetLowering::isPositionIndependent
bool isPositionIndependent() const
Definition: TargetLowering.cpp:46
llvm::HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const
Definition: HexagonISelLowering.cpp:1266
llvm::TargetLoweringBase::allowsMemoryAccess
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
Definition: TargetLoweringBase.cpp:1753
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1439
llvm::TargetMachine::getRelocationModel
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
Definition: TargetMachine.cpp:68
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
NL
#define NL
Definition: DetailedRecordsBackend.cpp:31
llvm::HexagonTargetLowering::isTruncateFree
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
Definition: HexagonISelLowering.cpp:2130
AlignLoads
static cl::opt< bool > AlignLoads("hexagon-align-loads", cl::Hidden, cl::init(false), cl::desc("Rewrite unaligned loads as a pair of aligned loads"))
Module.h
llvm::HexagonTargetLowering::LowerToTLSLocalExecModel
SDValue LowerToTLSLocalExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Definition: HexagonISelLowering.cpp:1354
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.