LLVM 23.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Custom DAG lowering for R600
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600ISelLowering.h"
15#include "AMDGPU.h"
18#include "R600Defines.h"
20#include "R600Subtarget.h"
21#include "R600TargetMachine.h"
23#include "llvm/IR/IntrinsicsAMDGPU.h"
24#include "llvm/IR/IntrinsicsR600.h"
26
27using namespace llvm;
28
29#include "R600GenCallingConv.inc"
30
32 const R600Subtarget &STI)
33 : AMDGPUTargetLowering(TM, STI, STI), Subtarget(&STI),
34 Gen(STI.getGeneration()) {
35 addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
36 addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
37 addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
38 addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
39 addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
40 addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
41
44
45 computeRegisterProperties(Subtarget->getRegisterInfo());
46
47 // Legalize loads and stores to the private address space.
48 setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
49
50 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
51 // spaces, so it is custom lowered to handle those where it isn't.
53 for (MVT VT : MVT::integer_valuetypes()) {
54 setLoadExtAction(Op, VT, MVT::i1, Promote);
55 setLoadExtAction(Op, VT, MVT::i8, Custom);
56 setLoadExtAction(Op, VT, MVT::i16, Custom);
57 }
58
59 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
61 MVT::v2i1, Expand);
62
64 MVT::v4i1, Expand);
65
66 setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
67 Custom);
68
69 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
70 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
71 // We need to include these since trunc STORES to PRIVATE need
72 // special handling to accommodate RMW
73 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
74 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
75 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
76 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
77 setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
78 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
79 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
80 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
81 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
82 setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
83
84 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
85 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
86 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
87
88 // Set condition code actions
92 MVT::f32, Expand);
93
95 MVT::i32, Expand);
96
98
99 setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);
100
101 setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);
103
105
107 {MVT::f32, MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
108 MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
109 Expand);
110
112 MVT::f64, Custom);
113
114 setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
115
116 setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);
117 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},
118 Custom);
119
120 setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
121 Expand);
122
123 // ADD, SUB overflow.
124 // TODO: turn these into Legal?
125 if (Subtarget->hasCARRY())
127
128 if (Subtarget->hasBORROW())
130
131 // Expand sign extension of vectors
132 if (!Subtarget->hasBFE())
134
135 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);
136
137 if (!Subtarget->hasBFE())
139 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);
140
141 if (!Subtarget->hasBFE())
143 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);
144
146 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);
147
149
151
153 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
154
156 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
157
158 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
159 // to be Legal/Custom in order to avoid library calls.
161 Custom);
162
163 if (!Subtarget->hasFMA())
164 setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);
165
166 // FIXME: May need no denormals check
168
169 if (!Subtarget->hasBFI())
170 // fcopysign can be done in a single instruction with BFI.
171 setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
172
173 if (!Subtarget->hasBCNT(32))
175
176 if (!Subtarget->hasBCNT(64))
178
179 if (Subtarget->hasFFBH())
181
182 if (Subtarget->hasFFBL())
184
185 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
186 // need it for R600.
187 if (Subtarget->hasBFE())
189
192
193 // LLVM will expand these to atomic_cmp_swap(0)
194 // and atomic_swap, respectively.
196
197 // We need to custom lower some of the intrinsics
199 Custom);
200
202
205}
206
208 if (std::next(I) == I->getParent()->end())
209 return false;
210 return std::next(I)->getOpcode() == R600::RETURN;
211}
212
215 MachineBasicBlock *BB) const {
216 MachineFunction *MF = BB->getParent();
217 MachineRegisterInfo &MRI = MF->getRegInfo();
219 const R600InstrInfo *TII = Subtarget->getInstrInfo();
220
221 switch (MI.getOpcode()) {
222 default:
223 // Replace LDS_*_RET instruction that don't have any uses with the
224 // equivalent LDS_*_NORET instruction.
225 if (TII->isLDSRetInstr(MI.getOpcode())) {
226 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
227 assert(DstIdx != -1);
229 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
230 // LDS_1A2D support and remove this special case.
231 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
232 MI.getOpcode() == R600::LDS_CMPST_RET)
233 return BB;
234
235 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
236 TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
237 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
238 NewMI.add(MO);
239 } else {
241 }
242 break;
243
244 case R600::FABS_R600: {
245 MachineInstr *NewMI = TII->buildDefaultInstruction(
246 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
247 MI.getOperand(1).getReg());
248 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
249 break;
250 }
251
252 case R600::FNEG_R600: {
253 MachineInstr *NewMI = TII->buildDefaultInstruction(
254 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
255 MI.getOperand(1).getReg());
256 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
257 break;
258 }
259
260 case R600::MASK_WRITE: {
261 Register maskedRegister = MI.getOperand(0).getReg();
262 assert(maskedRegister.isVirtual());
263 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
264 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
265 break;
266 }
267
268 case R600::MOV_IMM_F32:
269 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
270 .getFPImm()
271 ->getValueAPF()
272 .bitcastToAPInt()
273 .getZExtValue());
274 break;
275
276 case R600::MOV_IMM_I32:
277 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
278 MI.getOperand(1).getImm());
279 break;
280
281 case R600::MOV_IMM_GLOBAL_ADDR: {
282 //TODO: Perhaps combine this instruction with the next if possible
283 auto MIB = TII->buildDefaultInstruction(
284 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
285 int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
286 //TODO: Ugh this is rather ugly
287 const MachineOperand &MO = MI.getOperand(1);
288 MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
289 MO.getTargetFlags());
290 break;
291 }
292
293 case R600::CONST_COPY: {
294 MachineInstr *NewMI = TII->buildDefaultInstruction(
295 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
296 TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
297 MI.getOperand(1).getImm());
298 break;
299 }
300
301 case R600::RAT_WRITE_CACHELESS_32_eg:
302 case R600::RAT_WRITE_CACHELESS_64_eg:
303 case R600::RAT_WRITE_CACHELESS_128_eg:
304 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
305 .add(MI.getOperand(0))
306 .add(MI.getOperand(1))
307 .addImm(isEOP(I)); // Set End of program bit
308 break;
309
310 case R600::RAT_STORE_TYPED_eg:
311 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
312 .add(MI.getOperand(0))
313 .add(MI.getOperand(1))
314 .add(MI.getOperand(2))
315 .addImm(isEOP(I)); // Set End of program bit
316 break;
317
318 case R600::BRANCH:
319 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
320 .add(MI.getOperand(0));
321 break;
322
323 case R600::BRANCH_COND_f32: {
324 MachineInstr *NewMI =
325 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
326 R600::PREDICATE_BIT)
327 .add(MI.getOperand(1))
328 .addImm(R600::PRED_SETNE)
329 .addImm(0); // Flags
330 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
331 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
332 .add(MI.getOperand(0))
333 .addReg(R600::PREDICATE_BIT, RegState::Kill);
334 break;
335 }
336
337 case R600::BRANCH_COND_i32: {
338 MachineInstr *NewMI =
339 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
340 R600::PREDICATE_BIT)
341 .add(MI.getOperand(1))
342 .addImm(R600::PRED_SETNE_INT)
343 .addImm(0); // Flags
344 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
346 .add(MI.getOperand(0))
347 .addReg(R600::PREDICATE_BIT, RegState::Kill);
348 break;
349 }
350
351 case R600::EG_ExportSwz:
352 case R600::R600_ExportSwz: {
353 // Instruction is left unmodified if its not the last one of its type
354 bool isLastInstructionOfItsType = true;
355 unsigned InstExportType = MI.getOperand(1).getImm();
356 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
357 EndBlock = BB->end(); NextExportInst != EndBlock;
358 NextExportInst = std::next(NextExportInst)) {
359 if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
360 NextExportInst->getOpcode() == R600::R600_ExportSwz) {
361 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
362 .getImm();
363 if (CurrentInstExportType == InstExportType) {
364 isLastInstructionOfItsType = false;
365 break;
366 }
367 }
368 }
369 bool EOP = isEOP(I);
370 if (!EOP && !isLastInstructionOfItsType)
371 return BB;
372 unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
373 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
374 .add(MI.getOperand(0))
375 .add(MI.getOperand(1))
376 .add(MI.getOperand(2))
377 .add(MI.getOperand(3))
378 .add(MI.getOperand(4))
379 .add(MI.getOperand(5))
380 .add(MI.getOperand(6))
381 .addImm(CfInst)
382 .addImm(EOP);
383 break;
384 }
385 case R600::RETURN: {
386 return BB;
387 }
388 }
389
390 MI.eraseFromParent();
391 return BB;
392}
393
394//===----------------------------------------------------------------------===//
395// Custom DAG Lowering Operations
396//===----------------------------------------------------------------------===//
397
401 switch (Op.getOpcode()) {
402 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
403 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
404 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
405 case ISD::SHL_PARTS:
406 case ISD::SRA_PARTS:
407 case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
408 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
409 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
410 case ISD::FCOS:
411 case ISD::FSIN: return LowerTrig(Op, DAG);
412 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
413 case ISD::STORE: return LowerSTORE(Op, DAG);
414 case ISD::LOAD: {
415 SDValue Result = LowerLOAD(Op, DAG);
416 assert((!Result.getNode() ||
417 Result.getNode()->getNumValues() == 2) &&
418 "Load should return a value and a chain");
419 return Result;
420 }
421
422 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
423 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
424 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
426 return lowerADDRSPACECAST(Op, DAG);
427 case ISD::INTRINSIC_VOID: {
428 SDValue Chain = Op.getOperand(0);
429 unsigned IntrinsicID = Op.getConstantOperandVal(1);
430 switch (IntrinsicID) {
431 case Intrinsic::r600_store_swizzle: {
432 SDLoc DL(Op);
433 const SDValue Args[8] = {
434 Chain,
435 Op.getOperand(2), // Export Value
436 Op.getOperand(3), // ArrayBase
437 Op.getOperand(4), // Type
438 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
439 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
440 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
441 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
442 };
443 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
444 }
445
446 // default for switch(IntrinsicID)
447 default: break;
448 }
449 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
450 break;
451 }
453 unsigned IntrinsicID = Op.getConstantOperandVal(0);
454 EVT VT = Op.getValueType();
455 SDLoc DL(Op);
456 switch (IntrinsicID) {
457 case Intrinsic::r600_tex:
458 case Intrinsic::r600_texc: {
459 unsigned TextureOp;
460 switch (IntrinsicID) {
461 case Intrinsic::r600_tex:
462 TextureOp = 0;
463 break;
464 case Intrinsic::r600_texc:
465 TextureOp = 1;
466 break;
467 default:
468 llvm_unreachable("unhandled texture operation");
469 }
470
471 SDValue TexArgs[19] = {
472 DAG.getConstant(TextureOp, DL, MVT::i32),
473 Op.getOperand(1),
474 DAG.getConstant(0, DL, MVT::i32),
475 DAG.getConstant(1, DL, MVT::i32),
476 DAG.getConstant(2, DL, MVT::i32),
477 DAG.getConstant(3, DL, MVT::i32),
478 Op.getOperand(2),
479 Op.getOperand(3),
480 Op.getOperand(4),
481 DAG.getConstant(0, DL, MVT::i32),
482 DAG.getConstant(1, DL, MVT::i32),
483 DAG.getConstant(2, DL, MVT::i32),
484 DAG.getConstant(3, DL, MVT::i32),
485 Op.getOperand(5),
486 Op.getOperand(6),
487 Op.getOperand(7),
488 Op.getOperand(8),
489 Op.getOperand(9),
490 Op.getOperand(10)
491 };
492 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
493 }
494 case Intrinsic::r600_dot4: {
495 SDValue Args[8] = {
496 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
497 DAG.getConstant(0, DL, MVT::i32)),
498 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
499 DAG.getConstant(0, DL, MVT::i32)),
500 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
501 DAG.getConstant(1, DL, MVT::i32)),
502 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
503 DAG.getConstant(1, DL, MVT::i32)),
504 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
505 DAG.getConstant(2, DL, MVT::i32)),
506 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
507 DAG.getConstant(2, DL, MVT::i32)),
508 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
509 DAG.getConstant(3, DL, MVT::i32)),
510 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
511 DAG.getConstant(3, DL, MVT::i32))
512 };
513 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
514 }
515
516 case Intrinsic::r600_implicitarg_ptr: {
519 return DAG.getConstant(ByteOffset, DL, PtrVT);
520 }
521 case Intrinsic::r600_read_ngroups_x:
522 return LowerImplicitParameter(DAG, VT, DL, 0);
523 case Intrinsic::r600_read_ngroups_y:
524 return LowerImplicitParameter(DAG, VT, DL, 1);
525 case Intrinsic::r600_read_ngroups_z:
526 return LowerImplicitParameter(DAG, VT, DL, 2);
527 case Intrinsic::r600_read_global_size_x:
528 return LowerImplicitParameter(DAG, VT, DL, 3);
529 case Intrinsic::r600_read_global_size_y:
530 return LowerImplicitParameter(DAG, VT, DL, 4);
531 case Intrinsic::r600_read_global_size_z:
532 return LowerImplicitParameter(DAG, VT, DL, 5);
533 case Intrinsic::r600_read_local_size_x:
534 return LowerImplicitParameter(DAG, VT, DL, 6);
535 case Intrinsic::r600_read_local_size_y:
536 return LowerImplicitParameter(DAG, VT, DL, 7);
537 case Intrinsic::r600_read_local_size_z:
538 return LowerImplicitParameter(DAG, VT, DL, 8);
539
540 case Intrinsic::r600_read_tgid_x:
541 case Intrinsic::amdgcn_workgroup_id_x:
542 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
543 R600::T1_X, VT);
544 case Intrinsic::r600_read_tgid_y:
545 case Intrinsic::amdgcn_workgroup_id_y:
546 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
547 R600::T1_Y, VT);
548 case Intrinsic::r600_read_tgid_z:
549 case Intrinsic::amdgcn_workgroup_id_z:
550 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
551 R600::T1_Z, VT);
552 case Intrinsic::r600_read_tidig_x:
553 case Intrinsic::amdgcn_workitem_id_x:
554 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
555 R600::T0_X, VT);
556 case Intrinsic::r600_read_tidig_y:
557 case Intrinsic::amdgcn_workitem_id_y:
558 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
559 R600::T0_Y, VT);
560 case Intrinsic::r600_read_tidig_z:
561 case Intrinsic::amdgcn_workitem_id_z:
562 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
563 R600::T0_Z, VT);
564
565 case Intrinsic::r600_recipsqrt_ieee:
566 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
567
568 case Intrinsic::r600_recipsqrt_clamped:
569 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
570 default:
571 return Op;
572 }
573
574 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
575 break;
576 }
577 } // end switch(Op.getOpcode())
578 return SDValue();
579}
580
583 SelectionDAG &DAG) const {
584 switch (N->getOpcode()) {
585 default:
587 return;
588 case ISD::FP_TO_UINT:
589 if (N->getValueType(0) == MVT::i1) {
590 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
591 return;
592 }
593 // Since we don't care about out of bounds values we can use FP_TO_SINT for
594 // uints too. The DAGLegalizer code for uint considers some extra cases
595 // which are not necessary here.
596 [[fallthrough]];
597 case ISD::FP_TO_SINT: {
598 if (N->getValueType(0) == MVT::i1) {
599 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
600 return;
601 }
602
603 SDValue Result;
604 if (expandFP_TO_SINT(N, Result, DAG))
605 Results.push_back(Result);
606 return;
607 }
608 case ISD::SDIVREM: {
609 SDValue Op = SDValue(N, 1);
610 SDValue RES = LowerSDIVREM(Op, DAG);
611 Results.push_back(RES);
612 Results.push_back(RES.getValue(1));
613 break;
614 }
615 case ISD::UDIVREM: {
616 SDValue Op = SDValue(N, 0);
618 break;
619 }
620 }
621}
622
623SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
624 SDValue Vector) const {
625 SDLoc DL(Vector);
626 EVT VecVT = Vector.getValueType();
627 EVT EltVT = VecVT.getVectorElementType();
629
630 for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
631 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
632 DAG.getVectorIdxConstant(i, DL)));
633 }
634
635 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
636}
637
638SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
639 SelectionDAG &DAG) const {
640 SDLoc DL(Op);
641 SDValue Vector = Op.getOperand(0);
642 SDValue Index = Op.getOperand(1);
643
644 if (isa<ConstantSDNode>(Index) ||
646 return Op;
647
648 Vector = vectorToVerticalVector(DAG, Vector);
649 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
650 Vector, Index);
651}
652
653SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
654 SelectionDAG &DAG) const {
655 SDLoc DL(Op);
656 SDValue Vector = Op.getOperand(0);
657 SDValue Value = Op.getOperand(1);
658 SDValue Index = Op.getOperand(2);
659
660 if (isa<ConstantSDNode>(Index) ||
662 return Op;
663
664 Vector = vectorToVerticalVector(DAG, Vector);
665 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
666 Vector, Value, Index);
667 return vectorToVerticalVector(DAG, Insert);
668}
669
670SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI,
671 SDValue Op,
672 SelectionDAG &DAG) const {
673 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
676
677 const DataLayout &DL = DAG.getDataLayout();
678 const GlobalValue *GV = GSD->getGlobal();
679 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
680
681 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
682 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
683}
684
685SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
686 // On hw >= R700, COS/SIN input must be between -1. and 1.
687 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
688 EVT VT = Op.getValueType();
689 SDValue Arg = Op.getOperand(0);
690 SDLoc DL(Op);
691
692 // TODO: Should this propagate fast-math-flags?
693 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
694 DAG.getNode(ISD::FADD, DL, VT,
695 DAG.getNode(ISD::FMUL, DL, VT, Arg,
696 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
697 DAG.getConstantFP(0.5, DL, MVT::f32)));
698 unsigned TrigNode;
699 switch (Op.getOpcode()) {
700 case ISD::FCOS:
701 TrigNode = AMDGPUISD::COS_HW;
702 break;
703 case ISD::FSIN:
704 TrigNode = AMDGPUISD::SIN_HW;
705 break;
706 default:
707 llvm_unreachable("Wrong trig opcode");
708 }
709 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
710 DAG.getNode(ISD::FADD, DL, VT, FractPart,
711 DAG.getConstantFP(-0.5, DL, MVT::f32)));
712 if (Gen >= AMDGPUSubtarget::R700)
713 return TrigVal;
714 // On R600 hw, COS/SIN input must be between -Pi and Pi.
715 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
716 DAG.getConstantFP(numbers::pif, DL, MVT::f32));
717}
718
719SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
720 SelectionDAG &DAG) const {
721 SDValue Lo, Hi;
722 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
723 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
724}
725
726SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
727 unsigned mainop, unsigned ovf) const {
728 SDLoc DL(Op);
729 EVT VT = Op.getValueType();
730
731 SDValue Lo = Op.getOperand(0);
732 SDValue Hi = Op.getOperand(1);
733
734 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
735 // Extend sign.
736 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
737 DAG.getValueType(MVT::i1));
738
739 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
740
741 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
742}
743
744SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
745 SDLoc DL(Op);
746 return DAG.getNode(
748 DL,
749 MVT::i1,
750 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
752}
753
754SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
755 SDLoc DL(Op);
756 return DAG.getNode(
758 DL,
759 MVT::i1,
760 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
762}
763
764SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
765 const SDLoc &DL,
766 unsigned DwordOffset) const {
767 unsigned ByteOffset = DwordOffset * 4;
768 PointerType *PtrType =
770
771 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
772 assert(isInt<16>(ByteOffset));
773
774 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
775 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
776 MachinePointerInfo(ConstantPointerNull::get(PtrType)));
777}
778
779bool R600TargetLowering::isZero(SDValue Op) const {
780 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op))
781 return Cst->isZero();
782 if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op))
783 return CstFP->isZero();
784 return false;
785}
786
787bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
788 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
789 return CFP->isOne();
790 }
791 return isAllOnesConstant(Op);
792}
793
794bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
795 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
796 return CFP->getValueAPF().isZero();
797 }
798 return isNullConstant(Op);
799}
800
801SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
802 SDLoc DL(Op);
803 EVT VT = Op.getValueType();
804
805 SDValue LHS = Op.getOperand(0);
806 SDValue RHS = Op.getOperand(1);
807 SDValue True = Op.getOperand(2);
808 SDValue False = Op.getOperand(3);
809 SDValue CC = Op.getOperand(4);
810 SDValue Temp;
811
812 if (VT == MVT::f32) {
813 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
814 SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
815 if (MinMax)
816 return MinMax;
817 }
818
819 // LHS and RHS are guaranteed to be the same value type
820 EVT CompareVT = LHS.getValueType();
821
822 // Check if we can lower this to a native operation.
823
824 // Try to lower to a SET* instruction:
825 //
826 // SET* can match the following patterns:
827 //
828 // select_cc f32, f32, -1, 0, cc_supported
829 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
830 // select_cc i32, i32, -1, 0, cc_supported
831 //
832
833 // Move hardware True/False values to the correct operand.
834 if (isHWTrueValue(False) && isHWFalseValue(True)) {
835 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
836 ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
837 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
838 std::swap(False, True);
839 CC = DAG.getCondCode(InverseCC);
840 } else {
841 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
842 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
843 std::swap(False, True);
844 std::swap(LHS, RHS);
845 CC = DAG.getCondCode(SwapInvCC);
846 }
847 }
848 }
849
850 if (isHWTrueValue(True) && isHWFalseValue(False) &&
851 (CompareVT == VT || VT == MVT::i32)) {
852 // This can be matched by a SET* instruction.
853 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
854 }
855
856 // Try to lower to a CND* instruction:
857 //
858 // CND* can match the following patterns:
859 //
860 // select_cc f32, 0.0, f32, f32, cc_supported
861 // select_cc f32, 0.0, i32, i32, cc_supported
862 // select_cc i32, 0, f32, f32, cc_supported
863 // select_cc i32, 0, i32, i32, cc_supported
864 //
865
866 // Try to move the zero value to the RHS
867 if (isZero(LHS)) {
868 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
869 // Try swapping the operands
870 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
871 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
872 std::swap(LHS, RHS);
873 CC = DAG.getCondCode(CCSwapped);
874 } else {
875 // Try inverting the condition and then swapping the operands
876 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
877 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
878 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
879 std::swap(True, False);
880 std::swap(LHS, RHS);
881 CC = DAG.getCondCode(CCSwapped);
882 }
883 }
884 }
885 if (isZero(RHS)) {
886 SDValue Cond = LHS;
887 SDValue Zero = RHS;
888 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
889 if (CompareVT != VT) {
890 // Bitcast True / False to the correct types. This will end up being
891 // a nop, but it allows us to define only a single pattern in the
892 // .TD files for each CND* instruction rather than having to have
893 // one pattern for integer True/False and one for fp True/False
894 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
895 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
896 }
897
898 switch (CCOpcode) {
899 case ISD::SETONE:
900 case ISD::SETUNE:
901 case ISD::SETNE:
902 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
903 Temp = True;
904 True = False;
905 False = Temp;
906 break;
907 default:
908 break;
909 }
910 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
911 Cond, Zero,
912 True, False,
913 DAG.getCondCode(CCOpcode));
914 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
915 }
916
917 // If we make it this for it means we have no native instructions to handle
918 // this SELECT_CC, so we must lower it.
919 SDValue HWTrue, HWFalse;
920
921 if (CompareVT == MVT::f32) {
922 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
923 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
924 } else if (CompareVT == MVT::i32) {
925 HWTrue = DAG.getAllOnesConstant(DL, CompareVT);
926 HWFalse = DAG.getConstant(0, DL, CompareVT);
927 }
928 else {
929 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
930 }
931
932 // Lower this unsupported SELECT_CC into a combination of two supported
933 // SELECT_CC operations.
934 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
935
936 return DAG.getNode(ISD::SELECT_CC, DL, VT,
937 Cond, HWFalse,
938 True, False,
940}
941
942SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
943 SelectionDAG &DAG) const {
944 SDLoc SL(Op);
945 EVT VT = Op.getValueType();
946
947 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
948 unsigned SrcAS = ASC->getSrcAddressSpace();
949 unsigned DestAS = ASC->getDestAddressSpace();
950
951 if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
952 return DAG.getSignedConstant(AMDGPU::getNullPointerValue(DestAS), SL, VT);
953
954 return Op;
955}
956
957/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
958/// convert these pointers to a register index. Each register holds
959/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
960/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
961/// for indirect addressing.
962SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
963 unsigned StackWidth,
964 SelectionDAG &DAG) const {
965 unsigned SRLPad;
966 switch(StackWidth) {
967 case 1:
968 SRLPad = 2;
969 break;
970 case 2:
971 SRLPad = 3;
972 break;
973 case 4:
974 SRLPad = 4;
975 break;
976 default: llvm_unreachable("Invalid stack width");
977 }
978
979 SDLoc DL(Ptr);
980 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
981 DAG.getConstant(SRLPad, DL, MVT::i32));
982}
983
984void R600TargetLowering::getStackAddress(unsigned StackWidth,
985 unsigned ElemIdx,
986 unsigned &Channel,
987 unsigned &PtrIncr) const {
988 switch (StackWidth) {
989 default:
990 case 1:
991 Channel = 0;
992 if (ElemIdx > 0) {
993 PtrIncr = 1;
994 } else {
995 PtrIncr = 0;
996 }
997 break;
998 case 2:
999 Channel = ElemIdx % 2;
1000 if (ElemIdx == 2) {
1001 PtrIncr = 1;
1002 } else {
1003 PtrIncr = 0;
1004 }
1005 break;
1006 case 4:
1007 Channel = ElemIdx;
1008 PtrIncr = 0;
1009 break;
1010 }
1011}
1012
1013SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1014 SelectionDAG &DAG) const {
1015 SDLoc DL(Store);
1016 //TODO: Who creates the i8 stores?
1017 assert(Store->isTruncatingStore()
1018 || Store->getValue().getValueType() == MVT::i8);
1019 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1020
1021 SDValue Mask;
1022 if (Store->getMemoryVT() == MVT::i8) {
1023 assert(Store->getAlign() >= 1);
1024 Mask = DAG.getConstant(0xff, DL, MVT::i32);
1025 } else if (Store->getMemoryVT() == MVT::i16) {
1026 assert(Store->getAlign() >= 2);
1027 Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1028 } else {
1029 llvm_unreachable("Unsupported private trunc store");
1030 }
1031
1032 SDValue OldChain = Store->getChain();
1033 bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1034 // Skip dummy
1035 SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1036 SDValue BasePtr = Store->getBasePtr();
1037 SDValue Offset = Store->getOffset();
1038 EVT MemVT = Store->getMemoryVT();
1039
1040 SDValue LoadPtr = BasePtr;
1041 if (!Offset.isUndef()) {
1042 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1043 }
1044
1045 // Get dword location
1046 // TODO: this should be eliminated by the future SHR ptr, 2
1047 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1048 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1049
1050 // Load dword
1051 // TODO: can we be smarter about machine pointer info?
1052 MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);
1053 SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1054
1055 Chain = Dst.getValue(1);
1056
1057 // Get offset in dword
1058 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1059 DAG.getConstant(0x3, DL, MVT::i32));
1060
1061 // Convert byte offset to bit shift
1062 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1063 DAG.getConstant(3, DL, MVT::i32));
1064
1065 // TODO: Contrary to the name of the function,
1066 // it also handles sub i32 non-truncating stores (like i1)
1067 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1068 Store->getValue());
1069
1070 // Mask the value to the right type
1071 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1072
1073 // Shift the value in place
1074 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1075 MaskedValue, ShiftAmt);
1076
1077 // Shift the mask in place
1078 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1079
1080 // Invert the mask. NOTE: if we had native ROL instructions we could
1081 // use inverted mask
1082 DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1083
1084 // Cleanup the target bits
1085 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1086
1087 // Add the new bits
1088 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1089
1090 // Store dword
1091 // TODO: Can we be smarter about MachinePointerInfo?
1092 SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1093
1094 // If we are part of expanded vector, make our neighbors depend on this store
1095 if (VectorTrunc) {
1096 // Make all other vector elements depend on this store
1097 Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1098 DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1099 }
1100 return NewStore;
1101}
1102
1103SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1104 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1105 unsigned AS = StoreNode->getAddressSpace();
1106
1107 SDValue Chain = StoreNode->getChain();
1108 SDValue Ptr = StoreNode->getBasePtr();
1109 SDValue Value = StoreNode->getValue();
1110
1111 EVT VT = Value.getValueType();
1112 EVT MemVT = StoreNode->getMemoryVT();
1113 EVT PtrVT = Ptr.getValueType();
1114
1115 SDLoc DL(Op);
1116
1117 const bool TruncatingStore = StoreNode->isTruncatingStore();
1118
1119 // Neither LOCAL nor PRIVATE can do vectors at the moment
1121 TruncatingStore) &&
1122 VT.isVector()) {
1123 if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1124 // Add an extra level of chain to isolate this vector
1125 SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1126 SmallVector<SDValue, 4> NewOps(StoreNode->ops());
1127 NewOps[0] = NewChain;
1128 StoreNode = cast<StoreSDNode>(DAG.UpdateNodeOperands(StoreNode, NewOps));
1129 }
1130
1131 return scalarizeVectorStore(StoreNode, DAG);
1132 }
1133
1134 Align Alignment = StoreNode->getAlign();
1135 if (Alignment < MemVT.getStoreSize() &&
1136 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1137 StoreNode->getMemOperand()->getFlags(),
1138 nullptr)) {
1139 return expandUnalignedStore(StoreNode, DAG);
1140 }
1141
1142 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1143 DAG.getConstant(2, DL, PtrVT));
1144
1145 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1146 // It is beneficial to create MSKOR here instead of combiner to avoid
1147 // artificial dependencies introduced by RMW
1148 if (TruncatingStore) {
1149 assert(VT.bitsLE(MVT::i32));
1150 SDValue MaskConstant;
1151 if (MemVT == MVT::i8) {
1152 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1153 } else {
1154 assert(MemVT == MVT::i16);
1155 assert(StoreNode->getAlign() >= 2);
1156 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1157 }
1158
1159 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1160 DAG.getConstant(0x00000003, DL, PtrVT));
1161 SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1162 DAG.getConstant(3, DL, VT));
1163
1164 // Put the mask in correct place
1165 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1166
1167 // Put the value bits in correct place
1168 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1169 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1170
1171 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1172 // vector instead.
1173 SDValue Src[4] = {
1174 ShiftedValue,
1175 DAG.getConstant(0, DL, MVT::i32),
1176 DAG.getConstant(0, DL, MVT::i32),
1177 Mask
1178 };
1179 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1180 SDValue Args[3] = { Chain, Input, DWordAddr };
1181 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1182 Op->getVTList(), Args, MemVT,
1183 StoreNode->getMemOperand());
1184 }
1185 if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1186 // Convert pointer from byte address to dword address.
1187 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1188
1189 if (StoreNode->isIndexed()) {
1190 llvm_unreachable("Indexed stores not supported yet");
1191 } else {
1192 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1193 }
1194 return Chain;
1195 }
1196 }
1197
1198 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1199 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1200 return SDValue();
1201
1202 if (MemVT.bitsLT(MVT::i32))
1203 return lowerPrivateTruncStore(StoreNode, DAG);
1204
1205 // Standard i32+ store, tag it with DWORDADDR to note that the address
1206 // has been shifted
1207 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1208 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1209 return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1210 }
1211
1212 // Tagged i32+ stores will be matched by patterns
1213 return SDValue();
1214}
1215
1216// return (512 + (kc_bank << 12)
1217static int
1219 switch (AddressSpace) {
1221 return 512;
1223 return 512 + 4096;
1225 return 512 + 4096 * 2;
1227 return 512 + 4096 * 3;
1229 return 512 + 4096 * 4;
1231 return 512 + 4096 * 5;
1233 return 512 + 4096 * 6;
1235 return 512 + 4096 * 7;
1237 return 512 + 4096 * 8;
1239 return 512 + 4096 * 9;
1241 return 512 + 4096 * 10;
1243 return 512 + 4096 * 11;
1245 return 512 + 4096 * 12;
1247 return 512 + 4096 * 13;
1249 return 512 + 4096 * 14;
1251 return 512 + 4096 * 15;
1252 default:
1253 return -1;
1254 }
1255}
1256
1257SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1258 SelectionDAG &DAG) const {
1259 SDLoc DL(Op);
1260 LoadSDNode *Load = cast<LoadSDNode>(Op);
1261 ISD::LoadExtType ExtType = Load->getExtensionType();
1262 EVT MemVT = Load->getMemoryVT();
1263 assert(Load->getAlign() >= MemVT.getStoreSize());
1264
1265 SDValue BasePtr = Load->getBasePtr();
1266 SDValue Chain = Load->getChain();
1267 SDValue Offset = Load->getOffset();
1268
1269 SDValue LoadPtr = BasePtr;
1270 if (!Offset.isUndef()) {
1271 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1272 }
1273
1274 // Get dword location
1275 // NOTE: this should be eliminated by the future SHR ptr, 2
1276 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1277 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1278
1279 // Load dword
1280 // TODO: can we be smarter about machine pointer info?
1281 MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);
1282 SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1283
1284 // Get offset within the register.
1285 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1286 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1287
1288 // Bit offset of target byte (byteIdx * 8).
1289 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1290 DAG.getConstant(3, DL, MVT::i32));
1291
1292 // Shift to the right.
1293 SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1294
1295 // Eliminate the upper bits by setting them to ...
1296 EVT MemEltVT = MemVT.getScalarType();
1297
1298 if (ExtType == ISD::SEXTLOAD) { // ... ones.
1299 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1300 Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1301 } else { // ... or zeros.
1302 Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1303 }
1304
1305 SDValue Ops[] = {
1306 Ret,
1307 Read.getValue(1) // This should be our output chain
1308 };
1309
1310 return DAG.getMergeValues(Ops, DL);
1311}
1312
1313SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1314 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1315 unsigned AS = LoadNode->getAddressSpace();
1316 EVT MemVT = LoadNode->getMemoryVT();
1317 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1318
1319 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1320 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1321 return lowerPrivateExtLoad(Op, DAG);
1322 }
1323
1324 SDLoc DL(Op);
1325 EVT VT = Op.getValueType();
1326 SDValue Chain = LoadNode->getChain();
1327 SDValue Ptr = LoadNode->getBasePtr();
1328
1329 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1331 VT.isVector()) {
1332 SDValue Ops[2];
1333 std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1334 return DAG.getMergeValues(Ops, DL);
1335 }
1336
1337 // This is still used for explicit load from addrspace(8)
1338 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1339 if (ConstantBlock > -1 &&
1340 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1341 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1343 if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1344 isa<ConstantSDNode>(Ptr)) {
1345 return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1346 }
1347 // TODO: Does this even work?
1348 // non-constant ptr can't be folded, keeps it as a v4f32 load
1349 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1350 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1351 DAG.getConstant(4, DL, MVT::i32)),
1352 DAG.getConstant(LoadNode->getAddressSpace() -
1354 DL, MVT::i32));
1355
1356 if (!VT.isVector()) {
1357 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1358 DAG.getConstant(0, DL, MVT::i32));
1359 }
1360
1361 SDValue MergedValues[2] = {
1362 Result,
1363 Chain
1364 };
1365 return DAG.getMergeValues(MergedValues, DL);
1366 }
1367
1368 // For most operations returning SDValue() will result in the node being
1369 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1370 // need to manually expand loads that may be legal in some address spaces and
1371 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1372 // compute shaders, since the data is sign extended when it is uploaded to the
1373 // buffer. However SEXT loads from other address spaces are not supported, so
1374 // we need to expand them here.
1375 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1376 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1377 SDValue NewLoad = DAG.getExtLoad(
1378 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1379 LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1380 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1381 DAG.getValueType(MemVT));
1382
1383 SDValue MergedValues[2] = { Res, Chain };
1384 return DAG.getMergeValues(MergedValues, DL);
1385 }
1386
1387 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1388 return SDValue();
1389 }
1390
1391 // DWORDADDR ISD marks already shifted address
1392 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1393 assert(VT == MVT::i32);
1394 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1395 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1396 return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1397 }
1398 return SDValue();
1399}
1400
1401SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1402 SDValue Chain = Op.getOperand(0);
1403 SDValue Cond = Op.getOperand(1);
1404 SDValue Jump = Op.getOperand(2);
1405
1406 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1407 Chain, Jump, Cond);
1408}
1409
1410SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1411 SelectionDAG &DAG) const {
1412 MachineFunction &MF = DAG.getMachineFunction();
1413 const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1414
1415 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1416
1417 unsigned FrameIndex = FIN->getIndex();
1418 Register IgnoredFrameReg;
1419 StackOffset Offset =
1420 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1421 return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1422 SDLoc(Op), Op.getValueType());
1423}
1424
1426 bool IsVarArg) const {
1427 switch (CC) {
1430 case CallingConv::C:
1431 case CallingConv::Fast:
1432 case CallingConv::Cold:
1433 llvm_unreachable("kernels should not be handled here");
1441 return CC_R600;
1442 default:
1443 reportFatalUsageError("unsupported calling convention");
1444 }
1445}
1446
1447/// XXX Only kernel functions are supported, so we can assume for now that
1448/// every function is a kernel function, but in the future we should use
1449/// separate calling conventions for kernel and non-kernel functions.
1451 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1452 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1453 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1455 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1456 *DAG.getContext());
1458
1459 if (AMDGPU::isShader(CallConv)) {
1460 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1461 } else {
1462 analyzeFormalArgumentsCompute(CCInfo, Ins);
1463 }
1464
1465 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1466 CCValAssign &VA = ArgLocs[i];
1467 const ISD::InputArg &In = Ins[i];
1468 EVT VT = In.VT;
1469 EVT MemVT = VA.getLocVT();
1470 if (!VT.isVector() && MemVT.isVector()) {
1471 // Get load source type if scalarized.
1472 MemVT = MemVT.getVectorElementType();
1473 }
1474
1475 if (VT.isInteger() && !MemVT.isInteger())
1476 MemVT = MemVT.changeTypeToInteger();
1477
1478 if (AMDGPU::isShader(CallConv)) {
1479 Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1480 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1481 InVals.push_back(Register);
1482 continue;
1483 }
1484
1485 // i64 isn't a legal type, so the register type used ends up as i32, which
1486 // isn't expected here. It attempts to create this sextload, but it ends up
1487 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1488 // for <1 x i64>.
1489
1490 // The first 36 bytes of the input buffer contains information about
1491 // thread group and global sizes.
1493 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1494 if (VT.isFloatingPoint()) {
1495 Ext = ISD::EXTLOAD;
1496 } else {
1497 // FIXME: This should really check the extload type, but the handling of
1498 // extload vector parameters seems to be broken.
1499
1500 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1501 Ext = ISD::SEXTLOAD;
1502 }
1503 }
1504
1505 // Compute the offset from the value.
1506 // XXX - I think PartOffset should give you this, but it seems to give the
1507 // size of the register which isn't useful.
1508
1509 unsigned PartOffset = VA.getLocMemOffset();
1510 Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);
1511
1513 SDValue Arg = DAG.getLoad(
1514 ISD::UNINDEXED, Ext, VT, DL, Chain,
1515 DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1516 PtrInfo,
1517 MemVT, Alignment, MachineMemOperand::MONonTemporal |
1520
1521 InVals.push_back(Arg);
1522 }
1523 return Chain;
1524}
1525
1527 EVT VT) const {
1528 if (!VT.isVector())
1529 return MVT::i32;
1531}
1532
1534 const MachineFunction &MF) const {
1535 // Local and Private addresses do not handle vectors. Limit to i32
1537 return (MemVT.getSizeInBits() <= 32);
1538 }
1539 return true;
1540}
1541
1543 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1544 unsigned *IsFast) const {
1545 if (IsFast)
1546 *IsFast = 0;
1547
1548 if (!VT.isSimple() || VT == MVT::Other)
1549 return false;
1550
1551 if (VT.bitsLT(MVT::i32))
1552 return false;
1553
1554 // TODO: This is a rough estimate.
1555 if (IsFast)
1556 *IsFast = 1;
1557
1558 return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1559}
1560
1562 SelectionDAG &DAG, SDValue VectorEntry,
1563 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1564 assert(RemapSwizzle.empty());
1565
1566 SDLoc DL(VectorEntry);
1567 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1568
1569 SDValue NewBldVec[4];
1570 for (unsigned i = 0; i < 4; i++)
1571 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1572 DAG.getIntPtrConstant(i, DL));
1573
1574 for (unsigned i = 0; i < 4; i++) {
1575 if (NewBldVec[i].isUndef())
1576 // We mask write here to teach later passes that the ith element of this
1577 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1578 // break false dependencies and additionally make assembly easier to read.
1579 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1580 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1581 if (C->isZero()) {
1582 RemapSwizzle[i] = 4; // SEL_0
1583 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1584 } else if (C->isOne()) {
1585 RemapSwizzle[i] = 5; // SEL_1
1586 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1587 }
1588 }
1589
1590 if (NewBldVec[i].isUndef())
1591 continue;
1592
1593 for (unsigned j = 0; j < i; j++) {
1594 if (NewBldVec[i] == NewBldVec[j]) {
1595 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1596 RemapSwizzle[i] = j;
1597 break;
1598 }
1599 }
1600 }
1601
1602 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1603 NewBldVec);
1604}
1605
1607 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1608 assert(RemapSwizzle.empty());
1609
1610 SDLoc DL(VectorEntry);
1611 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1612
1613 SDValue NewBldVec[4];
1614 bool isUnmovable[4] = {false, false, false, false};
1615 for (unsigned i = 0; i < 4; i++)
1616 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1617 DAG.getIntPtrConstant(i, DL));
1618
1619 for (unsigned i = 0; i < 4; i++) {
1620 RemapSwizzle[i] = i;
1621 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1622 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1623 if (i == Idx)
1624 isUnmovable[Idx] = true;
1625 }
1626 }
1627
1628 for (unsigned i = 0; i < 4; i++) {
1629 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1630 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1631 if (isUnmovable[Idx])
1632 continue;
1633 // Swap i and Idx
1634 std::swap(NewBldVec[Idx], NewBldVec[i]);
1635 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1636 break;
1637 }
1638 }
1639
1640 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1641 NewBldVec);
1642}
1643
1644SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1645 SelectionDAG &DAG,
1646 const SDLoc &DL) const {
1647 // Old -> New swizzle values
1648 DenseMap<unsigned, unsigned> SwizzleRemap;
1649
1650 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1651 for (unsigned i = 0; i < 4; i++) {
1652 unsigned Idx = Swz[i]->getAsZExtVal();
1653 auto It = SwizzleRemap.find(Idx);
1654 if (It != SwizzleRemap.end())
1655 Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);
1656 }
1657
1658 SwizzleRemap.clear();
1659 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1660 for (unsigned i = 0; i < 4; i++) {
1661 unsigned Idx = Swz[i]->getAsZExtVal();
1662 auto It = SwizzleRemap.find(Idx);
1663 if (It != SwizzleRemap.end())
1664 Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);
1665 }
1666
1667 return BuildVector;
1668}
1669
1670SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1671 SelectionDAG &DAG) const {
1672 SDLoc DL(LoadNode);
1673 EVT VT = LoadNode->getValueType(0);
1674 SDValue Chain = LoadNode->getChain();
1675 SDValue Ptr = LoadNode->getBasePtr();
1677
1678 //TODO: Support smaller loads
1679 if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1680 return SDValue();
1681
1682 if (LoadNode->getAlign() < Align(4))
1683 return SDValue();
1684
1685 int ConstantBlock = ConstantAddressBlock(Block);
1686
1687 SDValue Slots[4];
1688 for (unsigned i = 0; i < 4; i++) {
1689 // We want Const position encoded with the following formula :
1690 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1691 // const_index is Ptr computed by llvm using an alignment of 16.
1692 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1693 // then div by 4 at the ISel step
1694 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1695 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1696 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1697 }
1698 EVT NewVT = MVT::v4i32;
1699 unsigned NumElements = 4;
1700 if (VT.isVector()) {
1701 NewVT = VT;
1702 NumElements = VT.getVectorNumElements();
1703 }
1704 SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));
1705 if (!VT.isVector()) {
1706 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1707 DAG.getConstant(0, DL, MVT::i32));
1708 }
1709 SDValue MergedValues[2] = {
1710 Result,
1711 Chain
1712 };
1713 return DAG.getMergeValues(MergedValues, DL);
1714}
1715
1716//===----------------------------------------------------------------------===//
1717// Custom DAG Optimizations
1718//===----------------------------------------------------------------------===//
1719
1721 DAGCombinerInfo &DCI) const {
1722 SelectionDAG &DAG = DCI.DAG;
1723 SDLoc DL(N);
1724
1725 switch (N->getOpcode()) {
1726 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1727 case ISD::FP_ROUND: {
1728 SDValue Arg = N->getOperand(0);
1729 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1730 return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1731 Arg.getOperand(0));
1732 }
1733 break;
1734 }
1735
1736 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1737 // (i32 select_cc f32, f32, -1, 0 cc)
1738 //
1739 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1740 // this to one of the SET*_DX10 instructions.
1741 case ISD::FP_TO_SINT: {
1742 SDValue FNeg = N->getOperand(0);
1743 if (FNeg.getOpcode() != ISD::FNEG) {
1744 return SDValue();
1745 }
1746 SDValue SelectCC = FNeg.getOperand(0);
1747 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1748 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1749 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1750 !isHWTrueValue(SelectCC.getOperand(2)) ||
1751 !isHWFalseValue(SelectCC.getOperand(3))) {
1752 return SDValue();
1753 }
1754
1755 return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1756 SelectCC.getOperand(0), // LHS
1757 SelectCC.getOperand(1), // RHS
1758 DAG.getAllOnesConstant(DL, MVT::i32), // True
1759 DAG.getConstant(0, DL, MVT::i32), // False
1760 SelectCC.getOperand(4)); // CC
1761 }
1762
1763 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1764 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1766 SDValue InVec = N->getOperand(0);
1767 SDValue InVal = N->getOperand(1);
1768 SDValue EltNo = N->getOperand(2);
1769
1770 // If the inserted element is an UNDEF, just use the input vector.
1771 if (InVal.isUndef())
1772 return InVec;
1773
1774 EVT VT = InVec.getValueType();
1775
1776 // If we can't generate a legal BUILD_VECTOR, exit
1778 return SDValue();
1779
1780 // Check that we know which element is being inserted
1781 if (!isa<ConstantSDNode>(EltNo))
1782 return SDValue();
1783 unsigned Elt = EltNo->getAsZExtVal();
1784
1785 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1786 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1787 // vector elements.
1789 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1790 Ops.append(InVec.getNode()->op_begin(),
1791 InVec.getNode()->op_end());
1792 } else if (InVec.isUndef()) {
1793 unsigned NElts = VT.getVectorNumElements();
1794 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1795 } else {
1796 return SDValue();
1797 }
1798
1799 // Insert the element
1800 if (Elt < Ops.size()) {
1801 // All the operands of BUILD_VECTOR must have the same type;
1802 // we enforce that here.
1803 EVT OpVT = Ops[0].getValueType();
1804 if (InVal.getValueType() != OpVT)
1805 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1806 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1807 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1808 Ops[Elt] = InVal;
1809 }
1810
1811 // Return the new vector
1812 return DAG.getBuildVector(VT, DL, Ops);
1813 }
1814
1815 // Extract_vec (Build_vector) generated by custom lowering
1816 // also needs to be customly combined
1818 SDValue Arg = N->getOperand(0);
1819 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1820 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1821 unsigned Element = Const->getZExtValue();
1822 return Arg->getOperand(Element);
1823 }
1824 }
1825 if (Arg.getOpcode() == ISD::BITCAST &&
1829 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1830 unsigned Element = Const->getZExtValue();
1831 return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1832 Arg->getOperand(0).getOperand(Element));
1833 }
1834 }
1835 break;
1836 }
1837
1838 case ISD::SELECT_CC: {
1839 // Try common optimizations
1841 return Ret;
1842
1843 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1844 // selectcc x, y, a, b, inv(cc)
1845 //
1846 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1847 // selectcc x, y, a, b, cc
1848 SDValue LHS = N->getOperand(0);
1849 if (LHS.getOpcode() != ISD::SELECT_CC) {
1850 return SDValue();
1851 }
1852
1853 SDValue RHS = N->getOperand(1);
1854 SDValue True = N->getOperand(2);
1855 SDValue False = N->getOperand(3);
1856 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1857
1858 if (LHS.getOperand(2).getNode() != True.getNode() ||
1859 LHS.getOperand(3).getNode() != False.getNode() ||
1860 RHS.getNode() != False.getNode()) {
1861 return SDValue();
1862 }
1863
1864 switch (NCC) {
1865 default: return SDValue();
1866 case ISD::SETNE: return LHS;
1867 case ISD::SETEQ: {
1868 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1869 LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1870 if (DCI.isBeforeLegalizeOps() ||
1871 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1872 return DAG.getSelectCC(DL,
1873 LHS.getOperand(0),
1874 LHS.getOperand(1),
1875 LHS.getOperand(2),
1876 LHS.getOperand(3),
1877 LHSCC);
1878 break;
1879 }
1880 }
1881 return SDValue();
1882 }
1883
1885 SDValue Arg = N->getOperand(1);
1886 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1887 break;
1888
1889 SDValue NewArgs[8] = {
1890 N->getOperand(0), // Chain
1891 SDValue(),
1892 N->getOperand(2), // ArrayBase
1893 N->getOperand(3), // Type
1894 N->getOperand(4), // SWZ_X
1895 N->getOperand(5), // SWZ_Y
1896 N->getOperand(6), // SWZ_Z
1897 N->getOperand(7) // SWZ_W
1898 };
1899 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1900 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1901 }
1903 SDValue Arg = N->getOperand(1);
1904 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1905 break;
1906
1907 SDValue NewArgs[19] = {
1908 N->getOperand(0),
1909 N->getOperand(1),
1910 N->getOperand(2),
1911 N->getOperand(3),
1912 N->getOperand(4),
1913 N->getOperand(5),
1914 N->getOperand(6),
1915 N->getOperand(7),
1916 N->getOperand(8),
1917 N->getOperand(9),
1918 N->getOperand(10),
1919 N->getOperand(11),
1920 N->getOperand(12),
1921 N->getOperand(13),
1922 N->getOperand(14),
1923 N->getOperand(15),
1924 N->getOperand(16),
1925 N->getOperand(17),
1926 N->getOperand(18),
1927 };
1928 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1929 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1930 }
1931
1932 case ISD::LOAD: {
1933 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1934 SDValue Ptr = LoadNode->getBasePtr();
1935 if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1937 return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1938 break;
1939 }
1940
1941 default: break;
1942 }
1943
1945}
1946
1947bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1948 SDValue &Src, SDValue &Neg, SDValue &Abs,
1949 SDValue &Sel, SDValue &Imm,
1950 SelectionDAG &DAG) const {
1951 const R600InstrInfo *TII = Subtarget->getInstrInfo();
1952 if (!Src.isMachineOpcode())
1953 return false;
1954
1955 switch (Src.getMachineOpcode()) {
1956 case R600::FNEG_R600:
1957 if (!Neg.getNode())
1958 return false;
1959 Src = Src.getOperand(0);
1960 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1961 return true;
1962 case R600::FABS_R600:
1963 if (!Abs.getNode())
1964 return false;
1965 Src = Src.getOperand(0);
1966 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1967 return true;
1968 case R600::CONST_COPY: {
1969 unsigned Opcode = ParentNode->getMachineOpcode();
1970 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1971
1972 if (!Sel.getNode())
1973 return false;
1974
1975 SDValue CstOffset = Src.getOperand(0);
1976 if (ParentNode->getValueType(0).isVector())
1977 return false;
1978
1979 // Gather constants values
1980 int SrcIndices[] = {
1981 TII->getOperandIdx(Opcode, R600::OpName::src0),
1982 TII->getOperandIdx(Opcode, R600::OpName::src1),
1983 TII->getOperandIdx(Opcode, R600::OpName::src2),
1984 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1985 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1986 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1987 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1988 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1989 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1990 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1991 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1992 };
1993 std::vector<unsigned> Consts;
1994 for (int OtherSrcIdx : SrcIndices) {
1995 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1996 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1997 continue;
1998 if (HasDst) {
1999 OtherSrcIdx--;
2000 OtherSelIdx--;
2001 }
2002 if (RegisterSDNode *Reg =
2003 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2004 if (Reg->getReg() == R600::ALU_CONST) {
2005 Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));
2006 }
2007 }
2008 }
2009
2010 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2011 Consts.push_back(Cst->getZExtValue());
2012 if (!TII->fitsConstReadLimitations(Consts)) {
2013 return false;
2014 }
2015
2016 Sel = CstOffset;
2017 Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2018 return true;
2019 }
2020 case R600::MOV_IMM_GLOBAL_ADDR:
2021 // Check if the Imm slot is used. Taken from below.
2022 if (Imm->getAsZExtVal())
2023 return false;
2024 Imm = Src.getOperand(0);
2025 Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2026 return true;
2027 case R600::MOV_IMM_I32:
2028 case R600::MOV_IMM_F32: {
2029 unsigned ImmReg = R600::ALU_LITERAL_X;
2030 uint64_t ImmValue = 0;
2031
2032 if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2033 ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2034 float FloatValue = FPC->getValueAPF().convertToFloat();
2035 if (FloatValue == 0.0) {
2036 ImmReg = R600::ZERO;
2037 } else if (FloatValue == 0.5) {
2038 ImmReg = R600::HALF;
2039 } else if (FloatValue == 1.0) {
2040 ImmReg = R600::ONE;
2041 } else {
2042 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2043 }
2044 } else {
2045 uint64_t Value = Src.getConstantOperandVal(0);
2046 if (Value == 0) {
2047 ImmReg = R600::ZERO;
2048 } else if (Value == 1) {
2049 ImmReg = R600::ONE_INT;
2050 } else {
2051 ImmValue = Value;
2052 }
2053 }
2054
2055 // Check that we aren't already using an immediate.
2056 // XXX: It's possible for an instruction to have more than one
2057 // immediate operand, but this is not supported yet.
2058 if (ImmReg == R600::ALU_LITERAL_X) {
2059 if (!Imm.getNode())
2060 return false;
2061 ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2062 if (C->getZExtValue())
2063 return false;
2064 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2065 }
2066 Src = DAG.getRegister(ImmReg, MVT::i32);
2067 return true;
2068 }
2069 default:
2070 return false;
2071 }
2072}
2073
2074/// Fold the instructions after selecting them
2075SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2076 SelectionDAG &DAG) const {
2077 const R600InstrInfo *TII = Subtarget->getInstrInfo();
2078 if (!Node->isMachineOpcode())
2079 return Node;
2080
2081 unsigned Opcode = Node->getMachineOpcode();
2082 SDValue FakeOp;
2083
2084 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2085
2086 if (Opcode == R600::DOT_4) {
2087 int OperandIdx[] = {
2088 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2089 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2090 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2091 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2092 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2093 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2094 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2095 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2096 };
2097 int NegIdx[] = {
2098 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2099 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2100 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2101 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2102 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2103 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2104 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2105 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2106 };
2107 int AbsIdx[] = {
2108 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2109 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2110 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2111 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2112 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2113 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2114 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2115 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2116 };
2117 for (unsigned i = 0; i < 8; i++) {
2118 if (OperandIdx[i] < 0)
2119 return Node;
2120 SDValue &Src = Ops[OperandIdx[i] - 1];
2121 SDValue &Neg = Ops[NegIdx[i] - 1];
2122 SDValue &Abs = Ops[AbsIdx[i] - 1];
2123 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2124 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2125 if (HasDst)
2126 SelIdx--;
2127 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2128 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2129 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2130 }
2131 } else if (Opcode == R600::REG_SEQUENCE) {
2132 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2133 SDValue &Src = Ops[i];
2134 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2135 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2136 }
2137 } else {
2138 if (!TII->hasInstrModifiers(Opcode))
2139 return Node;
2140 int OperandIdx[] = {
2141 TII->getOperandIdx(Opcode, R600::OpName::src0),
2142 TII->getOperandIdx(Opcode, R600::OpName::src1),
2143 TII->getOperandIdx(Opcode, R600::OpName::src2)
2144 };
2145 int NegIdx[] = {
2146 TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2147 TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2148 TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2149 };
2150 int AbsIdx[] = {
2151 TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2152 TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2153 -1
2154 };
2155 for (unsigned i = 0; i < 3; i++) {
2156 if (OperandIdx[i] < 0)
2157 return Node;
2158 SDValue &Src = Ops[OperandIdx[i] - 1];
2159 SDValue &Neg = Ops[NegIdx[i] - 1];
2160 SDValue FakeAbs;
2161 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2162 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2163 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2164 int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2165 if (HasDst) {
2166 SelIdx--;
2167 ImmIdx--;
2168 }
2169 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2170 SDValue &Imm = Ops[ImmIdx];
2171 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2172 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2173 }
2174 }
2175
2176 return Node;
2177}
2178
2180R600TargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const {
2181 switch (RMW->getOperation()) {
2192 // FIXME: Cayman at least appears to have instructions for this, but the
2193 // instruction definitions appear to be missing.
2195 case AtomicRMWInst::Xchg: {
2196 const DataLayout &DL = RMW->getFunction()->getDataLayout();
2197 unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());
2198 if (ValSize == 32 || ValSize == 64)
2201 }
2202 default:
2203 if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
2204 unsigned Size = IntTy->getBitWidth();
2205 if (Size == 32 || Size == 64)
2207 }
2208
2210 }
2211
2212 llvm_unreachable("covered atomicrmw op switch");
2213}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Interfaces for producing common pass manager configurations.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
static bool isUndef(const MachineInstr &MI)
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define MO_FLAG_NEG
Definition R600Defines.h:15
#define MO_FLAG_ABS
Definition R600Defines.h:16
#define MO_FLAG_MASK
Definition R600Defines.h:17
#define MO_FLAG_PUSH
Definition R600Defines.h:18
static bool isEOP(MachineBasicBlock::iterator I)
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
static int ConstantAddressBlock(unsigned AddressSpace)
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
R600 DAG Lowering interface definition.
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
const SmallVectorImpl< MachineOperand > & Cond
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
Value * RHS
Value * LHS
unsigned getStackWidth(const MachineFunction &MF) const
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
AMDGPUTargetLowering(const TargetMachine &TM, const TargetSubtargetInfo &STI, const AMDGPUSubtarget &AMDGPUSTI)
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:5988
APInt bitcastToAPInt() const
Definition APFloat.h:1436
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
an instruction that atomically reads a memory location, combines it with another value,...
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
int64_t getLocMemOffset() const
const APFloat & getValueAPF() const
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
uint64_t getZExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
bool empty() const
Definition DenseMap.h:173
iterator end()
Definition DenseMap.h:143
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:357
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
static auto integer_valuetypes()
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
unsigned getTargetFlags() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
const R600InstrInfo * getInstrInfo() const override
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVMContext * getContext() const
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Primary interface to the complete machine description for the target machine.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:827
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:861
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:796
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:852
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:804
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:819
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:896
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:934
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:795
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:967
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:864
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:841
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
int32_t getLDSNoRetOp(uint32_t Opcode)
constexpr float pif
Definition MathExtras.h:53
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
@ Custom
The result value requires a custom uniformity check.
Definition Uniformity.h:31
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:307
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:323
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:315
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:331
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...