LLVM 18.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Custom DAG lowering for R600
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600ISelLowering.h"
15#include "AMDGPU.h"
17#include "R600Defines.h"
18#include "R600InstrInfo.h"
20#include "R600Subtarget.h"
21#include "R600TargetMachine.h"
23#include "llvm/IR/IntrinsicsAMDGPU.h"
24#include "llvm/IR/IntrinsicsR600.h"
25
26using namespace llvm;
27
28#include "R600GenCallingConv.inc"
29
31 const R600Subtarget &STI)
32 : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
33 addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
34 addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
35 addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
36 addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
37 addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
38 addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
39
42
44
45 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
47
48 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
49 // spaces, so it is custom lowered to handle those where it isn't.
51 for (MVT VT : MVT::integer_valuetypes()) {
52 setLoadExtAction(Op, VT, MVT::i1, Promote);
53 setLoadExtAction(Op, VT, MVT::i8, Custom);
54 setLoadExtAction(Op, VT, MVT::i16, Custom);
55 }
56
57 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
59 MVT::v2i1, Expand);
60
62 MVT::v4i1, Expand);
63
64 setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
65 Custom);
66
67 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
68 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
69 // We need to include these since trunc STORES to PRIVATE need
70 // special handling to accommodate RMW
71 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
72 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
73 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
74 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
75 setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
76 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
77 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
78 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
79 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
80 setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
81
82 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
83 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
84 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
85
86 // Set condition code actions
90 MVT::f32, Expand);
91
93 MVT::i32, Expand);
94
96
97 setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);
98
99 setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);
101
103
105 MVT::f64, Custom);
106
107 setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
108
109 setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);
110 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},
111 Custom);
112
113 setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
114 Expand);
115
116 // ADD, SUB overflow.
117 // TODO: turn these into Legal?
118 if (Subtarget->hasCARRY())
120
121 if (Subtarget->hasBORROW())
123
124 // Expand sign extension of vectors
125 if (!Subtarget->hasBFE())
127
128 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);
129
130 if (!Subtarget->hasBFE())
132 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);
133
134 if (!Subtarget->hasBFE())
136 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);
137
139 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);
140
142
144
146 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
147
149 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
150
151 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
152 // to be Legal/Custom in order to avoid library calls.
154 Custom);
155
156 if (!Subtarget->hasFMA())
157 setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);
158
159 // FIXME: May need no denormals check
161
162 if (!Subtarget->hasBFI())
163 // fcopysign can be done in a single instruction with BFI.
164 setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
165
166 if (!Subtarget->hasBCNT(32))
168
169 if (!Subtarget->hasBCNT(64))
171
172 if (Subtarget->hasFFBH())
174
175 if (Subtarget->hasFFBL())
177
178 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
179 // need it for R600.
180 if (Subtarget->hasBFE())
182
185
186 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
187 for (MVT VT : ScalarIntVTs)
189 Expand);
190
191 // LLVM will expand these to atomic_cmp_swap(0)
192 // and atomic_swap, respectively.
194
195 // We need to custom lower some of the intrinsics
197 Custom);
198
200
203}
204
206 if (std::next(I) == I->getParent()->end())
207 return false;
208 return std::next(I)->getOpcode() == R600::RETURN;
209}
210
213 MachineBasicBlock *BB) const {
214 MachineFunction *MF = BB->getParent();
217 const R600InstrInfo *TII = Subtarget->getInstrInfo();
218
219 switch (MI.getOpcode()) {
220 default:
221 // Replace LDS_*_RET instruction that don't have any uses with the
222 // equivalent LDS_*_NORET instruction.
223 if (TII->isLDSRetInstr(MI.getOpcode())) {
224 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
225 assert(DstIdx != -1);
227 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
228 // LDS_1A2D support and remove this special case.
229 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
230 MI.getOpcode() == R600::LDS_CMPST_RET)
231 return BB;
232
233 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
234 TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
235 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
236 NewMI.add(MO);
237 } else {
239 }
240 break;
241
242 case R600::FABS_R600: {
243 MachineInstr *NewMI = TII->buildDefaultInstruction(
244 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
245 MI.getOperand(1).getReg());
246 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
247 break;
248 }
249
250 case R600::FNEG_R600: {
251 MachineInstr *NewMI = TII->buildDefaultInstruction(
252 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
253 MI.getOperand(1).getReg());
254 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
255 break;
256 }
257
258 case R600::MASK_WRITE: {
259 Register maskedRegister = MI.getOperand(0).getReg();
260 assert(maskedRegister.isVirtual());
261 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
262 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
263 break;
264 }
265
266 case R600::MOV_IMM_F32:
267 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
268 .getFPImm()
269 ->getValueAPF()
270 .bitcastToAPInt()
271 .getZExtValue());
272 break;
273
274 case R600::MOV_IMM_I32:
275 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
276 MI.getOperand(1).getImm());
277 break;
278
279 case R600::MOV_IMM_GLOBAL_ADDR: {
280 //TODO: Perhaps combine this instruction with the next if possible
281 auto MIB = TII->buildDefaultInstruction(
282 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
283 int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
284 //TODO: Ugh this is rather ugly
285 const MachineOperand &MO = MI.getOperand(1);
286 MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
287 MO.getTargetFlags());
288 break;
289 }
290
291 case R600::CONST_COPY: {
292 MachineInstr *NewMI = TII->buildDefaultInstruction(
293 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
294 TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
295 MI.getOperand(1).getImm());
296 break;
297 }
298
299 case R600::RAT_WRITE_CACHELESS_32_eg:
300 case R600::RAT_WRITE_CACHELESS_64_eg:
301 case R600::RAT_WRITE_CACHELESS_128_eg:
302 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
303 .add(MI.getOperand(0))
304 .add(MI.getOperand(1))
305 .addImm(isEOP(I)); // Set End of program bit
306 break;
307
308 case R600::RAT_STORE_TYPED_eg:
309 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
310 .add(MI.getOperand(0))
311 .add(MI.getOperand(1))
312 .add(MI.getOperand(2))
313 .addImm(isEOP(I)); // Set End of program bit
314 break;
315
316 case R600::BRANCH:
317 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
318 .add(MI.getOperand(0));
319 break;
320
321 case R600::BRANCH_COND_f32: {
322 MachineInstr *NewMI =
323 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
324 R600::PREDICATE_BIT)
325 .add(MI.getOperand(1))
326 .addImm(R600::PRED_SETNE)
327 .addImm(0); // Flags
328 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
329 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
330 .add(MI.getOperand(0))
331 .addReg(R600::PREDICATE_BIT, RegState::Kill);
332 break;
333 }
334
335 case R600::BRANCH_COND_i32: {
336 MachineInstr *NewMI =
337 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
338 R600::PREDICATE_BIT)
339 .add(MI.getOperand(1))
340 .addImm(R600::PRED_SETNE_INT)
341 .addImm(0); // Flags
342 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
343 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
344 .add(MI.getOperand(0))
345 .addReg(R600::PREDICATE_BIT, RegState::Kill);
346 break;
347 }
348
349 case R600::EG_ExportSwz:
350 case R600::R600_ExportSwz: {
351 // Instruction is left unmodified if its not the last one of its type
352 bool isLastInstructionOfItsType = true;
353 unsigned InstExportType = MI.getOperand(1).getImm();
354 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
355 EndBlock = BB->end(); NextExportInst != EndBlock;
356 NextExportInst = std::next(NextExportInst)) {
357 if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
358 NextExportInst->getOpcode() == R600::R600_ExportSwz) {
359 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
360 .getImm();
361 if (CurrentInstExportType == InstExportType) {
362 isLastInstructionOfItsType = false;
363 break;
364 }
365 }
366 }
367 bool EOP = isEOP(I);
368 if (!EOP && !isLastInstructionOfItsType)
369 return BB;
370 unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
371 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
372 .add(MI.getOperand(0))
373 .add(MI.getOperand(1))
374 .add(MI.getOperand(2))
375 .add(MI.getOperand(3))
376 .add(MI.getOperand(4))
377 .add(MI.getOperand(5))
378 .add(MI.getOperand(6))
379 .addImm(CfInst)
380 .addImm(EOP);
381 break;
382 }
383 case R600::RETURN: {
384 return BB;
385 }
386 }
387
388 MI.eraseFromParent();
389 return BB;
390}
391
392//===----------------------------------------------------------------------===//
393// Custom DAG Lowering Operations
394//===----------------------------------------------------------------------===//
395
399 switch (Op.getOpcode()) {
400 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
401 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
402 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
403 case ISD::SHL_PARTS:
404 case ISD::SRA_PARTS:
405 case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
406 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
407 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
408 case ISD::FCOS:
409 case ISD::FSIN: return LowerTrig(Op, DAG);
410 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
411 case ISD::STORE: return LowerSTORE(Op, DAG);
412 case ISD::LOAD: {
413 SDValue Result = LowerLOAD(Op, DAG);
414 assert((!Result.getNode() ||
415 Result.getNode()->getNumValues() == 2) &&
416 "Load should return a value and a chain");
417 return Result;
418 }
419
420 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
421 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
422 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
424 return lowerADDRSPACECAST(Op, DAG);
425 case ISD::INTRINSIC_VOID: {
426 SDValue Chain = Op.getOperand(0);
427 unsigned IntrinsicID =
428 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
429 switch (IntrinsicID) {
430 case Intrinsic::r600_store_swizzle: {
431 SDLoc DL(Op);
432 const SDValue Args[8] = {
433 Chain,
434 Op.getOperand(2), // Export Value
435 Op.getOperand(3), // ArrayBase
436 Op.getOperand(4), // Type
437 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
438 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
439 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
440 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
441 };
442 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
443 }
444
445 // default for switch(IntrinsicID)
446 default: break;
447 }
448 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
449 break;
450 }
452 unsigned IntrinsicID =
453 cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
454 EVT VT = Op.getValueType();
455 SDLoc DL(Op);
456 switch (IntrinsicID) {
457 case Intrinsic::r600_tex:
458 case Intrinsic::r600_texc: {
459 unsigned TextureOp;
460 switch (IntrinsicID) {
461 case Intrinsic::r600_tex:
462 TextureOp = 0;
463 break;
464 case Intrinsic::r600_texc:
465 TextureOp = 1;
466 break;
467 default:
468 llvm_unreachable("unhandled texture operation");
469 }
470
471 SDValue TexArgs[19] = {
472 DAG.getConstant(TextureOp, DL, MVT::i32),
473 Op.getOperand(1),
474 DAG.getConstant(0, DL, MVT::i32),
475 DAG.getConstant(1, DL, MVT::i32),
476 DAG.getConstant(2, DL, MVT::i32),
477 DAG.getConstant(3, DL, MVT::i32),
478 Op.getOperand(2),
479 Op.getOperand(3),
480 Op.getOperand(4),
481 DAG.getConstant(0, DL, MVT::i32),
482 DAG.getConstant(1, DL, MVT::i32),
483 DAG.getConstant(2, DL, MVT::i32),
484 DAG.getConstant(3, DL, MVT::i32),
485 Op.getOperand(5),
486 Op.getOperand(6),
487 Op.getOperand(7),
488 Op.getOperand(8),
489 Op.getOperand(9),
490 Op.getOperand(10)
491 };
492 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
493 }
494 case Intrinsic::r600_dot4: {
495 SDValue Args[8] = {
496 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
497 DAG.getConstant(0, DL, MVT::i32)),
498 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
499 DAG.getConstant(0, DL, MVT::i32)),
500 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
501 DAG.getConstant(1, DL, MVT::i32)),
502 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
503 DAG.getConstant(1, DL, MVT::i32)),
504 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
505 DAG.getConstant(2, DL, MVT::i32)),
506 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
507 DAG.getConstant(2, DL, MVT::i32)),
508 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
509 DAG.getConstant(3, DL, MVT::i32)),
510 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
511 DAG.getConstant(3, DL, MVT::i32))
512 };
513 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
514 }
515
516 case Intrinsic::r600_implicitarg_ptr: {
519 return DAG.getConstant(ByteOffset, DL, PtrVT);
520 }
521 case Intrinsic::r600_read_ngroups_x:
522 return LowerImplicitParameter(DAG, VT, DL, 0);
523 case Intrinsic::r600_read_ngroups_y:
524 return LowerImplicitParameter(DAG, VT, DL, 1);
525 case Intrinsic::r600_read_ngroups_z:
526 return LowerImplicitParameter(DAG, VT, DL, 2);
527 case Intrinsic::r600_read_global_size_x:
528 return LowerImplicitParameter(DAG, VT, DL, 3);
529 case Intrinsic::r600_read_global_size_y:
530 return LowerImplicitParameter(DAG, VT, DL, 4);
531 case Intrinsic::r600_read_global_size_z:
532 return LowerImplicitParameter(DAG, VT, DL, 5);
533 case Intrinsic::r600_read_local_size_x:
534 return LowerImplicitParameter(DAG, VT, DL, 6);
535 case Intrinsic::r600_read_local_size_y:
536 return LowerImplicitParameter(DAG, VT, DL, 7);
537 case Intrinsic::r600_read_local_size_z:
538 return LowerImplicitParameter(DAG, VT, DL, 8);
539
540 case Intrinsic::r600_read_tgid_x:
541 case Intrinsic::amdgcn_workgroup_id_x:
542 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
543 R600::T1_X, VT);
544 case Intrinsic::r600_read_tgid_y:
545 case Intrinsic::amdgcn_workgroup_id_y:
546 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
547 R600::T1_Y, VT);
548 case Intrinsic::r600_read_tgid_z:
549 case Intrinsic::amdgcn_workgroup_id_z:
550 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
551 R600::T1_Z, VT);
552 case Intrinsic::r600_read_tidig_x:
553 case Intrinsic::amdgcn_workitem_id_x:
554 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
555 R600::T0_X, VT);
556 case Intrinsic::r600_read_tidig_y:
557 case Intrinsic::amdgcn_workitem_id_y:
558 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
559 R600::T0_Y, VT);
560 case Intrinsic::r600_read_tidig_z:
561 case Intrinsic::amdgcn_workitem_id_z:
562 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
563 R600::T0_Z, VT);
564
565 case Intrinsic::r600_recipsqrt_ieee:
566 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
567
568 case Intrinsic::r600_recipsqrt_clamped:
569 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
570 default:
571 return Op;
572 }
573
574 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
575 break;
576 }
577 } // end switch(Op.getOpcode())
578 return SDValue();
579}
580
583 SelectionDAG &DAG) const {
584 switch (N->getOpcode()) {
585 default:
587 return;
588 case ISD::FP_TO_UINT:
589 if (N->getValueType(0) == MVT::i1) {
590 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
591 return;
592 }
593 // Since we don't care about out of bounds values we can use FP_TO_SINT for
594 // uints too. The DAGLegalizer code for uint considers some extra cases
595 // which are not necessary here.
596 [[fallthrough]];
597 case ISD::FP_TO_SINT: {
598 if (N->getValueType(0) == MVT::i1) {
599 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
600 return;
601 }
602
603 SDValue Result;
604 if (expandFP_TO_SINT(N, Result, DAG))
605 Results.push_back(Result);
606 return;
607 }
608 case ISD::SDIVREM: {
609 SDValue Op = SDValue(N, 1);
610 SDValue RES = LowerSDIVREM(Op, DAG);
611 Results.push_back(RES);
612 Results.push_back(RES.getValue(1));
613 break;
614 }
615 case ISD::UDIVREM: {
616 SDValue Op = SDValue(N, 0);
618 break;
619 }
620 }
621}
622
623SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
624 SDValue Vector) const {
625 SDLoc DL(Vector);
626 EVT VecVT = Vector.getValueType();
627 EVT EltVT = VecVT.getVectorElementType();
629
630 for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
631 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
632 DAG.getVectorIdxConstant(i, DL)));
633 }
634
635 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
636}
637
638SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
639 SelectionDAG &DAG) const {
640 SDLoc DL(Op);
641 SDValue Vector = Op.getOperand(0);
642 SDValue Index = Op.getOperand(1);
643
644 if (isa<ConstantSDNode>(Index) ||
646 return Op;
647
648 Vector = vectorToVerticalVector(DAG, Vector);
649 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
650 Vector, Index);
651}
652
653SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
654 SelectionDAG &DAG) const {
655 SDLoc DL(Op);
656 SDValue Vector = Op.getOperand(0);
657 SDValue Value = Op.getOperand(1);
658 SDValue Index = Op.getOperand(2);
659
660 if (isa<ConstantSDNode>(Index) ||
662 return Op;
663
664 Vector = vectorToVerticalVector(DAG, Vector);
665 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
666 Vector, Value, Index);
667 return vectorToVerticalVector(DAG, Insert);
668}
669
670SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
671 SDValue Op,
672 SelectionDAG &DAG) const {
673 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
676
677 const DataLayout &DL = DAG.getDataLayout();
678 const GlobalValue *GV = GSD->getGlobal();
680
681 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
682 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
683}
684
685SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
686 // On hw >= R700, COS/SIN input must be between -1. and 1.
687 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
688 EVT VT = Op.getValueType();
689 SDValue Arg = Op.getOperand(0);
690 SDLoc DL(Op);
691
692 // TODO: Should this propagate fast-math-flags?
693 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
694 DAG.getNode(ISD::FADD, DL, VT,
695 DAG.getNode(ISD::FMUL, DL, VT, Arg,
696 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
697 DAG.getConstantFP(0.5, DL, MVT::f32)));
698 unsigned TrigNode;
699 switch (Op.getOpcode()) {
700 case ISD::FCOS:
701 TrigNode = AMDGPUISD::COS_HW;
702 break;
703 case ISD::FSIN:
704 TrigNode = AMDGPUISD::SIN_HW;
705 break;
706 default:
707 llvm_unreachable("Wrong trig opcode");
708 }
709 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
710 DAG.getNode(ISD::FADD, DL, VT, FractPart,
711 DAG.getConstantFP(-0.5, DL, MVT::f32)));
712 if (Gen >= AMDGPUSubtarget::R700)
713 return TrigVal;
714 // On R600 hw, COS/SIN input must be between -Pi and Pi.
715 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
716 DAG.getConstantFP(numbers::pif, DL, MVT::f32));
717}
718
719SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
720 SelectionDAG &DAG) const {
721 SDValue Lo, Hi;
722 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
723 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
724}
725
726SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
727 unsigned mainop, unsigned ovf) const {
728 SDLoc DL(Op);
729 EVT VT = Op.getValueType();
730
731 SDValue Lo = Op.getOperand(0);
732 SDValue Hi = Op.getOperand(1);
733
734 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
735 // Extend sign.
736 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
737 DAG.getValueType(MVT::i1));
738
739 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
740
741 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
742}
743
744SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
745 SDLoc DL(Op);
746 return DAG.getNode(
748 DL,
749 MVT::i1,
750 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
752}
753
754SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
755 SDLoc DL(Op);
756 return DAG.getNode(
758 DL,
759 MVT::i1,
760 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
762}
763
764SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
765 const SDLoc &DL,
766 unsigned DwordOffset) const {
767 unsigned ByteOffset = DwordOffset * 4;
770
771 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
772 assert(isInt<16>(ByteOffset));
773
774 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
775 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
777}
778
779bool R600TargetLowering::isZero(SDValue Op) const {
780 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
781 return Cst->isZero();
782 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
783 return CstFP->isZero();
784 } else {
785 return false;
786 }
787}
788
789bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
790 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
791 return CFP->isExactlyValue(1.0);
792 }
793 return isAllOnesConstant(Op);
794}
795
796bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
797 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
798 return CFP->getValueAPF().isZero();
799 }
800 return isNullConstant(Op);
801}
802
803SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
804 SDLoc DL(Op);
805 EVT VT = Op.getValueType();
806
807 SDValue LHS = Op.getOperand(0);
808 SDValue RHS = Op.getOperand(1);
809 SDValue True = Op.getOperand(2);
810 SDValue False = Op.getOperand(3);
811 SDValue CC = Op.getOperand(4);
812 SDValue Temp;
813
814 if (VT == MVT::f32) {
815 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
816 SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
817 if (MinMax)
818 return MinMax;
819 }
820
821 // LHS and RHS are guaranteed to be the same value type
822 EVT CompareVT = LHS.getValueType();
823
824 // Check if we can lower this to a native operation.
825
826 // Try to lower to a SET* instruction:
827 //
828 // SET* can match the following patterns:
829 //
830 // select_cc f32, f32, -1, 0, cc_supported
831 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
832 // select_cc i32, i32, -1, 0, cc_supported
833 //
834
835 // Move hardware True/False values to the correct operand.
836 if (isHWTrueValue(False) && isHWFalseValue(True)) {
837 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
838 ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
839 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
840 std::swap(False, True);
841 CC = DAG.getCondCode(InverseCC);
842 } else {
843 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
844 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
845 std::swap(False, True);
846 std::swap(LHS, RHS);
847 CC = DAG.getCondCode(SwapInvCC);
848 }
849 }
850 }
851
852 if (isHWTrueValue(True) && isHWFalseValue(False) &&
853 (CompareVT == VT || VT == MVT::i32)) {
854 // This can be matched by a SET* instruction.
855 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
856 }
857
858 // Try to lower to a CND* instruction:
859 //
860 // CND* can match the following patterns:
861 //
862 // select_cc f32, 0.0, f32, f32, cc_supported
863 // select_cc f32, 0.0, i32, i32, cc_supported
864 // select_cc i32, 0, f32, f32, cc_supported
865 // select_cc i32, 0, i32, i32, cc_supported
866 //
867
868 // Try to move the zero value to the RHS
869 if (isZero(LHS)) {
870 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
871 // Try swapping the operands
872 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
873 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
874 std::swap(LHS, RHS);
875 CC = DAG.getCondCode(CCSwapped);
876 } else {
877 // Try inverting the condition and then swapping the operands
878 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
879 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
880 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
881 std::swap(True, False);
882 std::swap(LHS, RHS);
883 CC = DAG.getCondCode(CCSwapped);
884 }
885 }
886 }
887 if (isZero(RHS)) {
888 SDValue Cond = LHS;
889 SDValue Zero = RHS;
890 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
891 if (CompareVT != VT) {
892 // Bitcast True / False to the correct types. This will end up being
893 // a nop, but it allows us to define only a single pattern in the
894 // .TD files for each CND* instruction rather than having to have
895 // one pattern for integer True/False and one for fp True/False
896 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
897 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
898 }
899
900 switch (CCOpcode) {
901 case ISD::SETONE:
902 case ISD::SETUNE:
903 case ISD::SETNE:
904 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
905 Temp = True;
906 True = False;
907 False = Temp;
908 break;
909 default:
910 break;
911 }
912 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
913 Cond, Zero,
914 True, False,
915 DAG.getCondCode(CCOpcode));
916 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
917 }
918
919 // If we make it this for it means we have no native instructions to handle
920 // this SELECT_CC, so we must lower it.
921 SDValue HWTrue, HWFalse;
922
923 if (CompareVT == MVT::f32) {
924 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
925 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
926 } else if (CompareVT == MVT::i32) {
927 HWTrue = DAG.getConstant(-1, DL, CompareVT);
928 HWFalse = DAG.getConstant(0, DL, CompareVT);
929 }
930 else {
931 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
932 }
933
934 // Lower this unsupported SELECT_CC into a combination of two supported
935 // SELECT_CC operations.
936 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
937
938 return DAG.getNode(ISD::SELECT_CC, DL, VT,
939 Cond, HWFalse,
940 True, False,
942}
943
944SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
945 SelectionDAG &DAG) const {
946 SDLoc SL(Op);
947 EVT VT = Op.getValueType();
948
949 const R600TargetMachine &TM =
950 static_cast<const R600TargetMachine &>(getTargetMachine());
951
952 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
953 unsigned SrcAS = ASC->getSrcAddressSpace();
954 unsigned DestAS = ASC->getDestAddressSpace();
955
956 if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
957 return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT);
958
959 return Op;
960}
961
962/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
963/// convert these pointers to a register index. Each register holds
964/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
965/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
966/// for indirect addressing.
967SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
968 unsigned StackWidth,
969 SelectionDAG &DAG) const {
970 unsigned SRLPad;
971 switch(StackWidth) {
972 case 1:
973 SRLPad = 2;
974 break;
975 case 2:
976 SRLPad = 3;
977 break;
978 case 4:
979 SRLPad = 4;
980 break;
981 default: llvm_unreachable("Invalid stack width");
982 }
983
984 SDLoc DL(Ptr);
985 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
986 DAG.getConstant(SRLPad, DL, MVT::i32));
987}
988
989void R600TargetLowering::getStackAddress(unsigned StackWidth,
990 unsigned ElemIdx,
991 unsigned &Channel,
992 unsigned &PtrIncr) const {
993 switch (StackWidth) {
994 default:
995 case 1:
996 Channel = 0;
997 if (ElemIdx > 0) {
998 PtrIncr = 1;
999 } else {
1000 PtrIncr = 0;
1001 }
1002 break;
1003 case 2:
1004 Channel = ElemIdx % 2;
1005 if (ElemIdx == 2) {
1006 PtrIncr = 1;
1007 } else {
1008 PtrIncr = 0;
1009 }
1010 break;
1011 case 4:
1012 Channel = ElemIdx;
1013 PtrIncr = 0;
1014 break;
1015 }
1016}
1017
1018SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1019 SelectionDAG &DAG) const {
1020 SDLoc DL(Store);
1021 //TODO: Who creates the i8 stores?
1022 assert(Store->isTruncatingStore()
1023 || Store->getValue().getValueType() == MVT::i8);
1024 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1025
1026 SDValue Mask;
1027 if (Store->getMemoryVT() == MVT::i8) {
1028 assert(Store->getAlign() >= 1);
1029 Mask = DAG.getConstant(0xff, DL, MVT::i32);
1030 } else if (Store->getMemoryVT() == MVT::i16) {
1031 assert(Store->getAlign() >= 2);
1032 Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1033 } else {
1034 llvm_unreachable("Unsupported private trunc store");
1035 }
1036
1037 SDValue OldChain = Store->getChain();
1038 bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1039 // Skip dummy
1040 SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1041 SDValue BasePtr = Store->getBasePtr();
1042 SDValue Offset = Store->getOffset();
1043 EVT MemVT = Store->getMemoryVT();
1044
1045 SDValue LoadPtr = BasePtr;
1046 if (!Offset.isUndef()) {
1047 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1048 }
1049
1050 // Get dword location
1051 // TODO: this should be eliminated by the future SHR ptr, 2
1052 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1053 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1054
1055 // Load dword
1056 // TODO: can we be smarter about machine pointer info?
1058 SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1059
1060 Chain = Dst.getValue(1);
1061
1062 // Get offset in dword
1063 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1064 DAG.getConstant(0x3, DL, MVT::i32));
1065
1066 // Convert byte offset to bit shift
1067 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1068 DAG.getConstant(3, DL, MVT::i32));
1069
1070 // TODO: Contrary to the name of the function,
1071 // it also handles sub i32 non-truncating stores (like i1)
1072 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1073 Store->getValue());
1074
1075 // Mask the value to the right type
1076 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1077
1078 // Shift the value in place
1079 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1080 MaskedValue, ShiftAmt);
1081
1082 // Shift the mask in place
1083 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1084
1085 // Invert the mask. NOTE: if we had native ROL instructions we could
1086 // use inverted mask
1087 DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1088
1089 // Cleanup the target bits
1090 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1091
1092 // Add the new bits
1093 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1094
1095 // Store dword
1096 // TODO: Can we be smarter about MachinePointerInfo?
1097 SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1098
1099 // If we are part of expanded vector, make our neighbors depend on this store
1100 if (VectorTrunc) {
1101 // Make all other vector elements depend on this store
1102 Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1103 DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1104 }
1105 return NewStore;
1106}
1107
1108SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1109 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1110 unsigned AS = StoreNode->getAddressSpace();
1111
1112 SDValue Chain = StoreNode->getChain();
1113 SDValue Ptr = StoreNode->getBasePtr();
1114 SDValue Value = StoreNode->getValue();
1115
1116 EVT VT = Value.getValueType();
1117 EVT MemVT = StoreNode->getMemoryVT();
1118 EVT PtrVT = Ptr.getValueType();
1119
1120 SDLoc DL(Op);
1121
1122 const bool TruncatingStore = StoreNode->isTruncatingStore();
1123
1124 // Neither LOCAL nor PRIVATE can do vectors at the moment
1126 TruncatingStore) &&
1127 VT.isVector()) {
1128 if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1129 // Add an extra level of chain to isolate this vector
1130 SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1131 // TODO: can the chain be replaced without creating a new store?
1132 SDValue NewStore = DAG.getTruncStore(
1133 NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
1134 StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
1135 StoreNode->getAAInfo());
1136 StoreNode = cast<StoreSDNode>(NewStore);
1137 }
1138
1139 return scalarizeVectorStore(StoreNode, DAG);
1140 }
1141
1142 Align Alignment = StoreNode->getAlign();
1143 if (Alignment < MemVT.getStoreSize() &&
1144 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1145 StoreNode->getMemOperand()->getFlags(),
1146 nullptr)) {
1147 return expandUnalignedStore(StoreNode, DAG);
1148 }
1149
1150 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1151 DAG.getConstant(2, DL, PtrVT));
1152
1153 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1154 // It is beneficial to create MSKOR here instead of combiner to avoid
1155 // artificial dependencies introduced by RMW
1156 if (TruncatingStore) {
1157 assert(VT.bitsLE(MVT::i32));
1158 SDValue MaskConstant;
1159 if (MemVT == MVT::i8) {
1160 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1161 } else {
1162 assert(MemVT == MVT::i16);
1163 assert(StoreNode->getAlign() >= 2);
1164 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1165 }
1166
1167 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1168 DAG.getConstant(0x00000003, DL, PtrVT));
1169 SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1170 DAG.getConstant(3, DL, VT));
1171
1172 // Put the mask in correct place
1173 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1174
1175 // Put the value bits in correct place
1176 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1177 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1178
1179 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1180 // vector instead.
1181 SDValue Src[4] = {
1182 ShiftedValue,
1183 DAG.getConstant(0, DL, MVT::i32),
1184 DAG.getConstant(0, DL, MVT::i32),
1185 Mask
1186 };
1187 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1188 SDValue Args[3] = { Chain, Input, DWordAddr };
1190 Op->getVTList(), Args, MemVT,
1191 StoreNode->getMemOperand());
1192 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1193 // Convert pointer from byte address to dword address.
1194 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1195
1196 if (StoreNode->isIndexed()) {
1197 llvm_unreachable("Indexed stores not supported yet");
1198 } else {
1199 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1200 }
1201 return Chain;
1202 }
1203 }
1204
1205 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1206 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1207 return SDValue();
1208
1209 if (MemVT.bitsLT(MVT::i32))
1210 return lowerPrivateTruncStore(StoreNode, DAG);
1211
1212 // Standard i32+ store, tag it with DWORDADDR to note that the address
1213 // has been shifted
1214 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1215 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1216 return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1217 }
1218
1219 // Tagged i32+ stores will be matched by patterns
1220 return SDValue();
1221}
1222
1223// return (512 + (kc_bank << 12)
1224static int
1226 switch (AddressSpace) {
1228 return 512;
1230 return 512 + 4096;
1232 return 512 + 4096 * 2;
1234 return 512 + 4096 * 3;
1236 return 512 + 4096 * 4;
1238 return 512 + 4096 * 5;
1240 return 512 + 4096 * 6;
1242 return 512 + 4096 * 7;
1244 return 512 + 4096 * 8;
1246 return 512 + 4096 * 9;
1248 return 512 + 4096 * 10;
1250 return 512 + 4096 * 11;
1252 return 512 + 4096 * 12;
1254 return 512 + 4096 * 13;
1256 return 512 + 4096 * 14;
1258 return 512 + 4096 * 15;
1259 default:
1260 return -1;
1261 }
1262}
1263
1264SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1265 SelectionDAG &DAG) const {
1266 SDLoc DL(Op);
1267 LoadSDNode *Load = cast<LoadSDNode>(Op);
1268 ISD::LoadExtType ExtType = Load->getExtensionType();
1269 EVT MemVT = Load->getMemoryVT();
1270 assert(Load->getAlign() >= MemVT.getStoreSize());
1271
1272 SDValue BasePtr = Load->getBasePtr();
1273 SDValue Chain = Load->getChain();
1274 SDValue Offset = Load->getOffset();
1275
1276 SDValue LoadPtr = BasePtr;
1277 if (!Offset.isUndef()) {
1278 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1279 }
1280
1281 // Get dword location
1282 // NOTE: this should be eliminated by the future SHR ptr, 2
1283 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1284 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1285
1286 // Load dword
1287 // TODO: can we be smarter about machine pointer info?
1289 SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1290
1291 // Get offset within the register.
1292 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1293 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1294
1295 // Bit offset of target byte (byteIdx * 8).
1296 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1297 DAG.getConstant(3, DL, MVT::i32));
1298
1299 // Shift to the right.
1300 SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1301
1302 // Eliminate the upper bits by setting them to ...
1303 EVT MemEltVT = MemVT.getScalarType();
1304
1305 if (ExtType == ISD::SEXTLOAD) { // ... ones.
1306 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1307 Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1308 } else { // ... or zeros.
1309 Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1310 }
1311
1312 SDValue Ops[] = {
1313 Ret,
1314 Read.getValue(1) // This should be our output chain
1315 };
1316
1317 return DAG.getMergeValues(Ops, DL);
1318}
1319
1320SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1321 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1322 unsigned AS = LoadNode->getAddressSpace();
1323 EVT MemVT = LoadNode->getMemoryVT();
1324 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1325
1326 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1327 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1328 return lowerPrivateExtLoad(Op, DAG);
1329 }
1330
1331 SDLoc DL(Op);
1332 EVT VT = Op.getValueType();
1333 SDValue Chain = LoadNode->getChain();
1334 SDValue Ptr = LoadNode->getBasePtr();
1335
1336 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1338 VT.isVector()) {
1339 SDValue Ops[2];
1340 std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1341 return DAG.getMergeValues(Ops, DL);
1342 }
1343
1344 // This is still used for explicit load from addrspace(8)
1345 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1346 if (ConstantBlock > -1 &&
1347 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1348 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1350 if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1351 isa<ConstantSDNode>(Ptr)) {
1352 return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1353 } else {
1354 //TODO: Does this even work?
1355 // non-constant ptr can't be folded, keeps it as a v4f32 load
1356 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1357 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1358 DAG.getConstant(4, DL, MVT::i32)),
1359 DAG.getConstant(LoadNode->getAddressSpace() -
1361 );
1362 }
1363
1364 if (!VT.isVector()) {
1365 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1366 DAG.getConstant(0, DL, MVT::i32));
1367 }
1368
1369 SDValue MergedValues[2] = {
1370 Result,
1371 Chain
1372 };
1373 return DAG.getMergeValues(MergedValues, DL);
1374 }
1375
1376 // For most operations returning SDValue() will result in the node being
1377 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1378 // need to manually expand loads that may be legal in some address spaces and
1379 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1380 // compute shaders, since the data is sign extended when it is uploaded to the
1381 // buffer. However SEXT loads from other address spaces are not supported, so
1382 // we need to expand them here.
1383 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1384 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1385 SDValue NewLoad = DAG.getExtLoad(
1386 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1387 LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1388 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1389 DAG.getValueType(MemVT));
1390
1391 SDValue MergedValues[2] = { Res, Chain };
1392 return DAG.getMergeValues(MergedValues, DL);
1393 }
1394
1395 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1396 return SDValue();
1397 }
1398
1399 // DWORDADDR ISD marks already shifted address
1400 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1401 assert(VT == MVT::i32);
1402 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1403 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1404 return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1405 }
1406 return SDValue();
1407}
1408
1409SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1410 SDValue Chain = Op.getOperand(0);
1411 SDValue Cond = Op.getOperand(1);
1412 SDValue Jump = Op.getOperand(2);
1413
1414 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1415 Chain, Jump, Cond);
1416}
1417
1418SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1419 SelectionDAG &DAG) const {
1421 const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1422
1423 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1424
1425 unsigned FrameIndex = FIN->getIndex();
1426 Register IgnoredFrameReg;
1428 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1429 return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1430 SDLoc(Op), Op.getValueType());
1431}
1432
1434 bool IsVarArg) const {
1435 switch (CC) {
1438 case CallingConv::C:
1439 case CallingConv::Fast:
1440 case CallingConv::Cold:
1441 llvm_unreachable("kernels should not be handled here");
1449 return CC_R600;
1450 default:
1451 report_fatal_error("Unsupported calling convention.");
1452 }
1453}
1454
1455/// XXX Only kernel functions are supported, so we can assume for now that
1456/// every function is a kernel function, but in the future we should use
1457/// separate calling conventions for kernel and non-kernel functions.
1459 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1460 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1461 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1463 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1464 *DAG.getContext());
1467
1468 if (AMDGPU::isShader(CallConv)) {
1469 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1470 } else {
1471 analyzeFormalArgumentsCompute(CCInfo, Ins);
1472 }
1473
1474 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1475 CCValAssign &VA = ArgLocs[i];
1476 const ISD::InputArg &In = Ins[i];
1477 EVT VT = In.VT;
1478 EVT MemVT = VA.getLocVT();
1479 if (!VT.isVector() && MemVT.isVector()) {
1480 // Get load source type if scalarized.
1481 MemVT = MemVT.getVectorElementType();
1482 }
1483
1484 if (AMDGPU::isShader(CallConv)) {
1485 Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1486 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1487 InVals.push_back(Register);
1488 continue;
1489 }
1490
1491 // i64 isn't a legal type, so the register type used ends up as i32, which
1492 // isn't expected here. It attempts to create this sextload, but it ends up
1493 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1494 // for <1 x i64>.
1495
1496 // The first 36 bytes of the input buffer contains information about
1497 // thread group and global sizes.
1499 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1500 // FIXME: This should really check the extload type, but the handling of
1501 // extload vector parameters seems to be broken.
1502
1503 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1504 Ext = ISD::SEXTLOAD;
1505 }
1506
1507 // Compute the offset from the value.
1508 // XXX - I think PartOffset should give you this, but it seems to give the
1509 // size of the register which isn't useful.
1510
1511 unsigned PartOffset = VA.getLocMemOffset();
1512 Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);
1513
1515 SDValue Arg = DAG.getLoad(
1516 ISD::UNINDEXED, Ext, VT, DL, Chain,
1517 DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1518 PtrInfo,
1519 MemVT, Alignment, MachineMemOperand::MONonTemporal |
1522
1523 InVals.push_back(Arg);
1524 }
1525 return Chain;
1526}
1527
1529 EVT VT) const {
1530 if (!VT.isVector())
1531 return MVT::i32;
1533}
1534
1536 const MachineFunction &MF) const {
1537 // Local and Private addresses do not handle vectors. Limit to i32
1539 return (MemVT.getSizeInBits() <= 32);
1540 }
1541 return true;
1542}
1543
1545 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1546 unsigned *IsFast) const {
1547 if (IsFast)
1548 *IsFast = 0;
1549
1550 if (!VT.isSimple() || VT == MVT::Other)
1551 return false;
1552
1553 if (VT.bitsLT(MVT::i32))
1554 return false;
1555
1556 // TODO: This is a rough estimate.
1557 if (IsFast)
1558 *IsFast = 1;
1559
1560 return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1561}
1562
1564 SelectionDAG &DAG, SDValue VectorEntry,
1565 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1566 assert(RemapSwizzle.empty());
1567
1568 SDLoc DL(VectorEntry);
1569 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1570
1571 SDValue NewBldVec[4];
1572 for (unsigned i = 0; i < 4; i++)
1573 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1574 DAG.getIntPtrConstant(i, DL));
1575
1576 for (unsigned i = 0; i < 4; i++) {
1577 if (NewBldVec[i].isUndef())
1578 // We mask write here to teach later passes that the ith element of this
1579 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1580 // break false dependencies and additionally make assembly easier to read.
1581 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1582 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1583 if (C->isZero()) {
1584 RemapSwizzle[i] = 4; // SEL_0
1585 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1586 } else if (C->isExactlyValue(1.0)) {
1587 RemapSwizzle[i] = 5; // SEL_1
1588 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1589 }
1590 }
1591
1592 if (NewBldVec[i].isUndef())
1593 continue;
1594
1595 for (unsigned j = 0; j < i; j++) {
1596 if (NewBldVec[i] == NewBldVec[j]) {
1597 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1598 RemapSwizzle[i] = j;
1599 break;
1600 }
1601 }
1602 }
1603
1604 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1605 NewBldVec);
1606}
1607
1609 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1610 assert(RemapSwizzle.empty());
1611
1612 SDLoc DL(VectorEntry);
1613 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1614
1615 SDValue NewBldVec[4];
1616 bool isUnmovable[4] = {false, false, false, false};
1617 for (unsigned i = 0; i < 4; i++)
1618 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1619 DAG.getIntPtrConstant(i, DL));
1620
1621 for (unsigned i = 0; i < 4; i++) {
1622 RemapSwizzle[i] = i;
1623 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1624 unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1625 ->getZExtValue();
1626 if (i == Idx)
1627 isUnmovable[Idx] = true;
1628 }
1629 }
1630
1631 for (unsigned i = 0; i < 4; i++) {
1632 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1633 unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1634 ->getZExtValue();
1635 if (isUnmovable[Idx])
1636 continue;
1637 // Swap i and Idx
1638 std::swap(NewBldVec[Idx], NewBldVec[i]);
1639 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1640 break;
1641 }
1642 }
1643
1644 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1645 NewBldVec);
1646}
1647
1648SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1649 SelectionDAG &DAG,
1650 const SDLoc &DL) const {
1651 // Old -> New swizzle values
1652 DenseMap<unsigned, unsigned> SwizzleRemap;
1653
1654 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1655 for (unsigned i = 0; i < 4; i++) {
1656 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1657 if (SwizzleRemap.contains(Idx))
1658 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1659 }
1660
1661 SwizzleRemap.clear();
1662 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1663 for (unsigned i = 0; i < 4; i++) {
1664 unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1665 if (SwizzleRemap.contains(Idx))
1666 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1667 }
1668
1669 return BuildVector;
1670}
1671
1672SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1673 SelectionDAG &DAG) const {
1674 SDLoc DL(LoadNode);
1675 EVT VT = LoadNode->getValueType(0);
1676 SDValue Chain = LoadNode->getChain();
1677 SDValue Ptr = LoadNode->getBasePtr();
1678 assert (isa<ConstantSDNode>(Ptr));
1679
1680 //TODO: Support smaller loads
1681 if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1682 return SDValue();
1683
1684 if (LoadNode->getAlign() < Align(4))
1685 return SDValue();
1686
1687 int ConstantBlock = ConstantAddressBlock(Block);
1688
1689 SDValue Slots[4];
1690 for (unsigned i = 0; i < 4; i++) {
1691 // We want Const position encoded with the following formula :
1692 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1693 // const_index is Ptr computed by llvm using an alignment of 16.
1694 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1695 // then div by 4 at the ISel step
1696 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1697 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1698 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1699 }
1700 EVT NewVT = MVT::v4i32;
1701 unsigned NumElements = 4;
1702 if (VT.isVector()) {
1703 NewVT = VT;
1704 NumElements = VT.getVectorNumElements();
1705 }
1706 SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));
1707 if (!VT.isVector()) {
1708 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1709 DAG.getConstant(0, DL, MVT::i32));
1710 }
1711 SDValue MergedValues[2] = {
1712 Result,
1713 Chain
1714 };
1715 return DAG.getMergeValues(MergedValues, DL);
1716}
1717
1718//===----------------------------------------------------------------------===//
1719// Custom DAG Optimizations
1720//===----------------------------------------------------------------------===//
1721
1723 DAGCombinerInfo &DCI) const {
1724 SelectionDAG &DAG = DCI.DAG;
1725 SDLoc DL(N);
1726
1727 switch (N->getOpcode()) {
1728 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1729 case ISD::FP_ROUND: {
1730 SDValue Arg = N->getOperand(0);
1731 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1732 return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1733 Arg.getOperand(0));
1734 }
1735 break;
1736 }
1737
1738 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1739 // (i32 select_cc f32, f32, -1, 0 cc)
1740 //
1741 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1742 // this to one of the SET*_DX10 instructions.
1743 case ISD::FP_TO_SINT: {
1744 SDValue FNeg = N->getOperand(0);
1745 if (FNeg.getOpcode() != ISD::FNEG) {
1746 return SDValue();
1747 }
1748 SDValue SelectCC = FNeg.getOperand(0);
1749 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1750 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1751 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1752 !isHWTrueValue(SelectCC.getOperand(2)) ||
1753 !isHWFalseValue(SelectCC.getOperand(3))) {
1754 return SDValue();
1755 }
1756
1757 return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1758 SelectCC.getOperand(0), // LHS
1759 SelectCC.getOperand(1), // RHS
1760 DAG.getConstant(-1, DL, MVT::i32), // True
1761 DAG.getConstant(0, DL, MVT::i32), // False
1762 SelectCC.getOperand(4)); // CC
1763 }
1764
1765 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1766 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1768 SDValue InVec = N->getOperand(0);
1769 SDValue InVal = N->getOperand(1);
1770 SDValue EltNo = N->getOperand(2);
1771
1772 // If the inserted element is an UNDEF, just use the input vector.
1773 if (InVal.isUndef())
1774 return InVec;
1775
1776 EVT VT = InVec.getValueType();
1777
1778 // If we can't generate a legal BUILD_VECTOR, exit
1780 return SDValue();
1781
1782 // Check that we know which element is being inserted
1783 if (!isa<ConstantSDNode>(EltNo))
1784 return SDValue();
1785 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1786
1787 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1788 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1789 // vector elements.
1791 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1792 Ops.append(InVec.getNode()->op_begin(),
1793 InVec.getNode()->op_end());
1794 } else if (InVec.isUndef()) {
1795 unsigned NElts = VT.getVectorNumElements();
1796 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1797 } else {
1798 return SDValue();
1799 }
1800
1801 // Insert the element
1802 if (Elt < Ops.size()) {
1803 // All the operands of BUILD_VECTOR must have the same type;
1804 // we enforce that here.
1805 EVT OpVT = Ops[0].getValueType();
1806 if (InVal.getValueType() != OpVT)
1807 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1808 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1809 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1810 Ops[Elt] = InVal;
1811 }
1812
1813 // Return the new vector
1814 return DAG.getBuildVector(VT, DL, Ops);
1815 }
1816
1817 // Extract_vec (Build_vector) generated by custom lowering
1818 // also needs to be customly combined
1820 SDValue Arg = N->getOperand(0);
1821 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1822 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1823 unsigned Element = Const->getZExtValue();
1824 return Arg->getOperand(Element);
1825 }
1826 }
1827 if (Arg.getOpcode() == ISD::BITCAST &&
1831 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1832 unsigned Element = Const->getZExtValue();
1833 return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1834 Arg->getOperand(0).getOperand(Element));
1835 }
1836 }
1837 break;
1838 }
1839
1840 case ISD::SELECT_CC: {
1841 // Try common optimizations
1843 return Ret;
1844
1845 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1846 // selectcc x, y, a, b, inv(cc)
1847 //
1848 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1849 // selectcc x, y, a, b, cc
1850 SDValue LHS = N->getOperand(0);
1851 if (LHS.getOpcode() != ISD::SELECT_CC) {
1852 return SDValue();
1853 }
1854
1855 SDValue RHS = N->getOperand(1);
1856 SDValue True = N->getOperand(2);
1857 SDValue False = N->getOperand(3);
1858 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1859
1860 if (LHS.getOperand(2).getNode() != True.getNode() ||
1861 LHS.getOperand(3).getNode() != False.getNode() ||
1862 RHS.getNode() != False.getNode()) {
1863 return SDValue();
1864 }
1865
1866 switch (NCC) {
1867 default: return SDValue();
1868 case ISD::SETNE: return LHS;
1869 case ISD::SETEQ: {
1870 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1871 LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1872 if (DCI.isBeforeLegalizeOps() ||
1873 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1874 return DAG.getSelectCC(DL,
1875 LHS.getOperand(0),
1876 LHS.getOperand(1),
1877 LHS.getOperand(2),
1878 LHS.getOperand(3),
1879 LHSCC);
1880 break;
1881 }
1882 }
1883 return SDValue();
1884 }
1885
1887 SDValue Arg = N->getOperand(1);
1888 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1889 break;
1890
1891 SDValue NewArgs[8] = {
1892 N->getOperand(0), // Chain
1893 SDValue(),
1894 N->getOperand(2), // ArrayBase
1895 N->getOperand(3), // Type
1896 N->getOperand(4), // SWZ_X
1897 N->getOperand(5), // SWZ_Y
1898 N->getOperand(6), // SWZ_Z
1899 N->getOperand(7) // SWZ_W
1900 };
1901 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1902 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1903 }
1905 SDValue Arg = N->getOperand(1);
1906 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1907 break;
1908
1909 SDValue NewArgs[19] = {
1910 N->getOperand(0),
1911 N->getOperand(1),
1912 N->getOperand(2),
1913 N->getOperand(3),
1914 N->getOperand(4),
1915 N->getOperand(5),
1916 N->getOperand(6),
1917 N->getOperand(7),
1918 N->getOperand(8),
1919 N->getOperand(9),
1920 N->getOperand(10),
1921 N->getOperand(11),
1922 N->getOperand(12),
1923 N->getOperand(13),
1924 N->getOperand(14),
1925 N->getOperand(15),
1926 N->getOperand(16),
1927 N->getOperand(17),
1928 N->getOperand(18),
1929 };
1930 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1931 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1932 }
1933
1934 case ISD::LOAD: {
1935 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1936 SDValue Ptr = LoadNode->getBasePtr();
1937 if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1938 isa<ConstantSDNode>(Ptr))
1939 return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1940 break;
1941 }
1942
1943 default: break;
1944 }
1945
1947}
1948
1949bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1950 SDValue &Src, SDValue &Neg, SDValue &Abs,
1951 SDValue &Sel, SDValue &Imm,
1952 SelectionDAG &DAG) const {
1953 const R600InstrInfo *TII = Subtarget->getInstrInfo();
1954 if (!Src.isMachineOpcode())
1955 return false;
1956
1957 switch (Src.getMachineOpcode()) {
1958 case R600::FNEG_R600:
1959 if (!Neg.getNode())
1960 return false;
1961 Src = Src.getOperand(0);
1962 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1963 return true;
1964 case R600::FABS_R600:
1965 if (!Abs.getNode())
1966 return false;
1967 Src = Src.getOperand(0);
1968 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1969 return true;
1970 case R600::CONST_COPY: {
1971 unsigned Opcode = ParentNode->getMachineOpcode();
1972 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1973
1974 if (!Sel.getNode())
1975 return false;
1976
1977 SDValue CstOffset = Src.getOperand(0);
1978 if (ParentNode->getValueType(0).isVector())
1979 return false;
1980
1981 // Gather constants values
1982 int SrcIndices[] = {
1983 TII->getOperandIdx(Opcode, R600::OpName::src0),
1984 TII->getOperandIdx(Opcode, R600::OpName::src1),
1985 TII->getOperandIdx(Opcode, R600::OpName::src2),
1986 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1987 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1988 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1989 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1990 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1991 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1992 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1993 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1994 };
1995 std::vector<unsigned> Consts;
1996 for (int OtherSrcIdx : SrcIndices) {
1997 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1998 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1999 continue;
2000 if (HasDst) {
2001 OtherSrcIdx--;
2002 OtherSelIdx--;
2003 }
2004 if (RegisterSDNode *Reg =
2005 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2006 if (Reg->getReg() == R600::ALU_CONST) {
2007 ConstantSDNode *Cst
2008 = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2009 Consts.push_back(Cst->getZExtValue());
2010 }
2011 }
2012 }
2013
2014 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2015 Consts.push_back(Cst->getZExtValue());
2016 if (!TII->fitsConstReadLimitations(Consts)) {
2017 return false;
2018 }
2019
2020 Sel = CstOffset;
2021 Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2022 return true;
2023 }
2024 case R600::MOV_IMM_GLOBAL_ADDR:
2025 // Check if the Imm slot is used. Taken from below.
2026 if (cast<ConstantSDNode>(Imm)->getZExtValue())
2027 return false;
2028 Imm = Src.getOperand(0);
2029 Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2030 return true;
2031 case R600::MOV_IMM_I32:
2032 case R600::MOV_IMM_F32: {
2033 unsigned ImmReg = R600::ALU_LITERAL_X;
2034 uint64_t ImmValue = 0;
2035
2036 if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2037 ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2038 float FloatValue = FPC->getValueAPF().convertToFloat();
2039 if (FloatValue == 0.0) {
2040 ImmReg = R600::ZERO;
2041 } else if (FloatValue == 0.5) {
2042 ImmReg = R600::HALF;
2043 } else if (FloatValue == 1.0) {
2044 ImmReg = R600::ONE;
2045 } else {
2046 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2047 }
2048 } else {
2049 ConstantSDNode *C = cast<ConstantSDNode>(Src.getOperand(0));
2050 uint64_t Value = C->getZExtValue();
2051 if (Value == 0) {
2052 ImmReg = R600::ZERO;
2053 } else if (Value == 1) {
2054 ImmReg = R600::ONE_INT;
2055 } else {
2056 ImmValue = Value;
2057 }
2058 }
2059
2060 // Check that we aren't already using an immediate.
2061 // XXX: It's possible for an instruction to have more than one
2062 // immediate operand, but this is not supported yet.
2063 if (ImmReg == R600::ALU_LITERAL_X) {
2064 if (!Imm.getNode())
2065 return false;
2066 ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2067 if (C->getZExtValue())
2068 return false;
2069 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2070 }
2071 Src = DAG.getRegister(ImmReg, MVT::i32);
2072 return true;
2073 }
2074 default:
2075 return false;
2076 }
2077}
2078
2079/// Fold the instructions after selecting them
2080SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2081 SelectionDAG &DAG) const {
2082 const R600InstrInfo *TII = Subtarget->getInstrInfo();
2083 if (!Node->isMachineOpcode())
2084 return Node;
2085
2086 unsigned Opcode = Node->getMachineOpcode();
2087 SDValue FakeOp;
2088
2089 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2090
2091 if (Opcode == R600::DOT_4) {
2092 int OperandIdx[] = {
2093 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2094 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2095 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2096 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2097 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2098 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2099 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2100 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2101 };
2102 int NegIdx[] = {
2103 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2104 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2105 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2106 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2107 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2108 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2109 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2110 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2111 };
2112 int AbsIdx[] = {
2113 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2114 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2115 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2116 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2117 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2118 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2119 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2120 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2121 };
2122 for (unsigned i = 0; i < 8; i++) {
2123 if (OperandIdx[i] < 0)
2124 return Node;
2125 SDValue &Src = Ops[OperandIdx[i] - 1];
2126 SDValue &Neg = Ops[NegIdx[i] - 1];
2127 SDValue &Abs = Ops[AbsIdx[i] - 1];
2128 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2129 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2130 if (HasDst)
2131 SelIdx--;
2132 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2133 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2134 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2135 }
2136 } else if (Opcode == R600::REG_SEQUENCE) {
2137 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2138 SDValue &Src = Ops[i];
2139 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2140 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2141 }
2142 } else {
2143 if (!TII->hasInstrModifiers(Opcode))
2144 return Node;
2145 int OperandIdx[] = {
2146 TII->getOperandIdx(Opcode, R600::OpName::src0),
2147 TII->getOperandIdx(Opcode, R600::OpName::src1),
2148 TII->getOperandIdx(Opcode, R600::OpName::src2)
2149 };
2150 int NegIdx[] = {
2151 TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2152 TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2153 TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2154 };
2155 int AbsIdx[] = {
2156 TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2157 TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2158 -1
2159 };
2160 for (unsigned i = 0; i < 3; i++) {
2161 if (OperandIdx[i] < 0)
2162 return Node;
2163 SDValue &Src = Ops[OperandIdx[i] - 1];
2164 SDValue &Neg = Ops[NegIdx[i] - 1];
2165 SDValue FakeAbs;
2166 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2167 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2168 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2169 int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2170 if (HasDst) {
2171 SelIdx--;
2172 ImmIdx--;
2173 }
2174 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2175 SDValue &Imm = Ops[ImmIdx];
2176 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2177 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2178 }
2179 }
2180
2181 return Node;
2182}
2183
2185R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
2186 switch (RMW->getOperation()) {
2189 // FIXME: Cayman at least appears to have instructions for this, but the
2190 // instruction defintions appear to be missing.
2192 default:
2193 break;
2194 }
2195
2197}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
const char LLVMTargetMachineRef TM
#define MO_FLAG_NEG
Definition: R600Defines.h:15
#define MO_FLAG_ABS
Definition: R600Defines.h:16
#define MO_FLAG_MASK
Definition: R600Defines.h:17
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
static bool isEOP(MachineBasicBlock::iterator I)
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
static int ConstantAddressBlock(unsigned AddressSpace)
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
R600 DAG Lowering interface definition.
Interface definition for R600InstrInfo.
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
unsigned getStackWidth(const MachineFunction &MF) const
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:5268
APInt bitcastToAPInt() const
Definition: APFloat.h:1208
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1485
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:718
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:770
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:774
BinOp getOperation() const
Definition: Instructions.h:812
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
int64_t getLocMemOffset() const
const APFloat & getValueAPF() const
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1691
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool empty() const
Definition: DenseMap.h:98
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
static auto integer_valuetypes()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
unsigned getTargetFlags() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
bool hasFFBH() const
bool hasFMA() const
bool hasBFI() const
Definition: R600Subtarget.h:88
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:52
bool hasCARRY() const
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:60
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:50
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:92
bool hasBORROW() const
Definition: R600Subtarget.h:99
bool hasFFBL() const
bool hasBFE() const
Definition: R600Subtarget.h:84
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:720
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:826
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:674
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:797
SDValue getCondCode(ISD::CondCode Cond)
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:36
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const TargetMachine & getTargetMachine() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ CONSTANT_BUFFER_5
Definition: AMDGPU.h:425
@ CONSTANT_BUFFER_15
Definition: AMDGPU.h:435
@ CONSTANT_BUFFER_11
Definition: AMDGPU.h:431
@ CONSTANT_BUFFER_6
Definition: AMDGPU.h:426
@ CONSTANT_BUFFER_12
Definition: AMDGPU.h:432
@ CONSTANT_BUFFER_2
Definition: AMDGPU.h:422
@ CONSTANT_BUFFER_1
Definition: AMDGPU.h:421
@ CONSTANT_BUFFER_0
Definition: AMDGPU.h:420
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:395
@ CONSTANT_BUFFER_8
Definition: AMDGPU.h:428
@ CONSTANT_BUFFER_4
Definition: AMDGPU.h:424
@ CONSTANT_BUFFER_3
Definition: AMDGPU.h:423
@ CONSTANT_BUFFER_10
Definition: AMDGPU.h:430
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
Definition: AMDGPU.h:412
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:394
@ FLAT_ADDRESS
Address space for flat memory.
Definition: AMDGPU.h:390
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:391
@ CONSTANT_BUFFER_14
Definition: AMDGPU.h:434
@ CONSTANT_BUFFER_9
Definition: AMDGPU.h:429
@ CONSTANT_BUFFER_7
Definition: AMDGPU.h:427
@ CONSTANT_BUFFER_13
Definition: AMDGPU.h:433
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:396
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isShader(CallingConv::ID cc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:119
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:194
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:185
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:197
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:203
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:188
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:191
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:141
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:215
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:210
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1230
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:487
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1026
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:780
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:900
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:925
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1072
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1226
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:795
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:833
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:866
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:904
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:783
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1065
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1503
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1483
int getLDSNoRetOp(uint16_t Opcode)
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr float pif
Definition: MathExtras.h:52
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:330
@ Offset
Definition: DWP.cpp:440
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
AddressSpace
Definition: NVPTXBaseInfo.h:21
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
DWARFExpression::Operation Op
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:373
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:267
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:283
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:363
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:299
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:160
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:306
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:275
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:194
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:311
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:319
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:291
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...