LLVM 20.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Custom DAG lowering for R600
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600ISelLowering.h"
15#include "AMDGPU.h"
17#include "R600Defines.h"
18#include "R600InstrInfo.h"
20#include "R600Subtarget.h"
21#include "R600TargetMachine.h"
23#include "llvm/IR/IntrinsicsAMDGPU.h"
24#include "llvm/IR/IntrinsicsR600.h"
25
26using namespace llvm;
27
28#include "R600GenCallingConv.inc"
29
31 const R600Subtarget &STI)
32 : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
33 addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
34 addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
35 addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
36 addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
37 addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
38 addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
39
42
44
45 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
47
48 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
49 // spaces, so it is custom lowered to handle those where it isn't.
51 for (MVT VT : MVT::integer_valuetypes()) {
52 setLoadExtAction(Op, VT, MVT::i1, Promote);
53 setLoadExtAction(Op, VT, MVT::i8, Custom);
54 setLoadExtAction(Op, VT, MVT::i16, Custom);
55 }
56
57 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
59 MVT::v2i1, Expand);
60
62 MVT::v4i1, Expand);
63
64 setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
65 Custom);
66
67 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
68 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
69 // We need to include these since trunc STORES to PRIVATE need
70 // special handling to accommodate RMW
71 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
72 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
73 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
74 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
75 setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
76 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
77 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
78 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
79 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
80 setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
81
82 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
83 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
84 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
85
86 // Set condition code actions
90 MVT::f32, Expand);
91
93 MVT::i32, Expand);
94
96
97 setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);
98
99 setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);
101
103
105 MVT::f64, Custom);
106
107 setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
108
109 setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);
110 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},
111 Custom);
112
113 setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
114 Expand);
115
116 // ADD, SUB overflow.
117 // TODO: turn these into Legal?
118 if (Subtarget->hasCARRY())
120
121 if (Subtarget->hasBORROW())
123
124 // Expand sign extension of vectors
125 if (!Subtarget->hasBFE())
127
128 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);
129
130 if (!Subtarget->hasBFE())
132 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);
133
134 if (!Subtarget->hasBFE())
136 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);
137
139 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);
140
142
144
146 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
147
149 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
150
151 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
152 // to be Legal/Custom in order to avoid library calls.
154 Custom);
155
156 if (!Subtarget->hasFMA())
157 setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);
158
159 // FIXME: May need no denormals check
161
162 if (!Subtarget->hasBFI())
163 // fcopysign can be done in a single instruction with BFI.
164 setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
165
166 if (!Subtarget->hasBCNT(32))
168
169 if (!Subtarget->hasBCNT(64))
171
172 if (Subtarget->hasFFBH())
174
175 if (Subtarget->hasFFBL())
177
178 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
179 // need it for R600.
180 if (Subtarget->hasBFE())
182
185
186 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
187 for (MVT VT : ScalarIntVTs)
189 Expand);
190
191 // LLVM will expand these to atomic_cmp_swap(0)
192 // and atomic_swap, respectively.
194
195 // We need to custom lower some of the intrinsics
197 Custom);
198
200
203}
204
206 if (std::next(I) == I->getParent()->end())
207 return false;
208 return std::next(I)->getOpcode() == R600::RETURN;
209}
210
213 MachineBasicBlock *BB) const {
214 MachineFunction *MF = BB->getParent();
217 const R600InstrInfo *TII = Subtarget->getInstrInfo();
218
219 switch (MI.getOpcode()) {
220 default:
221 // Replace LDS_*_RET instruction that don't have any uses with the
222 // equivalent LDS_*_NORET instruction.
223 if (TII->isLDSRetInstr(MI.getOpcode())) {
224 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
225 assert(DstIdx != -1);
227 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
228 // LDS_1A2D support and remove this special case.
229 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
230 MI.getOpcode() == R600::LDS_CMPST_RET)
231 return BB;
232
233 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
234 TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
235 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
236 NewMI.add(MO);
237 } else {
239 }
240 break;
241
242 case R600::FABS_R600: {
243 MachineInstr *NewMI = TII->buildDefaultInstruction(
244 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
245 MI.getOperand(1).getReg());
246 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
247 break;
248 }
249
250 case R600::FNEG_R600: {
251 MachineInstr *NewMI = TII->buildDefaultInstruction(
252 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
253 MI.getOperand(1).getReg());
254 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
255 break;
256 }
257
258 case R600::MASK_WRITE: {
259 Register maskedRegister = MI.getOperand(0).getReg();
260 assert(maskedRegister.isVirtual());
261 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
262 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
263 break;
264 }
265
266 case R600::MOV_IMM_F32:
267 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
268 .getFPImm()
269 ->getValueAPF()
270 .bitcastToAPInt()
271 .getZExtValue());
272 break;
273
274 case R600::MOV_IMM_I32:
275 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
276 MI.getOperand(1).getImm());
277 break;
278
279 case R600::MOV_IMM_GLOBAL_ADDR: {
280 //TODO: Perhaps combine this instruction with the next if possible
281 auto MIB = TII->buildDefaultInstruction(
282 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
283 int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
284 //TODO: Ugh this is rather ugly
285 const MachineOperand &MO = MI.getOperand(1);
286 MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
287 MO.getTargetFlags());
288 break;
289 }
290
291 case R600::CONST_COPY: {
292 MachineInstr *NewMI = TII->buildDefaultInstruction(
293 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
294 TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
295 MI.getOperand(1).getImm());
296 break;
297 }
298
299 case R600::RAT_WRITE_CACHELESS_32_eg:
300 case R600::RAT_WRITE_CACHELESS_64_eg:
301 case R600::RAT_WRITE_CACHELESS_128_eg:
302 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
303 .add(MI.getOperand(0))
304 .add(MI.getOperand(1))
305 .addImm(isEOP(I)); // Set End of program bit
306 break;
307
308 case R600::RAT_STORE_TYPED_eg:
309 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
310 .add(MI.getOperand(0))
311 .add(MI.getOperand(1))
312 .add(MI.getOperand(2))
313 .addImm(isEOP(I)); // Set End of program bit
314 break;
315
316 case R600::BRANCH:
317 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
318 .add(MI.getOperand(0));
319 break;
320
321 case R600::BRANCH_COND_f32: {
322 MachineInstr *NewMI =
323 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
324 R600::PREDICATE_BIT)
325 .add(MI.getOperand(1))
326 .addImm(R600::PRED_SETNE)
327 .addImm(0); // Flags
328 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
329 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
330 .add(MI.getOperand(0))
331 .addReg(R600::PREDICATE_BIT, RegState::Kill);
332 break;
333 }
334
335 case R600::BRANCH_COND_i32: {
336 MachineInstr *NewMI =
337 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
338 R600::PREDICATE_BIT)
339 .add(MI.getOperand(1))
340 .addImm(R600::PRED_SETNE_INT)
341 .addImm(0); // Flags
342 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
343 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
344 .add(MI.getOperand(0))
345 .addReg(R600::PREDICATE_BIT, RegState::Kill);
346 break;
347 }
348
349 case R600::EG_ExportSwz:
350 case R600::R600_ExportSwz: {
351 // Instruction is left unmodified if its not the last one of its type
352 bool isLastInstructionOfItsType = true;
353 unsigned InstExportType = MI.getOperand(1).getImm();
354 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
355 EndBlock = BB->end(); NextExportInst != EndBlock;
356 NextExportInst = std::next(NextExportInst)) {
357 if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
358 NextExportInst->getOpcode() == R600::R600_ExportSwz) {
359 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
360 .getImm();
361 if (CurrentInstExportType == InstExportType) {
362 isLastInstructionOfItsType = false;
363 break;
364 }
365 }
366 }
367 bool EOP = isEOP(I);
368 if (!EOP && !isLastInstructionOfItsType)
369 return BB;
370 unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
371 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
372 .add(MI.getOperand(0))
373 .add(MI.getOperand(1))
374 .add(MI.getOperand(2))
375 .add(MI.getOperand(3))
376 .add(MI.getOperand(4))
377 .add(MI.getOperand(5))
378 .add(MI.getOperand(6))
379 .addImm(CfInst)
380 .addImm(EOP);
381 break;
382 }
383 case R600::RETURN: {
384 return BB;
385 }
386 }
387
388 MI.eraseFromParent();
389 return BB;
390}
391
392//===----------------------------------------------------------------------===//
393// Custom DAG Lowering Operations
394//===----------------------------------------------------------------------===//
395
399 switch (Op.getOpcode()) {
400 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
401 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
402 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
403 case ISD::SHL_PARTS:
404 case ISD::SRA_PARTS:
405 case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
406 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
407 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
408 case ISD::FCOS:
409 case ISD::FSIN: return LowerTrig(Op, DAG);
410 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
411 case ISD::STORE: return LowerSTORE(Op, DAG);
412 case ISD::LOAD: {
413 SDValue Result = LowerLOAD(Op, DAG);
414 assert((!Result.getNode() ||
415 Result.getNode()->getNumValues() == 2) &&
416 "Load should return a value and a chain");
417 return Result;
418 }
419
420 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
421 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
422 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
424 return lowerADDRSPACECAST(Op, DAG);
425 case ISD::INTRINSIC_VOID: {
426 SDValue Chain = Op.getOperand(0);
427 unsigned IntrinsicID = Op.getConstantOperandVal(1);
428 switch (IntrinsicID) {
429 case Intrinsic::r600_store_swizzle: {
430 SDLoc DL(Op);
431 const SDValue Args[8] = {
432 Chain,
433 Op.getOperand(2), // Export Value
434 Op.getOperand(3), // ArrayBase
435 Op.getOperand(4), // Type
436 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
437 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
438 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
439 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
440 };
441 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
442 }
443
444 // default for switch(IntrinsicID)
445 default: break;
446 }
447 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
448 break;
449 }
451 unsigned IntrinsicID = Op.getConstantOperandVal(0);
452 EVT VT = Op.getValueType();
453 SDLoc DL(Op);
454 switch (IntrinsicID) {
455 case Intrinsic::r600_tex:
456 case Intrinsic::r600_texc: {
457 unsigned TextureOp;
458 switch (IntrinsicID) {
459 case Intrinsic::r600_tex:
460 TextureOp = 0;
461 break;
462 case Intrinsic::r600_texc:
463 TextureOp = 1;
464 break;
465 default:
466 llvm_unreachable("unhandled texture operation");
467 }
468
469 SDValue TexArgs[19] = {
470 DAG.getConstant(TextureOp, DL, MVT::i32),
471 Op.getOperand(1),
472 DAG.getConstant(0, DL, MVT::i32),
473 DAG.getConstant(1, DL, MVT::i32),
474 DAG.getConstant(2, DL, MVT::i32),
475 DAG.getConstant(3, DL, MVT::i32),
476 Op.getOperand(2),
477 Op.getOperand(3),
478 Op.getOperand(4),
479 DAG.getConstant(0, DL, MVT::i32),
480 DAG.getConstant(1, DL, MVT::i32),
481 DAG.getConstant(2, DL, MVT::i32),
482 DAG.getConstant(3, DL, MVT::i32),
483 Op.getOperand(5),
484 Op.getOperand(6),
485 Op.getOperand(7),
486 Op.getOperand(8),
487 Op.getOperand(9),
488 Op.getOperand(10)
489 };
490 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
491 }
492 case Intrinsic::r600_dot4: {
493 SDValue Args[8] = {
494 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
495 DAG.getConstant(0, DL, MVT::i32)),
496 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
497 DAG.getConstant(0, DL, MVT::i32)),
498 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
499 DAG.getConstant(1, DL, MVT::i32)),
500 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
501 DAG.getConstant(1, DL, MVT::i32)),
502 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
503 DAG.getConstant(2, DL, MVT::i32)),
504 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
505 DAG.getConstant(2, DL, MVT::i32)),
506 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
507 DAG.getConstant(3, DL, MVT::i32)),
508 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
509 DAG.getConstant(3, DL, MVT::i32))
510 };
511 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
512 }
513
514 case Intrinsic::r600_implicitarg_ptr: {
517 return DAG.getConstant(ByteOffset, DL, PtrVT);
518 }
519 case Intrinsic::r600_read_ngroups_x:
520 return LowerImplicitParameter(DAG, VT, DL, 0);
521 case Intrinsic::r600_read_ngroups_y:
522 return LowerImplicitParameter(DAG, VT, DL, 1);
523 case Intrinsic::r600_read_ngroups_z:
524 return LowerImplicitParameter(DAG, VT, DL, 2);
525 case Intrinsic::r600_read_global_size_x:
526 return LowerImplicitParameter(DAG, VT, DL, 3);
527 case Intrinsic::r600_read_global_size_y:
528 return LowerImplicitParameter(DAG, VT, DL, 4);
529 case Intrinsic::r600_read_global_size_z:
530 return LowerImplicitParameter(DAG, VT, DL, 5);
531 case Intrinsic::r600_read_local_size_x:
532 return LowerImplicitParameter(DAG, VT, DL, 6);
533 case Intrinsic::r600_read_local_size_y:
534 return LowerImplicitParameter(DAG, VT, DL, 7);
535 case Intrinsic::r600_read_local_size_z:
536 return LowerImplicitParameter(DAG, VT, DL, 8);
537
538 case Intrinsic::r600_read_tgid_x:
539 case Intrinsic::amdgcn_workgroup_id_x:
540 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
541 R600::T1_X, VT);
542 case Intrinsic::r600_read_tgid_y:
543 case Intrinsic::amdgcn_workgroup_id_y:
544 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
545 R600::T1_Y, VT);
546 case Intrinsic::r600_read_tgid_z:
547 case Intrinsic::amdgcn_workgroup_id_z:
548 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
549 R600::T1_Z, VT);
550 case Intrinsic::r600_read_tidig_x:
551 case Intrinsic::amdgcn_workitem_id_x:
552 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
553 R600::T0_X, VT);
554 case Intrinsic::r600_read_tidig_y:
555 case Intrinsic::amdgcn_workitem_id_y:
556 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
557 R600::T0_Y, VT);
558 case Intrinsic::r600_read_tidig_z:
559 case Intrinsic::amdgcn_workitem_id_z:
560 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
561 R600::T0_Z, VT);
562
563 case Intrinsic::r600_recipsqrt_ieee:
564 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
565
566 case Intrinsic::r600_recipsqrt_clamped:
567 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
568 default:
569 return Op;
570 }
571
572 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
573 break;
574 }
575 } // end switch(Op.getOpcode())
576 return SDValue();
577}
578
581 SelectionDAG &DAG) const {
582 switch (N->getOpcode()) {
583 default:
585 return;
586 case ISD::FP_TO_UINT:
587 if (N->getValueType(0) == MVT::i1) {
588 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
589 return;
590 }
591 // Since we don't care about out of bounds values we can use FP_TO_SINT for
592 // uints too. The DAGLegalizer code for uint considers some extra cases
593 // which are not necessary here.
594 [[fallthrough]];
595 case ISD::FP_TO_SINT: {
596 if (N->getValueType(0) == MVT::i1) {
597 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
598 return;
599 }
600
601 SDValue Result;
602 if (expandFP_TO_SINT(N, Result, DAG))
603 Results.push_back(Result);
604 return;
605 }
606 case ISD::SDIVREM: {
607 SDValue Op = SDValue(N, 1);
608 SDValue RES = LowerSDIVREM(Op, DAG);
609 Results.push_back(RES);
610 Results.push_back(RES.getValue(1));
611 break;
612 }
613 case ISD::UDIVREM: {
614 SDValue Op = SDValue(N, 0);
616 break;
617 }
618 }
619}
620
621SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
622 SDValue Vector) const {
623 SDLoc DL(Vector);
624 EVT VecVT = Vector.getValueType();
625 EVT EltVT = VecVT.getVectorElementType();
627
628 for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
629 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
630 DAG.getVectorIdxConstant(i, DL)));
631 }
632
633 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
634}
635
636SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
637 SelectionDAG &DAG) const {
638 SDLoc DL(Op);
639 SDValue Vector = Op.getOperand(0);
640 SDValue Index = Op.getOperand(1);
641
642 if (isa<ConstantSDNode>(Index) ||
644 return Op;
645
646 Vector = vectorToVerticalVector(DAG, Vector);
647 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
648 Vector, Index);
649}
650
651SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
652 SelectionDAG &DAG) const {
653 SDLoc DL(Op);
654 SDValue Vector = Op.getOperand(0);
655 SDValue Value = Op.getOperand(1);
656 SDValue Index = Op.getOperand(2);
657
658 if (isa<ConstantSDNode>(Index) ||
660 return Op;
661
662 Vector = vectorToVerticalVector(DAG, Vector);
663 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
664 Vector, Value, Index);
665 return vectorToVerticalVector(DAG, Insert);
666}
667
668SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
669 SDValue Op,
670 SelectionDAG &DAG) const {
671 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
674
675 const DataLayout &DL = DAG.getDataLayout();
676 const GlobalValue *GV = GSD->getGlobal();
678
679 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
680 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
681}
682
683SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
684 // On hw >= R700, COS/SIN input must be between -1. and 1.
685 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
686 EVT VT = Op.getValueType();
687 SDValue Arg = Op.getOperand(0);
688 SDLoc DL(Op);
689
690 // TODO: Should this propagate fast-math-flags?
691 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
692 DAG.getNode(ISD::FADD, DL, VT,
693 DAG.getNode(ISD::FMUL, DL, VT, Arg,
694 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
695 DAG.getConstantFP(0.5, DL, MVT::f32)));
696 unsigned TrigNode;
697 switch (Op.getOpcode()) {
698 case ISD::FCOS:
699 TrigNode = AMDGPUISD::COS_HW;
700 break;
701 case ISD::FSIN:
702 TrigNode = AMDGPUISD::SIN_HW;
703 break;
704 default:
705 llvm_unreachable("Wrong trig opcode");
706 }
707 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
708 DAG.getNode(ISD::FADD, DL, VT, FractPart,
709 DAG.getConstantFP(-0.5, DL, MVT::f32)));
710 if (Gen >= AMDGPUSubtarget::R700)
711 return TrigVal;
712 // On R600 hw, COS/SIN input must be between -Pi and Pi.
713 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
714 DAG.getConstantFP(numbers::pif, DL, MVT::f32));
715}
716
717SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
718 SelectionDAG &DAG) const {
719 SDValue Lo, Hi;
720 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
721 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
722}
723
724SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
725 unsigned mainop, unsigned ovf) const {
726 SDLoc DL(Op);
727 EVT VT = Op.getValueType();
728
729 SDValue Lo = Op.getOperand(0);
730 SDValue Hi = Op.getOperand(1);
731
732 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
733 // Extend sign.
734 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
735 DAG.getValueType(MVT::i1));
736
737 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
738
739 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
740}
741
742SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
743 SDLoc DL(Op);
744 return DAG.getNode(
746 DL,
747 MVT::i1,
748 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
750}
751
752SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
753 SDLoc DL(Op);
754 return DAG.getNode(
756 DL,
757 MVT::i1,
758 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
760}
761
762SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
763 const SDLoc &DL,
764 unsigned DwordOffset) const {
765 unsigned ByteOffset = DwordOffset * 4;
768
769 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
770 assert(isInt<16>(ByteOffset));
771
772 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
773 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
775}
776
777bool R600TargetLowering::isZero(SDValue Op) const {
778 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op))
779 return Cst->isZero();
780 if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op))
781 return CstFP->isZero();
782 return false;
783}
784
785bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
786 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
787 return CFP->isExactlyValue(1.0);
788 }
789 return isAllOnesConstant(Op);
790}
791
792bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
793 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
794 return CFP->getValueAPF().isZero();
795 }
796 return isNullConstant(Op);
797}
798
799SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
800 SDLoc DL(Op);
801 EVT VT = Op.getValueType();
802
803 SDValue LHS = Op.getOperand(0);
804 SDValue RHS = Op.getOperand(1);
805 SDValue True = Op.getOperand(2);
806 SDValue False = Op.getOperand(3);
807 SDValue CC = Op.getOperand(4);
808 SDValue Temp;
809
810 if (VT == MVT::f32) {
811 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
812 SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
813 if (MinMax)
814 return MinMax;
815 }
816
817 // LHS and RHS are guaranteed to be the same value type
818 EVT CompareVT = LHS.getValueType();
819
820 // Check if we can lower this to a native operation.
821
822 // Try to lower to a SET* instruction:
823 //
824 // SET* can match the following patterns:
825 //
826 // select_cc f32, f32, -1, 0, cc_supported
827 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
828 // select_cc i32, i32, -1, 0, cc_supported
829 //
830
831 // Move hardware True/False values to the correct operand.
832 if (isHWTrueValue(False) && isHWFalseValue(True)) {
833 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
834 ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
835 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
836 std::swap(False, True);
837 CC = DAG.getCondCode(InverseCC);
838 } else {
839 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
840 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
841 std::swap(False, True);
842 std::swap(LHS, RHS);
843 CC = DAG.getCondCode(SwapInvCC);
844 }
845 }
846 }
847
848 if (isHWTrueValue(True) && isHWFalseValue(False) &&
849 (CompareVT == VT || VT == MVT::i32)) {
850 // This can be matched by a SET* instruction.
851 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
852 }
853
854 // Try to lower to a CND* instruction:
855 //
856 // CND* can match the following patterns:
857 //
858 // select_cc f32, 0.0, f32, f32, cc_supported
859 // select_cc f32, 0.0, i32, i32, cc_supported
860 // select_cc i32, 0, f32, f32, cc_supported
861 // select_cc i32, 0, i32, i32, cc_supported
862 //
863
864 // Try to move the zero value to the RHS
865 if (isZero(LHS)) {
866 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
867 // Try swapping the operands
868 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
869 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
870 std::swap(LHS, RHS);
871 CC = DAG.getCondCode(CCSwapped);
872 } else {
873 // Try inverting the condition and then swapping the operands
874 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
875 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
876 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
877 std::swap(True, False);
878 std::swap(LHS, RHS);
879 CC = DAG.getCondCode(CCSwapped);
880 }
881 }
882 }
883 if (isZero(RHS)) {
884 SDValue Cond = LHS;
885 SDValue Zero = RHS;
886 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
887 if (CompareVT != VT) {
888 // Bitcast True / False to the correct types. This will end up being
889 // a nop, but it allows us to define only a single pattern in the
890 // .TD files for each CND* instruction rather than having to have
891 // one pattern for integer True/False and one for fp True/False
892 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
893 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
894 }
895
896 switch (CCOpcode) {
897 case ISD::SETONE:
898 case ISD::SETUNE:
899 case ISD::SETNE:
900 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
901 Temp = True;
902 True = False;
903 False = Temp;
904 break;
905 default:
906 break;
907 }
908 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
909 Cond, Zero,
910 True, False,
911 DAG.getCondCode(CCOpcode));
912 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
913 }
914
915 // If we make it this for it means we have no native instructions to handle
916 // this SELECT_CC, so we must lower it.
917 SDValue HWTrue, HWFalse;
918
919 if (CompareVT == MVT::f32) {
920 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
921 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
922 } else if (CompareVT == MVT::i32) {
923 HWTrue = DAG.getConstant(-1, DL, CompareVT);
924 HWFalse = DAG.getConstant(0, DL, CompareVT);
925 }
926 else {
927 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
928 }
929
930 // Lower this unsupported SELECT_CC into a combination of two supported
931 // SELECT_CC operations.
932 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
933
934 return DAG.getNode(ISD::SELECT_CC, DL, VT,
935 Cond, HWFalse,
936 True, False,
938}
939
940SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
941 SelectionDAG &DAG) const {
942 SDLoc SL(Op);
943 EVT VT = Op.getValueType();
944
945 const R600TargetMachine &TM =
946 static_cast<const R600TargetMachine &>(getTargetMachine());
947
948 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
949 unsigned SrcAS = ASC->getSrcAddressSpace();
950 unsigned DestAS = ASC->getDestAddressSpace();
951
952 if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
953 return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT);
954
955 return Op;
956}
957
958/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
959/// convert these pointers to a register index. Each register holds
960/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
961/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
962/// for indirect addressing.
963SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
964 unsigned StackWidth,
965 SelectionDAG &DAG) const {
966 unsigned SRLPad;
967 switch(StackWidth) {
968 case 1:
969 SRLPad = 2;
970 break;
971 case 2:
972 SRLPad = 3;
973 break;
974 case 4:
975 SRLPad = 4;
976 break;
977 default: llvm_unreachable("Invalid stack width");
978 }
979
980 SDLoc DL(Ptr);
981 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
982 DAG.getConstant(SRLPad, DL, MVT::i32));
983}
984
985void R600TargetLowering::getStackAddress(unsigned StackWidth,
986 unsigned ElemIdx,
987 unsigned &Channel,
988 unsigned &PtrIncr) const {
989 switch (StackWidth) {
990 default:
991 case 1:
992 Channel = 0;
993 if (ElemIdx > 0) {
994 PtrIncr = 1;
995 } else {
996 PtrIncr = 0;
997 }
998 break;
999 case 2:
1000 Channel = ElemIdx % 2;
1001 if (ElemIdx == 2) {
1002 PtrIncr = 1;
1003 } else {
1004 PtrIncr = 0;
1005 }
1006 break;
1007 case 4:
1008 Channel = ElemIdx;
1009 PtrIncr = 0;
1010 break;
1011 }
1012}
1013
1014SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1015 SelectionDAG &DAG) const {
1016 SDLoc DL(Store);
1017 //TODO: Who creates the i8 stores?
1018 assert(Store->isTruncatingStore()
1019 || Store->getValue().getValueType() == MVT::i8);
1020 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1021
1022 SDValue Mask;
1023 if (Store->getMemoryVT() == MVT::i8) {
1024 assert(Store->getAlign() >= 1);
1025 Mask = DAG.getConstant(0xff, DL, MVT::i32);
1026 } else if (Store->getMemoryVT() == MVT::i16) {
1027 assert(Store->getAlign() >= 2);
1028 Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1029 } else {
1030 llvm_unreachable("Unsupported private trunc store");
1031 }
1032
1033 SDValue OldChain = Store->getChain();
1034 bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1035 // Skip dummy
1036 SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1037 SDValue BasePtr = Store->getBasePtr();
1038 SDValue Offset = Store->getOffset();
1039 EVT MemVT = Store->getMemoryVT();
1040
1041 SDValue LoadPtr = BasePtr;
1042 if (!Offset.isUndef()) {
1043 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1044 }
1045
1046 // Get dword location
1047 // TODO: this should be eliminated by the future SHR ptr, 2
1048 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1049 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1050
1051 // Load dword
1052 // TODO: can we be smarter about machine pointer info?
1054 SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1055
1056 Chain = Dst.getValue(1);
1057
1058 // Get offset in dword
1059 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1060 DAG.getConstant(0x3, DL, MVT::i32));
1061
1062 // Convert byte offset to bit shift
1063 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1064 DAG.getConstant(3, DL, MVT::i32));
1065
1066 // TODO: Contrary to the name of the function,
1067 // it also handles sub i32 non-truncating stores (like i1)
1068 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1069 Store->getValue());
1070
1071 // Mask the value to the right type
1072 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1073
1074 // Shift the value in place
1075 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1076 MaskedValue, ShiftAmt);
1077
1078 // Shift the mask in place
1079 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1080
1081 // Invert the mask. NOTE: if we had native ROL instructions we could
1082 // use inverted mask
1083 DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1084
1085 // Cleanup the target bits
1086 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1087
1088 // Add the new bits
1089 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1090
1091 // Store dword
1092 // TODO: Can we be smarter about MachinePointerInfo?
1093 SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1094
1095 // If we are part of expanded vector, make our neighbors depend on this store
1096 if (VectorTrunc) {
1097 // Make all other vector elements depend on this store
1098 Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1099 DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1100 }
1101 return NewStore;
1102}
1103
1104SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1105 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1106 unsigned AS = StoreNode->getAddressSpace();
1107
1108 SDValue Chain = StoreNode->getChain();
1109 SDValue Ptr = StoreNode->getBasePtr();
1110 SDValue Value = StoreNode->getValue();
1111
1112 EVT VT = Value.getValueType();
1113 EVT MemVT = StoreNode->getMemoryVT();
1114 EVT PtrVT = Ptr.getValueType();
1115
1116 SDLoc DL(Op);
1117
1118 const bool TruncatingStore = StoreNode->isTruncatingStore();
1119
1120 // Neither LOCAL nor PRIVATE can do vectors at the moment
1122 TruncatingStore) &&
1123 VT.isVector()) {
1124 if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1125 // Add an extra level of chain to isolate this vector
1126 SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1127 // TODO: can the chain be replaced without creating a new store?
1128 SDValue NewStore = DAG.getTruncStore(
1129 NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
1130 StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
1131 StoreNode->getAAInfo());
1132 StoreNode = cast<StoreSDNode>(NewStore);
1133 }
1134
1135 return scalarizeVectorStore(StoreNode, DAG);
1136 }
1137
1138 Align Alignment = StoreNode->getAlign();
1139 if (Alignment < MemVT.getStoreSize() &&
1140 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1141 StoreNode->getMemOperand()->getFlags(),
1142 nullptr)) {
1143 return expandUnalignedStore(StoreNode, DAG);
1144 }
1145
1146 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1147 DAG.getConstant(2, DL, PtrVT));
1148
1149 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1150 // It is beneficial to create MSKOR here instead of combiner to avoid
1151 // artificial dependencies introduced by RMW
1152 if (TruncatingStore) {
1153 assert(VT.bitsLE(MVT::i32));
1154 SDValue MaskConstant;
1155 if (MemVT == MVT::i8) {
1156 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1157 } else {
1158 assert(MemVT == MVT::i16);
1159 assert(StoreNode->getAlign() >= 2);
1160 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1161 }
1162
1163 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1164 DAG.getConstant(0x00000003, DL, PtrVT));
1165 SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1166 DAG.getConstant(3, DL, VT));
1167
1168 // Put the mask in correct place
1169 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1170
1171 // Put the value bits in correct place
1172 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1173 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1174
1175 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1176 // vector instead.
1177 SDValue Src[4] = {
1178 ShiftedValue,
1179 DAG.getConstant(0, DL, MVT::i32),
1180 DAG.getConstant(0, DL, MVT::i32),
1181 Mask
1182 };
1183 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1184 SDValue Args[3] = { Chain, Input, DWordAddr };
1186 Op->getVTList(), Args, MemVT,
1187 StoreNode->getMemOperand());
1188 }
1189 if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1190 // Convert pointer from byte address to dword address.
1191 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1192
1193 if (StoreNode->isIndexed()) {
1194 llvm_unreachable("Indexed stores not supported yet");
1195 } else {
1196 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1197 }
1198 return Chain;
1199 }
1200 }
1201
1202 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1203 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1204 return SDValue();
1205
1206 if (MemVT.bitsLT(MVT::i32))
1207 return lowerPrivateTruncStore(StoreNode, DAG);
1208
1209 // Standard i32+ store, tag it with DWORDADDR to note that the address
1210 // has been shifted
1211 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1212 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1213 return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1214 }
1215
1216 // Tagged i32+ stores will be matched by patterns
1217 return SDValue();
1218}
1219
1220// return (512 + (kc_bank << 12)
1221static int
1223 switch (AddressSpace) {
1225 return 512;
1227 return 512 + 4096;
1229 return 512 + 4096 * 2;
1231 return 512 + 4096 * 3;
1233 return 512 + 4096 * 4;
1235 return 512 + 4096 * 5;
1237 return 512 + 4096 * 6;
1239 return 512 + 4096 * 7;
1241 return 512 + 4096 * 8;
1243 return 512 + 4096 * 9;
1245 return 512 + 4096 * 10;
1247 return 512 + 4096 * 11;
1249 return 512 + 4096 * 12;
1251 return 512 + 4096 * 13;
1253 return 512 + 4096 * 14;
1255 return 512 + 4096 * 15;
1256 default:
1257 return -1;
1258 }
1259}
1260
1261SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1262 SelectionDAG &DAG) const {
1263 SDLoc DL(Op);
1264 LoadSDNode *Load = cast<LoadSDNode>(Op);
1265 ISD::LoadExtType ExtType = Load->getExtensionType();
1266 EVT MemVT = Load->getMemoryVT();
1267 assert(Load->getAlign() >= MemVT.getStoreSize());
1268
1269 SDValue BasePtr = Load->getBasePtr();
1270 SDValue Chain = Load->getChain();
1271 SDValue Offset = Load->getOffset();
1272
1273 SDValue LoadPtr = BasePtr;
1274 if (!Offset.isUndef()) {
1275 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1276 }
1277
1278 // Get dword location
1279 // NOTE: this should be eliminated by the future SHR ptr, 2
1280 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1281 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1282
1283 // Load dword
1284 // TODO: can we be smarter about machine pointer info?
1286 SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1287
1288 // Get offset within the register.
1289 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1290 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1291
1292 // Bit offset of target byte (byteIdx * 8).
1293 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1294 DAG.getConstant(3, DL, MVT::i32));
1295
1296 // Shift to the right.
1297 SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1298
1299 // Eliminate the upper bits by setting them to ...
1300 EVT MemEltVT = MemVT.getScalarType();
1301
1302 if (ExtType == ISD::SEXTLOAD) { // ... ones.
1303 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1304 Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1305 } else { // ... or zeros.
1306 Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1307 }
1308
1309 SDValue Ops[] = {
1310 Ret,
1311 Read.getValue(1) // This should be our output chain
1312 };
1313
1314 return DAG.getMergeValues(Ops, DL);
1315}
1316
1317SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1318 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1319 unsigned AS = LoadNode->getAddressSpace();
1320 EVT MemVT = LoadNode->getMemoryVT();
1321 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1322
1323 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1324 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1325 return lowerPrivateExtLoad(Op, DAG);
1326 }
1327
1328 SDLoc DL(Op);
1329 EVT VT = Op.getValueType();
1330 SDValue Chain = LoadNode->getChain();
1331 SDValue Ptr = LoadNode->getBasePtr();
1332
1333 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1335 VT.isVector()) {
1336 SDValue Ops[2];
1337 std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1338 return DAG.getMergeValues(Ops, DL);
1339 }
1340
1341 // This is still used for explicit load from addrspace(8)
1342 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1343 if (ConstantBlock > -1 &&
1344 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1345 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1347 if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1348 isa<ConstantSDNode>(Ptr)) {
1349 return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1350 }
1351 // TODO: Does this even work?
1352 // non-constant ptr can't be folded, keeps it as a v4f32 load
1353 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1354 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1355 DAG.getConstant(4, DL, MVT::i32)),
1356 DAG.getConstant(LoadNode->getAddressSpace() -
1358 DL, MVT::i32));
1359
1360 if (!VT.isVector()) {
1361 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1362 DAG.getConstant(0, DL, MVT::i32));
1363 }
1364
1365 SDValue MergedValues[2] = {
1366 Result,
1367 Chain
1368 };
1369 return DAG.getMergeValues(MergedValues, DL);
1370 }
1371
1372 // For most operations returning SDValue() will result in the node being
1373 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1374 // need to manually expand loads that may be legal in some address spaces and
1375 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1376 // compute shaders, since the data is sign extended when it is uploaded to the
1377 // buffer. However SEXT loads from other address spaces are not supported, so
1378 // we need to expand them here.
1379 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1380 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1381 SDValue NewLoad = DAG.getExtLoad(
1382 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1383 LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1384 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1385 DAG.getValueType(MemVT));
1386
1387 SDValue MergedValues[2] = { Res, Chain };
1388 return DAG.getMergeValues(MergedValues, DL);
1389 }
1390
1391 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1392 return SDValue();
1393 }
1394
1395 // DWORDADDR ISD marks already shifted address
1396 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1397 assert(VT == MVT::i32);
1398 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1399 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1400 return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1401 }
1402 return SDValue();
1403}
1404
1405SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1406 SDValue Chain = Op.getOperand(0);
1407 SDValue Cond = Op.getOperand(1);
1408 SDValue Jump = Op.getOperand(2);
1409
1410 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1411 Chain, Jump, Cond);
1412}
1413
1414SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1415 SelectionDAG &DAG) const {
1417 const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1418
1419 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1420
1421 unsigned FrameIndex = FIN->getIndex();
1422 Register IgnoredFrameReg;
1424 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1425 return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1426 SDLoc(Op), Op.getValueType());
1427}
1428
1430 bool IsVarArg) const {
1431 switch (CC) {
1434 case CallingConv::C:
1435 case CallingConv::Fast:
1436 case CallingConv::Cold:
1437 llvm_unreachable("kernels should not be handled here");
1445 return CC_R600;
1446 default:
1447 report_fatal_error("Unsupported calling convention.");
1448 }
1449}
1450
1451/// XXX Only kernel functions are supported, so we can assume for now that
1452/// every function is a kernel function, but in the future we should use
1453/// separate calling conventions for kernel and non-kernel functions.
1455 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1456 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1457 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1459 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1460 *DAG.getContext());
1463
1464 if (AMDGPU::isShader(CallConv)) {
1465 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1466 } else {
1467 analyzeFormalArgumentsCompute(CCInfo, Ins);
1468 }
1469
1470 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1471 CCValAssign &VA = ArgLocs[i];
1472 const ISD::InputArg &In = Ins[i];
1473 EVT VT = In.VT;
1474 EVT MemVT = VA.getLocVT();
1475 if (!VT.isVector() && MemVT.isVector()) {
1476 // Get load source type if scalarized.
1477 MemVT = MemVT.getVectorElementType();
1478 }
1479
1480 if (AMDGPU::isShader(CallConv)) {
1481 Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1482 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1483 InVals.push_back(Register);
1484 continue;
1485 }
1486
1487 // i64 isn't a legal type, so the register type used ends up as i32, which
1488 // isn't expected here. It attempts to create this sextload, but it ends up
1489 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1490 // for <1 x i64>.
1491
1492 // The first 36 bytes of the input buffer contains information about
1493 // thread group and global sizes.
1495 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1496 // FIXME: This should really check the extload type, but the handling of
1497 // extload vector parameters seems to be broken.
1498
1499 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1500 Ext = ISD::SEXTLOAD;
1501 }
1502
1503 // Compute the offset from the value.
1504 // XXX - I think PartOffset should give you this, but it seems to give the
1505 // size of the register which isn't useful.
1506
1507 unsigned PartOffset = VA.getLocMemOffset();
1508 Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);
1509
1511 SDValue Arg = DAG.getLoad(
1512 ISD::UNINDEXED, Ext, VT, DL, Chain,
1513 DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1514 PtrInfo,
1515 MemVT, Alignment, MachineMemOperand::MONonTemporal |
1518
1519 InVals.push_back(Arg);
1520 }
1521 return Chain;
1522}
1523
1525 EVT VT) const {
1526 if (!VT.isVector())
1527 return MVT::i32;
1529}
1530
1532 const MachineFunction &MF) const {
1533 // Local and Private addresses do not handle vectors. Limit to i32
1535 return (MemVT.getSizeInBits() <= 32);
1536 }
1537 return true;
1538}
1539
1541 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1542 unsigned *IsFast) const {
1543 if (IsFast)
1544 *IsFast = 0;
1545
1546 if (!VT.isSimple() || VT == MVT::Other)
1547 return false;
1548
1549 if (VT.bitsLT(MVT::i32))
1550 return false;
1551
1552 // TODO: This is a rough estimate.
1553 if (IsFast)
1554 *IsFast = 1;
1555
1556 return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1557}
1558
1560 SelectionDAG &DAG, SDValue VectorEntry,
1561 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1562 assert(RemapSwizzle.empty());
1563
1564 SDLoc DL(VectorEntry);
1565 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1566
1567 SDValue NewBldVec[4];
1568 for (unsigned i = 0; i < 4; i++)
1569 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1570 DAG.getIntPtrConstant(i, DL));
1571
1572 for (unsigned i = 0; i < 4; i++) {
1573 if (NewBldVec[i].isUndef())
1574 // We mask write here to teach later passes that the ith element of this
1575 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1576 // break false dependencies and additionally make assembly easier to read.
1577 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1578 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1579 if (C->isZero()) {
1580 RemapSwizzle[i] = 4; // SEL_0
1581 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1582 } else if (C->isExactlyValue(1.0)) {
1583 RemapSwizzle[i] = 5; // SEL_1
1584 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1585 }
1586 }
1587
1588 if (NewBldVec[i].isUndef())
1589 continue;
1590
1591 for (unsigned j = 0; j < i; j++) {
1592 if (NewBldVec[i] == NewBldVec[j]) {
1593 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1594 RemapSwizzle[i] = j;
1595 break;
1596 }
1597 }
1598 }
1599
1600 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1601 NewBldVec);
1602}
1603
1605 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1606 assert(RemapSwizzle.empty());
1607
1608 SDLoc DL(VectorEntry);
1609 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1610
1611 SDValue NewBldVec[4];
1612 bool isUnmovable[4] = {false, false, false, false};
1613 for (unsigned i = 0; i < 4; i++)
1614 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1615 DAG.getIntPtrConstant(i, DL));
1616
1617 for (unsigned i = 0; i < 4; i++) {
1618 RemapSwizzle[i] = i;
1619 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1620 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1621 if (i == Idx)
1622 isUnmovable[Idx] = true;
1623 }
1624 }
1625
1626 for (unsigned i = 0; i < 4; i++) {
1627 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1628 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1629 if (isUnmovable[Idx])
1630 continue;
1631 // Swap i and Idx
1632 std::swap(NewBldVec[Idx], NewBldVec[i]);
1633 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1634 break;
1635 }
1636 }
1637
1638 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1639 NewBldVec);
1640}
1641
1642SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1643 SelectionDAG &DAG,
1644 const SDLoc &DL) const {
1645 // Old -> New swizzle values
1646 DenseMap<unsigned, unsigned> SwizzleRemap;
1647
1648 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1649 for (unsigned i = 0; i < 4; i++) {
1650 unsigned Idx = Swz[i]->getAsZExtVal();
1651 if (SwizzleRemap.contains(Idx))
1652 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1653 }
1654
1655 SwizzleRemap.clear();
1656 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1657 for (unsigned i = 0; i < 4; i++) {
1658 unsigned Idx = Swz[i]->getAsZExtVal();
1659 if (SwizzleRemap.contains(Idx))
1660 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1661 }
1662
1663 return BuildVector;
1664}
1665
1666SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1667 SelectionDAG &DAG) const {
1668 SDLoc DL(LoadNode);
1669 EVT VT = LoadNode->getValueType(0);
1670 SDValue Chain = LoadNode->getChain();
1671 SDValue Ptr = LoadNode->getBasePtr();
1672 assert (isa<ConstantSDNode>(Ptr));
1673
1674 //TODO: Support smaller loads
1675 if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1676 return SDValue();
1677
1678 if (LoadNode->getAlign() < Align(4))
1679 return SDValue();
1680
1681 int ConstantBlock = ConstantAddressBlock(Block);
1682
1683 SDValue Slots[4];
1684 for (unsigned i = 0; i < 4; i++) {
1685 // We want Const position encoded with the following formula :
1686 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1687 // const_index is Ptr computed by llvm using an alignment of 16.
1688 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1689 // then div by 4 at the ISel step
1690 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1691 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1692 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1693 }
1694 EVT NewVT = MVT::v4i32;
1695 unsigned NumElements = 4;
1696 if (VT.isVector()) {
1697 NewVT = VT;
1698 NumElements = VT.getVectorNumElements();
1699 }
1700 SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));
1701 if (!VT.isVector()) {
1702 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1703 DAG.getConstant(0, DL, MVT::i32));
1704 }
1705 SDValue MergedValues[2] = {
1706 Result,
1707 Chain
1708 };
1709 return DAG.getMergeValues(MergedValues, DL);
1710}
1711
1712//===----------------------------------------------------------------------===//
1713// Custom DAG Optimizations
1714//===----------------------------------------------------------------------===//
1715
1717 DAGCombinerInfo &DCI) const {
1718 SelectionDAG &DAG = DCI.DAG;
1719 SDLoc DL(N);
1720
1721 switch (N->getOpcode()) {
1722 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1723 case ISD::FP_ROUND: {
1724 SDValue Arg = N->getOperand(0);
1725 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1726 return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1727 Arg.getOperand(0));
1728 }
1729 break;
1730 }
1731
1732 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1733 // (i32 select_cc f32, f32, -1, 0 cc)
1734 //
1735 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1736 // this to one of the SET*_DX10 instructions.
1737 case ISD::FP_TO_SINT: {
1738 SDValue FNeg = N->getOperand(0);
1739 if (FNeg.getOpcode() != ISD::FNEG) {
1740 return SDValue();
1741 }
1742 SDValue SelectCC = FNeg.getOperand(0);
1743 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1744 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1745 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1746 !isHWTrueValue(SelectCC.getOperand(2)) ||
1747 !isHWFalseValue(SelectCC.getOperand(3))) {
1748 return SDValue();
1749 }
1750
1751 return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1752 SelectCC.getOperand(0), // LHS
1753 SelectCC.getOperand(1), // RHS
1754 DAG.getConstant(-1, DL, MVT::i32), // True
1755 DAG.getConstant(0, DL, MVT::i32), // False
1756 SelectCC.getOperand(4)); // CC
1757 }
1758
1759 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1760 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1762 SDValue InVec = N->getOperand(0);
1763 SDValue InVal = N->getOperand(1);
1764 SDValue EltNo = N->getOperand(2);
1765
1766 // If the inserted element is an UNDEF, just use the input vector.
1767 if (InVal.isUndef())
1768 return InVec;
1769
1770 EVT VT = InVec.getValueType();
1771
1772 // If we can't generate a legal BUILD_VECTOR, exit
1774 return SDValue();
1775
1776 // Check that we know which element is being inserted
1777 if (!isa<ConstantSDNode>(EltNo))
1778 return SDValue();
1779 unsigned Elt = EltNo->getAsZExtVal();
1780
1781 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1782 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1783 // vector elements.
1785 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1786 Ops.append(InVec.getNode()->op_begin(),
1787 InVec.getNode()->op_end());
1788 } else if (InVec.isUndef()) {
1789 unsigned NElts = VT.getVectorNumElements();
1790 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1791 } else {
1792 return SDValue();
1793 }
1794
1795 // Insert the element
1796 if (Elt < Ops.size()) {
1797 // All the operands of BUILD_VECTOR must have the same type;
1798 // we enforce that here.
1799 EVT OpVT = Ops[0].getValueType();
1800 if (InVal.getValueType() != OpVT)
1801 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1802 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1803 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1804 Ops[Elt] = InVal;
1805 }
1806
1807 // Return the new vector
1808 return DAG.getBuildVector(VT, DL, Ops);
1809 }
1810
1811 // Extract_vec (Build_vector) generated by custom lowering
1812 // also needs to be customly combined
1814 SDValue Arg = N->getOperand(0);
1815 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1816 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1817 unsigned Element = Const->getZExtValue();
1818 return Arg->getOperand(Element);
1819 }
1820 }
1821 if (Arg.getOpcode() == ISD::BITCAST &&
1825 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1826 unsigned Element = Const->getZExtValue();
1827 return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1828 Arg->getOperand(0).getOperand(Element));
1829 }
1830 }
1831 break;
1832 }
1833
1834 case ISD::SELECT_CC: {
1835 // Try common optimizations
1837 return Ret;
1838
1839 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1840 // selectcc x, y, a, b, inv(cc)
1841 //
1842 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1843 // selectcc x, y, a, b, cc
1844 SDValue LHS = N->getOperand(0);
1845 if (LHS.getOpcode() != ISD::SELECT_CC) {
1846 return SDValue();
1847 }
1848
1849 SDValue RHS = N->getOperand(1);
1850 SDValue True = N->getOperand(2);
1851 SDValue False = N->getOperand(3);
1852 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1853
1854 if (LHS.getOperand(2).getNode() != True.getNode() ||
1855 LHS.getOperand(3).getNode() != False.getNode() ||
1856 RHS.getNode() != False.getNode()) {
1857 return SDValue();
1858 }
1859
1860 switch (NCC) {
1861 default: return SDValue();
1862 case ISD::SETNE: return LHS;
1863 case ISD::SETEQ: {
1864 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1865 LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1866 if (DCI.isBeforeLegalizeOps() ||
1867 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1868 return DAG.getSelectCC(DL,
1869 LHS.getOperand(0),
1870 LHS.getOperand(1),
1871 LHS.getOperand(2),
1872 LHS.getOperand(3),
1873 LHSCC);
1874 break;
1875 }
1876 }
1877 return SDValue();
1878 }
1879
1881 SDValue Arg = N->getOperand(1);
1882 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1883 break;
1884
1885 SDValue NewArgs[8] = {
1886 N->getOperand(0), // Chain
1887 SDValue(),
1888 N->getOperand(2), // ArrayBase
1889 N->getOperand(3), // Type
1890 N->getOperand(4), // SWZ_X
1891 N->getOperand(5), // SWZ_Y
1892 N->getOperand(6), // SWZ_Z
1893 N->getOperand(7) // SWZ_W
1894 };
1895 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1896 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1897 }
1899 SDValue Arg = N->getOperand(1);
1900 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1901 break;
1902
1903 SDValue NewArgs[19] = {
1904 N->getOperand(0),
1905 N->getOperand(1),
1906 N->getOperand(2),
1907 N->getOperand(3),
1908 N->getOperand(4),
1909 N->getOperand(5),
1910 N->getOperand(6),
1911 N->getOperand(7),
1912 N->getOperand(8),
1913 N->getOperand(9),
1914 N->getOperand(10),
1915 N->getOperand(11),
1916 N->getOperand(12),
1917 N->getOperand(13),
1918 N->getOperand(14),
1919 N->getOperand(15),
1920 N->getOperand(16),
1921 N->getOperand(17),
1922 N->getOperand(18),
1923 };
1924 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1925 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1926 }
1927
1928 case ISD::LOAD: {
1929 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1930 SDValue Ptr = LoadNode->getBasePtr();
1931 if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1932 isa<ConstantSDNode>(Ptr))
1933 return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1934 break;
1935 }
1936
1937 default: break;
1938 }
1939
1941}
1942
1943bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1944 SDValue &Src, SDValue &Neg, SDValue &Abs,
1945 SDValue &Sel, SDValue &Imm,
1946 SelectionDAG &DAG) const {
1947 const R600InstrInfo *TII = Subtarget->getInstrInfo();
1948 if (!Src.isMachineOpcode())
1949 return false;
1950
1951 switch (Src.getMachineOpcode()) {
1952 case R600::FNEG_R600:
1953 if (!Neg.getNode())
1954 return false;
1955 Src = Src.getOperand(0);
1956 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1957 return true;
1958 case R600::FABS_R600:
1959 if (!Abs.getNode())
1960 return false;
1961 Src = Src.getOperand(0);
1962 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1963 return true;
1964 case R600::CONST_COPY: {
1965 unsigned Opcode = ParentNode->getMachineOpcode();
1966 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1967
1968 if (!Sel.getNode())
1969 return false;
1970
1971 SDValue CstOffset = Src.getOperand(0);
1972 if (ParentNode->getValueType(0).isVector())
1973 return false;
1974
1975 // Gather constants values
1976 int SrcIndices[] = {
1977 TII->getOperandIdx(Opcode, R600::OpName::src0),
1978 TII->getOperandIdx(Opcode, R600::OpName::src1),
1979 TII->getOperandIdx(Opcode, R600::OpName::src2),
1980 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1981 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1982 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1983 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1984 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1985 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1986 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1987 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1988 };
1989 std::vector<unsigned> Consts;
1990 for (int OtherSrcIdx : SrcIndices) {
1991 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1992 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1993 continue;
1994 if (HasDst) {
1995 OtherSrcIdx--;
1996 OtherSelIdx--;
1997 }
1998 if (RegisterSDNode *Reg =
1999 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2000 if (Reg->getReg() == R600::ALU_CONST) {
2001 Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));
2002 }
2003 }
2004 }
2005
2006 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2007 Consts.push_back(Cst->getZExtValue());
2008 if (!TII->fitsConstReadLimitations(Consts)) {
2009 return false;
2010 }
2011
2012 Sel = CstOffset;
2013 Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2014 return true;
2015 }
2016 case R600::MOV_IMM_GLOBAL_ADDR:
2017 // Check if the Imm slot is used. Taken from below.
2018 if (Imm->getAsZExtVal())
2019 return false;
2020 Imm = Src.getOperand(0);
2021 Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2022 return true;
2023 case R600::MOV_IMM_I32:
2024 case R600::MOV_IMM_F32: {
2025 unsigned ImmReg = R600::ALU_LITERAL_X;
2026 uint64_t ImmValue = 0;
2027
2028 if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2029 ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2030 float FloatValue = FPC->getValueAPF().convertToFloat();
2031 if (FloatValue == 0.0) {
2032 ImmReg = R600::ZERO;
2033 } else if (FloatValue == 0.5) {
2034 ImmReg = R600::HALF;
2035 } else if (FloatValue == 1.0) {
2036 ImmReg = R600::ONE;
2037 } else {
2038 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2039 }
2040 } else {
2041 uint64_t Value = Src.getConstantOperandVal(0);
2042 if (Value == 0) {
2043 ImmReg = R600::ZERO;
2044 } else if (Value == 1) {
2045 ImmReg = R600::ONE_INT;
2046 } else {
2047 ImmValue = Value;
2048 }
2049 }
2050
2051 // Check that we aren't already using an immediate.
2052 // XXX: It's possible for an instruction to have more than one
2053 // immediate operand, but this is not supported yet.
2054 if (ImmReg == R600::ALU_LITERAL_X) {
2055 if (!Imm.getNode())
2056 return false;
2057 ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2058 if (C->getZExtValue())
2059 return false;
2060 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2061 }
2062 Src = DAG.getRegister(ImmReg, MVT::i32);
2063 return true;
2064 }
2065 default:
2066 return false;
2067 }
2068}
2069
2070/// Fold the instructions after selecting them
2071SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2072 SelectionDAG &DAG) const {
2073 const R600InstrInfo *TII = Subtarget->getInstrInfo();
2074 if (!Node->isMachineOpcode())
2075 return Node;
2076
2077 unsigned Opcode = Node->getMachineOpcode();
2078 SDValue FakeOp;
2079
2080 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2081
2082 if (Opcode == R600::DOT_4) {
2083 int OperandIdx[] = {
2084 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2085 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2086 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2087 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2088 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2089 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2090 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2091 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2092 };
2093 int NegIdx[] = {
2094 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2095 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2096 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2097 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2098 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2099 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2100 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2101 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2102 };
2103 int AbsIdx[] = {
2104 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2105 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2106 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2107 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2108 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2109 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2110 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2111 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2112 };
2113 for (unsigned i = 0; i < 8; i++) {
2114 if (OperandIdx[i] < 0)
2115 return Node;
2116 SDValue &Src = Ops[OperandIdx[i] - 1];
2117 SDValue &Neg = Ops[NegIdx[i] - 1];
2118 SDValue &Abs = Ops[AbsIdx[i] - 1];
2119 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2120 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2121 if (HasDst)
2122 SelIdx--;
2123 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2124 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2125 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2126 }
2127 } else if (Opcode == R600::REG_SEQUENCE) {
2128 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2129 SDValue &Src = Ops[i];
2130 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2131 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2132 }
2133 } else {
2134 if (!TII->hasInstrModifiers(Opcode))
2135 return Node;
2136 int OperandIdx[] = {
2137 TII->getOperandIdx(Opcode, R600::OpName::src0),
2138 TII->getOperandIdx(Opcode, R600::OpName::src1),
2139 TII->getOperandIdx(Opcode, R600::OpName::src2)
2140 };
2141 int NegIdx[] = {
2142 TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2143 TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2144 TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2145 };
2146 int AbsIdx[] = {
2147 TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2148 TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2149 -1
2150 };
2151 for (unsigned i = 0; i < 3; i++) {
2152 if (OperandIdx[i] < 0)
2153 return Node;
2154 SDValue &Src = Ops[OperandIdx[i] - 1];
2155 SDValue &Neg = Ops[NegIdx[i] - 1];
2156 SDValue FakeAbs;
2157 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2158 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2159 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2160 int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2161 if (HasDst) {
2162 SelIdx--;
2163 ImmIdx--;
2164 }
2165 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2166 SDValue &Imm = Ops[ImmIdx];
2167 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2168 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2169 }
2170 }
2171
2172 return Node;
2173}
2174
2176R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
2177 switch (RMW->getOperation()) {
2186 // FIXME: Cayman at least appears to have instructions for this, but the
2187 // instruction defintions appear to be missing.
2189 case AtomicRMWInst::Xchg: {
2190 const DataLayout &DL = RMW->getFunction()->getDataLayout();
2191 unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());
2192 if (ValSize == 32 || ValSize == 64)
2195 }
2196 default:
2197 if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
2198 unsigned Size = IntTy->getBitWidth();
2199 if (Size == 32 || Size == 64)
2201 }
2202
2204 }
2205
2206 llvm_unreachable("covered atomicrmw op switch");
2207}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
const char LLVMTargetMachineRef TM
#define MO_FLAG_NEG
Definition: R600Defines.h:15
#define MO_FLAG_ABS
Definition: R600Defines.h:16
#define MO_FLAG_MASK
Definition: R600Defines.h:17
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
static bool isEOP(MachineBasicBlock::iterator I)
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
static int ConstantAddressBlock(unsigned AddressSpace)
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
R600 DAG Lowering interface definition.
Interface definition for R600InstrInfo.
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
unsigned getStackWidth(const MachineFunction &MF) const
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:5424
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1500
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
@ FAdd
*p = old + v
Definition: Instructions.h:733
@ FSub
*p = old - v
Definition: Instructions.h:736
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:744
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:740
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
BinOp getOperation() const
Definition: Instructions.h:787
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
int64_t getLocMemOffset() const
const APFloat & getValueAPF() const
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1800
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool empty() const
Definition: DenseMap.h:98
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:146
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:384
unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
static auto integer_valuetypes()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
unsigned getTargetFlags() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
bool hasFFBH() const
bool hasFMA() const
bool hasBFI() const
Definition: R600Subtarget.h:88
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:52
bool hasCARRY() const
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:60
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:50
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:92
bool hasBORROW() const
Definition: R600Subtarget.h:99
bool hasFFBL() const
bool hasBFE() const
Definition: R600Subtarget.h:84
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:844
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:489
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:692
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:484
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:815
SDValue getCondCode(ISD::CondCode Cond)
LLVMContext * getContext() const
Definition: SelectionDAG.h:502
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:572
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:697
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const TargetMachine & getTargetMachine() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isShader(CallingConv::ID cc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1284
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:501
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1074
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:960
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1120
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1280
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:828
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:866
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:899
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:937
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1113
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1578
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1558
int getLDSNoRetOp(uint16_t Opcode)
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr float pif
Definition: MathExtras.h:68
constexpr double e
Definition: MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
AddressSpace
Definition: NVPTXBaseInfo.h:21
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
DWARFExpression::Operation Op
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:282
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:203
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...