LLVM 22.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
91 setOperationAction(ISD::LOAD, T, Custom);
92 setOperationAction(ISD::STORE, T, Custom);
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
97 setOperationAction(ISD::LOAD, T, Custom);
98 setOperationAction(ISD::STORE, T, Custom);
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
103 setOperationAction(ISD::STORE, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
109 setOperationAction(ISD::LOAD, T, Custom);
110 setOperationAction(ISD::STORE, T, Custom);
111 }
112 }
113
119 setOperationAction(ISD::BRIND, MVT::Other, Custom);
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction(ISD::VAARG, MVT::Other, Expand);
126 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
127 setOperationAction(ISD::VAEND, MVT::Other, Expand);
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
140 for (auto Op :
141 {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
143 // Note supported floating-point library function operators that otherwise
144 // default to expand.
145 for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
146 ISD::FRINT, ISD::FROUNDEVEN})
148 // Support minimum and maximum, which otherwise default to expand.
149 setOperationAction(ISD::FMINIMUM, T, Legal);
150 setOperationAction(ISD::FMAXIMUM, T, Legal);
151 // When experimental v8f16 support is enabled these instructions don't need
152 // to be expanded.
153 if (T != MVT::v8f16) {
154 setOperationAction(ISD::FP16_TO_FP, T, Expand);
155 setOperationAction(ISD::FP_TO_FP16, T, Expand);
156 }
158 setTruncStoreAction(T, MVT::f16, Expand);
159 }
160
161 // Expand unavailable integer operations.
162 for (auto Op :
166 for (auto T : {MVT::i32, MVT::i64})
168 if (Subtarget->hasSIMD128())
169 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
171 }
172
173 if (Subtarget->hasWideArithmetic()) {
179 }
180
181 if (Subtarget->hasNontrappingFPToInt())
183 for (auto T : {MVT::i32, MVT::i64})
185
186 // SIMD-specific configuration
187 if (Subtarget->hasSIMD128()) {
188
189 // Combine partial.reduce.add before legalization gets confused.
191
192 // Combine wide-vector muls, with extend inputs, to extmul_half.
194
195 // Combine vector mask reductions into alltrue/anytrue
197
198 // Convert vector to integer bitcasts to bitmask
199 setTargetDAGCombine(ISD::BITCAST);
200
201 // Hoist bitcasts out of shuffles
203
204 // Combine extends of extract_subvectors into widening ops
206
207 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
208 // conversions ops
211
212 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
213 // into conversion ops
216
218
219 // Support saturating add/sub for i8x16 and i16x8
221 for (auto T : {MVT::v16i8, MVT::v8i16})
223
224 // Support integer abs
225 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
227
228 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
229 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
230 MVT::v2f64})
232
233 if (Subtarget->hasFP16())
235
236 // We have custom shuffle lowering to expose the shuffle mask
237 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
238 MVT::v2f64})
240
241 if (Subtarget->hasFP16())
243
244 // Support splatting
245 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
246 MVT::v2f64})
248
249 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
250
251 // Custom lowering since wasm shifts must have a scalar shift amount
252 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
253 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
255
256 // Custom lower lane accesses to expand out variable indices
258 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
259 MVT::v2f64})
261
262 // There is no i8x16.mul instruction
263 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
264
265 // There is no vector conditional select instruction
266 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
267 MVT::v2f64})
269
270 // Expand integer operations supported for scalars but not SIMD
271 for (auto Op :
273 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
275
276 // But we do have integer min and max operations
277 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
278 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
280
281 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
282 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
283 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
284 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
285
286 // Custom lower bit counting operations for other types to scalarize them.
287 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
288 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
290
291 // Expand float operations supported for scalars but not SIMD
292 for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
293 ISD::FEXP, ISD::FEXP2, ISD::FEXP10})
294 for (auto T : {MVT::v4f32, MVT::v2f64})
296
297 // Unsigned comparison operations are unavailable for i64x2 vectors.
299 setCondCodeAction(CC, MVT::v2i64, Custom);
300
301 // 64x2 conversions are not in the spec
302 for (auto Op :
304 for (auto T : {MVT::v2i64, MVT::v2f64})
306
307 // But saturating fp_to_int converstions are
309 setOperationAction(Op, MVT::v4i32, Custom);
310 if (Subtarget->hasFP16()) {
311 setOperationAction(Op, MVT::v8i16, Custom);
312 }
313 }
314
315 // Support vector extending
319 }
320 }
321
322 // As a special case, these operators use the type to mean the type to
323 // sign-extend from.
325 if (!Subtarget->hasSignExt()) {
326 // Sign extends are legal only when extending a vector extract
327 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
328 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
330 }
333
334 // Dynamic stack allocation: use the default expansion.
335 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
336 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
337 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
338
342
343 // Expand these forms; we pattern-match the forms that we can handle in isel.
344 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
345 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
347
348 // We have custom switch handling.
349 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
350
351 // WebAssembly doesn't have:
352 // - Floating-point extending loads.
353 // - Floating-point truncating stores.
354 // - i1 extending loads.
355 // - truncating SIMD stores and most extending loads
356 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
357 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
358 for (auto T : MVT::integer_valuetypes())
359 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
360 setLoadExtAction(Ext, T, MVT::i1, Promote);
361 if (Subtarget->hasSIMD128()) {
362 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
363 MVT::v2f64}) {
364 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
365 if (MVT(T) != MemT) {
367 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
368 setLoadExtAction(Ext, T, MemT, Expand);
369 }
370 }
371 }
372 // But some vector extending loads are legal
373 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
374 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
375 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
376 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
377 }
378 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
379 }
380
381 // Don't do anything clever with build_pairs
383
384 // Trap lowers to wasm unreachable
385 setOperationAction(ISD::TRAP, MVT::Other, Legal);
386 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
387
388 // Exception handling intrinsics
392
394
395 // Always convert switches to br_tables unless there is only one case, which
396 // is equivalent to a simple branch. This reduces code size for wasm, and we
397 // defer possible jump table optimizations to the VM.
399}
400
409
418
419bool WebAssemblyTargetLowering::shouldExpandPartialReductionIntrinsic(
420 const IntrinsicInst *I) const {
421 if (I->getIntrinsicID() != Intrinsic::vector_partial_reduce_add)
422 return true;
423
424 EVT VT = EVT::getEVT(I->getType());
425 if (VT.getSizeInBits() > 128)
426 return true;
427
428 auto Op1 = I->getOperand(1);
429
430 if (auto *InputInst = dyn_cast<Instruction>(Op1)) {
431 unsigned Opcode = InstructionOpcodeToISD(InputInst->getOpcode());
432 if (Opcode == ISD::MUL) {
433 if (isa<Instruction>(InputInst->getOperand(0)) &&
434 isa<Instruction>(InputInst->getOperand(1))) {
435 // dot only supports signed inputs but also support lowering unsigned.
436 if (cast<Instruction>(InputInst->getOperand(0))->getOpcode() !=
437 cast<Instruction>(InputInst->getOperand(1))->getOpcode())
438 return true;
439
440 EVT Op1VT = EVT::getEVT(Op1->getType());
441 if (Op1VT.getVectorElementType() == VT.getVectorElementType() &&
442 ((VT.getVectorElementCount() * 2 ==
443 Op1VT.getVectorElementCount()) ||
444 (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount())))
445 return false;
446 }
447 } else if (ISD::isExtOpcode(Opcode)) {
448 return false;
449 }
450 }
451 return true;
452}
453
455WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
456 // We have wasm instructions for these
457 switch (AI->getOperation()) {
465 default:
466 break;
467 }
469}
470
471bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
472 // Implementation copied from X86TargetLowering.
473 unsigned Opc = VecOp.getOpcode();
474
475 // Assume target opcodes can't be scalarized.
476 // TODO - do we have any exceptions?
478 return false;
479
480 // If the vector op is not supported, try to convert to scalar.
481 EVT VecVT = VecOp.getValueType();
483 return true;
484
485 // If the vector op is supported, but the scalar op is not, the transform may
486 // not be worthwhile.
487 EVT ScalarVT = VecVT.getScalarType();
488 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
489}
490
491FastISel *WebAssemblyTargetLowering::createFastISel(
492 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
493 return WebAssembly::createFastISel(FuncInfo, LibInfo);
494}
495
496MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
497 EVT VT) const {
498 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
499 if (BitWidth > 1 && BitWidth < 8)
500 BitWidth = 8;
501
502 if (BitWidth > 64) {
503 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
504 // the count to be an i32.
505 BitWidth = 32;
507 "32-bit shift counts ought to be enough for anyone");
508 }
509
512 "Unable to represent scalar shift amount type");
513 return Result;
514}
515
516// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
517// undefined result on invalid/overflow, to the WebAssembly opcode, which
518// traps on invalid/overflow.
521 const TargetInstrInfo &TII,
522 bool IsUnsigned, bool Int64,
523 bool Float64, unsigned LoweredOpcode) {
525
526 Register OutReg = MI.getOperand(0).getReg();
527 Register InReg = MI.getOperand(1).getReg();
528
529 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
530 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
531 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
532 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
533 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
534 unsigned Eqz = WebAssembly::EQZ_I32;
535 unsigned And = WebAssembly::AND_I32;
536 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
537 int64_t Substitute = IsUnsigned ? 0 : Limit;
538 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
539 auto &Context = BB->getParent()->getFunction().getContext();
540 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
541
542 const BasicBlock *LLVMBB = BB->getBasicBlock();
543 MachineFunction *F = BB->getParent();
544 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
545 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
546 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
547
549 F->insert(It, FalseMBB);
550 F->insert(It, TrueMBB);
551 F->insert(It, DoneMBB);
552
553 // Transfer the remainder of BB and its successor edges to DoneMBB.
554 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
556
557 BB->addSuccessor(TrueMBB);
558 BB->addSuccessor(FalseMBB);
559 TrueMBB->addSuccessor(DoneMBB);
560 FalseMBB->addSuccessor(DoneMBB);
561
562 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
563 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
564 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
565 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
566 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
567 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
568 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
569
570 MI.eraseFromParent();
571 // For signed numbers, we can do a single comparison to determine whether
572 // fabs(x) is within range.
573 if (IsUnsigned) {
574 Tmp0 = InReg;
575 } else {
576 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
577 }
578 BuildMI(BB, DL, TII.get(FConst), Tmp1)
579 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
580 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
581
582 // For unsigned numbers, we have to do a separate comparison with zero.
583 if (IsUnsigned) {
584 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
585 Register SecondCmpReg =
586 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
587 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
588 BuildMI(BB, DL, TII.get(FConst), Tmp1)
589 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
590 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
591 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
592 CmpReg = AndReg;
593 }
594
595 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
596
597 // Create the CFG diamond to select between doing the conversion or using
598 // the substitute value.
599 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
600 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
601 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
602 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
603 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
604 .addReg(FalseReg)
605 .addMBB(FalseMBB)
606 .addReg(TrueReg)
607 .addMBB(TrueMBB);
608
609 return DoneMBB;
610}
611
612// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
613// instuction to handle the zero-length case.
616 const TargetInstrInfo &TII, bool Int64) {
618
619 MachineOperand DstMem = MI.getOperand(0);
620 MachineOperand SrcMem = MI.getOperand(1);
621 MachineOperand Dst = MI.getOperand(2);
622 MachineOperand Src = MI.getOperand(3);
623 MachineOperand Len = MI.getOperand(4);
624
625 // We're going to add an extra use to `Len` to test if it's zero; that
626 // use shouldn't be a kill, even if the original use is.
627 MachineOperand NoKillLen = Len;
628 NoKillLen.setIsKill(false);
629
630 // Decide on which `MachineInstr` opcode we're going to use.
631 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
632 unsigned MemoryCopy =
633 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
634
635 // Create two new basic blocks; one for the new `memory.fill` that we can
636 // branch over, and one for the rest of the instructions after the original
637 // `memory.fill`.
638 const BasicBlock *LLVMBB = BB->getBasicBlock();
639 MachineFunction *F = BB->getParent();
640 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
641 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
642
644 F->insert(It, TrueMBB);
645 F->insert(It, DoneMBB);
646
647 // Transfer the remainder of BB and its successor edges to DoneMBB.
648 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
650
651 // Connect the CFG edges.
652 BB->addSuccessor(TrueMBB);
653 BB->addSuccessor(DoneMBB);
654 TrueMBB->addSuccessor(DoneMBB);
655
656 // Create a virtual register for the `Eqz` result.
657 unsigned EqzReg;
658 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
659
660 // Erase the original `memory.copy`.
661 MI.eraseFromParent();
662
663 // Test if `Len` is zero.
664 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
665
666 // Insert a new `memory.copy`.
667 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
668 .add(DstMem)
669 .add(SrcMem)
670 .add(Dst)
671 .add(Src)
672 .add(Len);
673
674 // Create the CFG triangle.
675 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
676 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
677
678 return DoneMBB;
679}
680
681// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
682// instuction to handle the zero-length case.
685 const TargetInstrInfo &TII, bool Int64) {
687
688 MachineOperand Mem = MI.getOperand(0);
689 MachineOperand Dst = MI.getOperand(1);
690 MachineOperand Val = MI.getOperand(2);
691 MachineOperand Len = MI.getOperand(3);
692
693 // We're going to add an extra use to `Len` to test if it's zero; that
694 // use shouldn't be a kill, even if the original use is.
695 MachineOperand NoKillLen = Len;
696 NoKillLen.setIsKill(false);
697
698 // Decide on which `MachineInstr` opcode we're going to use.
699 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
700 unsigned MemoryFill =
701 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
702
703 // Create two new basic blocks; one for the new `memory.fill` that we can
704 // branch over, and one for the rest of the instructions after the original
705 // `memory.fill`.
706 const BasicBlock *LLVMBB = BB->getBasicBlock();
707 MachineFunction *F = BB->getParent();
708 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
709 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
710
712 F->insert(It, TrueMBB);
713 F->insert(It, DoneMBB);
714
715 // Transfer the remainder of BB and its successor edges to DoneMBB.
716 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
718
719 // Connect the CFG edges.
720 BB->addSuccessor(TrueMBB);
721 BB->addSuccessor(DoneMBB);
722 TrueMBB->addSuccessor(DoneMBB);
723
724 // Create a virtual register for the `Eqz` result.
725 unsigned EqzReg;
726 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
727
728 // Erase the original `memory.fill`.
729 MI.eraseFromParent();
730
731 // Test if `Len` is zero.
732 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
733
734 // Insert a new `memory.copy`.
735 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
736
737 // Create the CFG triangle.
738 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
739 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
740
741 return DoneMBB;
742}
743
744static MachineBasicBlock *
746 const WebAssemblySubtarget *Subtarget,
747 const TargetInstrInfo &TII) {
748 MachineInstr &CallParams = *CallResults.getPrevNode();
749 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
750 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
751 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
752
753 bool IsIndirect =
754 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
755 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
756
757 bool IsFuncrefCall = false;
758 if (IsIndirect && CallParams.getOperand(0).isReg()) {
759 Register Reg = CallParams.getOperand(0).getReg();
760 const MachineFunction *MF = BB->getParent();
761 const MachineRegisterInfo &MRI = MF->getRegInfo();
762 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
763 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
764 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
765 }
766
767 unsigned CallOp;
768 if (IsIndirect && IsRetCall) {
769 CallOp = WebAssembly::RET_CALL_INDIRECT;
770 } else if (IsIndirect) {
771 CallOp = WebAssembly::CALL_INDIRECT;
772 } else if (IsRetCall) {
773 CallOp = WebAssembly::RET_CALL;
774 } else {
775 CallOp = WebAssembly::CALL;
776 }
777
778 MachineFunction &MF = *BB->getParent();
779 const MCInstrDesc &MCID = TII.get(CallOp);
780 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
781
782 // Move the function pointer to the end of the arguments for indirect calls
783 if (IsIndirect) {
784 auto FnPtr = CallParams.getOperand(0);
785 CallParams.removeOperand(0);
786
787 // For funcrefs, call_indirect is done through __funcref_call_table and the
788 // funcref is always installed in slot 0 of the table, therefore instead of
789 // having the function pointer added at the end of the params list, a zero
790 // (the index in
791 // __funcref_call_table is added).
792 if (IsFuncrefCall) {
793 Register RegZero =
794 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
795 MachineInstrBuilder MIBC0 =
796 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
797
798 BB->insert(CallResults.getIterator(), MIBC0);
799 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
800 } else
801 CallParams.addOperand(FnPtr);
802 }
803
804 for (auto Def : CallResults.defs())
805 MIB.add(Def);
806
807 if (IsIndirect) {
808 // Placeholder for the type index.
809 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
810 MIB.addImm(0);
811 // The table into which this call_indirect indexes.
812 MCSymbolWasm *Table = IsFuncrefCall
814 MF.getContext(), Subtarget)
816 MF.getContext(), Subtarget);
817 if (Subtarget->hasCallIndirectOverlong()) {
818 MIB.addSym(Table);
819 } else {
820 // For the MVP there is at most one table whose number is 0, but we can't
821 // write a table symbol or issue relocations. Instead we just ensure the
822 // table is live and write a zero.
823 Table->setNoStrip();
824 MIB.addImm(0);
825 }
826 }
827
828 for (auto Use : CallParams.uses())
829 MIB.add(Use);
830
831 BB->insert(CallResults.getIterator(), MIB);
832 CallParams.eraseFromParent();
833 CallResults.eraseFromParent();
834
835 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
836 // table slot with ref.null upon call_indirect return.
837 //
838 // This generates the following code, which comes right after a call_indirect
839 // of a funcref:
840 //
841 // i32.const 0
842 // ref.null func
843 // table.set __funcref_call_table
844 if (IsIndirect && IsFuncrefCall) {
846 MF.getContext(), Subtarget);
847 Register RegZero =
848 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
849 MachineInstr *Const0 =
850 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
851 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
852
853 Register RegFuncref =
854 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
855 MachineInstr *RefNull =
856 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
857 BB->insertAfter(Const0->getIterator(), RefNull);
858
859 MachineInstr *TableSet =
860 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
861 .addSym(Table)
862 .addReg(RegZero)
863 .addReg(RegFuncref);
864 BB->insertAfter(RefNull->getIterator(), TableSet);
865 }
866
867 return BB;
868}
869
870MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
871 MachineInstr &MI, MachineBasicBlock *BB) const {
872 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
873 DebugLoc DL = MI.getDebugLoc();
874
875 switch (MI.getOpcode()) {
876 default:
877 llvm_unreachable("Unexpected instr type to insert");
878 case WebAssembly::FP_TO_SINT_I32_F32:
879 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
880 WebAssembly::I32_TRUNC_S_F32);
881 case WebAssembly::FP_TO_UINT_I32_F32:
882 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
883 WebAssembly::I32_TRUNC_U_F32);
884 case WebAssembly::FP_TO_SINT_I64_F32:
885 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
886 WebAssembly::I64_TRUNC_S_F32);
887 case WebAssembly::FP_TO_UINT_I64_F32:
888 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
889 WebAssembly::I64_TRUNC_U_F32);
890 case WebAssembly::FP_TO_SINT_I32_F64:
891 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
892 WebAssembly::I32_TRUNC_S_F64);
893 case WebAssembly::FP_TO_UINT_I32_F64:
894 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
895 WebAssembly::I32_TRUNC_U_F64);
896 case WebAssembly::FP_TO_SINT_I64_F64:
897 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
898 WebAssembly::I64_TRUNC_S_F64);
899 case WebAssembly::FP_TO_UINT_I64_F64:
900 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
901 WebAssembly::I64_TRUNC_U_F64);
902 case WebAssembly::MEMCPY_A32:
903 return LowerMemcpy(MI, DL, BB, TII, false);
904 case WebAssembly::MEMCPY_A64:
905 return LowerMemcpy(MI, DL, BB, TII, true);
906 case WebAssembly::MEMSET_A32:
907 return LowerMemset(MI, DL, BB, TII, false);
908 case WebAssembly::MEMSET_A64:
909 return LowerMemset(MI, DL, BB, TII, true);
910 case WebAssembly::CALL_RESULTS:
911 case WebAssembly::RET_CALL_RESULTS:
912 return LowerCallResults(MI, DL, BB, Subtarget, TII);
913 }
914}
915
916const char *
917WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
918 switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
920 break;
921#define HANDLE_NODETYPE(NODE) \
922 case WebAssemblyISD::NODE: \
923 return "WebAssemblyISD::" #NODE;
924#include "WebAssemblyISD.def"
925#undef HANDLE_NODETYPE
926 }
927 return nullptr;
928}
929
930std::pair<unsigned, const TargetRegisterClass *>
931WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
932 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
933 // First, see if this is a constraint that directly corresponds to a
934 // WebAssembly register class.
935 if (Constraint.size() == 1) {
936 switch (Constraint[0]) {
937 case 'r':
938 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
939 if (Subtarget->hasSIMD128() && VT.isVector()) {
940 if (VT.getSizeInBits() == 128)
941 return std::make_pair(0U, &WebAssembly::V128RegClass);
942 }
943 if (VT.isInteger() && !VT.isVector()) {
944 if (VT.getSizeInBits() <= 32)
945 return std::make_pair(0U, &WebAssembly::I32RegClass);
946 if (VT.getSizeInBits() <= 64)
947 return std::make_pair(0U, &WebAssembly::I64RegClass);
948 }
949 if (VT.isFloatingPoint() && !VT.isVector()) {
950 switch (VT.getSizeInBits()) {
951 case 32:
952 return std::make_pair(0U, &WebAssembly::F32RegClass);
953 case 64:
954 return std::make_pair(0U, &WebAssembly::F64RegClass);
955 default:
956 break;
957 }
958 }
959 break;
960 default:
961 break;
962 }
963 }
964
966}
967
968bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
969 // Assume ctz is a relatively cheap operation.
970 return true;
971}
972
973bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
974 // Assume clz is a relatively cheap operation.
975 return true;
976}
977
978bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
979 const AddrMode &AM,
980 Type *Ty, unsigned AS,
981 Instruction *I) const {
982 // WebAssembly offsets are added as unsigned without wrapping. The
983 // isLegalAddressingMode gives us no way to determine if wrapping could be
984 // happening, so we approximate this by accepting only non-negative offsets.
985 if (AM.BaseOffs < 0)
986 return false;
987
988 // WebAssembly has no scale register operands.
989 if (AM.Scale != 0)
990 return false;
991
992 // Everything else is legal.
993 return true;
994}
995
996bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
997 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
998 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
999 // WebAssembly supports unaligned accesses, though it should be declared
1000 // with the p2align attribute on loads and stores which do so, and there
1001 // may be a performance impact. We tell LLVM they're "fast" because
1002 // for the kinds of things that LLVM uses this for (merging adjacent stores
1003 // of constants, etc.), WebAssembly implementations will either want the
1004 // unaligned access or they'll split anyway.
1005 if (Fast)
1006 *Fast = 1;
1007 return true;
1008}
1009
1010bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1011 AttributeList Attr) const {
1012 // The current thinking is that wasm engines will perform this optimization,
1013 // so we can save on code size.
1014 return true;
1015}
1016
1017bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1018 EVT ExtT = ExtVal.getValueType();
1019 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1020 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1021 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1022 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1023}
1024
1025bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1026 const GlobalAddressSDNode *GA) const {
1027 // Wasm doesn't support function addresses with offsets
1028 const GlobalValue *GV = GA->getGlobal();
1030}
1031
1032EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1033 LLVMContext &C,
1034 EVT VT) const {
1035 if (VT.isVector())
1037
1038 // So far, all branch instructions in Wasm take an I32 condition.
1039 // The default TargetLowering::getSetCCResultType returns the pointer size,
1040 // which would be useful to reduce instruction counts when testing
1041 // against 64-bit pointers/values if at some point Wasm supports that.
1042 return EVT::getIntegerVT(C, 32);
1043}
1044
1045bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1046 const CallInst &I,
1047 MachineFunction &MF,
1048 unsigned Intrinsic) const {
1049 switch (Intrinsic) {
1050 case Intrinsic::wasm_memory_atomic_notify:
1052 Info.memVT = MVT::i32;
1053 Info.ptrVal = I.getArgOperand(0);
1054 Info.offset = 0;
1055 Info.align = Align(4);
1056 // atomic.notify instruction does not really load the memory specified with
1057 // this argument, but MachineMemOperand should either be load or store, so
1058 // we set this to a load.
1059 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1060 // instructions are treated as volatiles in the backend, so we should be
1061 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1063 return true;
1064 case Intrinsic::wasm_memory_atomic_wait32:
1066 Info.memVT = MVT::i32;
1067 Info.ptrVal = I.getArgOperand(0);
1068 Info.offset = 0;
1069 Info.align = Align(4);
1071 return true;
1072 case Intrinsic::wasm_memory_atomic_wait64:
1074 Info.memVT = MVT::i64;
1075 Info.ptrVal = I.getArgOperand(0);
1076 Info.offset = 0;
1077 Info.align = Align(8);
1079 return true;
1080 case Intrinsic::wasm_loadf16_f32:
1082 Info.memVT = MVT::f16;
1083 Info.ptrVal = I.getArgOperand(0);
1084 Info.offset = 0;
1085 Info.align = Align(2);
1087 return true;
1088 case Intrinsic::wasm_storef16_f32:
1090 Info.memVT = MVT::f16;
1091 Info.ptrVal = I.getArgOperand(1);
1092 Info.offset = 0;
1093 Info.align = Align(2);
1095 return true;
1096 default:
1097 return false;
1098 }
1099}
1100
1101void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1102 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1103 const SelectionDAG &DAG, unsigned Depth) const {
1104 switch (Op.getOpcode()) {
1105 default:
1106 break;
1108 unsigned IntNo = Op.getConstantOperandVal(0);
1109 switch (IntNo) {
1110 default:
1111 break;
1112 case Intrinsic::wasm_bitmask: {
1113 unsigned BitWidth = Known.getBitWidth();
1114 EVT VT = Op.getOperand(1).getSimpleValueType();
1115 unsigned PossibleBits = VT.getVectorNumElements();
1116 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1117 Known.Zero |= ZeroMask;
1118 break;
1119 }
1120 }
1121 break;
1122 }
1123
1124 // For 128-bit addition if the upper bits are all zero then it's known that
1125 // the upper bits of the result will have all bits guaranteed zero except the
1126 // first.
1127 case WebAssemblyISD::I64_ADD128:
1128 if (Op.getResNo() == 1) {
1129 SDValue LHS_HI = Op.getOperand(1);
1130 SDValue RHS_HI = Op.getOperand(3);
1131 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1132 Known.Zero.setBitsFrom(1);
1133 }
1134 break;
1135 }
1136}
1137
1139WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1140 if (VT.isFixedLengthVector()) {
1141 MVT EltVT = VT.getVectorElementType();
1142 // We have legal vector types with these lane types, so widening the
1143 // vector would let us use some of the lanes directly without having to
1144 // extend or truncate values.
1145 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1146 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1147 return TypeWidenVector;
1148 }
1149
1151}
1152
1153bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1154 SDValue Op, const TargetLoweringOpt &TLO) const {
1155 // ISel process runs DAGCombiner after legalization; this step is called
1156 // SelectionDAG optimization phase. This post-legalization combining process
1157 // runs DAGCombiner on each node, and if there was a change to be made,
1158 // re-runs legalization again on it and its user nodes to make sure
1159 // everythiing is in a legalized state.
1160 //
1161 // The legalization calls lowering routines, and we do our custom lowering for
1162 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1163 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1164 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1165 // turns unused vector elements into undefs. But this routine does not work
1166 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1167 // combination can result in a infinite loop, in which undefs are converted to
1168 // zeros in legalization and back to undefs in combining.
1169 //
1170 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1171 // running for build_vectors.
1172 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1173 return false;
1174 return true;
1175}
1176
1177//===----------------------------------------------------------------------===//
1178// WebAssembly Lowering private implementation.
1179//===----------------------------------------------------------------------===//
1180
1181//===----------------------------------------------------------------------===//
1182// Lowering Code
1183//===----------------------------------------------------------------------===//
1184
1185static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1187 DAG.getContext()->diagnose(
1188 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1189}
1190
1191// Test whether the given calling convention is supported.
1193 // We currently support the language-independent target-independent
1194 // conventions. We don't yet have a way to annotate calls with properties like
1195 // "cold", and we don't have any call-clobbered registers, so these are mostly
1196 // all handled the same.
1197 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1198 CallConv == CallingConv::Cold ||
1199 CallConv == CallingConv::PreserveMost ||
1200 CallConv == CallingConv::PreserveAll ||
1201 CallConv == CallingConv::CXX_FAST_TLS ||
1203 CallConv == CallingConv::Swift;
1204}
1205
1206SDValue
1207WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1208 SmallVectorImpl<SDValue> &InVals) const {
1209 SelectionDAG &DAG = CLI.DAG;
1210 SDLoc DL = CLI.DL;
1211 SDValue Chain = CLI.Chain;
1212 SDValue Callee = CLI.Callee;
1213 MachineFunction &MF = DAG.getMachineFunction();
1214 auto Layout = MF.getDataLayout();
1215
1216 CallingConv::ID CallConv = CLI.CallConv;
1217 if (!callingConvSupported(CallConv))
1218 fail(DL, DAG,
1219 "WebAssembly doesn't support language-specific or target-specific "
1220 "calling conventions yet");
1221 if (CLI.IsPatchPoint)
1222 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1223
1224 if (CLI.IsTailCall) {
1225 auto NoTail = [&](const char *Msg) {
1226 if (CLI.CB && CLI.CB->isMustTailCall())
1227 fail(DL, DAG, Msg);
1228 CLI.IsTailCall = false;
1229 };
1230
1231 if (!Subtarget->hasTailCall())
1232 NoTail("WebAssembly 'tail-call' feature not enabled");
1233
1234 // Varargs calls cannot be tail calls because the buffer is on the stack
1235 if (CLI.IsVarArg)
1236 NoTail("WebAssembly does not support varargs tail calls");
1237
1238 // Do not tail call unless caller and callee return types match
1239 const Function &F = MF.getFunction();
1240 const TargetMachine &TM = getTargetMachine();
1241 Type *RetTy = F.getReturnType();
1242 SmallVector<MVT, 4> CallerRetTys;
1243 SmallVector<MVT, 4> CalleeRetTys;
1244 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1245 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1246 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1247 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1248 CalleeRetTys.begin());
1249 if (!TypesMatch)
1250 NoTail("WebAssembly tail call requires caller and callee return types to "
1251 "match");
1252
1253 // If pointers to local stack values are passed, we cannot tail call
1254 if (CLI.CB) {
1255 for (auto &Arg : CLI.CB->args()) {
1256 Value *Val = Arg.get();
1257 // Trace the value back through pointer operations
1258 while (true) {
1259 Value *Src = Val->stripPointerCastsAndAliases();
1260 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1261 Src = GEP->getPointerOperand();
1262 if (Val == Src)
1263 break;
1264 Val = Src;
1265 }
1266 if (isa<AllocaInst>(Val)) {
1267 NoTail(
1268 "WebAssembly does not support tail calling with stack arguments");
1269 break;
1270 }
1271 }
1272 }
1273 }
1274
1275 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1276 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1277 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1278
1279 // The generic code may have added an sret argument. If we're lowering an
1280 // invoke function, the ABI requires that the function pointer be the first
1281 // argument, so we may have to swap the arguments.
1282 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1283 Outs[0].Flags.isSRet()) {
1284 std::swap(Outs[0], Outs[1]);
1285 std::swap(OutVals[0], OutVals[1]);
1286 }
1287
1288 bool HasSwiftSelfArg = false;
1289 bool HasSwiftErrorArg = false;
1290 unsigned NumFixedArgs = 0;
1291 for (unsigned I = 0; I < Outs.size(); ++I) {
1292 const ISD::OutputArg &Out = Outs[I];
1293 SDValue &OutVal = OutVals[I];
1294 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1295 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1296 if (Out.Flags.isNest())
1297 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1298 if (Out.Flags.isInAlloca())
1299 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1300 if (Out.Flags.isInConsecutiveRegs())
1301 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1303 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1304 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1305 auto &MFI = MF.getFrameInfo();
1306 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1308 /*isSS=*/false);
1309 SDValue SizeNode =
1310 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1311 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1312 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1314 /*isVolatile*/ false, /*AlwaysInline=*/false,
1315 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1316 MachinePointerInfo());
1317 OutVal = FINode;
1318 }
1319 // Count the number of fixed args *after* legalization.
1320 NumFixedArgs += !Out.Flags.isVarArg();
1321 }
1322
1323 bool IsVarArg = CLI.IsVarArg;
1324 auto PtrVT = getPointerTy(Layout);
1325
1326 // For swiftcc, emit additional swiftself and swifterror arguments
1327 // if there aren't. These additional arguments are also added for callee
1328 // signature They are necessary to match callee and caller signature for
1329 // indirect call.
1330 if (CallConv == CallingConv::Swift) {
1331 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1332 if (!HasSwiftSelfArg) {
1333 NumFixedArgs++;
1334 ISD::ArgFlagsTy Flags;
1335 Flags.setSwiftSelf();
1336 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1337 CLI.Outs.push_back(Arg);
1338 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1339 CLI.OutVals.push_back(ArgVal);
1340 }
1341 if (!HasSwiftErrorArg) {
1342 NumFixedArgs++;
1343 ISD::ArgFlagsTy Flags;
1344 Flags.setSwiftError();
1345 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1346 CLI.Outs.push_back(Arg);
1347 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1348 CLI.OutVals.push_back(ArgVal);
1349 }
1350 }
1351
1352 // Analyze operands of the call, assigning locations to each operand.
1354 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1355
1356 if (IsVarArg) {
1357 // Outgoing non-fixed arguments are placed in a buffer. First
1358 // compute their offsets and the total amount of buffer space needed.
1359 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1360 const ISD::OutputArg &Out = Outs[I];
1361 SDValue &Arg = OutVals[I];
1362 EVT VT = Arg.getValueType();
1363 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1364 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1365 Align Alignment =
1366 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1367 unsigned Offset =
1368 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1369 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1370 Offset, VT.getSimpleVT(),
1372 }
1373 }
1374
1375 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1376
1377 SDValue FINode;
1378 if (IsVarArg && NumBytes) {
1379 // For non-fixed arguments, next emit stores to store the argument values
1380 // to the stack buffer at the offsets computed above.
1381 MaybeAlign StackAlign = Layout.getStackAlignment();
1382 assert(StackAlign && "data layout string is missing stack alignment");
1383 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1384 /*isSS=*/false);
1385 unsigned ValNo = 0;
1387 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1388 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1389 "ArgLocs should remain in order and only hold varargs args");
1390 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1391 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1392 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1393 DAG.getConstant(Offset, DL, PtrVT));
1394 Chains.push_back(
1395 DAG.getStore(Chain, DL, Arg, Add,
1397 }
1398 if (!Chains.empty())
1399 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1400 } else if (IsVarArg) {
1401 FINode = DAG.getIntPtrConstant(0, DL);
1402 }
1403
1404 if (Callee->getOpcode() == ISD::GlobalAddress) {
1405 // If the callee is a GlobalAddress node (quite common, every direct call
1406 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1407 // doesn't at MO_GOT which is not needed for direct calls.
1408 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1411 GA->getOffset());
1412 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1413 getPointerTy(DAG.getDataLayout()), Callee);
1414 }
1415
1416 // Compute the operands for the CALLn node.
1418 Ops.push_back(Chain);
1419 Ops.push_back(Callee);
1420
1421 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1422 // isn't reliable.
1423 Ops.append(OutVals.begin(),
1424 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1425 // Add a pointer to the vararg buffer.
1426 if (IsVarArg)
1427 Ops.push_back(FINode);
1428
1429 SmallVector<EVT, 8> InTys;
1430 for (const auto &In : Ins) {
1431 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1432 assert(!In.Flags.isNest() && "nest is not valid for return values");
1433 if (In.Flags.isInAlloca())
1434 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1435 if (In.Flags.isInConsecutiveRegs())
1436 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1437 if (In.Flags.isInConsecutiveRegsLast())
1438 fail(DL, DAG,
1439 "WebAssembly hasn't implemented cons regs last return values");
1440 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1441 // registers.
1442 InTys.push_back(In.VT);
1443 }
1444
1445 // Lastly, if this is a call to a funcref we need to add an instruction
1446 // table.set to the chain and transform the call.
1448 CLI.CB->getCalledOperand()->getType())) {
1449 // In the absence of function references proposal where a funcref call is
1450 // lowered to call_ref, using reference types we generate a table.set to set
1451 // the funcref to a special table used solely for this purpose, followed by
1452 // a call_indirect. Here we just generate the table set, and return the
1453 // SDValue of the table.set so that LowerCall can finalize the lowering by
1454 // generating the call_indirect.
1455 SDValue Chain = Ops[0];
1456
1458 MF.getContext(), Subtarget);
1459 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1460 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1461 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1462 SDValue TableSet = DAG.getMemIntrinsicNode(
1463 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1464 MVT::funcref,
1465 // Machine Mem Operand args
1466 MachinePointerInfo(
1468 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1470
1471 Ops[0] = TableSet; // The new chain is the TableSet itself
1472 }
1473
1474 if (CLI.IsTailCall) {
1475 // ret_calls do not return values to the current frame
1476 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1477 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1478 }
1479
1480 InTys.push_back(MVT::Other);
1481 SDVTList InTyList = DAG.getVTList(InTys);
1482 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1483
1484 for (size_t I = 0; I < Ins.size(); ++I)
1485 InVals.push_back(Res.getValue(I));
1486
1487 // Return the chain
1488 return Res.getValue(Ins.size());
1489}
1490
1491bool WebAssemblyTargetLowering::CanLowerReturn(
1492 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1493 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1494 const Type *RetTy) const {
1495 // WebAssembly can only handle returning tuples with multivalue enabled
1496 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1497}
1498
1499SDValue WebAssemblyTargetLowering::LowerReturn(
1500 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1502 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1503 SelectionDAG &DAG) const {
1504 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1505 "MVP WebAssembly can only return up to one value");
1506 if (!callingConvSupported(CallConv))
1507 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1508
1509 SmallVector<SDValue, 4> RetOps(1, Chain);
1510 RetOps.append(OutVals.begin(), OutVals.end());
1511 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1512
1513 // Record the number and types of the return values.
1514 for (const ISD::OutputArg &Out : Outs) {
1515 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1516 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1517 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1518 if (Out.Flags.isInAlloca())
1519 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1520 if (Out.Flags.isInConsecutiveRegs())
1521 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1523 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1524 }
1525
1526 return Chain;
1527}
1528
1529SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1530 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1531 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1532 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1533 if (!callingConvSupported(CallConv))
1534 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1535
1536 MachineFunction &MF = DAG.getMachineFunction();
1537 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1538
1539 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1540 // of the incoming values before they're represented by virtual registers.
1541 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1542
1543 bool HasSwiftErrorArg = false;
1544 bool HasSwiftSelfArg = false;
1545 for (const ISD::InputArg &In : Ins) {
1546 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1547 HasSwiftErrorArg |= In.Flags.isSwiftError();
1548 if (In.Flags.isInAlloca())
1549 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1550 if (In.Flags.isNest())
1551 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1552 if (In.Flags.isInConsecutiveRegs())
1553 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1554 if (In.Flags.isInConsecutiveRegsLast())
1555 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1556 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1557 // registers.
1558 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1559 DAG.getTargetConstant(InVals.size(),
1560 DL, MVT::i32))
1561 : DAG.getUNDEF(In.VT));
1562
1563 // Record the number and types of arguments.
1564 MFI->addParam(In.VT);
1565 }
1566
1567 // For swiftcc, emit additional swiftself and swifterror arguments
1568 // if there aren't. These additional arguments are also added for callee
1569 // signature They are necessary to match callee and caller signature for
1570 // indirect call.
1571 auto PtrVT = getPointerTy(MF.getDataLayout());
1572 if (CallConv == CallingConv::Swift) {
1573 if (!HasSwiftSelfArg) {
1574 MFI->addParam(PtrVT);
1575 }
1576 if (!HasSwiftErrorArg) {
1577 MFI->addParam(PtrVT);
1578 }
1579 }
1580 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1581 // the buffer is passed as an argument.
1582 if (IsVarArg) {
1583 MVT PtrVT = getPointerTy(MF.getDataLayout());
1584 Register VarargVreg =
1586 MFI->setVarargBufferVreg(VarargVreg);
1587 Chain = DAG.getCopyToReg(
1588 Chain, DL, VarargVreg,
1589 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1590 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1591 MFI->addParam(PtrVT);
1592 }
1593
1594 // Record the number and types of arguments and results.
1595 SmallVector<MVT, 4> Params;
1598 MF.getFunction(), DAG.getTarget(), Params, Results);
1599 for (MVT VT : Results)
1600 MFI->addResult(VT);
1601 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1602 // the param logic here with ComputeSignatureVTs
1603 assert(MFI->getParams().size() == Params.size() &&
1604 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1605 Params.begin()));
1606
1607 return Chain;
1608}
1609
1610void WebAssemblyTargetLowering::ReplaceNodeResults(
1612 switch (N->getOpcode()) {
1614 // Do not add any results, signifying that N should not be custom lowered
1615 // after all. This happens because simd128 turns on custom lowering for
1616 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1617 // illegal type.
1618 break;
1621 // Do not add any results, signifying that N should not be custom lowered.
1622 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1623 break;
1624 case ISD::ADD:
1625 case ISD::SUB:
1626 Results.push_back(Replace128Op(N, DAG));
1627 break;
1628 default:
1630 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1631 }
1632}
1633
1634//===----------------------------------------------------------------------===//
1635// Custom lowering hooks.
1636//===----------------------------------------------------------------------===//
1637
1638SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1639 SelectionDAG &DAG) const {
1640 SDLoc DL(Op);
1641 switch (Op.getOpcode()) {
1642 default:
1643 llvm_unreachable("unimplemented operation lowering");
1644 return SDValue();
1645 case ISD::FrameIndex:
1646 return LowerFrameIndex(Op, DAG);
1647 case ISD::GlobalAddress:
1648 return LowerGlobalAddress(Op, DAG);
1650 return LowerGlobalTLSAddress(Op, DAG);
1652 return LowerExternalSymbol(Op, DAG);
1653 case ISD::JumpTable:
1654 return LowerJumpTable(Op, DAG);
1655 case ISD::BR_JT:
1656 return LowerBR_JT(Op, DAG);
1657 case ISD::VASTART:
1658 return LowerVASTART(Op, DAG);
1659 case ISD::BlockAddress:
1660 case ISD::BRIND:
1661 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1662 return SDValue();
1663 case ISD::RETURNADDR:
1664 return LowerRETURNADDR(Op, DAG);
1665 case ISD::FRAMEADDR:
1666 return LowerFRAMEADDR(Op, DAG);
1667 case ISD::CopyToReg:
1668 return LowerCopyToReg(Op, DAG);
1671 return LowerAccessVectorElement(Op, DAG);
1675 return LowerIntrinsic(Op, DAG);
1677 return LowerSIGN_EXTEND_INREG(Op, DAG);
1680 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1681 case ISD::BUILD_VECTOR:
1682 return LowerBUILD_VECTOR(Op, DAG);
1684 return LowerVECTOR_SHUFFLE(Op, DAG);
1685 case ISD::SETCC:
1686 return LowerSETCC(Op, DAG);
1687 case ISD::SHL:
1688 case ISD::SRA:
1689 case ISD::SRL:
1690 return LowerShift(Op, DAG);
1693 return LowerFP_TO_INT_SAT(Op, DAG);
1694 case ISD::LOAD:
1695 return LowerLoad(Op, DAG);
1696 case ISD::STORE:
1697 return LowerStore(Op, DAG);
1698 case ISD::CTPOP:
1699 case ISD::CTLZ:
1700 case ISD::CTTZ:
1701 return DAG.UnrollVectorOp(Op.getNode());
1702 case ISD::CLEAR_CACHE:
1703 report_fatal_error("llvm.clear_cache is not supported on wasm");
1704 case ISD::SMUL_LOHI:
1705 case ISD::UMUL_LOHI:
1706 return LowerMUL_LOHI(Op, DAG);
1707 case ISD::UADDO:
1708 return LowerUADDO(Op, DAG);
1709 }
1710}
1711
1715
1716 return false;
1717}
1718
1719static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1720 SelectionDAG &DAG) {
1722 if (!FI)
1723 return std::nullopt;
1724
1725 auto &MF = DAG.getMachineFunction();
1727}
1728
1729SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1730 SelectionDAG &DAG) const {
1731 SDLoc DL(Op);
1732 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1733 const SDValue &Value = SN->getValue();
1734 const SDValue &Base = SN->getBasePtr();
1735 const SDValue &Offset = SN->getOffset();
1736
1738 if (!Offset->isUndef())
1739 report_fatal_error("unexpected offset when storing to webassembly global",
1740 false);
1741
1742 SDVTList Tys = DAG.getVTList(MVT::Other);
1743 SDValue Ops[] = {SN->getChain(), Value, Base};
1744 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1745 SN->getMemoryVT(), SN->getMemOperand());
1746 }
1747
1748 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1749 if (!Offset->isUndef())
1750 report_fatal_error("unexpected offset when storing to webassembly local",
1751 false);
1752
1753 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1754 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1755 SDValue Ops[] = {SN->getChain(), Idx, Value};
1756 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1757 }
1758
1761 "Encountered an unlowerable store to the wasm_var address space",
1762 false);
1763
1764 return Op;
1765}
1766
1767SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1768 SelectionDAG &DAG) const {
1769 SDLoc DL(Op);
1770 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1771 const SDValue &Base = LN->getBasePtr();
1772 const SDValue &Offset = LN->getOffset();
1773
1775 if (!Offset->isUndef())
1777 "unexpected offset when loading from webassembly global", false);
1778
1779 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1780 SDValue Ops[] = {LN->getChain(), Base};
1781 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1782 LN->getMemoryVT(), LN->getMemOperand());
1783 }
1784
1785 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1786 if (!Offset->isUndef())
1788 "unexpected offset when loading from webassembly local", false);
1789
1790 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1791 EVT LocalVT = LN->getValueType(0);
1792 SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT,
1793 {LN->getChain(), Idx});
1794 SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL);
1795 assert(Result->getNumValues() == 2 && "Loads must carry a chain!");
1796 return Result;
1797 }
1798
1801 "Encountered an unlowerable load from the wasm_var address space",
1802 false);
1803
1804 return Op;
1805}
1806
1807SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1808 SelectionDAG &DAG) const {
1809 assert(Subtarget->hasWideArithmetic());
1810 assert(Op.getValueType() == MVT::i64);
1811 SDLoc DL(Op);
1812 unsigned Opcode;
1813 switch (Op.getOpcode()) {
1814 case ISD::UMUL_LOHI:
1815 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1816 break;
1817 case ISD::SMUL_LOHI:
1818 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1819 break;
1820 default:
1821 llvm_unreachable("unexpected opcode");
1822 }
1823 SDValue LHS = Op.getOperand(0);
1824 SDValue RHS = Op.getOperand(1);
1825 SDValue Lo =
1826 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1827 SDValue Hi(Lo.getNode(), 1);
1828 SDValue Ops[] = {Lo, Hi};
1829 return DAG.getMergeValues(Ops, DL);
1830}
1831
1832// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1833//
1834// This enables generating a single wasm instruction for this operation where
1835// the upper half of both operands are constant zeros. The upper half of the
1836// result is then whether the overflow happened.
1837SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1838 SelectionDAG &DAG) const {
1839 assert(Subtarget->hasWideArithmetic());
1840 assert(Op.getValueType() == MVT::i64);
1841 assert(Op.getOpcode() == ISD::UADDO);
1842 SDLoc DL(Op);
1843 SDValue LHS = Op.getOperand(0);
1844 SDValue RHS = Op.getOperand(1);
1845 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1846 SDValue Result =
1847 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1848 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1849 SDValue CarryI64(Result.getNode(), 1);
1850 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1851 SDValue Ops[] = {Result, CarryI32};
1852 return DAG.getMergeValues(Ops, DL);
1853}
1854
1855SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1856 SelectionDAG &DAG) const {
1857 assert(Subtarget->hasWideArithmetic());
1858 assert(N->getValueType(0) == MVT::i128);
1859 SDLoc DL(N);
1860 unsigned Opcode;
1861 switch (N->getOpcode()) {
1862 case ISD::ADD:
1863 Opcode = WebAssemblyISD::I64_ADD128;
1864 break;
1865 case ISD::SUB:
1866 Opcode = WebAssemblyISD::I64_SUB128;
1867 break;
1868 default:
1869 llvm_unreachable("unexpected opcode");
1870 }
1871 SDValue LHS = N->getOperand(0);
1872 SDValue RHS = N->getOperand(1);
1873
1874 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1875 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1876 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1877 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1878 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1879 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1880 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1881 LHS_0, LHS_1, RHS_0, RHS_1);
1882 SDValue Result_HI(Result_LO.getNode(), 1);
1883 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1884}
1885
1886SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1887 SelectionDAG &DAG) const {
1888 SDValue Src = Op.getOperand(2);
1889 if (isa<FrameIndexSDNode>(Src.getNode())) {
1890 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1891 // the FI to some LEA-like instruction, but since we don't have that, we
1892 // need to insert some kind of instruction that can take an FI operand and
1893 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1894 // local.copy between Op and its FI operand.
1895 SDValue Chain = Op.getOperand(0);
1896 SDLoc DL(Op);
1897 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1898 EVT VT = Src.getValueType();
1899 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1900 : WebAssembly::COPY_I64,
1901 DL, VT, Src),
1902 0);
1903 return Op.getNode()->getNumValues() == 1
1904 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1905 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1906 Op.getNumOperands() == 4 ? Op.getOperand(3)
1907 : SDValue());
1908 }
1909 return SDValue();
1910}
1911
1912SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1913 SelectionDAG &DAG) const {
1914 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1915 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1916}
1917
1918SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1919 SelectionDAG &DAG) const {
1920 SDLoc DL(Op);
1921
1922 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1923 fail(DL, DAG,
1924 "Non-Emscripten WebAssembly hasn't implemented "
1925 "__builtin_return_address");
1926 return SDValue();
1927 }
1928
1929 unsigned Depth = Op.getConstantOperandVal(0);
1930 MakeLibCallOptions CallOptions;
1931 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1932 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1933 .first;
1934}
1935
1936SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1937 SelectionDAG &DAG) const {
1938 // Non-zero depths are not supported by WebAssembly currently. Use the
1939 // legalizer's default expansion, which is to return 0 (what this function is
1940 // documented to do).
1941 if (Op.getConstantOperandVal(0) > 0)
1942 return SDValue();
1943
1945 EVT VT = Op.getValueType();
1946 Register FP =
1947 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1948 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1949}
1950
1951SDValue
1952WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1953 SelectionDAG &DAG) const {
1954 SDLoc DL(Op);
1955 const auto *GA = cast<GlobalAddressSDNode>(Op);
1956
1957 MachineFunction &MF = DAG.getMachineFunction();
1958 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
1959 report_fatal_error("cannot use thread-local storage without bulk memory",
1960 false);
1961
1962 const GlobalValue *GV = GA->getGlobal();
1963
1964 // Currently only Emscripten supports dynamic linking with threads. Therefore,
1965 // on other targets, if we have thread-local storage, only the local-exec
1966 // model is possible.
1967 auto model = Subtarget->getTargetTriple().isOSEmscripten()
1968 ? GV->getThreadLocalMode()
1970
1971 // Unsupported TLS modes
1974
1975 if (model == GlobalValue::LocalExecTLSModel ||
1978 getTargetMachine().shouldAssumeDSOLocal(GV))) {
1979 // For DSO-local TLS variables we use offset from __tls_base
1980
1981 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1982 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
1983 : WebAssembly::GLOBAL_GET_I32;
1984 const char *BaseName = MF.createExternalSymbolName("__tls_base");
1985
1987 DAG.getMachineNode(GlobalGet, DL, PtrVT,
1988 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
1989 0);
1990
1991 SDValue TLSOffset = DAG.getTargetGlobalAddress(
1992 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
1993 SDValue SymOffset =
1994 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
1995
1996 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
1997 }
1998
2000
2001 EVT VT = Op.getValueType();
2002 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2003 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2004 GA->getOffset(),
2006}
2007
2008SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2009 SelectionDAG &DAG) const {
2010 SDLoc DL(Op);
2011 const auto *GA = cast<GlobalAddressSDNode>(Op);
2012 EVT VT = Op.getValueType();
2013 assert(GA->getTargetFlags() == 0 &&
2014 "Unexpected target flags on generic GlobalAddressSDNode");
2016 fail(DL, DAG, "Invalid address space for WebAssembly target");
2017
2018 unsigned OperandFlags = 0;
2019 const GlobalValue *GV = GA->getGlobal();
2020 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2021 // need special treatment for tables in PIC mode.
2022 if (isPositionIndependent() &&
2024 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2025 MachineFunction &MF = DAG.getMachineFunction();
2026 MVT PtrVT = getPointerTy(MF.getDataLayout());
2027 const char *BaseName;
2028 if (GV->getValueType()->isFunctionTy()) {
2029 BaseName = MF.createExternalSymbolName("__table_base");
2031 } else {
2032 BaseName = MF.createExternalSymbolName("__memory_base");
2034 }
2036 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2037 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2038
2039 SDValue SymAddr = DAG.getNode(
2040 WebAssemblyISD::WrapperREL, DL, VT,
2041 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2042 OperandFlags));
2043
2044 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2045 }
2047 }
2048
2049 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2050 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2051 GA->getOffset(), OperandFlags));
2052}
2053
2054SDValue
2055WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2056 SelectionDAG &DAG) const {
2057 SDLoc DL(Op);
2058 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2059 EVT VT = Op.getValueType();
2060 assert(ES->getTargetFlags() == 0 &&
2061 "Unexpected target flags on generic ExternalSymbolSDNode");
2062 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2063 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2064}
2065
2066SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2067 SelectionDAG &DAG) const {
2068 // There's no need for a Wrapper node because we always incorporate a jump
2069 // table operand into a BR_TABLE instruction, rather than ever
2070 // materializing it in a register.
2071 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2072 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2073 JT->getTargetFlags());
2074}
2075
2076SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2077 SelectionDAG &DAG) const {
2078 SDLoc DL(Op);
2079 SDValue Chain = Op.getOperand(0);
2080 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2081 SDValue Index = Op.getOperand(2);
2082 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2083
2085 Ops.push_back(Chain);
2086 Ops.push_back(Index);
2087
2088 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2089 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2090
2091 // Add an operand for each case.
2092 for (auto *MBB : MBBs)
2093 Ops.push_back(DAG.getBasicBlock(MBB));
2094
2095 // Add the first MBB as a dummy default target for now. This will be replaced
2096 // with the proper default target (and the preceding range check eliminated)
2097 // if possible by WebAssemblyFixBrTableDefaults.
2098 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2099 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2100}
2101
2102SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2103 SelectionDAG &DAG) const {
2104 SDLoc DL(Op);
2105 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2106
2107 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2108 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2109
2110 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2111 MFI->getVarargBufferVreg(), PtrVT);
2112 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2113 MachinePointerInfo(SV));
2114}
2115
2116// Try to lower partial.reduce.add to a dot or fallback to a sequence with
2117// extmul and adds.
2119 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
2120 if (N->getConstantOperandVal(0) != Intrinsic::vector_partial_reduce_add)
2121 return SDValue();
2122
2123 assert(N->getValueType(0) == MVT::v4i32 && "can only support v4i32");
2124 SDLoc DL(N);
2125
2126 SDValue Input = N->getOperand(2);
2127 if (Input->getOpcode() == ISD::MUL) {
2128 SDValue ExtendLHS = Input->getOperand(0);
2129 SDValue ExtendRHS = Input->getOperand(1);
2130 assert((ISD::isExtOpcode(ExtendLHS.getOpcode()) &&
2131 ISD::isExtOpcode(ExtendRHS.getOpcode())) &&
2132 "expected widening mul or add");
2133 assert(ExtendLHS.getOpcode() == ExtendRHS.getOpcode() &&
2134 "expected binop to use the same extend for both operands");
2135
2136 SDValue ExtendInLHS = ExtendLHS->getOperand(0);
2137 SDValue ExtendInRHS = ExtendRHS->getOperand(0);
2138 bool IsSigned = ExtendLHS->getOpcode() == ISD::SIGN_EXTEND;
2139 unsigned LowOpc =
2140 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
2141 unsigned HighOpc = IsSigned ? WebAssemblyISD::EXTEND_HIGH_S
2142 : WebAssemblyISD::EXTEND_HIGH_U;
2143 SDValue LowLHS;
2144 SDValue LowRHS;
2145 SDValue HighLHS;
2146 SDValue HighRHS;
2147
2148 auto AssignInputs = [&](MVT VT) {
2149 LowLHS = DAG.getNode(LowOpc, DL, VT, ExtendInLHS);
2150 LowRHS = DAG.getNode(LowOpc, DL, VT, ExtendInRHS);
2151 HighLHS = DAG.getNode(HighOpc, DL, VT, ExtendInLHS);
2152 HighRHS = DAG.getNode(HighOpc, DL, VT, ExtendInRHS);
2153 };
2154
2155 if (ExtendInLHS->getValueType(0) == MVT::v8i16) {
2156 if (IsSigned) {
2157 // i32x4.dot_i16x8_s
2158 SDValue Dot = DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32,
2159 ExtendInLHS, ExtendInRHS);
2160 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Dot);
2161 }
2162
2163 // (add (add (extmul_low_sx lhs, rhs), (extmul_high_sx lhs, rhs)))
2164 MVT VT = MVT::v4i32;
2165 AssignInputs(VT);
2166 SDValue MulLow = DAG.getNode(ISD::MUL, DL, VT, LowLHS, LowRHS);
2167 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, VT, HighLHS, HighRHS);
2168 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, MulLow, MulHigh);
2169 return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(1), Add);
2170 } else {
2171 assert(ExtendInLHS->getValueType(0) == MVT::v16i8 &&
2172 "expected v16i8 input types");
2173 AssignInputs(MVT::v8i16);
2174 // Lower to a wider tree, using twice the operations compared to above.
2175 if (IsSigned) {
2176 // Use two dots
2177 SDValue DotLHS =
2178 DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, LowLHS, LowRHS);
2179 SDValue DotRHS =
2180 DAG.getNode(WebAssemblyISD::DOT, DL, MVT::v4i32, HighLHS, HighRHS);
2181 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, DotLHS, DotRHS);
2182 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
2183 }
2184
2185 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
2186 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
2187
2188 SDValue AddLow = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL,
2189 MVT::v4i32, MulLow);
2190 SDValue AddHigh = DAG.getNode(WebAssemblyISD::EXT_ADD_PAIRWISE_U, DL,
2191 MVT::v4i32, MulHigh);
2192 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::v4i32, AddLow, AddHigh);
2193 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
2194 }
2195 } else {
2196 // Accumulate the input using extadd_pairwise.
2197 assert(ISD::isExtOpcode(Input.getOpcode()) && "expected extend");
2198 bool IsSigned = Input->getOpcode() == ISD::SIGN_EXTEND;
2199 unsigned PairwiseOpc = IsSigned ? WebAssemblyISD::EXT_ADD_PAIRWISE_S
2200 : WebAssemblyISD::EXT_ADD_PAIRWISE_U;
2201 SDValue ExtendIn = Input->getOperand(0);
2202 if (ExtendIn->getValueType(0) == MVT::v8i16) {
2203 SDValue Add = DAG.getNode(PairwiseOpc, DL, MVT::v4i32, ExtendIn);
2204 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
2205 }
2206
2207 assert(ExtendIn->getValueType(0) == MVT::v16i8 &&
2208 "expected v16i8 input types");
2209 SDValue Add =
2210 DAG.getNode(PairwiseOpc, DL, MVT::v4i32,
2211 DAG.getNode(PairwiseOpc, DL, MVT::v8i16, ExtendIn));
2212 return DAG.getNode(ISD::ADD, DL, MVT::v4i32, N->getOperand(1), Add);
2213 }
2214}
2215
2216SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2217 SelectionDAG &DAG) const {
2218 MachineFunction &MF = DAG.getMachineFunction();
2219 unsigned IntNo;
2220 switch (Op.getOpcode()) {
2223 IntNo = Op.getConstantOperandVal(1);
2224 break;
2226 IntNo = Op.getConstantOperandVal(0);
2227 break;
2228 default:
2229 llvm_unreachable("Invalid intrinsic");
2230 }
2231 SDLoc DL(Op);
2232
2233 switch (IntNo) {
2234 default:
2235 return SDValue(); // Don't custom lower most intrinsics.
2236
2237 case Intrinsic::wasm_lsda: {
2238 auto PtrVT = getPointerTy(MF.getDataLayout());
2239 const char *SymName = MF.createExternalSymbolName(
2240 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2241 if (isPositionIndependent()) {
2243 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2244 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2246 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2247 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2248 SDValue SymAddr =
2249 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2250 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2251 }
2252 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2253 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2254 }
2255
2256 case Intrinsic::wasm_shuffle: {
2257 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2258 SDValue Ops[18];
2259 size_t OpIdx = 0;
2260 Ops[OpIdx++] = Op.getOperand(1);
2261 Ops[OpIdx++] = Op.getOperand(2);
2262 while (OpIdx < 18) {
2263 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2264 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2265 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2266 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2267 } else {
2268 Ops[OpIdx++] = MaskIdx;
2269 }
2270 }
2271 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2272 }
2273
2274 case Intrinsic::thread_pointer: {
2275 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2276 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2277 : WebAssembly::GLOBAL_GET_I32;
2278 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2279 return SDValue(
2280 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2281 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2282 0);
2283 }
2284 }
2285}
2286
2287SDValue
2288WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2289 SelectionDAG &DAG) const {
2290 SDLoc DL(Op);
2291 // If sign extension operations are disabled, allow sext_inreg only if operand
2292 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2293 // extension operations, but allowing sext_inreg in this context lets us have
2294 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2295 // everywhere would be simpler in this file, but would necessitate large and
2296 // brittle patterns to undo the expansion and select extract_lane_s
2297 // instructions.
2298 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2299 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2300 return SDValue();
2301
2302 const SDValue &Extract = Op.getOperand(0);
2303 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2304 if (VecT.getVectorElementType().getSizeInBits() > 32)
2305 return SDValue();
2306 MVT ExtractedLaneT =
2307 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2308 MVT ExtractedVecT =
2309 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2310 if (ExtractedVecT == VecT)
2311 return Op;
2312
2313 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2314 const SDNode *Index = Extract.getOperand(1).getNode();
2315 if (!isa<ConstantSDNode>(Index))
2316 return SDValue();
2317 unsigned IndexVal = Index->getAsZExtVal();
2318 unsigned Scale =
2319 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2320 assert(Scale > 1);
2321 SDValue NewIndex =
2322 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2323 SDValue NewExtract = DAG.getNode(
2325 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2326 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2327 Op.getOperand(1));
2328}
2329
2330static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2331 SelectionDAG &DAG) {
2332 if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2333 return SDValue();
2334
2335 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2336 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2337 "expected extend_low");
2338 auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2339
2340 ArrayRef<int> Mask = Shuffle->getMask();
2341 // Look for a shuffle which moves from the high half to the low half.
2342 size_t FirstIdx = Mask.size() / 2;
2343 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2344 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2345 return SDValue();
2346 }
2347 }
2348
2349 SDLoc DL(Op);
2350 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2351 ? WebAssemblyISD::EXTEND_HIGH_S
2352 : WebAssemblyISD::EXTEND_HIGH_U;
2353 return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2354}
2355
2356SDValue
2357WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2358 SelectionDAG &DAG) const {
2359 SDLoc DL(Op);
2360 EVT VT = Op.getValueType();
2361 SDValue Src = Op.getOperand(0);
2362 EVT SrcVT = Src.getValueType();
2363
2364 if (SrcVT.getVectorElementType() == MVT::i1 ||
2365 SrcVT.getVectorElementType() == MVT::i64)
2366 return SDValue();
2367
2368 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2369 "Unexpected extension factor.");
2370 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2371
2372 if (Scale != 2 && Scale != 4 && Scale != 8)
2373 return SDValue();
2374
2375 unsigned Ext;
2376 switch (Op.getOpcode()) {
2378 Ext = WebAssemblyISD::EXTEND_LOW_U;
2379 break;
2381 Ext = WebAssemblyISD::EXTEND_LOW_S;
2382 break;
2383 }
2384
2385 if (Scale == 2) {
2386 // See if we can use EXTEND_HIGH.
2387 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2388 return ExtendHigh;
2389 }
2390
2391 SDValue Ret = Src;
2392 while (Scale != 1) {
2393 Ret = DAG.getNode(Ext, DL,
2394 Ret.getValueType()
2395 .widenIntegerVectorElementType(*DAG.getContext())
2396 .getHalfNumVectorElementsVT(*DAG.getContext()),
2397 Ret);
2398 Scale /= 2;
2399 }
2400 assert(Ret.getValueType() == VT);
2401 return Ret;
2402}
2403
2405 SDLoc DL(Op);
2406 if (Op.getValueType() != MVT::v2f64)
2407 return SDValue();
2408
2409 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2410 unsigned &Index) -> bool {
2411 switch (Op.getOpcode()) {
2412 case ISD::SINT_TO_FP:
2413 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2414 break;
2415 case ISD::UINT_TO_FP:
2416 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2417 break;
2418 case ISD::FP_EXTEND:
2419 Opcode = WebAssemblyISD::PROMOTE_LOW;
2420 break;
2421 default:
2422 return false;
2423 }
2424
2425 auto ExtractVector = Op.getOperand(0);
2426 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2427 return false;
2428
2429 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2430 return false;
2431
2432 SrcVec = ExtractVector.getOperand(0);
2433 Index = ExtractVector.getConstantOperandVal(1);
2434 return true;
2435 };
2436
2437 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2438 SDValue LHSSrcVec, RHSSrcVec;
2439 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2440 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2441 return SDValue();
2442
2443 if (LHSOpcode != RHSOpcode)
2444 return SDValue();
2445
2446 MVT ExpectedSrcVT;
2447 switch (LHSOpcode) {
2448 case WebAssemblyISD::CONVERT_LOW_S:
2449 case WebAssemblyISD::CONVERT_LOW_U:
2450 ExpectedSrcVT = MVT::v4i32;
2451 break;
2452 case WebAssemblyISD::PROMOTE_LOW:
2453 ExpectedSrcVT = MVT::v4f32;
2454 break;
2455 }
2456 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2457 return SDValue();
2458
2459 auto Src = LHSSrcVec;
2460 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2461 // Shuffle the source vector so that the converted lanes are the low lanes.
2462 Src = DAG.getVectorShuffle(
2463 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2464 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2465 }
2466 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2467}
2468
2469SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2470 SelectionDAG &DAG) const {
2471 MVT VT = Op.getSimpleValueType();
2472 if (VT == MVT::v8f16) {
2473 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2474 // FP16 type, so cast them to I16s.
2475 MVT IVT = VT.changeVectorElementType(MVT::i16);
2477 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2478 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2479 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2480 return DAG.getBitcast(VT, Res);
2481 }
2482
2483 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2484 return ConvertLow;
2485
2486 SDLoc DL(Op);
2487 const EVT VecT = Op.getValueType();
2488 const EVT LaneT = Op.getOperand(0).getValueType();
2489 const size_t Lanes = Op.getNumOperands();
2490 bool CanSwizzle = VecT == MVT::v16i8;
2491
2492 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2493 // possible number of lanes at once followed by a sequence of replace_lane
2494 // instructions to individually initialize any remaining lanes.
2495
2496 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2497 // swizzled lanes should be given greater weight.
2498
2499 // TODO: Investigate looping rather than always extracting/replacing specific
2500 // lanes to fill gaps.
2501
2502 auto IsConstant = [](const SDValue &V) {
2503 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2504 };
2505
2506 // Returns the source vector and index vector pair if they exist. Checks for:
2507 // (extract_vector_elt
2508 // $src,
2509 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2510 // )
2511 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2512 auto Bail = std::make_pair(SDValue(), SDValue());
2513 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2514 return Bail;
2515 const SDValue &SwizzleSrc = Lane->getOperand(0);
2516 const SDValue &IndexExt = Lane->getOperand(1);
2517 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2518 return Bail;
2519 const SDValue &Index = IndexExt->getOperand(0);
2520 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2521 return Bail;
2522 const SDValue &SwizzleIndices = Index->getOperand(0);
2523 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2524 SwizzleIndices.getValueType() != MVT::v16i8 ||
2525 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2526 Index->getConstantOperandVal(1) != I)
2527 return Bail;
2528 return std::make_pair(SwizzleSrc, SwizzleIndices);
2529 };
2530
2531 // If the lane is extracted from another vector at a constant index, return
2532 // that vector. The source vector must not have more lanes than the dest
2533 // because the shufflevector indices are in terms of the destination lanes and
2534 // would not be able to address the smaller individual source lanes.
2535 auto GetShuffleSrc = [&](const SDValue &Lane) {
2536 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2537 return SDValue();
2538 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2539 return SDValue();
2540 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2541 VecT.getVectorNumElements())
2542 return SDValue();
2543 return Lane->getOperand(0);
2544 };
2545
2546 using ValueEntry = std::pair<SDValue, size_t>;
2547 SmallVector<ValueEntry, 16> SplatValueCounts;
2548
2549 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2550 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2551
2552 using ShuffleEntry = std::pair<SDValue, size_t>;
2553 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2554
2555 auto AddCount = [](auto &Counts, const auto &Val) {
2556 auto CountIt =
2557 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2558 if (CountIt == Counts.end()) {
2559 Counts.emplace_back(Val, 1);
2560 } else {
2561 CountIt->second++;
2562 }
2563 };
2564
2565 auto GetMostCommon = [](auto &Counts) {
2566 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2567 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2568 return *CommonIt;
2569 };
2570
2571 size_t NumConstantLanes = 0;
2572
2573 // Count eligible lanes for each type of vector creation op
2574 for (size_t I = 0; I < Lanes; ++I) {
2575 const SDValue &Lane = Op->getOperand(I);
2576 if (Lane.isUndef())
2577 continue;
2578
2579 AddCount(SplatValueCounts, Lane);
2580
2581 if (IsConstant(Lane))
2582 NumConstantLanes++;
2583 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2584 AddCount(ShuffleCounts, ShuffleSrc);
2585 if (CanSwizzle) {
2586 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2587 if (SwizzleSrcs.first)
2588 AddCount(SwizzleCounts, SwizzleSrcs);
2589 }
2590 }
2591
2592 SDValue SplatValue;
2593 size_t NumSplatLanes;
2594 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2595
2596 SDValue SwizzleSrc;
2597 SDValue SwizzleIndices;
2598 size_t NumSwizzleLanes = 0;
2599 if (SwizzleCounts.size())
2600 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2601 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2602
2603 // Shuffles can draw from up to two vectors, so find the two most common
2604 // sources.
2605 SDValue ShuffleSrc1, ShuffleSrc2;
2606 size_t NumShuffleLanes = 0;
2607 if (ShuffleCounts.size()) {
2608 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2609 llvm::erase_if(ShuffleCounts,
2610 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2611 }
2612 if (ShuffleCounts.size()) {
2613 size_t AdditionalShuffleLanes;
2614 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2615 GetMostCommon(ShuffleCounts);
2616 NumShuffleLanes += AdditionalShuffleLanes;
2617 }
2618
2619 // Predicate returning true if the lane is properly initialized by the
2620 // original instruction
2621 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2623 // Prefer swizzles over shuffles over vector consts over splats
2624 if (NumSwizzleLanes >= NumShuffleLanes &&
2625 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2626 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2627 SwizzleIndices);
2628 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2629 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2630 return Swizzled == GetSwizzleSrcs(I, Lane);
2631 };
2632 } else if (NumShuffleLanes >= NumConstantLanes &&
2633 NumShuffleLanes >= NumSplatLanes) {
2634 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2635 size_t DestLaneCount = VecT.getVectorNumElements();
2636 size_t Scale1 = 1;
2637 size_t Scale2 = 1;
2638 SDValue Src1 = ShuffleSrc1;
2639 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2640 if (Src1.getValueType() != VecT) {
2641 size_t LaneSize =
2643 assert(LaneSize > DestLaneSize);
2644 Scale1 = LaneSize / DestLaneSize;
2645 Src1 = DAG.getBitcast(VecT, Src1);
2646 }
2647 if (Src2.getValueType() != VecT) {
2648 size_t LaneSize =
2650 assert(LaneSize > DestLaneSize);
2651 Scale2 = LaneSize / DestLaneSize;
2652 Src2 = DAG.getBitcast(VecT, Src2);
2653 }
2654
2655 int Mask[16];
2656 assert(DestLaneCount <= 16);
2657 for (size_t I = 0; I < DestLaneCount; ++I) {
2658 const SDValue &Lane = Op->getOperand(I);
2659 SDValue Src = GetShuffleSrc(Lane);
2660 if (Src == ShuffleSrc1) {
2661 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2662 } else if (Src && Src == ShuffleSrc2) {
2663 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2664 } else {
2665 Mask[I] = -1;
2666 }
2667 }
2668 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2669 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2670 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2671 auto Src = GetShuffleSrc(Lane);
2672 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2673 };
2674 } else if (NumConstantLanes >= NumSplatLanes) {
2675 SmallVector<SDValue, 16> ConstLanes;
2676 for (const SDValue &Lane : Op->op_values()) {
2677 if (IsConstant(Lane)) {
2678 // Values may need to be fixed so that they will sign extend to be
2679 // within the expected range during ISel. Check whether the value is in
2680 // bounds based on the lane bit width and if it is out of bounds, lop
2681 // off the extra bits and subtract 2^n to reflect giving the high bit
2682 // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
2683 // cannot possibly be out of range.
2684 auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode());
2685 int64_t Val = Const ? Const->getSExtValue() : 0;
2686 uint64_t LaneBits = 128 / Lanes;
2687 assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) &&
2688 "Unexpected out of bounds negative value");
2689 if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) {
2690 uint64_t Mask = (1ll << LaneBits) - 1;
2691 auto NewVal = (((uint64_t)Val & Mask) - (1ll << LaneBits)) & Mask;
2692 ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT));
2693 } else {
2694 ConstLanes.push_back(Lane);
2695 }
2696 } else if (LaneT.isFloatingPoint()) {
2697 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2698 } else {
2699 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2700 }
2701 }
2702 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2703 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2704 return IsConstant(Lane);
2705 };
2706 } else {
2707 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2708 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2709 (DestLaneSize == 32 || DestLaneSize == 64)) {
2710 // Could be selected to load_zero.
2711 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2712 } else {
2713 // Use a splat (which might be selected as a load splat)
2714 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2715 }
2716 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2717 return Lane == SplatValue;
2718 };
2719 }
2720
2721 assert(Result);
2722 assert(IsLaneConstructed);
2723
2724 // Add replace_lane instructions for any unhandled values
2725 for (size_t I = 0; I < Lanes; ++I) {
2726 const SDValue &Lane = Op->getOperand(I);
2727 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2728 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2729 DAG.getConstant(I, DL, MVT::i32));
2730 }
2731
2732 return Result;
2733}
2734
2735SDValue
2736WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2737 SelectionDAG &DAG) const {
2738 SDLoc DL(Op);
2739 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2740 MVT VecType = Op.getOperand(0).getSimpleValueType();
2741 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2742 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2743
2744 // Space for two vector args and sixteen mask indices
2745 SDValue Ops[18];
2746 size_t OpIdx = 0;
2747 Ops[OpIdx++] = Op.getOperand(0);
2748 Ops[OpIdx++] = Op.getOperand(1);
2749
2750 // Expand mask indices to byte indices and materialize them as operands
2751 for (int M : Mask) {
2752 for (size_t J = 0; J < LaneBytes; ++J) {
2753 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2754 // whole lane of vector input, to allow further reduction at VM. E.g.
2755 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2756 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2757 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2758 }
2759 }
2760
2761 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2762}
2763
2764SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2765 SelectionDAG &DAG) const {
2766 SDLoc DL(Op);
2767 // The legalizer does not know how to expand the unsupported comparison modes
2768 // of i64x2 vectors, so we manually unroll them here.
2769 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2771 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2772 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2773 const SDValue &CC = Op->getOperand(2);
2774 auto MakeLane = [&](unsigned I) {
2775 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2776 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2777 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2778 };
2779 return DAG.getBuildVector(Op->getValueType(0), DL,
2780 {MakeLane(0), MakeLane(1)});
2781}
2782
2783SDValue
2784WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2785 SelectionDAG &DAG) const {
2786 // Allow constant lane indices, expand variable lane indices
2787 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2788 if (isa<ConstantSDNode>(IdxNode)) {
2789 // Ensure the index type is i32 to match the tablegen patterns
2790 uint64_t Idx = IdxNode->getAsZExtVal();
2791 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2792 Ops[Op.getNumOperands() - 1] =
2793 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2794 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2795 }
2796 // Perform default expansion
2797 return SDValue();
2798}
2799
2801 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2802 // 32-bit and 64-bit unrolled shifts will have proper semantics
2803 if (LaneT.bitsGE(MVT::i32))
2804 return DAG.UnrollVectorOp(Op.getNode());
2805 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2806 SDLoc DL(Op);
2807 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2808 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2809 unsigned ShiftOpcode = Op.getOpcode();
2810 SmallVector<SDValue, 16> ShiftedElements;
2811 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2812 SmallVector<SDValue, 16> ShiftElements;
2813 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2814 SmallVector<SDValue, 16> UnrolledOps;
2815 for (size_t i = 0; i < NumLanes; ++i) {
2816 SDValue MaskedShiftValue =
2817 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2818 SDValue ShiftedValue = ShiftedElements[i];
2819 if (ShiftOpcode == ISD::SRA)
2820 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2821 ShiftedValue, DAG.getValueType(LaneT));
2822 UnrolledOps.push_back(
2823 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2824 }
2825 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2826}
2827
2828SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2829 SelectionDAG &DAG) const {
2830 SDLoc DL(Op);
2831
2832 // Only manually lower vector shifts
2833 assert(Op.getSimpleValueType().isVector());
2834
2835 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2836 auto ShiftVal = Op.getOperand(1);
2837
2838 // Try to skip bitmask operation since it is implied inside shift instruction
2839 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2840 if (MaskOp.getOpcode() != ISD::AND)
2841 return MaskOp;
2842 SDValue LHS = MaskOp.getOperand(0);
2843 SDValue RHS = MaskOp.getOperand(1);
2844 if (MaskOp.getValueType().isVector()) {
2845 APInt MaskVal;
2846 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2847 std::swap(LHS, RHS);
2848
2849 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2850 MaskVal == MaskBits)
2851 MaskOp = LHS;
2852 } else {
2853 if (!isa<ConstantSDNode>(RHS.getNode()))
2854 std::swap(LHS, RHS);
2855
2856 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2857 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2858 MaskOp = LHS;
2859 }
2860
2861 return MaskOp;
2862 };
2863
2864 // Skip vector and operation
2865 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2866 ShiftVal = DAG.getSplatValue(ShiftVal);
2867 if (!ShiftVal)
2868 return unrollVectorShift(Op, DAG);
2869
2870 // Skip scalar and operation
2871 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2872 // Use anyext because none of the high bits can affect the shift
2873 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2874
2875 unsigned Opcode;
2876 switch (Op.getOpcode()) {
2877 case ISD::SHL:
2878 Opcode = WebAssemblyISD::VEC_SHL;
2879 break;
2880 case ISD::SRA:
2881 Opcode = WebAssemblyISD::VEC_SHR_S;
2882 break;
2883 case ISD::SRL:
2884 Opcode = WebAssemblyISD::VEC_SHR_U;
2885 break;
2886 default:
2887 llvm_unreachable("unexpected opcode");
2888 }
2889
2890 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2891}
2892
2893SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2894 SelectionDAG &DAG) const {
2895 EVT ResT = Op.getValueType();
2896 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2897
2898 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2899 (SatVT == MVT::i32 || SatVT == MVT::i64))
2900 return Op;
2901
2902 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2903 return Op;
2904
2905 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2906 return Op;
2907
2908 return SDValue();
2909}
2910
2911//===----------------------------------------------------------------------===//
2912// Custom DAG combine hooks
2913//===----------------------------------------------------------------------===//
2914static SDValue
2916 auto &DAG = DCI.DAG;
2917 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2918
2919 // Hoist vector bitcasts that don't change the number of lanes out of unary
2920 // shuffles, where they are less likely to get in the way of other combines.
2921 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2922 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2923 SDValue Bitcast = N->getOperand(0);
2924 if (Bitcast.getOpcode() != ISD::BITCAST)
2925 return SDValue();
2926 if (!N->getOperand(1).isUndef())
2927 return SDValue();
2928 SDValue CastOp = Bitcast.getOperand(0);
2929 EVT SrcType = CastOp.getValueType();
2930 EVT DstType = Bitcast.getValueType();
2931 if (!SrcType.is128BitVector() ||
2932 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2933 return SDValue();
2934 SDValue NewShuffle = DAG.getVectorShuffle(
2935 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2936 return DAG.getBitcast(DstType, NewShuffle);
2937}
2938
2939/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2940/// split up into scalar instructions during legalization, and the vector
2941/// extending instructions are selected in performVectorExtendCombine below.
2942static SDValue
2945 auto &DAG = DCI.DAG;
2946 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2947 N->getOpcode() == ISD::SINT_TO_FP);
2948
2949 EVT InVT = N->getOperand(0)->getValueType(0);
2950 EVT ResVT = N->getValueType(0);
2951 MVT ExtVT;
2952 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2953 ExtVT = MVT::v4i32;
2954 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2955 ExtVT = MVT::v2i32;
2956 else
2957 return SDValue();
2958
2959 unsigned Op =
2961 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2962 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2963}
2964
2965static SDValue
2968 auto &DAG = DCI.DAG;
2969
2970 SDNodeFlags Flags = N->getFlags();
2971 SDValue Op0 = N->getOperand(0);
2972 EVT VT = N->getValueType(0);
2973
2974 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2975 // Depending on the target (runtime) backend, this might be performance
2976 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2977 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2978 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2979 }
2980
2981 return SDValue();
2982}
2983
2984static SDValue
2986 auto &DAG = DCI.DAG;
2987 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2988 N->getOpcode() == ISD::ZERO_EXTEND);
2989
2990 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2991 // possible before the extract_subvector can be expanded.
2992 auto Extract = N->getOperand(0);
2993 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2994 return SDValue();
2995 auto Source = Extract.getOperand(0);
2996 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2997 if (IndexNode == nullptr)
2998 return SDValue();
2999 auto Index = IndexNode->getZExtValue();
3000
3001 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
3002 // extracted subvector is the low or high half of its source.
3003 EVT ResVT = N->getValueType(0);
3004 if (ResVT == MVT::v8i16) {
3005 if (Extract.getValueType() != MVT::v8i8 ||
3006 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
3007 return SDValue();
3008 } else if (ResVT == MVT::v4i32) {
3009 if (Extract.getValueType() != MVT::v4i16 ||
3010 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
3011 return SDValue();
3012 } else if (ResVT == MVT::v2i64) {
3013 if (Extract.getValueType() != MVT::v2i32 ||
3014 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
3015 return SDValue();
3016 } else {
3017 return SDValue();
3018 }
3019
3020 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3021 bool IsLow = Index == 0;
3022
3023 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3024 : WebAssemblyISD::EXTEND_HIGH_S)
3025 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3026 : WebAssemblyISD::EXTEND_HIGH_U);
3027
3028 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3029}
3030
3031static SDValue
3033 auto &DAG = DCI.DAG;
3034
3035 auto GetWasmConversionOp = [](unsigned Op) {
3036 switch (Op) {
3038 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3040 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3041 case ISD::FP_ROUND:
3042 return WebAssemblyISD::DEMOTE_ZERO;
3043 }
3044 llvm_unreachable("unexpected op");
3045 };
3046
3047 auto IsZeroSplat = [](SDValue SplatVal) {
3048 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3049 APInt SplatValue, SplatUndef;
3050 unsigned SplatBitSize;
3051 bool HasAnyUndefs;
3052 // Endianness doesn't matter in this context because we are looking for
3053 // an all-zero value.
3054 return Splat &&
3055 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3056 HasAnyUndefs) &&
3057 SplatValue == 0;
3058 };
3059
3060 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3061 // Combine this:
3062 //
3063 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3064 //
3065 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3066 //
3067 // Or this:
3068 //
3069 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3070 //
3071 // into (f32x4.demote_zero_f64x2 $x).
3072 EVT ResVT;
3073 EVT ExpectedConversionType;
3074 auto Conversion = N->getOperand(0);
3075 auto ConversionOp = Conversion.getOpcode();
3076 switch (ConversionOp) {
3079 ResVT = MVT::v4i32;
3080 ExpectedConversionType = MVT::v2i32;
3081 break;
3082 case ISD::FP_ROUND:
3083 ResVT = MVT::v4f32;
3084 ExpectedConversionType = MVT::v2f32;
3085 break;
3086 default:
3087 return SDValue();
3088 }
3089
3090 if (N->getValueType(0) != ResVT)
3091 return SDValue();
3092
3093 if (Conversion.getValueType() != ExpectedConversionType)
3094 return SDValue();
3095
3096 auto Source = Conversion.getOperand(0);
3097 if (Source.getValueType() != MVT::v2f64)
3098 return SDValue();
3099
3100 if (!IsZeroSplat(N->getOperand(1)) ||
3101 N->getOperand(1).getValueType() != ExpectedConversionType)
3102 return SDValue();
3103
3104 unsigned Op = GetWasmConversionOp(ConversionOp);
3105 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3106 }
3107
3108 // Combine this:
3109 //
3110 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3111 //
3112 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3113 //
3114 // Or this:
3115 //
3116 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3117 //
3118 // into (f32x4.demote_zero_f64x2 $x).
3119 EVT ResVT;
3120 auto ConversionOp = N->getOpcode();
3121 switch (ConversionOp) {
3124 ResVT = MVT::v4i32;
3125 break;
3126 case ISD::FP_ROUND:
3127 ResVT = MVT::v4f32;
3128 break;
3129 default:
3130 llvm_unreachable("unexpected op");
3131 }
3132
3133 if (N->getValueType(0) != ResVT)
3134 return SDValue();
3135
3136 auto Concat = N->getOperand(0);
3137 if (Concat.getValueType() != MVT::v4f64)
3138 return SDValue();
3139
3140 auto Source = Concat.getOperand(0);
3141 if (Source.getValueType() != MVT::v2f64)
3142 return SDValue();
3143
3144 if (!IsZeroSplat(Concat.getOperand(1)) ||
3145 Concat.getOperand(1).getValueType() != MVT::v2f64)
3146 return SDValue();
3147
3148 unsigned Op = GetWasmConversionOp(ConversionOp);
3149 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3150}
3151
3152// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3153static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3154 const SDLoc &DL, unsigned VectorWidth) {
3155 EVT VT = Vec.getValueType();
3156 EVT ElVT = VT.getVectorElementType();
3157 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3158 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3159 VT.getVectorNumElements() / Factor);
3160
3161 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3162 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3163 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3164
3165 // This is the index of the first element of the VectorWidth-bit chunk
3166 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3167 IdxVal &= ~(ElemsPerChunk - 1);
3168
3169 // If the input is a buildvector just emit a smaller one.
3170 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3171 return DAG.getBuildVector(ResultVT, DL,
3172 Vec->ops().slice(IdxVal, ElemsPerChunk));
3173
3174 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3175 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3176}
3177
3178// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3179// is the expected destination value type after recursion. In is the initial
3180// input. Note that the input should have enough leading zero bits to prevent
3181// NARROW_U from saturating results.
3183 SelectionDAG &DAG) {
3184 EVT SrcVT = In.getValueType();
3185
3186 // No truncation required, we might get here due to recursive calls.
3187 if (SrcVT == DstVT)
3188 return In;
3189
3190 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3191 unsigned NumElems = SrcVT.getVectorNumElements();
3192 if (!isPowerOf2_32(NumElems))
3193 return SDValue();
3194 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3195 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3196
3197 LLVMContext &Ctx = *DAG.getContext();
3198 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3199
3200 // Narrow to the largest type possible:
3201 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3202 EVT InVT = MVT::i16, OutVT = MVT::i8;
3203 if (SrcVT.getScalarSizeInBits() > 16) {
3204 InVT = MVT::i32;
3205 OutVT = MVT::i16;
3206 }
3207 unsigned SubSizeInBits = SrcSizeInBits / 2;
3208 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3209 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3210
3211 // Split lower/upper subvectors.
3212 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3213 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3214
3215 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3216 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3217 Lo = DAG.getBitcast(InVT, Lo);
3218 Hi = DAG.getBitcast(InVT, Hi);
3219 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3220 return DAG.getBitcast(DstVT, Res);
3221 }
3222
3223 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3224 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3225 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3226 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3227
3228 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3229 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3230 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3231}
3232
3235 auto &DAG = DCI.DAG;
3236
3237 SDValue In = N->getOperand(0);
3238 EVT InVT = In.getValueType();
3239 if (!InVT.isSimple())
3240 return SDValue();
3241
3242 EVT OutVT = N->getValueType(0);
3243 if (!OutVT.isVector())
3244 return SDValue();
3245
3246 EVT OutSVT = OutVT.getVectorElementType();
3247 EVT InSVT = InVT.getVectorElementType();
3248 // Currently only cover truncate to v16i8 or v8i16.
3249 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3250 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3251 return SDValue();
3252
3253 SDLoc DL(N);
3255 OutVT.getScalarSizeInBits());
3256 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3257 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3258}
3259
3262 using namespace llvm::SDPatternMatch;
3263 auto &DAG = DCI.DAG;
3264 SDLoc DL(N);
3265 SDValue Src = N->getOperand(0);
3266 EVT VT = N->getValueType(0);
3267 EVT SrcVT = Src.getValueType();
3268
3269 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3270 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3271 return SDValue();
3272
3273 unsigned NumElts = SrcVT.getVectorNumElements();
3274 EVT Width = MVT::getIntegerVT(128 / NumElts);
3275
3276 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3277 // ==> bitmask
3278 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3279 return DAG.getZExtOrTrunc(
3280 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3281 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3282 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3283 SrcVT.changeVectorElementType(Width))}),
3284 DL, VT);
3285 }
3286
3287 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3288 if (NumElts == 32 || NumElts == 64) {
3289 // Strategy: We will setcc them seperately in v16i8 -> v16i1
3290 // Bitcast them to i16, extend them to either i32 or i64.
3291 // Add them together, shifting left 1 by 1.
3292 SDValue Concat, SetCCVector;
3293 ISD::CondCode SetCond;
3294
3295 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3296 m_CondCode(SetCond)))))
3297 return SDValue();
3298 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3299 return SDValue();
3300
3301 uint64_t ElementWidth =
3303
3304 SmallVector<SDValue> VectorsToShuffle;
3305 for (size_t I = 0; I < Concat->ops().size(); I++) {
3306 VectorsToShuffle.push_back(DAG.getBitcast(
3307 MVT::i16,
3308 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3309 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3310 DAG, DL, 128),
3311 SetCond)));
3312 }
3313
3314 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3315 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3316
3317 for (SDValue V : VectorsToShuffle) {
3318 ReturningInteger = DAG.getNode(
3319 ISD::SHL, DL, ReturnType,
3320 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3321
3322 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3323 ReturningInteger =
3324 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3325 }
3326
3327 return ReturningInteger;
3328 }
3329
3330 return SDValue();
3331}
3332
3334 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3335 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3336 // any_true (setcc <X>, 0, ne) => (any_true X)
3337 // all_true (setcc <X>, 0, ne) => (all_true X)
3338 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3339 using namespace llvm::SDPatternMatch;
3340
3341 SDValue LHS;
3342 if (!sd_match(N->getOperand(1),
3344 return SDValue();
3345 EVT LT = LHS.getValueType();
3346 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3347 return SDValue();
3348
3349 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3350 ISD::CondCode SetType,
3351 Intrinsic::WASMIntrinsics InPost) {
3352 if (N->getConstantOperandVal(0) != InPre)
3353 return SDValue();
3354
3355 SDValue LHS;
3356 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3357 m_SpecificCondCode(SetType))))
3358 return SDValue();
3359
3360 SDLoc DL(N);
3361 SDValue Ret = DAG.getZExtOrTrunc(
3362 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3363 {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3364 DL, MVT::i1);
3365 if (SetType == ISD::SETEQ)
3366 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3367 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3368 };
3369
3370 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3371 Intrinsic::wasm_alltrue))
3372 return AnyTrueEQ;
3373 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3374 Intrinsic::wasm_anytrue))
3375 return AllTrueEQ;
3376 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3377 Intrinsic::wasm_anytrue))
3378 return AnyTrueNE;
3379 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3380 Intrinsic::wasm_alltrue))
3381 return AllTrueNE;
3382
3383 return SDValue();
3384}
3385
3386template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3387 Intrinsic::ID Intrin>
3389 SDValue LHS = N->getOperand(0);
3390 SDValue RHS = N->getOperand(1);
3391 SDValue Cond = N->getOperand(2);
3392 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3393 return SDValue();
3394
3395 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3396 return SDValue();
3397
3398 SDLoc DL(N);
3399 SDValue Ret = DAG.getZExtOrTrunc(
3400 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3401 {DAG.getConstant(Intrin, DL, MVT::i32),
3402 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3403 DL, MVT::i1);
3404 if (RequiresNegate)
3405 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3406 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3407}
3408
3409/// Try to convert a i128 comparison to a v16i8 comparison before type
3410/// legalization splits it up into chunks
3411static SDValue
3413 const WebAssemblySubtarget *Subtarget) {
3414
3415 SDLoc DL(N);
3416 SDValue X = N->getOperand(0);
3417 SDValue Y = N->getOperand(1);
3418 EVT VT = N->getValueType(0);
3419 EVT OpVT = X.getValueType();
3420
3421 SelectionDAG &DAG = DCI.DAG;
3423 Attribute::NoImplicitFloat))
3424 return SDValue();
3425
3426 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3427 // We're looking for an oversized integer equality comparison with SIMD
3428 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3429 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3430 return SDValue();
3431
3432 // Don't perform this combine if constructing the vector will be expensive.
3433 auto IsVectorBitCastCheap = [](SDValue X) {
3435 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3436 };
3437
3438 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3439 return SDValue();
3440
3441 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3442 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3443 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3444
3445 SDValue Intr =
3446 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3447 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3448 : Intrinsic::wasm_anytrue,
3449 DL, MVT::i32),
3450 Cmp});
3451
3452 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3453 ISD::SETNE);
3454}
3455
3458 const WebAssemblySubtarget *Subtarget) {
3459 if (!DCI.isBeforeLegalize())
3460 return SDValue();
3461
3462 EVT VT = N->getValueType(0);
3463 if (!VT.isScalarInteger())
3464 return SDValue();
3465
3466 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3467 return V;
3468
3469 SDValue LHS = N->getOperand(0);
3470 if (LHS->getOpcode() != ISD::BITCAST)
3471 return SDValue();
3472
3473 EVT FromVT = LHS->getOperand(0).getValueType();
3474 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3475 return SDValue();
3476
3477 unsigned NumElts = FromVT.getVectorNumElements();
3478 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3479 return SDValue();
3480
3481 if (!cast<ConstantSDNode>(N->getOperand(1)))
3482 return SDValue();
3483
3484 EVT VecVT = FromVT.changeVectorElementType(MVT::getIntegerVT(128 / NumElts));
3485 auto &DAG = DCI.DAG;
3486 // setcc (iN (bitcast (vNi1 X))), 0, ne
3487 // ==> any_true (vNi1 X)
3489 N, VecVT, DAG)) {
3490 return Match;
3491 }
3492 // setcc (iN (bitcast (vNi1 X))), 0, eq
3493 // ==> xor (any_true (vNi1 X)), -1
3495 N, VecVT, DAG)) {
3496 return Match;
3497 }
3498 // setcc (iN (bitcast (vNi1 X))), -1, eq
3499 // ==> all_true (vNi1 X)
3501 N, VecVT, DAG)) {
3502 return Match;
3503 }
3504 // setcc (iN (bitcast (vNi1 X))), -1, ne
3505 // ==> xor (all_true (vNi1 X)), -1
3507 N, VecVT, DAG)) {
3508 return Match;
3509 }
3510 return SDValue();
3511}
3512
3514 EVT VT = N->getValueType(0);
3515 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3516 return SDValue();
3517
3518 // Mul with extending inputs.
3519 SDValue LHS = N->getOperand(0);
3520 SDValue RHS = N->getOperand(1);
3521 if (LHS.getOpcode() != RHS.getOpcode())
3522 return SDValue();
3523
3524 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3525 LHS.getOpcode() != ISD::ZERO_EXTEND)
3526 return SDValue();
3527
3528 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3529 return SDValue();
3530
3531 EVT FromVT = LHS->getOperand(0).getValueType();
3532 EVT EltTy = FromVT.getVectorElementType();
3533 if (EltTy != MVT::i8)
3534 return SDValue();
3535
3536 // For an input DAG that looks like this
3537 // %a = input_type
3538 // %b = input_type
3539 // %lhs = extend %a to output_type
3540 // %rhs = extend %b to output_type
3541 // %mul = mul %lhs, %rhs
3542
3543 // input_type | output_type | instructions
3544 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3545 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3546 // | | %low_low = i32x4.ext_low_i16x8_ %low
3547 // | | %low_high = i32x4.ext_high_i16x8_ %low
3548 // | | %high_low = i32x4.ext_low_i16x8_ %high
3549 // | | %high_high = i32x4.ext_high_i16x8_ %high
3550 // | | %res = concat_vector(...)
3551 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3552 // | | %low_low = i32x4.ext_low_i16x8_ %low
3553 // | | %low_high = i32x4.ext_high_i16x8_ %low
3554 // | | %res = concat_vector(%low_low, %low_high)
3555
3556 SDLoc DL(N);
3557 unsigned NumElts = VT.getVectorNumElements();
3558 SDValue ExtendInLHS = LHS->getOperand(0);
3559 SDValue ExtendInRHS = RHS->getOperand(0);
3560 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3561 unsigned ExtendLowOpc =
3562 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3563 unsigned ExtendHighOpc =
3564 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3565
3566 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3567 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3568 };
3569 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3570 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3571 };
3572
3573 if (NumElts == 16) {
3574 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3575 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3576 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3577 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3578 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3579 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3580 SDValue SubVectors[] = {
3581 GetExtendLow(MVT::v4i32, MulLow),
3582 GetExtendHigh(MVT::v4i32, MulLow),
3583 GetExtendLow(MVT::v4i32, MulHigh),
3584 GetExtendHigh(MVT::v4i32, MulHigh),
3585 };
3586 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3587 } else {
3588 assert(NumElts == 8);
3589 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3590 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3591 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3592 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3593 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3594 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3595 }
3596 return SDValue();
3597}
3598
3601 assert(N->getOpcode() == ISD::MUL);
3602 EVT VT = N->getValueType(0);
3603 if (!VT.isVector())
3604 return SDValue();
3605
3606 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3607 return Res;
3608
3609 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3610 // extend them to v8i16. Only do this before legalization in case a narrow
3611 // vector is widened and may be simplified later.
3612 if (!DCI.isBeforeLegalize() || (VT != MVT::v8i8 && VT != MVT::v16i8))
3613 return SDValue();
3614
3615 SDLoc DL(N);
3616 SelectionDAG &DAG = DCI.DAG;
3617 SDValue LHS = N->getOperand(0);
3618 SDValue RHS = N->getOperand(1);
3619 EVT MulVT = MVT::v8i16;
3620
3621 if (VT == MVT::v8i8) {
3622 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3623 DAG.getUNDEF(MVT::v8i8));
3624 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3625 DAG.getUNDEF(MVT::v8i8));
3626 SDValue LowLHS =
3627 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3628 SDValue LowRHS =
3629 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3630 SDValue MulLow = DAG.getBitcast(
3631 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3632 // Take the low byte of each lane.
3633 SDValue Shuffle = DAG.getVectorShuffle(
3634 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3635 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3636 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3637 } else {
3638 assert(VT == MVT::v16i8 && "Expected v16i8");
3639 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3640 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3641 SDValue HighLHS =
3642 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3643 SDValue HighRHS =
3644 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3645
3646 SDValue MulLow =
3647 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3648 SDValue MulHigh =
3649 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3650
3651 // Take the low byte of each lane.
3652 return DAG.getVectorShuffle(
3653 VT, DL, MulLow, MulHigh,
3654 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3655 }
3656}
3657
3658SDValue
3659WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3660 DAGCombinerInfo &DCI) const {
3661 switch (N->getOpcode()) {
3662 default:
3663 return SDValue();
3664 case ISD::BITCAST:
3665 return performBitcastCombine(N, DCI);
3666 case ISD::SETCC:
3667 return performSETCCCombine(N, DCI, Subtarget);
3669 return performVECTOR_SHUFFLECombine(N, DCI);
3670 case ISD::SIGN_EXTEND:
3671 case ISD::ZERO_EXTEND:
3672 return performVectorExtendCombine(N, DCI);
3673 case ISD::UINT_TO_FP:
3674 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3675 return ExtCombine;
3676 return performVectorNonNegToFPCombine(N, DCI);
3677 case ISD::SINT_TO_FP:
3678 return performVectorExtendToFPCombine(N, DCI);
3681 case ISD::FP_ROUND:
3683 return performVectorTruncZeroCombine(N, DCI);
3684 case ISD::TRUNCATE:
3685 return performTruncateCombine(N, DCI);
3687 if (auto AnyAllCombine = performAnyAllCombine(N, DCI.DAG))
3688 return AnyAllCombine;
3689 return performLowerPartialReduction(N, DCI.DAG);
3690 }
3691 case ISD::MUL:
3692 return performMulCombine(N, DCI);
3693 }
3694}
unsigned const MachineRegisterInfo * MRI
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
Class for arbitrary precision integers.
Definition APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A debug info location.
Definition DebugLoc.h:124
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:154
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:705
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:130
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:656
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:793
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:927
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:815
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:521
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
bool isExtOpcode(unsigned Opcode)
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool isValidAddressSpace(unsigned AS)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:318
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:355
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1657
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2010
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2100
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:384
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.