LLVM 22.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
91 setOperationAction(ISD::LOAD, T, Custom);
92 setOperationAction(ISD::STORE, T, Custom);
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
97 setOperationAction(ISD::LOAD, T, Custom);
98 setOperationAction(ISD::STORE, T, Custom);
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
103 setOperationAction(ISD::STORE, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
109 setOperationAction(ISD::LOAD, T, Custom);
110 setOperationAction(ISD::STORE, T, Custom);
111 }
112 }
113
119 setOperationAction(ISD::BRIND, MVT::Other, Custom);
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction(ISD::VAARG, MVT::Other, Expand);
126 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
127 setOperationAction(ISD::VAEND, MVT::Other, Expand);
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
140 for (auto Op :
141 {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
143 // Note supported floating-point library function operators that otherwise
144 // default to expand.
145 for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
146 ISD::FRINT, ISD::FROUNDEVEN})
148 // Support minimum and maximum, which otherwise default to expand.
149 setOperationAction(ISD::FMINIMUM, T, Legal);
150 setOperationAction(ISD::FMAXIMUM, T, Legal);
151 // When experimental v8f16 support is enabled these instructions don't need
152 // to be expanded.
153 if (T != MVT::v8f16) {
154 setOperationAction(ISD::FP16_TO_FP, T, Expand);
155 setOperationAction(ISD::FP_TO_FP16, T, Expand);
156 }
158 setTruncStoreAction(T, MVT::f16, Expand);
159 }
160
161 // Expand unavailable integer operations.
162 for (auto Op :
166 for (auto T : {MVT::i32, MVT::i64})
168 if (Subtarget->hasSIMD128())
169 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
171 }
172
173 if (Subtarget->hasWideArithmetic()) {
179 }
180
181 if (Subtarget->hasNontrappingFPToInt())
183 for (auto T : {MVT::i32, MVT::i64})
185
186 if (Subtarget->hasRelaxedSIMD()) {
188 {ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM},
189 {MVT::v4f32, MVT::v2f64}, Legal);
190 }
191 // SIMD-specific configuration
192 if (Subtarget->hasSIMD128()) {
193
195
196 // Combine wide-vector muls, with extend inputs, to extmul_half.
198
199 // Combine vector mask reductions into alltrue/anytrue
201
202 // Convert vector to integer bitcasts to bitmask
203 setTargetDAGCombine(ISD::BITCAST);
204
205 // Hoist bitcasts out of shuffles
207
208 // Combine extends of extract_subvectors into widening ops
210
211 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
212 // conversions ops
215
216 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
217 // into conversion ops
220
222
223 // Support saturating add/sub for i8x16 and i16x8
225 for (auto T : {MVT::v16i8, MVT::v8i16})
227
228 // Support integer abs
229 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
231
232 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
233 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
234 MVT::v2f64})
236
237 if (Subtarget->hasFP16())
239
240 // We have custom shuffle lowering to expose the shuffle mask
241 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
242 MVT::v2f64})
244
245 if (Subtarget->hasFP16())
247
248 // Support splatting
249 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
250 MVT::v2f64})
252
253 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
254
255 // Custom lowering since wasm shifts must have a scalar shift amount
256 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
257 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
259
260 // Custom lower lane accesses to expand out variable indices
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
263 MVT::v2f64})
265
266 // There is no i8x16.mul instruction
267 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
268
269 // There is no vector conditional select instruction
270 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
271 MVT::v2f64})
273
274 // Expand integer operations supported for scalars but not SIMD
275 for (auto Op :
277 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
279
280 // But we do have integer min and max operations
281 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
282 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
284
285 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
286 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
287 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
288 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
289
290 // Custom lower bit counting operations for other types to scalarize them.
291 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
292 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
294
295 // Expand float operations supported for scalars but not SIMD
296 for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
297 ISD::FEXP, ISD::FEXP2, ISD::FEXP10})
298 for (auto T : {MVT::v4f32, MVT::v2f64})
300
301 // Unsigned comparison operations are unavailable for i64x2 vectors.
303 setCondCodeAction(CC, MVT::v2i64, Custom);
304
305 // 64x2 conversions are not in the spec
306 for (auto Op :
308 for (auto T : {MVT::v2i64, MVT::v2f64})
310
311 // But saturating fp_to_int converstions are
313 setOperationAction(Op, MVT::v4i32, Custom);
314 if (Subtarget->hasFP16()) {
315 setOperationAction(Op, MVT::v8i16, Custom);
316 }
317 }
318
319 // Support vector extending
323 }
324
325 if (Subtarget->hasFP16()) {
326 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
327 }
328
329 if (Subtarget->hasRelaxedSIMD()) {
332 }
333
334 // Partial MLA reductions.
335 for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
336 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
337 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
338 }
339 }
340
341 // As a special case, these operators use the type to mean the type to
342 // sign-extend from.
344 if (!Subtarget->hasSignExt()) {
345 // Sign extends are legal only when extending a vector extract
346 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
347 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
349 }
352
353 // Dynamic stack allocation: use the default expansion.
354 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
355 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
356 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
357
361
362 // Expand these forms; we pattern-match the forms that we can handle in isel.
363 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
364 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
366
367 // We have custom switch handling.
368 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
369
370 // WebAssembly doesn't have:
371 // - Floating-point extending loads.
372 // - Floating-point truncating stores.
373 // - i1 extending loads.
374 // - truncating SIMD stores and most extending loads
375 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
376 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
377 for (auto T : MVT::integer_valuetypes())
378 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
379 setLoadExtAction(Ext, T, MVT::i1, Promote);
380 if (Subtarget->hasSIMD128()) {
381 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
382 MVT::v2f64}) {
383 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
384 if (MVT(T) != MemT) {
386 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
387 setLoadExtAction(Ext, T, MemT, Expand);
388 }
389 }
390 }
391 // But some vector extending loads are legal
392 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
393 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
394 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
395 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
396 }
397 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
398 }
399
400 // Don't do anything clever with build_pairs
402
403 // Trap lowers to wasm unreachable
404 setOperationAction(ISD::TRAP, MVT::Other, Legal);
405 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
406
407 // Exception handling intrinsics
411
413
414 // Always convert switches to br_tables unless there is only one case, which
415 // is equivalent to a simple branch. This reduces code size for wasm, and we
416 // defer possible jump table optimizations to the VM.
418}
419
428
437
439WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
440 // We have wasm instructions for these
441 switch (AI->getOperation()) {
449 default:
450 break;
451 }
453}
454
455bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
456 // Implementation copied from X86TargetLowering.
457 unsigned Opc = VecOp.getOpcode();
458
459 // Assume target opcodes can't be scalarized.
460 // TODO - do we have any exceptions?
462 return false;
463
464 // If the vector op is not supported, try to convert to scalar.
465 EVT VecVT = VecOp.getValueType();
467 return true;
468
469 // If the vector op is supported, but the scalar op is not, the transform may
470 // not be worthwhile.
471 EVT ScalarVT = VecVT.getScalarType();
472 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
473}
474
475FastISel *WebAssemblyTargetLowering::createFastISel(
476 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
477 return WebAssembly::createFastISel(FuncInfo, LibInfo);
478}
479
480MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
481 EVT VT) const {
482 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
483 if (BitWidth > 1 && BitWidth < 8)
484 BitWidth = 8;
485
486 if (BitWidth > 64) {
487 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
488 // the count to be an i32.
489 BitWidth = 32;
491 "32-bit shift counts ought to be enough for anyone");
492 }
493
496 "Unable to represent scalar shift amount type");
497 return Result;
498}
499
500// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
501// undefined result on invalid/overflow, to the WebAssembly opcode, which
502// traps on invalid/overflow.
505 const TargetInstrInfo &TII,
506 bool IsUnsigned, bool Int64,
507 bool Float64, unsigned LoweredOpcode) {
509
510 Register OutReg = MI.getOperand(0).getReg();
511 Register InReg = MI.getOperand(1).getReg();
512
513 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
514 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
515 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
516 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
517 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
518 unsigned Eqz = WebAssembly::EQZ_I32;
519 unsigned And = WebAssembly::AND_I32;
520 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
521 int64_t Substitute = IsUnsigned ? 0 : Limit;
522 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
523 auto &Context = BB->getParent()->getFunction().getContext();
524 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
525
526 const BasicBlock *LLVMBB = BB->getBasicBlock();
527 MachineFunction *F = BB->getParent();
528 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
529 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
530 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
531
533 F->insert(It, FalseMBB);
534 F->insert(It, TrueMBB);
535 F->insert(It, DoneMBB);
536
537 // Transfer the remainder of BB and its successor edges to DoneMBB.
538 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
540
541 BB->addSuccessor(TrueMBB);
542 BB->addSuccessor(FalseMBB);
543 TrueMBB->addSuccessor(DoneMBB);
544 FalseMBB->addSuccessor(DoneMBB);
545
546 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
547 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
548 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
549 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
550 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
551 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
552 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
553
554 MI.eraseFromParent();
555 // For signed numbers, we can do a single comparison to determine whether
556 // fabs(x) is within range.
557 if (IsUnsigned) {
558 Tmp0 = InReg;
559 } else {
560 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
561 }
562 BuildMI(BB, DL, TII.get(FConst), Tmp1)
563 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
564 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
565
566 // For unsigned numbers, we have to do a separate comparison with zero.
567 if (IsUnsigned) {
568 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
569 Register SecondCmpReg =
570 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
571 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
572 BuildMI(BB, DL, TII.get(FConst), Tmp1)
573 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
574 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
575 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
576 CmpReg = AndReg;
577 }
578
579 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
580
581 // Create the CFG diamond to select between doing the conversion or using
582 // the substitute value.
583 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
584 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
585 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
586 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
587 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
588 .addReg(FalseReg)
589 .addMBB(FalseMBB)
590 .addReg(TrueReg)
591 .addMBB(TrueMBB);
592
593 return DoneMBB;
594}
595
596// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
597// instuction to handle the zero-length case.
600 const TargetInstrInfo &TII, bool Int64) {
602
603 MachineOperand DstMem = MI.getOperand(0);
604 MachineOperand SrcMem = MI.getOperand(1);
605 MachineOperand Dst = MI.getOperand(2);
606 MachineOperand Src = MI.getOperand(3);
607 MachineOperand Len = MI.getOperand(4);
608
609 // If the length is a constant, we don't actually need the check.
610 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
611 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
612 Def->getOpcode() == WebAssembly::CONST_I64) {
613 if (Def->getOperand(1).getImm() == 0) {
614 // A zero-length memcpy is a no-op.
615 MI.eraseFromParent();
616 return BB;
617 }
618 // A non-zero-length memcpy doesn't need a zero check.
619 unsigned MemoryCopy =
620 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
621 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
622 .add(DstMem)
623 .add(SrcMem)
624 .add(Dst)
625 .add(Src)
626 .add(Len);
627 MI.eraseFromParent();
628 return BB;
629 }
630 }
631
632 // We're going to add an extra use to `Len` to test if it's zero; that
633 // use shouldn't be a kill, even if the original use is.
634 MachineOperand NoKillLen = Len;
635 NoKillLen.setIsKill(false);
636
637 // Decide on which `MachineInstr` opcode we're going to use.
638 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
639 unsigned MemoryCopy =
640 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
641
642 // Create two new basic blocks; one for the new `memory.fill` that we can
643 // branch over, and one for the rest of the instructions after the original
644 // `memory.fill`.
645 const BasicBlock *LLVMBB = BB->getBasicBlock();
646 MachineFunction *F = BB->getParent();
647 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
648 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
649
651 F->insert(It, TrueMBB);
652 F->insert(It, DoneMBB);
653
654 // Transfer the remainder of BB and its successor edges to DoneMBB.
655 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
657
658 // Connect the CFG edges.
659 BB->addSuccessor(TrueMBB);
660 BB->addSuccessor(DoneMBB);
661 TrueMBB->addSuccessor(DoneMBB);
662
663 // Create a virtual register for the `Eqz` result.
664 unsigned EqzReg;
665 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
666
667 // Erase the original `memory.copy`.
668 MI.eraseFromParent();
669
670 // Test if `Len` is zero.
671 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
672
673 // Insert a new `memory.copy`.
674 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
675 .add(DstMem)
676 .add(SrcMem)
677 .add(Dst)
678 .add(Src)
679 .add(Len);
680
681 // Create the CFG triangle.
682 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
683 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
684
685 return DoneMBB;
686}
687
688// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
689// instuction to handle the zero-length case.
692 const TargetInstrInfo &TII, bool Int64) {
694
695 MachineOperand Mem = MI.getOperand(0);
696 MachineOperand Dst = MI.getOperand(1);
697 MachineOperand Val = MI.getOperand(2);
698 MachineOperand Len = MI.getOperand(3);
699
700 // If the length is a constant, we don't actually need the check.
701 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
702 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
703 Def->getOpcode() == WebAssembly::CONST_I64) {
704 if (Def->getOperand(1).getImm() == 0) {
705 // A zero-length memset is a no-op.
706 MI.eraseFromParent();
707 return BB;
708 }
709 // A non-zero-length memset doesn't need a zero check.
710 unsigned MemoryFill =
711 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
712 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
713 .add(Mem)
714 .add(Dst)
715 .add(Val)
716 .add(Len);
717 MI.eraseFromParent();
718 return BB;
719 }
720 }
721
722 // We're going to add an extra use to `Len` to test if it's zero; that
723 // use shouldn't be a kill, even if the original use is.
724 MachineOperand NoKillLen = Len;
725 NoKillLen.setIsKill(false);
726
727 // Decide on which `MachineInstr` opcode we're going to use.
728 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
729 unsigned MemoryFill =
730 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
731
732 // Create two new basic blocks; one for the new `memory.fill` that we can
733 // branch over, and one for the rest of the instructions after the original
734 // `memory.fill`.
735 const BasicBlock *LLVMBB = BB->getBasicBlock();
736 MachineFunction *F = BB->getParent();
737 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
738 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
739
741 F->insert(It, TrueMBB);
742 F->insert(It, DoneMBB);
743
744 // Transfer the remainder of BB and its successor edges to DoneMBB.
745 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
747
748 // Connect the CFG edges.
749 BB->addSuccessor(TrueMBB);
750 BB->addSuccessor(DoneMBB);
751 TrueMBB->addSuccessor(DoneMBB);
752
753 // Create a virtual register for the `Eqz` result.
754 unsigned EqzReg;
755 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
756
757 // Erase the original `memory.fill`.
758 MI.eraseFromParent();
759
760 // Test if `Len` is zero.
761 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
762
763 // Insert a new `memory.copy`.
764 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
765
766 // Create the CFG triangle.
767 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
768 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
769
770 return DoneMBB;
771}
772
773static MachineBasicBlock *
775 const WebAssemblySubtarget *Subtarget,
776 const TargetInstrInfo &TII) {
777 MachineInstr &CallParams = *CallResults.getPrevNode();
778 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
779 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
780 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
781
782 bool IsIndirect =
783 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
784 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
785
786 bool IsFuncrefCall = false;
787 if (IsIndirect && CallParams.getOperand(0).isReg()) {
788 Register Reg = CallParams.getOperand(0).getReg();
789 const MachineFunction *MF = BB->getParent();
790 const MachineRegisterInfo &MRI = MF->getRegInfo();
791 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
792 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
793 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
794 }
795
796 unsigned CallOp;
797 if (IsIndirect && IsRetCall) {
798 CallOp = WebAssembly::RET_CALL_INDIRECT;
799 } else if (IsIndirect) {
800 CallOp = WebAssembly::CALL_INDIRECT;
801 } else if (IsRetCall) {
802 CallOp = WebAssembly::RET_CALL;
803 } else {
804 CallOp = WebAssembly::CALL;
805 }
806
807 MachineFunction &MF = *BB->getParent();
808 const MCInstrDesc &MCID = TII.get(CallOp);
809 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
810
811 // Move the function pointer to the end of the arguments for indirect calls
812 if (IsIndirect) {
813 auto FnPtr = CallParams.getOperand(0);
814 CallParams.removeOperand(0);
815
816 // For funcrefs, call_indirect is done through __funcref_call_table and the
817 // funcref is always installed in slot 0 of the table, therefore instead of
818 // having the function pointer added at the end of the params list, a zero
819 // (the index in
820 // __funcref_call_table is added).
821 if (IsFuncrefCall) {
822 Register RegZero =
823 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
824 MachineInstrBuilder MIBC0 =
825 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
826
827 BB->insert(CallResults.getIterator(), MIBC0);
828 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
829 } else
830 CallParams.addOperand(FnPtr);
831 }
832
833 for (auto Def : CallResults.defs())
834 MIB.add(Def);
835
836 if (IsIndirect) {
837 // Placeholder for the type index.
838 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
839 MIB.addImm(0);
840 // The table into which this call_indirect indexes.
841 MCSymbolWasm *Table = IsFuncrefCall
843 MF.getContext(), Subtarget)
845 MF.getContext(), Subtarget);
846 if (Subtarget->hasCallIndirectOverlong()) {
847 MIB.addSym(Table);
848 } else {
849 // For the MVP there is at most one table whose number is 0, but we can't
850 // write a table symbol or issue relocations. Instead we just ensure the
851 // table is live and write a zero.
852 Table->setNoStrip();
853 MIB.addImm(0);
854 }
855 }
856
857 for (auto Use : CallParams.uses())
858 MIB.add(Use);
859
860 BB->insert(CallResults.getIterator(), MIB);
861 CallParams.eraseFromParent();
862 CallResults.eraseFromParent();
863
864 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
865 // table slot with ref.null upon call_indirect return.
866 //
867 // This generates the following code, which comes right after a call_indirect
868 // of a funcref:
869 //
870 // i32.const 0
871 // ref.null func
872 // table.set __funcref_call_table
873 if (IsIndirect && IsFuncrefCall) {
875 MF.getContext(), Subtarget);
876 Register RegZero =
877 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
878 MachineInstr *Const0 =
879 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
880 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
881
882 Register RegFuncref =
883 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
884 MachineInstr *RefNull =
885 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
886 BB->insertAfter(Const0->getIterator(), RefNull);
887
888 MachineInstr *TableSet =
889 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
890 .addSym(Table)
891 .addReg(RegZero)
892 .addReg(RegFuncref);
893 BB->insertAfter(RefNull->getIterator(), TableSet);
894 }
895
896 return BB;
897}
898
899MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
900 MachineInstr &MI, MachineBasicBlock *BB) const {
901 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
902 DebugLoc DL = MI.getDebugLoc();
903
904 switch (MI.getOpcode()) {
905 default:
906 llvm_unreachable("Unexpected instr type to insert");
907 case WebAssembly::FP_TO_SINT_I32_F32:
908 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
909 WebAssembly::I32_TRUNC_S_F32);
910 case WebAssembly::FP_TO_UINT_I32_F32:
911 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
912 WebAssembly::I32_TRUNC_U_F32);
913 case WebAssembly::FP_TO_SINT_I64_F32:
914 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
915 WebAssembly::I64_TRUNC_S_F32);
916 case WebAssembly::FP_TO_UINT_I64_F32:
917 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
918 WebAssembly::I64_TRUNC_U_F32);
919 case WebAssembly::FP_TO_SINT_I32_F64:
920 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
921 WebAssembly::I32_TRUNC_S_F64);
922 case WebAssembly::FP_TO_UINT_I32_F64:
923 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
924 WebAssembly::I32_TRUNC_U_F64);
925 case WebAssembly::FP_TO_SINT_I64_F64:
926 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
927 WebAssembly::I64_TRUNC_S_F64);
928 case WebAssembly::FP_TO_UINT_I64_F64:
929 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
930 WebAssembly::I64_TRUNC_U_F64);
931 case WebAssembly::MEMCPY_A32:
932 return LowerMemcpy(MI, DL, BB, TII, false);
933 case WebAssembly::MEMCPY_A64:
934 return LowerMemcpy(MI, DL, BB, TII, true);
935 case WebAssembly::MEMSET_A32:
936 return LowerMemset(MI, DL, BB, TII, false);
937 case WebAssembly::MEMSET_A64:
938 return LowerMemset(MI, DL, BB, TII, true);
939 case WebAssembly::CALL_RESULTS:
940 case WebAssembly::RET_CALL_RESULTS:
941 return LowerCallResults(MI, DL, BB, Subtarget, TII);
942 }
943}
944
945const char *
946WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
947 switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
949 break;
950#define HANDLE_NODETYPE(NODE) \
951 case WebAssemblyISD::NODE: \
952 return "WebAssemblyISD::" #NODE;
953#include "WebAssemblyISD.def"
954#undef HANDLE_NODETYPE
955 }
956 return nullptr;
957}
958
959std::pair<unsigned, const TargetRegisterClass *>
960WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
961 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
962 // First, see if this is a constraint that directly corresponds to a
963 // WebAssembly register class.
964 if (Constraint.size() == 1) {
965 switch (Constraint[0]) {
966 case 'r':
967 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
968 if (Subtarget->hasSIMD128() && VT.isVector()) {
969 if (VT.getSizeInBits() == 128)
970 return std::make_pair(0U, &WebAssembly::V128RegClass);
971 }
972 if (VT.isInteger() && !VT.isVector()) {
973 if (VT.getSizeInBits() <= 32)
974 return std::make_pair(0U, &WebAssembly::I32RegClass);
975 if (VT.getSizeInBits() <= 64)
976 return std::make_pair(0U, &WebAssembly::I64RegClass);
977 }
978 if (VT.isFloatingPoint() && !VT.isVector()) {
979 switch (VT.getSizeInBits()) {
980 case 32:
981 return std::make_pair(0U, &WebAssembly::F32RegClass);
982 case 64:
983 return std::make_pair(0U, &WebAssembly::F64RegClass);
984 default:
985 break;
986 }
987 }
988 break;
989 default:
990 break;
991 }
992 }
993
995}
996
997bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
998 // Assume ctz is a relatively cheap operation.
999 return true;
1000}
1001
1002bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1003 // Assume clz is a relatively cheap operation.
1004 return true;
1005}
1006
1007bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1008 const AddrMode &AM,
1009 Type *Ty, unsigned AS,
1010 Instruction *I) const {
1011 // WebAssembly offsets are added as unsigned without wrapping. The
1012 // isLegalAddressingMode gives us no way to determine if wrapping could be
1013 // happening, so we approximate this by accepting only non-negative offsets.
1014 if (AM.BaseOffs < 0)
1015 return false;
1016
1017 // WebAssembly has no scale register operands.
1018 if (AM.Scale != 0)
1019 return false;
1020
1021 // Everything else is legal.
1022 return true;
1023}
1024
1025bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1026 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1027 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1028 // WebAssembly supports unaligned accesses, though it should be declared
1029 // with the p2align attribute on loads and stores which do so, and there
1030 // may be a performance impact. We tell LLVM they're "fast" because
1031 // for the kinds of things that LLVM uses this for (merging adjacent stores
1032 // of constants, etc.), WebAssembly implementations will either want the
1033 // unaligned access or they'll split anyway.
1034 if (Fast)
1035 *Fast = 1;
1036 return true;
1037}
1038
1039bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1040 AttributeList Attr) const {
1041 // The current thinking is that wasm engines will perform this optimization,
1042 // so we can save on code size.
1043 return true;
1044}
1045
1046bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1047 EVT ExtT = ExtVal.getValueType();
1048 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1049 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1050 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1051 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1052}
1053
1054bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1055 const GlobalAddressSDNode *GA) const {
1056 // Wasm doesn't support function addresses with offsets
1057 const GlobalValue *GV = GA->getGlobal();
1059}
1060
1061EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1062 LLVMContext &C,
1063 EVT VT) const {
1064 if (VT.isVector())
1066
1067 // So far, all branch instructions in Wasm take an I32 condition.
1068 // The default TargetLowering::getSetCCResultType returns the pointer size,
1069 // which would be useful to reduce instruction counts when testing
1070 // against 64-bit pointers/values if at some point Wasm supports that.
1071 return EVT::getIntegerVT(C, 32);
1072}
1073
1074bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1075 const CallInst &I,
1076 MachineFunction &MF,
1077 unsigned Intrinsic) const {
1078 switch (Intrinsic) {
1079 case Intrinsic::wasm_memory_atomic_notify:
1081 Info.memVT = MVT::i32;
1082 Info.ptrVal = I.getArgOperand(0);
1083 Info.offset = 0;
1084 Info.align = Align(4);
1085 // atomic.notify instruction does not really load the memory specified with
1086 // this argument, but MachineMemOperand should either be load or store, so
1087 // we set this to a load.
1088 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1089 // instructions are treated as volatiles in the backend, so we should be
1090 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1092 return true;
1093 case Intrinsic::wasm_memory_atomic_wait32:
1095 Info.memVT = MVT::i32;
1096 Info.ptrVal = I.getArgOperand(0);
1097 Info.offset = 0;
1098 Info.align = Align(4);
1100 return true;
1101 case Intrinsic::wasm_memory_atomic_wait64:
1103 Info.memVT = MVT::i64;
1104 Info.ptrVal = I.getArgOperand(0);
1105 Info.offset = 0;
1106 Info.align = Align(8);
1108 return true;
1109 case Intrinsic::wasm_loadf16_f32:
1111 Info.memVT = MVT::f16;
1112 Info.ptrVal = I.getArgOperand(0);
1113 Info.offset = 0;
1114 Info.align = Align(2);
1116 return true;
1117 case Intrinsic::wasm_storef16_f32:
1119 Info.memVT = MVT::f16;
1120 Info.ptrVal = I.getArgOperand(1);
1121 Info.offset = 0;
1122 Info.align = Align(2);
1124 return true;
1125 default:
1126 return false;
1127 }
1128}
1129
1130void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1131 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1132 const SelectionDAG &DAG, unsigned Depth) const {
1133 switch (Op.getOpcode()) {
1134 default:
1135 break;
1137 unsigned IntNo = Op.getConstantOperandVal(0);
1138 switch (IntNo) {
1139 default:
1140 break;
1141 case Intrinsic::wasm_bitmask: {
1142 unsigned BitWidth = Known.getBitWidth();
1143 EVT VT = Op.getOperand(1).getSimpleValueType();
1144 unsigned PossibleBits = VT.getVectorNumElements();
1145 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1146 Known.Zero |= ZeroMask;
1147 break;
1148 }
1149 }
1150 break;
1151 }
1152
1153 // For 128-bit addition if the upper bits are all zero then it's known that
1154 // the upper bits of the result will have all bits guaranteed zero except the
1155 // first.
1156 case WebAssemblyISD::I64_ADD128:
1157 if (Op.getResNo() == 1) {
1158 SDValue LHS_HI = Op.getOperand(1);
1159 SDValue RHS_HI = Op.getOperand(3);
1160 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1161 Known.Zero.setBitsFrom(1);
1162 }
1163 break;
1164 }
1165}
1166
1168WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1169 if (VT.isFixedLengthVector()) {
1170 MVT EltVT = VT.getVectorElementType();
1171 // We have legal vector types with these lane types, so widening the
1172 // vector would let us use some of the lanes directly without having to
1173 // extend or truncate values.
1174 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1175 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1176 return TypeWidenVector;
1177 }
1178
1180}
1181
1182bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1183 const MachineFunction &MF, EVT VT) const {
1184 if (!Subtarget->hasFP16() || !VT.isVector())
1185 return false;
1186
1187 EVT ScalarVT = VT.getScalarType();
1188 if (!ScalarVT.isSimple())
1189 return false;
1190
1191 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1192}
1193
1194bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1195 SDValue Op, const TargetLoweringOpt &TLO) const {
1196 // ISel process runs DAGCombiner after legalization; this step is called
1197 // SelectionDAG optimization phase. This post-legalization combining process
1198 // runs DAGCombiner on each node, and if there was a change to be made,
1199 // re-runs legalization again on it and its user nodes to make sure
1200 // everythiing is in a legalized state.
1201 //
1202 // The legalization calls lowering routines, and we do our custom lowering for
1203 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1204 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1205 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1206 // turns unused vector elements into undefs. But this routine does not work
1207 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1208 // combination can result in a infinite loop, in which undefs are converted to
1209 // zeros in legalization and back to undefs in combining.
1210 //
1211 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1212 // running for build_vectors.
1213 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1214 return false;
1215 return true;
1216}
1217
1218//===----------------------------------------------------------------------===//
1219// WebAssembly Lowering private implementation.
1220//===----------------------------------------------------------------------===//
1221
1222//===----------------------------------------------------------------------===//
1223// Lowering Code
1224//===----------------------------------------------------------------------===//
1225
1226static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1228 DAG.getContext()->diagnose(
1229 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1230}
1231
1232// Test whether the given calling convention is supported.
1234 // We currently support the language-independent target-independent
1235 // conventions. We don't yet have a way to annotate calls with properties like
1236 // "cold", and we don't have any call-clobbered registers, so these are mostly
1237 // all handled the same.
1238 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1239 CallConv == CallingConv::Cold ||
1240 CallConv == CallingConv::PreserveMost ||
1241 CallConv == CallingConv::PreserveAll ||
1242 CallConv == CallingConv::CXX_FAST_TLS ||
1244 CallConv == CallingConv::Swift;
1245}
1246
1247SDValue
1248WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1249 SmallVectorImpl<SDValue> &InVals) const {
1250 SelectionDAG &DAG = CLI.DAG;
1251 SDLoc DL = CLI.DL;
1252 SDValue Chain = CLI.Chain;
1253 SDValue Callee = CLI.Callee;
1254 MachineFunction &MF = DAG.getMachineFunction();
1255 auto Layout = MF.getDataLayout();
1256
1257 CallingConv::ID CallConv = CLI.CallConv;
1258 if (!callingConvSupported(CallConv))
1259 fail(DL, DAG,
1260 "WebAssembly doesn't support language-specific or target-specific "
1261 "calling conventions yet");
1262 if (CLI.IsPatchPoint)
1263 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1264
1265 if (CLI.IsTailCall) {
1266 auto NoTail = [&](const char *Msg) {
1267 if (CLI.CB && CLI.CB->isMustTailCall())
1268 fail(DL, DAG, Msg);
1269 CLI.IsTailCall = false;
1270 };
1271
1272 if (!Subtarget->hasTailCall())
1273 NoTail("WebAssembly 'tail-call' feature not enabled");
1274
1275 // Varargs calls cannot be tail calls because the buffer is on the stack
1276 if (CLI.IsVarArg)
1277 NoTail("WebAssembly does not support varargs tail calls");
1278
1279 // Do not tail call unless caller and callee return types match
1280 const Function &F = MF.getFunction();
1281 const TargetMachine &TM = getTargetMachine();
1282 Type *RetTy = F.getReturnType();
1283 SmallVector<MVT, 4> CallerRetTys;
1284 SmallVector<MVT, 4> CalleeRetTys;
1285 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1286 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1287 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1288 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1289 CalleeRetTys.begin());
1290 if (!TypesMatch)
1291 NoTail("WebAssembly tail call requires caller and callee return types to "
1292 "match");
1293
1294 // If pointers to local stack values are passed, we cannot tail call
1295 if (CLI.CB) {
1296 for (auto &Arg : CLI.CB->args()) {
1297 Value *Val = Arg.get();
1298 // Trace the value back through pointer operations
1299 while (true) {
1300 Value *Src = Val->stripPointerCastsAndAliases();
1301 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1302 Src = GEP->getPointerOperand();
1303 if (Val == Src)
1304 break;
1305 Val = Src;
1306 }
1307 if (isa<AllocaInst>(Val)) {
1308 NoTail(
1309 "WebAssembly does not support tail calling with stack arguments");
1310 break;
1311 }
1312 }
1313 }
1314 }
1315
1316 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1317 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1318 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1319
1320 // The generic code may have added an sret argument. If we're lowering an
1321 // invoke function, the ABI requires that the function pointer be the first
1322 // argument, so we may have to swap the arguments.
1323 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1324 Outs[0].Flags.isSRet()) {
1325 std::swap(Outs[0], Outs[1]);
1326 std::swap(OutVals[0], OutVals[1]);
1327 }
1328
1329 bool HasSwiftSelfArg = false;
1330 bool HasSwiftErrorArg = false;
1331 unsigned NumFixedArgs = 0;
1332 for (unsigned I = 0; I < Outs.size(); ++I) {
1333 const ISD::OutputArg &Out = Outs[I];
1334 SDValue &OutVal = OutVals[I];
1335 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1336 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1337 if (Out.Flags.isNest())
1338 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1339 if (Out.Flags.isInAlloca())
1340 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1341 if (Out.Flags.isInConsecutiveRegs())
1342 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1344 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1345 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1346 auto &MFI = MF.getFrameInfo();
1347 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1349 /*isSS=*/false);
1350 SDValue SizeNode =
1351 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1352 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1353 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1355 /*isVolatile*/ false, /*AlwaysInline=*/false,
1356 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1357 MachinePointerInfo());
1358 OutVal = FINode;
1359 }
1360 // Count the number of fixed args *after* legalization.
1361 NumFixedArgs += !Out.Flags.isVarArg();
1362 }
1363
1364 bool IsVarArg = CLI.IsVarArg;
1365 auto PtrVT = getPointerTy(Layout);
1366
1367 // For swiftcc, emit additional swiftself and swifterror arguments
1368 // if there aren't. These additional arguments are also added for callee
1369 // signature They are necessary to match callee and caller signature for
1370 // indirect call.
1371 if (CallConv == CallingConv::Swift) {
1372 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1373 if (!HasSwiftSelfArg) {
1374 NumFixedArgs++;
1375 ISD::ArgFlagsTy Flags;
1376 Flags.setSwiftSelf();
1377 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1378 CLI.Outs.push_back(Arg);
1379 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1380 CLI.OutVals.push_back(ArgVal);
1381 }
1382 if (!HasSwiftErrorArg) {
1383 NumFixedArgs++;
1384 ISD::ArgFlagsTy Flags;
1385 Flags.setSwiftError();
1386 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1387 CLI.Outs.push_back(Arg);
1388 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1389 CLI.OutVals.push_back(ArgVal);
1390 }
1391 }
1392
1393 // Analyze operands of the call, assigning locations to each operand.
1395 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1396
1397 if (IsVarArg) {
1398 // Outgoing non-fixed arguments are placed in a buffer. First
1399 // compute their offsets and the total amount of buffer space needed.
1400 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1401 const ISD::OutputArg &Out = Outs[I];
1402 SDValue &Arg = OutVals[I];
1403 EVT VT = Arg.getValueType();
1404 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1405 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1406 Align Alignment =
1407 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1408 unsigned Offset =
1409 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1410 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1411 Offset, VT.getSimpleVT(),
1413 }
1414 }
1415
1416 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1417
1418 SDValue FINode;
1419 if (IsVarArg && NumBytes) {
1420 // For non-fixed arguments, next emit stores to store the argument values
1421 // to the stack buffer at the offsets computed above.
1422 MaybeAlign StackAlign = Layout.getStackAlignment();
1423 assert(StackAlign && "data layout string is missing stack alignment");
1424 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1425 /*isSS=*/false);
1426 unsigned ValNo = 0;
1428 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1429 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1430 "ArgLocs should remain in order and only hold varargs args");
1431 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1432 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1433 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1434 DAG.getConstant(Offset, DL, PtrVT));
1435 Chains.push_back(
1436 DAG.getStore(Chain, DL, Arg, Add,
1438 }
1439 if (!Chains.empty())
1440 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1441 } else if (IsVarArg) {
1442 FINode = DAG.getIntPtrConstant(0, DL);
1443 }
1444
1445 if (Callee->getOpcode() == ISD::GlobalAddress) {
1446 // If the callee is a GlobalAddress node (quite common, every direct call
1447 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1448 // doesn't at MO_GOT which is not needed for direct calls.
1449 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1452 GA->getOffset());
1453 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1454 getPointerTy(DAG.getDataLayout()), Callee);
1455 }
1456
1457 // Compute the operands for the CALLn node.
1459 Ops.push_back(Chain);
1460 Ops.push_back(Callee);
1461
1462 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1463 // isn't reliable.
1464 Ops.append(OutVals.begin(),
1465 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1466 // Add a pointer to the vararg buffer.
1467 if (IsVarArg)
1468 Ops.push_back(FINode);
1469
1470 SmallVector<EVT, 8> InTys;
1471 for (const auto &In : Ins) {
1472 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1473 assert(!In.Flags.isNest() && "nest is not valid for return values");
1474 if (In.Flags.isInAlloca())
1475 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1476 if (In.Flags.isInConsecutiveRegs())
1477 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1478 if (In.Flags.isInConsecutiveRegsLast())
1479 fail(DL, DAG,
1480 "WebAssembly hasn't implemented cons regs last return values");
1481 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1482 // registers.
1483 InTys.push_back(In.VT);
1484 }
1485
1486 // Lastly, if this is a call to a funcref we need to add an instruction
1487 // table.set to the chain and transform the call.
1489 CLI.CB->getCalledOperand()->getType())) {
1490 // In the absence of function references proposal where a funcref call is
1491 // lowered to call_ref, using reference types we generate a table.set to set
1492 // the funcref to a special table used solely for this purpose, followed by
1493 // a call_indirect. Here we just generate the table set, and return the
1494 // SDValue of the table.set so that LowerCall can finalize the lowering by
1495 // generating the call_indirect.
1496 SDValue Chain = Ops[0];
1497
1499 MF.getContext(), Subtarget);
1500 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1501 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1502 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1503 SDValue TableSet = DAG.getMemIntrinsicNode(
1504 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1505 MVT::funcref,
1506 // Machine Mem Operand args
1507 MachinePointerInfo(
1509 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1511
1512 Ops[0] = TableSet; // The new chain is the TableSet itself
1513 }
1514
1515 if (CLI.IsTailCall) {
1516 // ret_calls do not return values to the current frame
1517 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1518 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1519 }
1520
1521 InTys.push_back(MVT::Other);
1522 SDVTList InTyList = DAG.getVTList(InTys);
1523 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1524
1525 for (size_t I = 0; I < Ins.size(); ++I)
1526 InVals.push_back(Res.getValue(I));
1527
1528 // Return the chain
1529 return Res.getValue(Ins.size());
1530}
1531
1532bool WebAssemblyTargetLowering::CanLowerReturn(
1533 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1534 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1535 const Type *RetTy) const {
1536 // WebAssembly can only handle returning tuples with multivalue enabled
1537 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1538}
1539
1540SDValue WebAssemblyTargetLowering::LowerReturn(
1541 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1543 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1544 SelectionDAG &DAG) const {
1545 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1546 "MVP WebAssembly can only return up to one value");
1547 if (!callingConvSupported(CallConv))
1548 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1549
1550 SmallVector<SDValue, 4> RetOps(1, Chain);
1551 RetOps.append(OutVals.begin(), OutVals.end());
1552 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1553
1554 // Record the number and types of the return values.
1555 for (const ISD::OutputArg &Out : Outs) {
1556 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1557 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1558 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1559 if (Out.Flags.isInAlloca())
1560 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1561 if (Out.Flags.isInConsecutiveRegs())
1562 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1564 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1565 }
1566
1567 return Chain;
1568}
1569
1570SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1571 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1572 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1573 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1574 if (!callingConvSupported(CallConv))
1575 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1576
1577 MachineFunction &MF = DAG.getMachineFunction();
1578 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1579
1580 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1581 // of the incoming values before they're represented by virtual registers.
1582 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1583
1584 bool HasSwiftErrorArg = false;
1585 bool HasSwiftSelfArg = false;
1586 for (const ISD::InputArg &In : Ins) {
1587 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1588 HasSwiftErrorArg |= In.Flags.isSwiftError();
1589 if (In.Flags.isInAlloca())
1590 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1591 if (In.Flags.isNest())
1592 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1593 if (In.Flags.isInConsecutiveRegs())
1594 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1595 if (In.Flags.isInConsecutiveRegsLast())
1596 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1597 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1598 // registers.
1599 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1600 DAG.getTargetConstant(InVals.size(),
1601 DL, MVT::i32))
1602 : DAG.getUNDEF(In.VT));
1603
1604 // Record the number and types of arguments.
1605 MFI->addParam(In.VT);
1606 }
1607
1608 // For swiftcc, emit additional swiftself and swifterror arguments
1609 // if there aren't. These additional arguments are also added for callee
1610 // signature They are necessary to match callee and caller signature for
1611 // indirect call.
1612 auto PtrVT = getPointerTy(MF.getDataLayout());
1613 if (CallConv == CallingConv::Swift) {
1614 if (!HasSwiftSelfArg) {
1615 MFI->addParam(PtrVT);
1616 }
1617 if (!HasSwiftErrorArg) {
1618 MFI->addParam(PtrVT);
1619 }
1620 }
1621 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1622 // the buffer is passed as an argument.
1623 if (IsVarArg) {
1624 MVT PtrVT = getPointerTy(MF.getDataLayout());
1625 Register VarargVreg =
1627 MFI->setVarargBufferVreg(VarargVreg);
1628 Chain = DAG.getCopyToReg(
1629 Chain, DL, VarargVreg,
1630 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1631 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1632 MFI->addParam(PtrVT);
1633 }
1634
1635 // Record the number and types of arguments and results.
1636 SmallVector<MVT, 4> Params;
1639 MF.getFunction(), DAG.getTarget(), Params, Results);
1640 for (MVT VT : Results)
1641 MFI->addResult(VT);
1642 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1643 // the param logic here with ComputeSignatureVTs
1644 assert(MFI->getParams().size() == Params.size() &&
1645 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1646 Params.begin()));
1647
1648 return Chain;
1649}
1650
1651void WebAssemblyTargetLowering::ReplaceNodeResults(
1653 switch (N->getOpcode()) {
1655 // Do not add any results, signifying that N should not be custom lowered
1656 // after all. This happens because simd128 turns on custom lowering for
1657 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1658 // illegal type.
1659 break;
1662 // Do not add any results, signifying that N should not be custom lowered.
1663 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1664 break;
1665 case ISD::ADD:
1666 case ISD::SUB:
1667 Results.push_back(Replace128Op(N, DAG));
1668 break;
1669 default:
1671 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1672 }
1673}
1674
1675//===----------------------------------------------------------------------===//
1676// Custom lowering hooks.
1677//===----------------------------------------------------------------------===//
1678
1679SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1680 SelectionDAG &DAG) const {
1681 SDLoc DL(Op);
1682 switch (Op.getOpcode()) {
1683 default:
1684 llvm_unreachable("unimplemented operation lowering");
1685 return SDValue();
1686 case ISD::FrameIndex:
1687 return LowerFrameIndex(Op, DAG);
1688 case ISD::GlobalAddress:
1689 return LowerGlobalAddress(Op, DAG);
1691 return LowerGlobalTLSAddress(Op, DAG);
1693 return LowerExternalSymbol(Op, DAG);
1694 case ISD::JumpTable:
1695 return LowerJumpTable(Op, DAG);
1696 case ISD::BR_JT:
1697 return LowerBR_JT(Op, DAG);
1698 case ISD::VASTART:
1699 return LowerVASTART(Op, DAG);
1700 case ISD::BlockAddress:
1701 case ISD::BRIND:
1702 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1703 return SDValue();
1704 case ISD::RETURNADDR:
1705 return LowerRETURNADDR(Op, DAG);
1706 case ISD::FRAMEADDR:
1707 return LowerFRAMEADDR(Op, DAG);
1708 case ISD::CopyToReg:
1709 return LowerCopyToReg(Op, DAG);
1712 return LowerAccessVectorElement(Op, DAG);
1716 return LowerIntrinsic(Op, DAG);
1718 return LowerSIGN_EXTEND_INREG(Op, DAG);
1721 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1722 case ISD::BUILD_VECTOR:
1723 return LowerBUILD_VECTOR(Op, DAG);
1725 return LowerVECTOR_SHUFFLE(Op, DAG);
1726 case ISD::SETCC:
1727 return LowerSETCC(Op, DAG);
1728 case ISD::SHL:
1729 case ISD::SRA:
1730 case ISD::SRL:
1731 return LowerShift(Op, DAG);
1734 return LowerFP_TO_INT_SAT(Op, DAG);
1735 case ISD::LOAD:
1736 return LowerLoad(Op, DAG);
1737 case ISD::STORE:
1738 return LowerStore(Op, DAG);
1739 case ISD::CTPOP:
1740 case ISD::CTLZ:
1741 case ISD::CTTZ:
1742 return DAG.UnrollVectorOp(Op.getNode());
1743 case ISD::CLEAR_CACHE:
1744 report_fatal_error("llvm.clear_cache is not supported on wasm");
1745 case ISD::SMUL_LOHI:
1746 case ISD::UMUL_LOHI:
1747 return LowerMUL_LOHI(Op, DAG);
1748 case ISD::UADDO:
1749 return LowerUADDO(Op, DAG);
1750 }
1751}
1752
1756
1757 return false;
1758}
1759
1760static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1761 SelectionDAG &DAG) {
1763 if (!FI)
1764 return std::nullopt;
1765
1766 auto &MF = DAG.getMachineFunction();
1768}
1769
1770SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1771 SelectionDAG &DAG) const {
1772 SDLoc DL(Op);
1773 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1774 const SDValue &Value = SN->getValue();
1775 const SDValue &Base = SN->getBasePtr();
1776 const SDValue &Offset = SN->getOffset();
1777
1779 if (!Offset->isUndef())
1780 report_fatal_error("unexpected offset when storing to webassembly global",
1781 false);
1782
1783 SDVTList Tys = DAG.getVTList(MVT::Other);
1784 SDValue Ops[] = {SN->getChain(), Value, Base};
1785 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1786 SN->getMemoryVT(), SN->getMemOperand());
1787 }
1788
1789 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1790 if (!Offset->isUndef())
1791 report_fatal_error("unexpected offset when storing to webassembly local",
1792 false);
1793
1794 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1795 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1796 SDValue Ops[] = {SN->getChain(), Idx, Value};
1797 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1798 }
1799
1802 "Encountered an unlowerable store to the wasm_var address space",
1803 false);
1804
1805 return Op;
1806}
1807
1808SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1809 SelectionDAG &DAG) const {
1810 SDLoc DL(Op);
1811 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1812 const SDValue &Base = LN->getBasePtr();
1813 const SDValue &Offset = LN->getOffset();
1814
1816 if (!Offset->isUndef())
1818 "unexpected offset when loading from webassembly global", false);
1819
1820 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1821 SDValue Ops[] = {LN->getChain(), Base};
1822 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1823 LN->getMemoryVT(), LN->getMemOperand());
1824 }
1825
1826 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1827 if (!Offset->isUndef())
1829 "unexpected offset when loading from webassembly local", false);
1830
1831 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1832 EVT LocalVT = LN->getValueType(0);
1833 SDValue LocalGet = DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, LocalVT,
1834 {LN->getChain(), Idx});
1835 SDValue Result = DAG.getMergeValues({LocalGet, LN->getChain()}, DL);
1836 assert(Result->getNumValues() == 2 && "Loads must carry a chain!");
1837 return Result;
1838 }
1839
1842 "Encountered an unlowerable load from the wasm_var address space",
1843 false);
1844
1845 return Op;
1846}
1847
1848SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1849 SelectionDAG &DAG) const {
1850 assert(Subtarget->hasWideArithmetic());
1851 assert(Op.getValueType() == MVT::i64);
1852 SDLoc DL(Op);
1853 unsigned Opcode;
1854 switch (Op.getOpcode()) {
1855 case ISD::UMUL_LOHI:
1856 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1857 break;
1858 case ISD::SMUL_LOHI:
1859 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1860 break;
1861 default:
1862 llvm_unreachable("unexpected opcode");
1863 }
1864 SDValue LHS = Op.getOperand(0);
1865 SDValue RHS = Op.getOperand(1);
1866 SDValue Lo =
1867 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1868 SDValue Hi(Lo.getNode(), 1);
1869 SDValue Ops[] = {Lo, Hi};
1870 return DAG.getMergeValues(Ops, DL);
1871}
1872
1873// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1874//
1875// This enables generating a single wasm instruction for this operation where
1876// the upper half of both operands are constant zeros. The upper half of the
1877// result is then whether the overflow happened.
1878SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1879 SelectionDAG &DAG) const {
1880 assert(Subtarget->hasWideArithmetic());
1881 assert(Op.getValueType() == MVT::i64);
1882 assert(Op.getOpcode() == ISD::UADDO);
1883 SDLoc DL(Op);
1884 SDValue LHS = Op.getOperand(0);
1885 SDValue RHS = Op.getOperand(1);
1886 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1887 SDValue Result =
1888 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1889 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1890 SDValue CarryI64(Result.getNode(), 1);
1891 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1892 SDValue Ops[] = {Result, CarryI32};
1893 return DAG.getMergeValues(Ops, DL);
1894}
1895
1896SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1897 SelectionDAG &DAG) const {
1898 assert(Subtarget->hasWideArithmetic());
1899 assert(N->getValueType(0) == MVT::i128);
1900 SDLoc DL(N);
1901 unsigned Opcode;
1902 switch (N->getOpcode()) {
1903 case ISD::ADD:
1904 Opcode = WebAssemblyISD::I64_ADD128;
1905 break;
1906 case ISD::SUB:
1907 Opcode = WebAssemblyISD::I64_SUB128;
1908 break;
1909 default:
1910 llvm_unreachable("unexpected opcode");
1911 }
1912 SDValue LHS = N->getOperand(0);
1913 SDValue RHS = N->getOperand(1);
1914
1915 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1916 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1917 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1918 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1919 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1920 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1921 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1922 LHS_0, LHS_1, RHS_0, RHS_1);
1923 SDValue Result_HI(Result_LO.getNode(), 1);
1924 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1925}
1926
1927SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1928 SelectionDAG &DAG) const {
1929 SDValue Src = Op.getOperand(2);
1930 if (isa<FrameIndexSDNode>(Src.getNode())) {
1931 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1932 // the FI to some LEA-like instruction, but since we don't have that, we
1933 // need to insert some kind of instruction that can take an FI operand and
1934 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1935 // local.copy between Op and its FI operand.
1936 SDValue Chain = Op.getOperand(0);
1937 SDLoc DL(Op);
1938 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1939 EVT VT = Src.getValueType();
1940 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1941 : WebAssembly::COPY_I64,
1942 DL, VT, Src),
1943 0);
1944 return Op.getNode()->getNumValues() == 1
1945 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1946 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1947 Op.getNumOperands() == 4 ? Op.getOperand(3)
1948 : SDValue());
1949 }
1950 return SDValue();
1951}
1952
1953SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1954 SelectionDAG &DAG) const {
1955 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1956 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1957}
1958
1959SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1960 SelectionDAG &DAG) const {
1961 SDLoc DL(Op);
1962
1963 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1964 fail(DL, DAG,
1965 "Non-Emscripten WebAssembly hasn't implemented "
1966 "__builtin_return_address");
1967 return SDValue();
1968 }
1969
1970 unsigned Depth = Op.getConstantOperandVal(0);
1971 MakeLibCallOptions CallOptions;
1972 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1973 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1974 .first;
1975}
1976
1977SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1978 SelectionDAG &DAG) const {
1979 // Non-zero depths are not supported by WebAssembly currently. Use the
1980 // legalizer's default expansion, which is to return 0 (what this function is
1981 // documented to do).
1982 if (Op.getConstantOperandVal(0) > 0)
1983 return SDValue();
1984
1986 EVT VT = Op.getValueType();
1987 Register FP =
1988 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1989 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1990}
1991
1992SDValue
1993WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1994 SelectionDAG &DAG) const {
1995 SDLoc DL(Op);
1996 const auto *GA = cast<GlobalAddressSDNode>(Op);
1997
1998 MachineFunction &MF = DAG.getMachineFunction();
1999 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2000 report_fatal_error("cannot use thread-local storage without bulk memory",
2001 false);
2002
2003 const GlobalValue *GV = GA->getGlobal();
2004
2005 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2006 // on other targets, if we have thread-local storage, only the local-exec
2007 // model is possible.
2008 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2009 ? GV->getThreadLocalMode()
2011
2012 // Unsupported TLS modes
2015
2016 if (model == GlobalValue::LocalExecTLSModel ||
2019 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2020 // For DSO-local TLS variables we use offset from __tls_base
2021
2022 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2023 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2024 : WebAssembly::GLOBAL_GET_I32;
2025 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2026
2028 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2029 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2030 0);
2031
2032 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2033 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2034 SDValue SymOffset =
2035 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2036
2037 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2038 }
2039
2041
2042 EVT VT = Op.getValueType();
2043 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2044 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2045 GA->getOffset(),
2047}
2048
2049SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2050 SelectionDAG &DAG) const {
2051 SDLoc DL(Op);
2052 const auto *GA = cast<GlobalAddressSDNode>(Op);
2053 EVT VT = Op.getValueType();
2054 assert(GA->getTargetFlags() == 0 &&
2055 "Unexpected target flags on generic GlobalAddressSDNode");
2057 fail(DL, DAG, "Invalid address space for WebAssembly target");
2058
2059 unsigned OperandFlags = 0;
2060 const GlobalValue *GV = GA->getGlobal();
2061 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2062 // need special treatment for tables in PIC mode.
2063 if (isPositionIndependent() &&
2065 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2066 MachineFunction &MF = DAG.getMachineFunction();
2067 MVT PtrVT = getPointerTy(MF.getDataLayout());
2068 const char *BaseName;
2069 if (GV->getValueType()->isFunctionTy()) {
2070 BaseName = MF.createExternalSymbolName("__table_base");
2072 } else {
2073 BaseName = MF.createExternalSymbolName("__memory_base");
2075 }
2077 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2078 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2079
2080 SDValue SymAddr = DAG.getNode(
2081 WebAssemblyISD::WrapperREL, DL, VT,
2082 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2083 OperandFlags));
2084
2085 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2086 }
2088 }
2089
2090 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2091 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2092 GA->getOffset(), OperandFlags));
2093}
2094
2095SDValue
2096WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2097 SelectionDAG &DAG) const {
2098 SDLoc DL(Op);
2099 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2100 EVT VT = Op.getValueType();
2101 assert(ES->getTargetFlags() == 0 &&
2102 "Unexpected target flags on generic ExternalSymbolSDNode");
2103 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2104 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2105}
2106
2107SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2108 SelectionDAG &DAG) const {
2109 // There's no need for a Wrapper node because we always incorporate a jump
2110 // table operand into a BR_TABLE instruction, rather than ever
2111 // materializing it in a register.
2112 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2113 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2114 JT->getTargetFlags());
2115}
2116
2117SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2118 SelectionDAG &DAG) const {
2119 SDLoc DL(Op);
2120 SDValue Chain = Op.getOperand(0);
2121 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2122 SDValue Index = Op.getOperand(2);
2123 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2124
2126 Ops.push_back(Chain);
2127 Ops.push_back(Index);
2128
2129 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2130 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2131
2132 // Add an operand for each case.
2133 for (auto *MBB : MBBs)
2134 Ops.push_back(DAG.getBasicBlock(MBB));
2135
2136 // Add the first MBB as a dummy default target for now. This will be replaced
2137 // with the proper default target (and the preceding range check eliminated)
2138 // if possible by WebAssemblyFixBrTableDefaults.
2139 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2140 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2141}
2142
2143SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2144 SelectionDAG &DAG) const {
2145 SDLoc DL(Op);
2146 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2147
2148 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2149 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2150
2151 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2152 MFI->getVarargBufferVreg(), PtrVT);
2153 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2154 MachinePointerInfo(SV));
2155}
2156
2157SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2158 SelectionDAG &DAG) const {
2159 MachineFunction &MF = DAG.getMachineFunction();
2160 unsigned IntNo;
2161 switch (Op.getOpcode()) {
2164 IntNo = Op.getConstantOperandVal(1);
2165 break;
2167 IntNo = Op.getConstantOperandVal(0);
2168 break;
2169 default:
2170 llvm_unreachable("Invalid intrinsic");
2171 }
2172 SDLoc DL(Op);
2173
2174 switch (IntNo) {
2175 default:
2176 return SDValue(); // Don't custom lower most intrinsics.
2177
2178 case Intrinsic::wasm_lsda: {
2179 auto PtrVT = getPointerTy(MF.getDataLayout());
2180 const char *SymName = MF.createExternalSymbolName(
2181 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2182 if (isPositionIndependent()) {
2184 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2185 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2187 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2188 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2189 SDValue SymAddr =
2190 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2191 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2192 }
2193 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2194 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2195 }
2196
2197 case Intrinsic::wasm_shuffle: {
2198 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2199 SDValue Ops[18];
2200 size_t OpIdx = 0;
2201 Ops[OpIdx++] = Op.getOperand(1);
2202 Ops[OpIdx++] = Op.getOperand(2);
2203 while (OpIdx < 18) {
2204 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2205 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2206 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2207 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2208 } else {
2209 Ops[OpIdx++] = MaskIdx;
2210 }
2211 }
2212 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2213 }
2214
2215 case Intrinsic::thread_pointer: {
2216 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2217 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2218 : WebAssembly::GLOBAL_GET_I32;
2219 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2220 return SDValue(
2221 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2222 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2223 0);
2224 }
2225 }
2226}
2227
2228SDValue
2229WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2230 SelectionDAG &DAG) const {
2231 SDLoc DL(Op);
2232 // If sign extension operations are disabled, allow sext_inreg only if operand
2233 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2234 // extension operations, but allowing sext_inreg in this context lets us have
2235 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2236 // everywhere would be simpler in this file, but would necessitate large and
2237 // brittle patterns to undo the expansion and select extract_lane_s
2238 // instructions.
2239 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2240 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2241 return SDValue();
2242
2243 const SDValue &Extract = Op.getOperand(0);
2244 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2245 if (VecT.getVectorElementType().getSizeInBits() > 32)
2246 return SDValue();
2247 MVT ExtractedLaneT =
2248 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2249 MVT ExtractedVecT =
2250 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2251 if (ExtractedVecT == VecT)
2252 return Op;
2253
2254 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2255 const SDNode *Index = Extract.getOperand(1).getNode();
2256 if (!isa<ConstantSDNode>(Index))
2257 return SDValue();
2258 unsigned IndexVal = Index->getAsZExtVal();
2259 unsigned Scale =
2260 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2261 assert(Scale > 1);
2262 SDValue NewIndex =
2263 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2264 SDValue NewExtract = DAG.getNode(
2266 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2267 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2268 Op.getOperand(1));
2269}
2270
2271static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2272 SelectionDAG &DAG) {
2273 if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2274 return SDValue();
2275
2276 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2277 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2278 "expected extend_low");
2279 auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2280
2281 ArrayRef<int> Mask = Shuffle->getMask();
2282 // Look for a shuffle which moves from the high half to the low half.
2283 size_t FirstIdx = Mask.size() / 2;
2284 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2285 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2286 return SDValue();
2287 }
2288 }
2289
2290 SDLoc DL(Op);
2291 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2292 ? WebAssemblyISD::EXTEND_HIGH_S
2293 : WebAssemblyISD::EXTEND_HIGH_U;
2294 return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2295}
2296
2297SDValue
2298WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2299 SelectionDAG &DAG) const {
2300 SDLoc DL(Op);
2301 EVT VT = Op.getValueType();
2302 SDValue Src = Op.getOperand(0);
2303 EVT SrcVT = Src.getValueType();
2304
2305 if (SrcVT.getVectorElementType() == MVT::i1 ||
2306 SrcVT.getVectorElementType() == MVT::i64)
2307 return SDValue();
2308
2309 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2310 "Unexpected extension factor.");
2311 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2312
2313 if (Scale != 2 && Scale != 4 && Scale != 8)
2314 return SDValue();
2315
2316 unsigned Ext;
2317 switch (Op.getOpcode()) {
2319 Ext = WebAssemblyISD::EXTEND_LOW_U;
2320 break;
2322 Ext = WebAssemblyISD::EXTEND_LOW_S;
2323 break;
2324 }
2325
2326 if (Scale == 2) {
2327 // See if we can use EXTEND_HIGH.
2328 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2329 return ExtendHigh;
2330 }
2331
2332 SDValue Ret = Src;
2333 while (Scale != 1) {
2334 Ret = DAG.getNode(Ext, DL,
2335 Ret.getValueType()
2336 .widenIntegerVectorElementType(*DAG.getContext())
2337 .getHalfNumVectorElementsVT(*DAG.getContext()),
2338 Ret);
2339 Scale /= 2;
2340 }
2341 assert(Ret.getValueType() == VT);
2342 return Ret;
2343}
2344
2346 SDLoc DL(Op);
2347 if (Op.getValueType() != MVT::v2f64)
2348 return SDValue();
2349
2350 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2351 unsigned &Index) -> bool {
2352 switch (Op.getOpcode()) {
2353 case ISD::SINT_TO_FP:
2354 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2355 break;
2356 case ISD::UINT_TO_FP:
2357 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2358 break;
2359 case ISD::FP_EXTEND:
2360 Opcode = WebAssemblyISD::PROMOTE_LOW;
2361 break;
2362 default:
2363 return false;
2364 }
2365
2366 auto ExtractVector = Op.getOperand(0);
2367 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2368 return false;
2369
2370 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2371 return false;
2372
2373 SrcVec = ExtractVector.getOperand(0);
2374 Index = ExtractVector.getConstantOperandVal(1);
2375 return true;
2376 };
2377
2378 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2379 SDValue LHSSrcVec, RHSSrcVec;
2380 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2381 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2382 return SDValue();
2383
2384 if (LHSOpcode != RHSOpcode)
2385 return SDValue();
2386
2387 MVT ExpectedSrcVT;
2388 switch (LHSOpcode) {
2389 case WebAssemblyISD::CONVERT_LOW_S:
2390 case WebAssemblyISD::CONVERT_LOW_U:
2391 ExpectedSrcVT = MVT::v4i32;
2392 break;
2393 case WebAssemblyISD::PROMOTE_LOW:
2394 ExpectedSrcVT = MVT::v4f32;
2395 break;
2396 }
2397 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2398 return SDValue();
2399
2400 auto Src = LHSSrcVec;
2401 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2402 // Shuffle the source vector so that the converted lanes are the low lanes.
2403 Src = DAG.getVectorShuffle(
2404 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2405 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2406 }
2407 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2408}
2409
2410SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2411 SelectionDAG &DAG) const {
2412 MVT VT = Op.getSimpleValueType();
2413 if (VT == MVT::v8f16) {
2414 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2415 // FP16 type, so cast them to I16s.
2416 MVT IVT = VT.changeVectorElementType(MVT::i16);
2418 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2419 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2420 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2421 return DAG.getBitcast(VT, Res);
2422 }
2423
2424 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2425 return ConvertLow;
2426
2427 SDLoc DL(Op);
2428 const EVT VecT = Op.getValueType();
2429 const EVT LaneT = Op.getOperand(0).getValueType();
2430 const size_t Lanes = Op.getNumOperands();
2431 bool CanSwizzle = VecT == MVT::v16i8;
2432
2433 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2434 // possible number of lanes at once followed by a sequence of replace_lane
2435 // instructions to individually initialize any remaining lanes.
2436
2437 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2438 // swizzled lanes should be given greater weight.
2439
2440 // TODO: Investigate looping rather than always extracting/replacing specific
2441 // lanes to fill gaps.
2442
2443 auto IsConstant = [](const SDValue &V) {
2444 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2445 };
2446
2447 // Returns the source vector and index vector pair if they exist. Checks for:
2448 // (extract_vector_elt
2449 // $src,
2450 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2451 // )
2452 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2453 auto Bail = std::make_pair(SDValue(), SDValue());
2454 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2455 return Bail;
2456 const SDValue &SwizzleSrc = Lane->getOperand(0);
2457 const SDValue &IndexExt = Lane->getOperand(1);
2458 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2459 return Bail;
2460 const SDValue &Index = IndexExt->getOperand(0);
2461 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2462 return Bail;
2463 const SDValue &SwizzleIndices = Index->getOperand(0);
2464 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2465 SwizzleIndices.getValueType() != MVT::v16i8 ||
2466 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2467 Index->getConstantOperandVal(1) != I)
2468 return Bail;
2469 return std::make_pair(SwizzleSrc, SwizzleIndices);
2470 };
2471
2472 // If the lane is extracted from another vector at a constant index, return
2473 // that vector. The source vector must not have more lanes than the dest
2474 // because the shufflevector indices are in terms of the destination lanes and
2475 // would not be able to address the smaller individual source lanes.
2476 auto GetShuffleSrc = [&](const SDValue &Lane) {
2477 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2478 return SDValue();
2479 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2480 return SDValue();
2481 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2482 VecT.getVectorNumElements())
2483 return SDValue();
2484 return Lane->getOperand(0);
2485 };
2486
2487 using ValueEntry = std::pair<SDValue, size_t>;
2488 SmallVector<ValueEntry, 16> SplatValueCounts;
2489
2490 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2491 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2492
2493 using ShuffleEntry = std::pair<SDValue, size_t>;
2494 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2495
2496 auto AddCount = [](auto &Counts, const auto &Val) {
2497 auto CountIt =
2498 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2499 if (CountIt == Counts.end()) {
2500 Counts.emplace_back(Val, 1);
2501 } else {
2502 CountIt->second++;
2503 }
2504 };
2505
2506 auto GetMostCommon = [](auto &Counts) {
2507 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2508 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2509 return *CommonIt;
2510 };
2511
2512 size_t NumConstantLanes = 0;
2513
2514 // Count eligible lanes for each type of vector creation op
2515 for (size_t I = 0; I < Lanes; ++I) {
2516 const SDValue &Lane = Op->getOperand(I);
2517 if (Lane.isUndef())
2518 continue;
2519
2520 AddCount(SplatValueCounts, Lane);
2521
2522 if (IsConstant(Lane))
2523 NumConstantLanes++;
2524 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2525 AddCount(ShuffleCounts, ShuffleSrc);
2526 if (CanSwizzle) {
2527 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2528 if (SwizzleSrcs.first)
2529 AddCount(SwizzleCounts, SwizzleSrcs);
2530 }
2531 }
2532
2533 SDValue SplatValue;
2534 size_t NumSplatLanes;
2535 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2536
2537 SDValue SwizzleSrc;
2538 SDValue SwizzleIndices;
2539 size_t NumSwizzleLanes = 0;
2540 if (SwizzleCounts.size())
2541 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2542 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2543
2544 // Shuffles can draw from up to two vectors, so find the two most common
2545 // sources.
2546 SDValue ShuffleSrc1, ShuffleSrc2;
2547 size_t NumShuffleLanes = 0;
2548 if (ShuffleCounts.size()) {
2549 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2550 llvm::erase_if(ShuffleCounts,
2551 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2552 }
2553 if (ShuffleCounts.size()) {
2554 size_t AdditionalShuffleLanes;
2555 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2556 GetMostCommon(ShuffleCounts);
2557 NumShuffleLanes += AdditionalShuffleLanes;
2558 }
2559
2560 // Predicate returning true if the lane is properly initialized by the
2561 // original instruction
2562 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2564 // Prefer swizzles over shuffles over vector consts over splats
2565 if (NumSwizzleLanes >= NumShuffleLanes &&
2566 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2567 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2568 SwizzleIndices);
2569 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2570 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2571 return Swizzled == GetSwizzleSrcs(I, Lane);
2572 };
2573 } else if (NumShuffleLanes >= NumConstantLanes &&
2574 NumShuffleLanes >= NumSplatLanes) {
2575 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2576 size_t DestLaneCount = VecT.getVectorNumElements();
2577 size_t Scale1 = 1;
2578 size_t Scale2 = 1;
2579 SDValue Src1 = ShuffleSrc1;
2580 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2581 if (Src1.getValueType() != VecT) {
2582 size_t LaneSize =
2584 assert(LaneSize > DestLaneSize);
2585 Scale1 = LaneSize / DestLaneSize;
2586 Src1 = DAG.getBitcast(VecT, Src1);
2587 }
2588 if (Src2.getValueType() != VecT) {
2589 size_t LaneSize =
2591 assert(LaneSize > DestLaneSize);
2592 Scale2 = LaneSize / DestLaneSize;
2593 Src2 = DAG.getBitcast(VecT, Src2);
2594 }
2595
2596 int Mask[16];
2597 assert(DestLaneCount <= 16);
2598 for (size_t I = 0; I < DestLaneCount; ++I) {
2599 const SDValue &Lane = Op->getOperand(I);
2600 SDValue Src = GetShuffleSrc(Lane);
2601 if (Src == ShuffleSrc1) {
2602 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2603 } else if (Src && Src == ShuffleSrc2) {
2604 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2605 } else {
2606 Mask[I] = -1;
2607 }
2608 }
2609 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2610 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2611 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2612 auto Src = GetShuffleSrc(Lane);
2613 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2614 };
2615 } else if (NumConstantLanes >= NumSplatLanes) {
2616 SmallVector<SDValue, 16> ConstLanes;
2617 for (const SDValue &Lane : Op->op_values()) {
2618 if (IsConstant(Lane)) {
2619 // Values may need to be fixed so that they will sign extend to be
2620 // within the expected range during ISel. Check whether the value is in
2621 // bounds based on the lane bit width and if it is out of bounds, lop
2622 // off the extra bits and subtract 2^n to reflect giving the high bit
2623 // value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
2624 // cannot possibly be out of range.
2625 auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode());
2626 int64_t Val = Const ? Const->getSExtValue() : 0;
2627 uint64_t LaneBits = 128 / Lanes;
2628 assert((LaneBits == 64 || Val >= -(1ll << (LaneBits - 1))) &&
2629 "Unexpected out of bounds negative value");
2630 if (Const && LaneBits != 64 && Val > (1ll << (LaneBits - 1)) - 1) {
2631 uint64_t Mask = (1ll << LaneBits) - 1;
2632 auto NewVal = (((uint64_t)Val & Mask) - (1ll << LaneBits)) & Mask;
2633 ConstLanes.push_back(DAG.getConstant(NewVal, SDLoc(Lane), LaneT));
2634 } else {
2635 ConstLanes.push_back(Lane);
2636 }
2637 } else if (LaneT.isFloatingPoint()) {
2638 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2639 } else {
2640 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2641 }
2642 }
2643 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2644 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2645 return IsConstant(Lane);
2646 };
2647 } else {
2648 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2649 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2650 (DestLaneSize == 32 || DestLaneSize == 64)) {
2651 // Could be selected to load_zero.
2652 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2653 } else {
2654 // Use a splat (which might be selected as a load splat)
2655 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2656 }
2657 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2658 return Lane == SplatValue;
2659 };
2660 }
2661
2662 assert(Result);
2663 assert(IsLaneConstructed);
2664
2665 // Add replace_lane instructions for any unhandled values
2666 for (size_t I = 0; I < Lanes; ++I) {
2667 const SDValue &Lane = Op->getOperand(I);
2668 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2669 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2670 DAG.getConstant(I, DL, MVT::i32));
2671 }
2672
2673 return Result;
2674}
2675
2676SDValue
2677WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2678 SelectionDAG &DAG) const {
2679 SDLoc DL(Op);
2680 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2681 MVT VecType = Op.getOperand(0).getSimpleValueType();
2682 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2683 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2684
2685 // Space for two vector args and sixteen mask indices
2686 SDValue Ops[18];
2687 size_t OpIdx = 0;
2688 Ops[OpIdx++] = Op.getOperand(0);
2689 Ops[OpIdx++] = Op.getOperand(1);
2690
2691 // Expand mask indices to byte indices and materialize them as operands
2692 for (int M : Mask) {
2693 for (size_t J = 0; J < LaneBytes; ++J) {
2694 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2695 // whole lane of vector input, to allow further reduction at VM. E.g.
2696 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2697 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2698 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2699 }
2700 }
2701
2702 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2703}
2704
2705SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2706 SelectionDAG &DAG) const {
2707 SDLoc DL(Op);
2708 // The legalizer does not know how to expand the unsupported comparison modes
2709 // of i64x2 vectors, so we manually unroll them here.
2710 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2712 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2713 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2714 const SDValue &CC = Op->getOperand(2);
2715 auto MakeLane = [&](unsigned I) {
2716 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2717 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2718 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2719 };
2720 return DAG.getBuildVector(Op->getValueType(0), DL,
2721 {MakeLane(0), MakeLane(1)});
2722}
2723
2724SDValue
2725WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2726 SelectionDAG &DAG) const {
2727 // Allow constant lane indices, expand variable lane indices
2728 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2729 if (isa<ConstantSDNode>(IdxNode)) {
2730 // Ensure the index type is i32 to match the tablegen patterns
2731 uint64_t Idx = IdxNode->getAsZExtVal();
2732 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2733 Ops[Op.getNumOperands() - 1] =
2734 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2735 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2736 }
2737 // Perform default expansion
2738 return SDValue();
2739}
2740
2742 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2743 // 32-bit and 64-bit unrolled shifts will have proper semantics
2744 if (LaneT.bitsGE(MVT::i32))
2745 return DAG.UnrollVectorOp(Op.getNode());
2746 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2747 SDLoc DL(Op);
2748 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2749 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2750 unsigned ShiftOpcode = Op.getOpcode();
2751 SmallVector<SDValue, 16> ShiftedElements;
2752 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2753 SmallVector<SDValue, 16> ShiftElements;
2754 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2755 SmallVector<SDValue, 16> UnrolledOps;
2756 for (size_t i = 0; i < NumLanes; ++i) {
2757 SDValue MaskedShiftValue =
2758 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2759 SDValue ShiftedValue = ShiftedElements[i];
2760 if (ShiftOpcode == ISD::SRA)
2761 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2762 ShiftedValue, DAG.getValueType(LaneT));
2763 UnrolledOps.push_back(
2764 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2765 }
2766 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2767}
2768
2769SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2770 SelectionDAG &DAG) const {
2771 SDLoc DL(Op);
2772
2773 // Only manually lower vector shifts
2774 assert(Op.getSimpleValueType().isVector());
2775
2776 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2777 auto ShiftVal = Op.getOperand(1);
2778
2779 // Try to skip bitmask operation since it is implied inside shift instruction
2780 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2781 if (MaskOp.getOpcode() != ISD::AND)
2782 return MaskOp;
2783 SDValue LHS = MaskOp.getOperand(0);
2784 SDValue RHS = MaskOp.getOperand(1);
2785 if (MaskOp.getValueType().isVector()) {
2786 APInt MaskVal;
2787 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2788 std::swap(LHS, RHS);
2789
2790 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2791 MaskVal == MaskBits)
2792 MaskOp = LHS;
2793 } else {
2794 if (!isa<ConstantSDNode>(RHS.getNode()))
2795 std::swap(LHS, RHS);
2796
2797 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2798 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2799 MaskOp = LHS;
2800 }
2801
2802 return MaskOp;
2803 };
2804
2805 // Skip vector and operation
2806 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2807 ShiftVal = DAG.getSplatValue(ShiftVal);
2808 if (!ShiftVal)
2809 return unrollVectorShift(Op, DAG);
2810
2811 // Skip scalar and operation
2812 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2813 // Use anyext because none of the high bits can affect the shift
2814 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2815
2816 unsigned Opcode;
2817 switch (Op.getOpcode()) {
2818 case ISD::SHL:
2819 Opcode = WebAssemblyISD::VEC_SHL;
2820 break;
2821 case ISD::SRA:
2822 Opcode = WebAssemblyISD::VEC_SHR_S;
2823 break;
2824 case ISD::SRL:
2825 Opcode = WebAssemblyISD::VEC_SHR_U;
2826 break;
2827 default:
2828 llvm_unreachable("unexpected opcode");
2829 }
2830
2831 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2832}
2833
2834SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2835 SelectionDAG &DAG) const {
2836 EVT ResT = Op.getValueType();
2837 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2838
2839 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2840 (SatVT == MVT::i32 || SatVT == MVT::i64))
2841 return Op;
2842
2843 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2844 return Op;
2845
2846 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2847 return Op;
2848
2849 return SDValue();
2850}
2851
2852//===----------------------------------------------------------------------===//
2853// Custom DAG combine hooks
2854//===----------------------------------------------------------------------===//
2855static SDValue
2857 auto &DAG = DCI.DAG;
2858 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2859
2860 // Hoist vector bitcasts that don't change the number of lanes out of unary
2861 // shuffles, where they are less likely to get in the way of other combines.
2862 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2863 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2864 SDValue Bitcast = N->getOperand(0);
2865 if (Bitcast.getOpcode() != ISD::BITCAST)
2866 return SDValue();
2867 if (!N->getOperand(1).isUndef())
2868 return SDValue();
2869 SDValue CastOp = Bitcast.getOperand(0);
2870 EVT SrcType = CastOp.getValueType();
2871 EVT DstType = Bitcast.getValueType();
2872 if (!SrcType.is128BitVector() ||
2873 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2874 return SDValue();
2875 SDValue NewShuffle = DAG.getVectorShuffle(
2876 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2877 return DAG.getBitcast(DstType, NewShuffle);
2878}
2879
2880/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2881/// split up into scalar instructions during legalization, and the vector
2882/// extending instructions are selected in performVectorExtendCombine below.
2883static SDValue
2886 auto &DAG = DCI.DAG;
2887 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2888 N->getOpcode() == ISD::SINT_TO_FP);
2889
2890 EVT InVT = N->getOperand(0)->getValueType(0);
2891 EVT ResVT = N->getValueType(0);
2892 MVT ExtVT;
2893 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2894 ExtVT = MVT::v4i32;
2895 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2896 ExtVT = MVT::v2i32;
2897 else
2898 return SDValue();
2899
2900 unsigned Op =
2902 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2903 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2904}
2905
2906static SDValue
2909 auto &DAG = DCI.DAG;
2910
2911 SDNodeFlags Flags = N->getFlags();
2912 SDValue Op0 = N->getOperand(0);
2913 EVT VT = N->getValueType(0);
2914
2915 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2916 // Depending on the target (runtime) backend, this might be performance
2917 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2918 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2919 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2920 }
2921
2922 return SDValue();
2923}
2924
2925static SDValue
2927 auto &DAG = DCI.DAG;
2928 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2929 N->getOpcode() == ISD::ZERO_EXTEND);
2930
2931 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2932 // possible before the extract_subvector can be expanded.
2933 auto Extract = N->getOperand(0);
2934 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2935 return SDValue();
2936 auto Source = Extract.getOperand(0);
2937 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2938 if (IndexNode == nullptr)
2939 return SDValue();
2940 auto Index = IndexNode->getZExtValue();
2941
2942 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2943 // extracted subvector is the low or high half of its source.
2944 EVT ResVT = N->getValueType(0);
2945 if (ResVT == MVT::v8i16) {
2946 if (Extract.getValueType() != MVT::v8i8 ||
2947 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2948 return SDValue();
2949 } else if (ResVT == MVT::v4i32) {
2950 if (Extract.getValueType() != MVT::v4i16 ||
2951 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2952 return SDValue();
2953 } else if (ResVT == MVT::v2i64) {
2954 if (Extract.getValueType() != MVT::v2i32 ||
2955 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2956 return SDValue();
2957 } else {
2958 return SDValue();
2959 }
2960
2961 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2962 bool IsLow = Index == 0;
2963
2964 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2965 : WebAssemblyISD::EXTEND_HIGH_S)
2966 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2967 : WebAssemblyISD::EXTEND_HIGH_U);
2968
2969 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2970}
2971
2972static SDValue
2974 auto &DAG = DCI.DAG;
2975
2976 auto GetWasmConversionOp = [](unsigned Op) {
2977 switch (Op) {
2979 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
2981 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
2982 case ISD::FP_ROUND:
2983 return WebAssemblyISD::DEMOTE_ZERO;
2984 }
2985 llvm_unreachable("unexpected op");
2986 };
2987
2988 auto IsZeroSplat = [](SDValue SplatVal) {
2989 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
2990 APInt SplatValue, SplatUndef;
2991 unsigned SplatBitSize;
2992 bool HasAnyUndefs;
2993 // Endianness doesn't matter in this context because we are looking for
2994 // an all-zero value.
2995 return Splat &&
2996 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
2997 HasAnyUndefs) &&
2998 SplatValue == 0;
2999 };
3000
3001 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3002 // Combine this:
3003 //
3004 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3005 //
3006 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3007 //
3008 // Or this:
3009 //
3010 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3011 //
3012 // into (f32x4.demote_zero_f64x2 $x).
3013 EVT ResVT;
3014 EVT ExpectedConversionType;
3015 auto Conversion = N->getOperand(0);
3016 auto ConversionOp = Conversion.getOpcode();
3017 switch (ConversionOp) {
3020 ResVT = MVT::v4i32;
3021 ExpectedConversionType = MVT::v2i32;
3022 break;
3023 case ISD::FP_ROUND:
3024 ResVT = MVT::v4f32;
3025 ExpectedConversionType = MVT::v2f32;
3026 break;
3027 default:
3028 return SDValue();
3029 }
3030
3031 if (N->getValueType(0) != ResVT)
3032 return SDValue();
3033
3034 if (Conversion.getValueType() != ExpectedConversionType)
3035 return SDValue();
3036
3037 auto Source = Conversion.getOperand(0);
3038 if (Source.getValueType() != MVT::v2f64)
3039 return SDValue();
3040
3041 if (!IsZeroSplat(N->getOperand(1)) ||
3042 N->getOperand(1).getValueType() != ExpectedConversionType)
3043 return SDValue();
3044
3045 unsigned Op = GetWasmConversionOp(ConversionOp);
3046 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3047 }
3048
3049 // Combine this:
3050 //
3051 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3052 //
3053 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3054 //
3055 // Or this:
3056 //
3057 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3058 //
3059 // into (f32x4.demote_zero_f64x2 $x).
3060 EVT ResVT;
3061 auto ConversionOp = N->getOpcode();
3062 switch (ConversionOp) {
3065 ResVT = MVT::v4i32;
3066 break;
3067 case ISD::FP_ROUND:
3068 ResVT = MVT::v4f32;
3069 break;
3070 default:
3071 llvm_unreachable("unexpected op");
3072 }
3073
3074 if (N->getValueType(0) != ResVT)
3075 return SDValue();
3076
3077 auto Concat = N->getOperand(0);
3078 if (Concat.getValueType() != MVT::v4f64)
3079 return SDValue();
3080
3081 auto Source = Concat.getOperand(0);
3082 if (Source.getValueType() != MVT::v2f64)
3083 return SDValue();
3084
3085 if (!IsZeroSplat(Concat.getOperand(1)) ||
3086 Concat.getOperand(1).getValueType() != MVT::v2f64)
3087 return SDValue();
3088
3089 unsigned Op = GetWasmConversionOp(ConversionOp);
3090 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3091}
3092
3093// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3094static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3095 const SDLoc &DL, unsigned VectorWidth) {
3096 EVT VT = Vec.getValueType();
3097 EVT ElVT = VT.getVectorElementType();
3098 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3099 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3100 VT.getVectorNumElements() / Factor);
3101
3102 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3103 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3104 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3105
3106 // This is the index of the first element of the VectorWidth-bit chunk
3107 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3108 IdxVal &= ~(ElemsPerChunk - 1);
3109
3110 // If the input is a buildvector just emit a smaller one.
3111 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3112 return DAG.getBuildVector(ResultVT, DL,
3113 Vec->ops().slice(IdxVal, ElemsPerChunk));
3114
3115 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3116 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3117}
3118
3119// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3120// is the expected destination value type after recursion. In is the initial
3121// input. Note that the input should have enough leading zero bits to prevent
3122// NARROW_U from saturating results.
3124 SelectionDAG &DAG) {
3125 EVT SrcVT = In.getValueType();
3126
3127 // No truncation required, we might get here due to recursive calls.
3128 if (SrcVT == DstVT)
3129 return In;
3130
3131 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3132 unsigned NumElems = SrcVT.getVectorNumElements();
3133 if (!isPowerOf2_32(NumElems))
3134 return SDValue();
3135 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3136 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3137
3138 LLVMContext &Ctx = *DAG.getContext();
3139 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3140
3141 // Narrow to the largest type possible:
3142 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3143 EVT InVT = MVT::i16, OutVT = MVT::i8;
3144 if (SrcVT.getScalarSizeInBits() > 16) {
3145 InVT = MVT::i32;
3146 OutVT = MVT::i16;
3147 }
3148 unsigned SubSizeInBits = SrcSizeInBits / 2;
3149 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3150 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3151
3152 // Split lower/upper subvectors.
3153 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3154 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3155
3156 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3157 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3158 Lo = DAG.getBitcast(InVT, Lo);
3159 Hi = DAG.getBitcast(InVT, Hi);
3160 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3161 return DAG.getBitcast(DstVT, Res);
3162 }
3163
3164 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3165 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3166 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3167 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3168
3169 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3170 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3171 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3172}
3173
3176 auto &DAG = DCI.DAG;
3177
3178 SDValue In = N->getOperand(0);
3179 EVT InVT = In.getValueType();
3180 if (!InVT.isSimple())
3181 return SDValue();
3182
3183 EVT OutVT = N->getValueType(0);
3184 if (!OutVT.isVector())
3185 return SDValue();
3186
3187 EVT OutSVT = OutVT.getVectorElementType();
3188 EVT InSVT = InVT.getVectorElementType();
3189 // Currently only cover truncate to v16i8 or v8i16.
3190 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3191 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3192 return SDValue();
3193
3194 SDLoc DL(N);
3196 OutVT.getScalarSizeInBits());
3197 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3198 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3199}
3200
3203 using namespace llvm::SDPatternMatch;
3204 auto &DAG = DCI.DAG;
3205 SDLoc DL(N);
3206 SDValue Src = N->getOperand(0);
3207 EVT VT = N->getValueType(0);
3208 EVT SrcVT = Src.getValueType();
3209
3210 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3211 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3212 return SDValue();
3213
3214 unsigned NumElts = SrcVT.getVectorNumElements();
3215 EVT Width = MVT::getIntegerVT(128 / NumElts);
3216
3217 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3218 // ==> bitmask
3219 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3220 return DAG.getZExtOrTrunc(
3221 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3222 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3223 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3224 SrcVT.changeVectorElementType(Width))}),
3225 DL, VT);
3226 }
3227
3228 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3229 if (NumElts == 32 || NumElts == 64) {
3230 // Strategy: We will setcc them seperately in v16i8 -> v16i1
3231 // Bitcast them to i16, extend them to either i32 or i64.
3232 // Add them together, shifting left 1 by 1.
3233 SDValue Concat, SetCCVector;
3234 ISD::CondCode SetCond;
3235
3236 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3237 m_CondCode(SetCond)))))
3238 return SDValue();
3239 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3240 return SDValue();
3241
3242 uint64_t ElementWidth =
3244
3245 SmallVector<SDValue> VectorsToShuffle;
3246 for (size_t I = 0; I < Concat->ops().size(); I++) {
3247 VectorsToShuffle.push_back(DAG.getBitcast(
3248 MVT::i16,
3249 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3250 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3251 DAG, DL, 128),
3252 SetCond)));
3253 }
3254
3255 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3256 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3257
3258 for (SDValue V : VectorsToShuffle) {
3259 ReturningInteger = DAG.getNode(
3260 ISD::SHL, DL, ReturnType,
3261 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3262
3263 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3264 ReturningInteger =
3265 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3266 }
3267
3268 return ReturningInteger;
3269 }
3270
3271 return SDValue();
3272}
3273
3275 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3276 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3277 // any_true (setcc <X>, 0, ne) => (any_true X)
3278 // all_true (setcc <X>, 0, ne) => (all_true X)
3279 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3280 using namespace llvm::SDPatternMatch;
3281
3282 SDValue LHS;
3283 if (N->getNumOperands() < 2 ||
3284 !sd_match(N->getOperand(1),
3286 return SDValue();
3287 EVT LT = LHS.getValueType();
3288 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3289 return SDValue();
3290
3291 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3292 ISD::CondCode SetType,
3293 Intrinsic::WASMIntrinsics InPost) {
3294 if (N->getConstantOperandVal(0) != InPre)
3295 return SDValue();
3296
3297 SDValue LHS;
3298 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3299 m_SpecificCondCode(SetType))))
3300 return SDValue();
3301
3302 SDLoc DL(N);
3303 SDValue Ret = DAG.getZExtOrTrunc(
3304 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3305 {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3306 DL, MVT::i1);
3307 if (SetType == ISD::SETEQ)
3308 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3309 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3310 };
3311
3312 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3313 Intrinsic::wasm_alltrue))
3314 return AnyTrueEQ;
3315 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3316 Intrinsic::wasm_anytrue))
3317 return AllTrueEQ;
3318 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3319 Intrinsic::wasm_anytrue))
3320 return AnyTrueNE;
3321 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3322 Intrinsic::wasm_alltrue))
3323 return AllTrueNE;
3324
3325 return SDValue();
3326}
3327
3328template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3329 Intrinsic::ID Intrin>
3331 SDValue LHS = N->getOperand(0);
3332 SDValue RHS = N->getOperand(1);
3333 SDValue Cond = N->getOperand(2);
3334 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3335 return SDValue();
3336
3337 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3338 return SDValue();
3339
3340 SDLoc DL(N);
3341 SDValue Ret = DAG.getZExtOrTrunc(
3342 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3343 {DAG.getConstant(Intrin, DL, MVT::i32),
3344 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3345 DL, MVT::i1);
3346 if (RequiresNegate)
3347 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3348 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3349}
3350
3351/// Try to convert a i128 comparison to a v16i8 comparison before type
3352/// legalization splits it up into chunks
3353static SDValue
3355 const WebAssemblySubtarget *Subtarget) {
3356
3357 SDLoc DL(N);
3358 SDValue X = N->getOperand(0);
3359 SDValue Y = N->getOperand(1);
3360 EVT VT = N->getValueType(0);
3361 EVT OpVT = X.getValueType();
3362
3363 SelectionDAG &DAG = DCI.DAG;
3365 Attribute::NoImplicitFloat))
3366 return SDValue();
3367
3368 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3369 // We're looking for an oversized integer equality comparison with SIMD
3370 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3371 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3372 return SDValue();
3373
3374 // Don't perform this combine if constructing the vector will be expensive.
3375 auto IsVectorBitCastCheap = [](SDValue X) {
3377 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3378 };
3379
3380 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3381 return SDValue();
3382
3383 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3384 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3385 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3386
3387 SDValue Intr =
3388 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3389 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3390 : Intrinsic::wasm_anytrue,
3391 DL, MVT::i32),
3392 Cmp});
3393
3394 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3395 ISD::SETNE);
3396}
3397
3400 const WebAssemblySubtarget *Subtarget) {
3401 if (!DCI.isBeforeLegalize())
3402 return SDValue();
3403
3404 EVT VT = N->getValueType(0);
3405 if (!VT.isScalarInteger())
3406 return SDValue();
3407
3408 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3409 return V;
3410
3411 SDValue LHS = N->getOperand(0);
3412 if (LHS->getOpcode() != ISD::BITCAST)
3413 return SDValue();
3414
3415 EVT FromVT = LHS->getOperand(0).getValueType();
3416 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3417 return SDValue();
3418
3419 unsigned NumElts = FromVT.getVectorNumElements();
3420 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3421 return SDValue();
3422
3423 if (!cast<ConstantSDNode>(N->getOperand(1)))
3424 return SDValue();
3425
3426 EVT VecVT = FromVT.changeVectorElementType(MVT::getIntegerVT(128 / NumElts));
3427 auto &DAG = DCI.DAG;
3428 // setcc (iN (bitcast (vNi1 X))), 0, ne
3429 // ==> any_true (vNi1 X)
3431 N, VecVT, DAG)) {
3432 return Match;
3433 }
3434 // setcc (iN (bitcast (vNi1 X))), 0, eq
3435 // ==> xor (any_true (vNi1 X)), -1
3437 N, VecVT, DAG)) {
3438 return Match;
3439 }
3440 // setcc (iN (bitcast (vNi1 X))), -1, eq
3441 // ==> all_true (vNi1 X)
3443 N, VecVT, DAG)) {
3444 return Match;
3445 }
3446 // setcc (iN (bitcast (vNi1 X))), -1, ne
3447 // ==> xor (all_true (vNi1 X)), -1
3449 N, VecVT, DAG)) {
3450 return Match;
3451 }
3452 return SDValue();
3453}
3454
3456 EVT VT = N->getValueType(0);
3457 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3458 return SDValue();
3459
3460 // Mul with extending inputs.
3461 SDValue LHS = N->getOperand(0);
3462 SDValue RHS = N->getOperand(1);
3463 if (LHS.getOpcode() != RHS.getOpcode())
3464 return SDValue();
3465
3466 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3467 LHS.getOpcode() != ISD::ZERO_EXTEND)
3468 return SDValue();
3469
3470 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3471 return SDValue();
3472
3473 EVT FromVT = LHS->getOperand(0).getValueType();
3474 EVT EltTy = FromVT.getVectorElementType();
3475 if (EltTy != MVT::i8)
3476 return SDValue();
3477
3478 // For an input DAG that looks like this
3479 // %a = input_type
3480 // %b = input_type
3481 // %lhs = extend %a to output_type
3482 // %rhs = extend %b to output_type
3483 // %mul = mul %lhs, %rhs
3484
3485 // input_type | output_type | instructions
3486 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3487 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3488 // | | %low_low = i32x4.ext_low_i16x8_ %low
3489 // | | %low_high = i32x4.ext_high_i16x8_ %low
3490 // | | %high_low = i32x4.ext_low_i16x8_ %high
3491 // | | %high_high = i32x4.ext_high_i16x8_ %high
3492 // | | %res = concat_vector(...)
3493 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3494 // | | %low_low = i32x4.ext_low_i16x8_ %low
3495 // | | %low_high = i32x4.ext_high_i16x8_ %low
3496 // | | %res = concat_vector(%low_low, %low_high)
3497
3498 SDLoc DL(N);
3499 unsigned NumElts = VT.getVectorNumElements();
3500 SDValue ExtendInLHS = LHS->getOperand(0);
3501 SDValue ExtendInRHS = RHS->getOperand(0);
3502 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3503 unsigned ExtendLowOpc =
3504 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3505 unsigned ExtendHighOpc =
3506 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3507
3508 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3509 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3510 };
3511 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3512 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3513 };
3514
3515 if (NumElts == 16) {
3516 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3517 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3518 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3519 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3520 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3521 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3522 SDValue SubVectors[] = {
3523 GetExtendLow(MVT::v4i32, MulLow),
3524 GetExtendHigh(MVT::v4i32, MulLow),
3525 GetExtendLow(MVT::v4i32, MulHigh),
3526 GetExtendHigh(MVT::v4i32, MulHigh),
3527 };
3528 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3529 } else {
3530 assert(NumElts == 8);
3531 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3532 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3533 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3534 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3535 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3536 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3537 }
3538 return SDValue();
3539}
3540
3543 assert(N->getOpcode() == ISD::MUL);
3544 EVT VT = N->getValueType(0);
3545 if (!VT.isVector())
3546 return SDValue();
3547
3548 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3549 return Res;
3550
3551 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3552 // extend them to v8i16. Only do this before legalization in case a narrow
3553 // vector is widened and may be simplified later.
3554 if (!DCI.isBeforeLegalize() || (VT != MVT::v8i8 && VT != MVT::v16i8))
3555 return SDValue();
3556
3557 SDLoc DL(N);
3558 SelectionDAG &DAG = DCI.DAG;
3559 SDValue LHS = N->getOperand(0);
3560 SDValue RHS = N->getOperand(1);
3561 EVT MulVT = MVT::v8i16;
3562
3563 if (VT == MVT::v8i8) {
3564 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3565 DAG.getUNDEF(MVT::v8i8));
3566 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3567 DAG.getUNDEF(MVT::v8i8));
3568 SDValue LowLHS =
3569 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3570 SDValue LowRHS =
3571 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3572 SDValue MulLow = DAG.getBitcast(
3573 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3574 // Take the low byte of each lane.
3575 SDValue Shuffle = DAG.getVectorShuffle(
3576 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3577 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3578 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3579 } else {
3580 assert(VT == MVT::v16i8 && "Expected v16i8");
3581 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3582 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3583 SDValue HighLHS =
3584 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3585 SDValue HighRHS =
3586 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3587
3588 SDValue MulLow =
3589 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3590 SDValue MulHigh =
3591 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3592
3593 // Take the low byte of each lane.
3594 return DAG.getVectorShuffle(
3595 VT, DL, MulLow, MulHigh,
3596 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3597 }
3598}
3599
3600SDValue
3601WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3602 DAGCombinerInfo &DCI) const {
3603 switch (N->getOpcode()) {
3604 default:
3605 return SDValue();
3606 case ISD::BITCAST:
3607 return performBitcastCombine(N, DCI);
3608 case ISD::SETCC:
3609 return performSETCCCombine(N, DCI, Subtarget);
3611 return performVECTOR_SHUFFLECombine(N, DCI);
3612 case ISD::SIGN_EXTEND:
3613 case ISD::ZERO_EXTEND:
3614 return performVectorExtendCombine(N, DCI);
3615 case ISD::UINT_TO_FP:
3616 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3617 return ExtCombine;
3618 return performVectorNonNegToFPCombine(N, DCI);
3619 case ISD::SINT_TO_FP:
3620 return performVectorExtendToFPCombine(N, DCI);
3623 case ISD::FP_ROUND:
3625 return performVectorTruncZeroCombine(N, DCI);
3626 case ISD::TRUNCATE:
3627 return performTruncateCombine(N, DCI);
3629 return performAnyAllCombine(N, DCI.DAG);
3630 case ISD::MUL:
3631 return performMulCombine(N, DCI);
3632 }
3633}
unsigned const MachineRegisterInfo * MRI
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1385
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A debug info location.
Definition DebugLoc.h:124
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:285
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:705
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:521
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool isValidAddressSpace(unsigned AS)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2030
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.