LLVM 22.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
162 setTruncStoreAction(T, MVT::f16, Expand);
163 }
164
165 // Expand unavailable integer operations.
166 for (auto Op :
170 for (auto T : {MVT::i32, MVT::i64})
172 if (Subtarget->hasSIMD128())
173 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
175 }
176
177 if (Subtarget->hasWideArithmetic()) {
183 }
184
185 if (Subtarget->hasNontrappingFPToInt())
187 for (auto T : {MVT::i32, MVT::i64})
189
190 if (Subtarget->hasRelaxedSIMD()) {
193 {MVT::v4f32, MVT::v2f64}, Legal);
194 }
195 // SIMD-specific configuration
196 if (Subtarget->hasSIMD128()) {
197
199
200 // Combine wide-vector muls, with extend inputs, to extmul_half.
202
203 // Combine vector mask reductions into alltrue/anytrue
205
206 // Convert vector to integer bitcasts to bitmask
208
209 // Hoist bitcasts out of shuffles
211
212 // Combine extends of extract_subvectors into widening ops
214
215 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
216 // conversions ops
219
220 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
221 // into conversion ops
225
227
228 // Support saturating add/sub for i8x16 and i16x8
230 for (auto T : {MVT::v16i8, MVT::v8i16})
232
233 // Support integer abs
234 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
236
237 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
238 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
239 MVT::v2f64})
241
242 if (Subtarget->hasFP16())
244
245 // We have custom shuffle lowering to expose the shuffle mask
246 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
247 MVT::v2f64})
249
250 if (Subtarget->hasFP16())
252
253 // Support splatting
254 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255 MVT::v2f64})
257
258 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
259
260 // Custom lowering since wasm shifts must have a scalar shift amount
261 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
264
265 // Custom lower lane accesses to expand out variable indices
267 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
268 MVT::v2f64})
270
271 // There is no i8x16.mul instruction
272 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
273
274 // There is no vector conditional select instruction
275 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
276 MVT::v2f64})
278
279 // Expand integer operations supported for scalars but not SIMD
280 for (auto Op :
282 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
284
285 // But we do have integer min and max operations
286 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
287 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
289
290 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
291 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
292 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
293 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
294
295 // Custom lower bit counting operations for other types to scalarize them.
296 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
297 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
299
300 // Expand float operations supported for scalars but not SIMD
303 for (auto T : {MVT::v4f32, MVT::v2f64})
305
306 // Unsigned comparison operations are unavailable for i64x2 vectors.
308 setCondCodeAction(CC, MVT::v2i64, Custom);
309
310 // 64x2 conversions are not in the spec
311 for (auto Op :
313 for (auto T : {MVT::v2i64, MVT::v2f64})
315
316 // But saturating fp_to_int converstions are
318 setOperationAction(Op, MVT::v4i32, Custom);
319 if (Subtarget->hasFP16()) {
320 setOperationAction(Op, MVT::v8i16, Custom);
321 }
322 }
323
324 // Support vector extending
329 }
330
331 if (Subtarget->hasFP16()) {
332 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
333 }
334
335 if (Subtarget->hasRelaxedSIMD()) {
338 }
339
340 // Partial MLA reductions.
342 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
343 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
344 }
345 }
346
347 // As a special case, these operators use the type to mean the type to
348 // sign-extend from.
350 if (!Subtarget->hasSignExt()) {
351 // Sign extends are legal only when extending a vector extract
352 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
353 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
355 }
358
359 // Dynamic stack allocation: use the default expansion.
363
367
368 // Expand these forms; we pattern-match the forms that we can handle in isel.
369 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
370 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
372
373 // We have custom switch handling.
375
376 // WebAssembly doesn't have:
377 // - Floating-point extending loads.
378 // - Floating-point truncating stores.
379 // - i1 extending loads.
380 // - truncating SIMD stores and most extending loads
381 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
382 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
383 for (auto T : MVT::integer_valuetypes())
384 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
385 setLoadExtAction(Ext, T, MVT::i1, Promote);
386 if (Subtarget->hasSIMD128()) {
387 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
388 MVT::v2f64}) {
389 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
390 if (MVT(T) != MemT) {
392 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
393 setLoadExtAction(Ext, T, MemT, Expand);
394 }
395 }
396 }
397 // But some vector extending loads are legal
398 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
399 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
400 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
401 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
402 }
403 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
404 }
405
406 // Don't do anything clever with build_pairs
408
409 // Trap lowers to wasm unreachable
410 setOperationAction(ISD::TRAP, MVT::Other, Legal);
412
413 // Exception handling intrinsics
417
419
420 // Always convert switches to br_tables unless there is only one case, which
421 // is equivalent to a simple branch. This reduces code size for wasm, and we
422 // defer possible jump table optimizations to the VM.
424}
425
434
443
445WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
446 // We have wasm instructions for these
447 switch (AI->getOperation()) {
455 default:
456 break;
457 }
459}
460
461bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
462 // Implementation copied from X86TargetLowering.
463 unsigned Opc = VecOp.getOpcode();
464
465 // Assume target opcodes can't be scalarized.
466 // TODO - do we have any exceptions?
468 return false;
469
470 // If the vector op is not supported, try to convert to scalar.
471 EVT VecVT = VecOp.getValueType();
473 return true;
474
475 // If the vector op is supported, but the scalar op is not, the transform may
476 // not be worthwhile.
477 EVT ScalarVT = VecVT.getScalarType();
478 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
479}
480
481FastISel *WebAssemblyTargetLowering::createFastISel(
482 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
483 return WebAssembly::createFastISel(FuncInfo, LibInfo);
484}
485
486MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
487 EVT VT) const {
488 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
489 if (BitWidth > 1 && BitWidth < 8)
490 BitWidth = 8;
491
492 if (BitWidth > 64) {
493 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
494 // the count to be an i32.
495 BitWidth = 32;
497 "32-bit shift counts ought to be enough for anyone");
498 }
499
502 "Unable to represent scalar shift amount type");
503 return Result;
504}
505
506// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
507// undefined result on invalid/overflow, to the WebAssembly opcode, which
508// traps on invalid/overflow.
511 const TargetInstrInfo &TII,
512 bool IsUnsigned, bool Int64,
513 bool Float64, unsigned LoweredOpcode) {
515
516 Register OutReg = MI.getOperand(0).getReg();
517 Register InReg = MI.getOperand(1).getReg();
518
519 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
520 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
521 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
522 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
523 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
524 unsigned Eqz = WebAssembly::EQZ_I32;
525 unsigned And = WebAssembly::AND_I32;
526 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
527 int64_t Substitute = IsUnsigned ? 0 : Limit;
528 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
529 auto &Context = BB->getParent()->getFunction().getContext();
530 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
531
532 const BasicBlock *LLVMBB = BB->getBasicBlock();
533 MachineFunction *F = BB->getParent();
534 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
535 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
536 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
537
539 F->insert(It, FalseMBB);
540 F->insert(It, TrueMBB);
541 F->insert(It, DoneMBB);
542
543 // Transfer the remainder of BB and its successor edges to DoneMBB.
544 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
546
547 BB->addSuccessor(TrueMBB);
548 BB->addSuccessor(FalseMBB);
549 TrueMBB->addSuccessor(DoneMBB);
550 FalseMBB->addSuccessor(DoneMBB);
551
552 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
553 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
554 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
555 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
556 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
557 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
558 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
559
560 MI.eraseFromParent();
561 // For signed numbers, we can do a single comparison to determine whether
562 // fabs(x) is within range.
563 if (IsUnsigned) {
564 Tmp0 = InReg;
565 } else {
566 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
567 }
568 BuildMI(BB, DL, TII.get(FConst), Tmp1)
569 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
570 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
571
572 // For unsigned numbers, we have to do a separate comparison with zero.
573 if (IsUnsigned) {
574 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
575 Register SecondCmpReg =
576 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
577 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
578 BuildMI(BB, DL, TII.get(FConst), Tmp1)
579 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
580 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
581 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
582 CmpReg = AndReg;
583 }
584
585 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
586
587 // Create the CFG diamond to select between doing the conversion or using
588 // the substitute value.
589 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
590 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
591 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
592 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
593 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
594 .addReg(FalseReg)
595 .addMBB(FalseMBB)
596 .addReg(TrueReg)
597 .addMBB(TrueMBB);
598
599 return DoneMBB;
600}
601
602// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
603// instuction to handle the zero-length case.
606 const TargetInstrInfo &TII, bool Int64) {
608
609 MachineOperand DstMem = MI.getOperand(0);
610 MachineOperand SrcMem = MI.getOperand(1);
611 MachineOperand Dst = MI.getOperand(2);
612 MachineOperand Src = MI.getOperand(3);
613 MachineOperand Len = MI.getOperand(4);
614
615 // If the length is a constant, we don't actually need the check.
616 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
617 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
618 Def->getOpcode() == WebAssembly::CONST_I64) {
619 if (Def->getOperand(1).getImm() == 0) {
620 // A zero-length memcpy is a no-op.
621 MI.eraseFromParent();
622 return BB;
623 }
624 // A non-zero-length memcpy doesn't need a zero check.
625 unsigned MemoryCopy =
626 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
627 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
628 .add(DstMem)
629 .add(SrcMem)
630 .add(Dst)
631 .add(Src)
632 .add(Len);
633 MI.eraseFromParent();
634 return BB;
635 }
636 }
637
638 // We're going to add an extra use to `Len` to test if it's zero; that
639 // use shouldn't be a kill, even if the original use is.
640 MachineOperand NoKillLen = Len;
641 NoKillLen.setIsKill(false);
642
643 // Decide on which `MachineInstr` opcode we're going to use.
644 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
645 unsigned MemoryCopy =
646 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
647
648 // Create two new basic blocks; one for the new `memory.fill` that we can
649 // branch over, and one for the rest of the instructions after the original
650 // `memory.fill`.
651 const BasicBlock *LLVMBB = BB->getBasicBlock();
652 MachineFunction *F = BB->getParent();
653 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
654 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
655
657 F->insert(It, TrueMBB);
658 F->insert(It, DoneMBB);
659
660 // Transfer the remainder of BB and its successor edges to DoneMBB.
661 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
663
664 // Connect the CFG edges.
665 BB->addSuccessor(TrueMBB);
666 BB->addSuccessor(DoneMBB);
667 TrueMBB->addSuccessor(DoneMBB);
668
669 // Create a virtual register for the `Eqz` result.
670 unsigned EqzReg;
671 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
672
673 // Erase the original `memory.copy`.
674 MI.eraseFromParent();
675
676 // Test if `Len` is zero.
677 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
678
679 // Insert a new `memory.copy`.
680 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
681 .add(DstMem)
682 .add(SrcMem)
683 .add(Dst)
684 .add(Src)
685 .add(Len);
686
687 // Create the CFG triangle.
688 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
689 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
690
691 return DoneMBB;
692}
693
694// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
695// instuction to handle the zero-length case.
698 const TargetInstrInfo &TII, bool Int64) {
700
701 MachineOperand Mem = MI.getOperand(0);
702 MachineOperand Dst = MI.getOperand(1);
703 MachineOperand Val = MI.getOperand(2);
704 MachineOperand Len = MI.getOperand(3);
705
706 // If the length is a constant, we don't actually need the check.
707 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
708 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
709 Def->getOpcode() == WebAssembly::CONST_I64) {
710 if (Def->getOperand(1).getImm() == 0) {
711 // A zero-length memset is a no-op.
712 MI.eraseFromParent();
713 return BB;
714 }
715 // A non-zero-length memset doesn't need a zero check.
716 unsigned MemoryFill =
717 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
718 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
719 .add(Mem)
720 .add(Dst)
721 .add(Val)
722 .add(Len);
723 MI.eraseFromParent();
724 return BB;
725 }
726 }
727
728 // We're going to add an extra use to `Len` to test if it's zero; that
729 // use shouldn't be a kill, even if the original use is.
730 MachineOperand NoKillLen = Len;
731 NoKillLen.setIsKill(false);
732
733 // Decide on which `MachineInstr` opcode we're going to use.
734 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
735 unsigned MemoryFill =
736 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
737
738 // Create two new basic blocks; one for the new `memory.fill` that we can
739 // branch over, and one for the rest of the instructions after the original
740 // `memory.fill`.
741 const BasicBlock *LLVMBB = BB->getBasicBlock();
742 MachineFunction *F = BB->getParent();
743 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
744 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
745
747 F->insert(It, TrueMBB);
748 F->insert(It, DoneMBB);
749
750 // Transfer the remainder of BB and its successor edges to DoneMBB.
751 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
753
754 // Connect the CFG edges.
755 BB->addSuccessor(TrueMBB);
756 BB->addSuccessor(DoneMBB);
757 TrueMBB->addSuccessor(DoneMBB);
758
759 // Create a virtual register for the `Eqz` result.
760 unsigned EqzReg;
761 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
762
763 // Erase the original `memory.fill`.
764 MI.eraseFromParent();
765
766 // Test if `Len` is zero.
767 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
768
769 // Insert a new `memory.copy`.
770 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
771
772 // Create the CFG triangle.
773 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
774 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
775
776 return DoneMBB;
777}
778
779static MachineBasicBlock *
781 const WebAssemblySubtarget *Subtarget,
782 const TargetInstrInfo &TII) {
783 MachineInstr &CallParams = *CallResults.getPrevNode();
784 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
785 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
786 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
787
788 bool IsIndirect =
789 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
790 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
791
792 bool IsFuncrefCall = false;
793 if (IsIndirect && CallParams.getOperand(0).isReg()) {
794 Register Reg = CallParams.getOperand(0).getReg();
795 const MachineFunction *MF = BB->getParent();
796 const MachineRegisterInfo &MRI = MF->getRegInfo();
797 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
798 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
799 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
800 }
801
802 unsigned CallOp;
803 if (IsIndirect && IsRetCall) {
804 CallOp = WebAssembly::RET_CALL_INDIRECT;
805 } else if (IsIndirect) {
806 CallOp = WebAssembly::CALL_INDIRECT;
807 } else if (IsRetCall) {
808 CallOp = WebAssembly::RET_CALL;
809 } else {
810 CallOp = WebAssembly::CALL;
811 }
812
813 MachineFunction &MF = *BB->getParent();
814 const MCInstrDesc &MCID = TII.get(CallOp);
815 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
816
817 // Move the function pointer to the end of the arguments for indirect calls
818 if (IsIndirect) {
819 auto FnPtr = CallParams.getOperand(0);
820 CallParams.removeOperand(0);
821
822 // For funcrefs, call_indirect is done through __funcref_call_table and the
823 // funcref is always installed in slot 0 of the table, therefore instead of
824 // having the function pointer added at the end of the params list, a zero
825 // (the index in
826 // __funcref_call_table is added).
827 if (IsFuncrefCall) {
828 Register RegZero =
829 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
830 MachineInstrBuilder MIBC0 =
831 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
832
833 BB->insert(CallResults.getIterator(), MIBC0);
834 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
835 } else
836 CallParams.addOperand(FnPtr);
837 }
838
839 for (auto Def : CallResults.defs())
840 MIB.add(Def);
841
842 if (IsIndirect) {
843 // Placeholder for the type index.
844 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
845 MIB.addImm(0);
846 // The table into which this call_indirect indexes.
847 MCSymbolWasm *Table = IsFuncrefCall
849 MF.getContext(), Subtarget)
851 MF.getContext(), Subtarget);
852 if (Subtarget->hasCallIndirectOverlong()) {
853 MIB.addSym(Table);
854 } else {
855 // For the MVP there is at most one table whose number is 0, but we can't
856 // write a table symbol or issue relocations. Instead we just ensure the
857 // table is live and write a zero.
858 Table->setNoStrip();
859 MIB.addImm(0);
860 }
861 }
862
863 for (auto Use : CallParams.uses())
864 MIB.add(Use);
865
866 BB->insert(CallResults.getIterator(), MIB);
867 CallParams.eraseFromParent();
868 CallResults.eraseFromParent();
869
870 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
871 // table slot with ref.null upon call_indirect return.
872 //
873 // This generates the following code, which comes right after a call_indirect
874 // of a funcref:
875 //
876 // i32.const 0
877 // ref.null func
878 // table.set __funcref_call_table
879 if (IsIndirect && IsFuncrefCall) {
881 MF.getContext(), Subtarget);
882 Register RegZero =
883 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
884 MachineInstr *Const0 =
885 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
886 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
887
888 Register RegFuncref =
889 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
890 MachineInstr *RefNull =
891 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
892 BB->insertAfter(Const0->getIterator(), RefNull);
893
894 MachineInstr *TableSet =
895 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
896 .addSym(Table)
897 .addReg(RegZero)
898 .addReg(RegFuncref);
899 BB->insertAfter(RefNull->getIterator(), TableSet);
900 }
901
902 return BB;
903}
904
905MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
906 MachineInstr &MI, MachineBasicBlock *BB) const {
907 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
908 DebugLoc DL = MI.getDebugLoc();
909
910 switch (MI.getOpcode()) {
911 default:
912 llvm_unreachable("Unexpected instr type to insert");
913 case WebAssembly::FP_TO_SINT_I32_F32:
914 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
915 WebAssembly::I32_TRUNC_S_F32);
916 case WebAssembly::FP_TO_UINT_I32_F32:
917 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
918 WebAssembly::I32_TRUNC_U_F32);
919 case WebAssembly::FP_TO_SINT_I64_F32:
920 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
921 WebAssembly::I64_TRUNC_S_F32);
922 case WebAssembly::FP_TO_UINT_I64_F32:
923 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
924 WebAssembly::I64_TRUNC_U_F32);
925 case WebAssembly::FP_TO_SINT_I32_F64:
926 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
927 WebAssembly::I32_TRUNC_S_F64);
928 case WebAssembly::FP_TO_UINT_I32_F64:
929 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
930 WebAssembly::I32_TRUNC_U_F64);
931 case WebAssembly::FP_TO_SINT_I64_F64:
932 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
933 WebAssembly::I64_TRUNC_S_F64);
934 case WebAssembly::FP_TO_UINT_I64_F64:
935 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
936 WebAssembly::I64_TRUNC_U_F64);
937 case WebAssembly::MEMCPY_A32:
938 return LowerMemcpy(MI, DL, BB, TII, false);
939 case WebAssembly::MEMCPY_A64:
940 return LowerMemcpy(MI, DL, BB, TII, true);
941 case WebAssembly::MEMSET_A32:
942 return LowerMemset(MI, DL, BB, TII, false);
943 case WebAssembly::MEMSET_A64:
944 return LowerMemset(MI, DL, BB, TII, true);
945 case WebAssembly::CALL_RESULTS:
946 case WebAssembly::RET_CALL_RESULTS:
947 return LowerCallResults(MI, DL, BB, Subtarget, TII);
948 }
949}
950
951std::pair<unsigned, const TargetRegisterClass *>
952WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
953 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
954 // First, see if this is a constraint that directly corresponds to a
955 // WebAssembly register class.
956 if (Constraint.size() == 1) {
957 switch (Constraint[0]) {
958 case 'r':
959 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
960 if (Subtarget->hasSIMD128() && VT.isVector()) {
961 if (VT.getSizeInBits() == 128)
962 return std::make_pair(0U, &WebAssembly::V128RegClass);
963 }
964 if (VT.isInteger() && !VT.isVector()) {
965 if (VT.getSizeInBits() <= 32)
966 return std::make_pair(0U, &WebAssembly::I32RegClass);
967 if (VT.getSizeInBits() <= 64)
968 return std::make_pair(0U, &WebAssembly::I64RegClass);
969 }
970 if (VT.isFloatingPoint() && !VT.isVector()) {
971 switch (VT.getSizeInBits()) {
972 case 32:
973 return std::make_pair(0U, &WebAssembly::F32RegClass);
974 case 64:
975 return std::make_pair(0U, &WebAssembly::F64RegClass);
976 default:
977 break;
978 }
979 }
980 break;
981 default:
982 break;
983 }
984 }
985
987}
988
989bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
990 // Assume ctz is a relatively cheap operation.
991 return true;
992}
993
994bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
995 // Assume clz is a relatively cheap operation.
996 return true;
997}
998
999bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1000 const AddrMode &AM,
1001 Type *Ty, unsigned AS,
1002 Instruction *I) const {
1003 // WebAssembly offsets are added as unsigned without wrapping. The
1004 // isLegalAddressingMode gives us no way to determine if wrapping could be
1005 // happening, so we approximate this by accepting only non-negative offsets.
1006 if (AM.BaseOffs < 0)
1007 return false;
1008
1009 // WebAssembly has no scale register operands.
1010 if (AM.Scale != 0)
1011 return false;
1012
1013 // Everything else is legal.
1014 return true;
1015}
1016
1017bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1018 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1019 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1020 // WebAssembly supports unaligned accesses, though it should be declared
1021 // with the p2align attribute on loads and stores which do so, and there
1022 // may be a performance impact. We tell LLVM they're "fast" because
1023 // for the kinds of things that LLVM uses this for (merging adjacent stores
1024 // of constants, etc.), WebAssembly implementations will either want the
1025 // unaligned access or they'll split anyway.
1026 if (Fast)
1027 *Fast = 1;
1028 return true;
1029}
1030
1031bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1032 AttributeList Attr) const {
1033 // The current thinking is that wasm engines will perform this optimization,
1034 // so we can save on code size.
1035 return true;
1036}
1037
1038bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1039 EVT ExtT = ExtVal.getValueType();
1040 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1041 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1042 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1043 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1044}
1045
1046bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1047 const GlobalAddressSDNode *GA) const {
1048 // Wasm doesn't support function addresses with offsets
1049 const GlobalValue *GV = GA->getGlobal();
1051}
1052
1053EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1054 LLVMContext &C,
1055 EVT VT) const {
1056 if (VT.isVector())
1058
1059 // So far, all branch instructions in Wasm take an I32 condition.
1060 // The default TargetLowering::getSetCCResultType returns the pointer size,
1061 // which would be useful to reduce instruction counts when testing
1062 // against 64-bit pointers/values if at some point Wasm supports that.
1063 return EVT::getIntegerVT(C, 32);
1064}
1065
1066bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1067 const CallBase &I,
1068 MachineFunction &MF,
1069 unsigned Intrinsic) const {
1070 switch (Intrinsic) {
1071 case Intrinsic::wasm_memory_atomic_notify:
1073 Info.memVT = MVT::i32;
1074 Info.ptrVal = I.getArgOperand(0);
1075 Info.offset = 0;
1076 Info.align = Align(4);
1077 // atomic.notify instruction does not really load the memory specified with
1078 // this argument, but MachineMemOperand should either be load or store, so
1079 // we set this to a load.
1080 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1081 // instructions are treated as volatiles in the backend, so we should be
1082 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1084 return true;
1085 case Intrinsic::wasm_memory_atomic_wait32:
1087 Info.memVT = MVT::i32;
1088 Info.ptrVal = I.getArgOperand(0);
1089 Info.offset = 0;
1090 Info.align = Align(4);
1092 return true;
1093 case Intrinsic::wasm_memory_atomic_wait64:
1095 Info.memVT = MVT::i64;
1096 Info.ptrVal = I.getArgOperand(0);
1097 Info.offset = 0;
1098 Info.align = Align(8);
1100 return true;
1101 case Intrinsic::wasm_loadf16_f32:
1103 Info.memVT = MVT::f16;
1104 Info.ptrVal = I.getArgOperand(0);
1105 Info.offset = 0;
1106 Info.align = Align(2);
1108 return true;
1109 case Intrinsic::wasm_storef16_f32:
1111 Info.memVT = MVT::f16;
1112 Info.ptrVal = I.getArgOperand(1);
1113 Info.offset = 0;
1114 Info.align = Align(2);
1116 return true;
1117 default:
1118 return false;
1119 }
1120}
1121
1122void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1123 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1124 const SelectionDAG &DAG, unsigned Depth) const {
1125 switch (Op.getOpcode()) {
1126 default:
1127 break;
1129 unsigned IntNo = Op.getConstantOperandVal(0);
1130 switch (IntNo) {
1131 default:
1132 break;
1133 case Intrinsic::wasm_bitmask: {
1134 unsigned BitWidth = Known.getBitWidth();
1135 EVT VT = Op.getOperand(1).getSimpleValueType();
1136 unsigned PossibleBits = VT.getVectorNumElements();
1137 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1138 Known.Zero |= ZeroMask;
1139 break;
1140 }
1141 }
1142 break;
1143 }
1144 case WebAssemblyISD::EXTEND_LOW_U:
1145 case WebAssemblyISD::EXTEND_HIGH_U: {
1146 // We know the high half, of each destination vector element, will be zero.
1147 SDValue SrcOp = Op.getOperand(0);
1148 EVT VT = SrcOp.getSimpleValueType();
1149 unsigned BitWidth = Known.getBitWidth();
1150 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1151 assert(BitWidth >= 8 && "Unexpected width!");
1153 Known.Zero |= Mask;
1154 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1155 assert(BitWidth >= 16 && "Unexpected width!");
1157 Known.Zero |= Mask;
1158 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1159 assert(BitWidth >= 32 && "Unexpected width!");
1161 Known.Zero |= Mask;
1162 }
1163 break;
1164 }
1165 // For 128-bit addition if the upper bits are all zero then it's known that
1166 // the upper bits of the result will have all bits guaranteed zero except the
1167 // first.
1168 case WebAssemblyISD::I64_ADD128:
1169 if (Op.getResNo() == 1) {
1170 SDValue LHS_HI = Op.getOperand(1);
1171 SDValue RHS_HI = Op.getOperand(3);
1172 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1173 Known.Zero.setBitsFrom(1);
1174 }
1175 break;
1176 }
1177}
1178
1180WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1181 if (VT.isFixedLengthVector()) {
1182 MVT EltVT = VT.getVectorElementType();
1183 // We have legal vector types with these lane types, so widening the
1184 // vector would let us use some of the lanes directly without having to
1185 // extend or truncate values.
1186 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1187 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1188 return TypeWidenVector;
1189 }
1190
1192}
1193
1194bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1195 const MachineFunction &MF, EVT VT) const {
1196 if (!Subtarget->hasFP16() || !VT.isVector())
1197 return false;
1198
1199 EVT ScalarVT = VT.getScalarType();
1200 if (!ScalarVT.isSimple())
1201 return false;
1202
1203 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1204}
1205
1206bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1207 SDValue Op, const TargetLoweringOpt &TLO) const {
1208 // ISel process runs DAGCombiner after legalization; this step is called
1209 // SelectionDAG optimization phase. This post-legalization combining process
1210 // runs DAGCombiner on each node, and if there was a change to be made,
1211 // re-runs legalization again on it and its user nodes to make sure
1212 // everythiing is in a legalized state.
1213 //
1214 // The legalization calls lowering routines, and we do our custom lowering for
1215 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1216 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1217 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1218 // turns unused vector elements into undefs. But this routine does not work
1219 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1220 // combination can result in a infinite loop, in which undefs are converted to
1221 // zeros in legalization and back to undefs in combining.
1222 //
1223 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1224 // running for build_vectors.
1225 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1226 return false;
1227 return true;
1228}
1229
1230//===----------------------------------------------------------------------===//
1231// WebAssembly Lowering private implementation.
1232//===----------------------------------------------------------------------===//
1233
1234//===----------------------------------------------------------------------===//
1235// Lowering Code
1236//===----------------------------------------------------------------------===//
1237
1238static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1240 DAG.getContext()->diagnose(
1241 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1242}
1243
1244// Test whether the given calling convention is supported.
1246 // We currently support the language-independent target-independent
1247 // conventions. We don't yet have a way to annotate calls with properties like
1248 // "cold", and we don't have any call-clobbered registers, so these are mostly
1249 // all handled the same.
1250 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1251 CallConv == CallingConv::Cold ||
1252 CallConv == CallingConv::PreserveMost ||
1253 CallConv == CallingConv::PreserveAll ||
1254 CallConv == CallingConv::CXX_FAST_TLS ||
1256 CallConv == CallingConv::Swift;
1257}
1258
1259SDValue
1260WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1261 SmallVectorImpl<SDValue> &InVals) const {
1262 SelectionDAG &DAG = CLI.DAG;
1263 SDLoc DL = CLI.DL;
1264 SDValue Chain = CLI.Chain;
1265 SDValue Callee = CLI.Callee;
1266 MachineFunction &MF = DAG.getMachineFunction();
1267 auto Layout = MF.getDataLayout();
1268
1269 CallingConv::ID CallConv = CLI.CallConv;
1270 if (!callingConvSupported(CallConv))
1271 fail(DL, DAG,
1272 "WebAssembly doesn't support language-specific or target-specific "
1273 "calling conventions yet");
1274 if (CLI.IsPatchPoint)
1275 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1276
1277 if (CLI.IsTailCall) {
1278 auto NoTail = [&](const char *Msg) {
1279 if (CLI.CB && CLI.CB->isMustTailCall())
1280 fail(DL, DAG, Msg);
1281 CLI.IsTailCall = false;
1282 };
1283
1284 if (!Subtarget->hasTailCall())
1285 NoTail("WebAssembly 'tail-call' feature not enabled");
1286
1287 // Varargs calls cannot be tail calls because the buffer is on the stack
1288 if (CLI.IsVarArg)
1289 NoTail("WebAssembly does not support varargs tail calls");
1290
1291 // Do not tail call unless caller and callee return types match
1292 const Function &F = MF.getFunction();
1293 const TargetMachine &TM = getTargetMachine();
1294 Type *RetTy = F.getReturnType();
1295 SmallVector<MVT, 4> CallerRetTys;
1296 SmallVector<MVT, 4> CalleeRetTys;
1297 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1298 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1299 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1300 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1301 CalleeRetTys.begin());
1302 if (!TypesMatch)
1303 NoTail("WebAssembly tail call requires caller and callee return types to "
1304 "match");
1305
1306 // If pointers to local stack values are passed, we cannot tail call
1307 if (CLI.CB) {
1308 for (auto &Arg : CLI.CB->args()) {
1309 Value *Val = Arg.get();
1310 // Trace the value back through pointer operations
1311 while (true) {
1312 Value *Src = Val->stripPointerCastsAndAliases();
1313 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1314 Src = GEP->getPointerOperand();
1315 if (Val == Src)
1316 break;
1317 Val = Src;
1318 }
1319 if (isa<AllocaInst>(Val)) {
1320 NoTail(
1321 "WebAssembly does not support tail calling with stack arguments");
1322 break;
1323 }
1324 }
1325 }
1326 }
1327
1328 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1329 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1330 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1331
1332 // The generic code may have added an sret argument. If we're lowering an
1333 // invoke function, the ABI requires that the function pointer be the first
1334 // argument, so we may have to swap the arguments.
1335 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1336 Outs[0].Flags.isSRet()) {
1337 std::swap(Outs[0], Outs[1]);
1338 std::swap(OutVals[0], OutVals[1]);
1339 }
1340
1341 bool HasSwiftSelfArg = false;
1342 bool HasSwiftErrorArg = false;
1343 unsigned NumFixedArgs = 0;
1344 for (unsigned I = 0; I < Outs.size(); ++I) {
1345 const ISD::OutputArg &Out = Outs[I];
1346 SDValue &OutVal = OutVals[I];
1347 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1348 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1349 if (Out.Flags.isNest())
1350 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1351 if (Out.Flags.isInAlloca())
1352 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1353 if (Out.Flags.isInConsecutiveRegs())
1354 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1356 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1357 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1358 auto &MFI = MF.getFrameInfo();
1359 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1361 /*isSS=*/false);
1362 SDValue SizeNode =
1363 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1364 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1365 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1367 /*isVolatile*/ false, /*AlwaysInline=*/false,
1368 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1369 MachinePointerInfo());
1370 OutVal = FINode;
1371 }
1372 // Count the number of fixed args *after* legalization.
1373 NumFixedArgs += !Out.Flags.isVarArg();
1374 }
1375
1376 bool IsVarArg = CLI.IsVarArg;
1377 auto PtrVT = getPointerTy(Layout);
1378
1379 // For swiftcc, emit additional swiftself and swifterror arguments
1380 // if there aren't. These additional arguments are also added for callee
1381 // signature They are necessary to match callee and caller signature for
1382 // indirect call.
1383 if (CallConv == CallingConv::Swift) {
1384 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1385 if (!HasSwiftSelfArg) {
1386 NumFixedArgs++;
1387 ISD::ArgFlagsTy Flags;
1388 Flags.setSwiftSelf();
1389 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1390 CLI.Outs.push_back(Arg);
1391 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1392 CLI.OutVals.push_back(ArgVal);
1393 }
1394 if (!HasSwiftErrorArg) {
1395 NumFixedArgs++;
1396 ISD::ArgFlagsTy Flags;
1397 Flags.setSwiftError();
1398 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1399 CLI.Outs.push_back(Arg);
1400 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1401 CLI.OutVals.push_back(ArgVal);
1402 }
1403 }
1404
1405 // Analyze operands of the call, assigning locations to each operand.
1407 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1408
1409 if (IsVarArg) {
1410 // Outgoing non-fixed arguments are placed in a buffer. First
1411 // compute their offsets and the total amount of buffer space needed.
1412 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1413 const ISD::OutputArg &Out = Outs[I];
1414 SDValue &Arg = OutVals[I];
1415 EVT VT = Arg.getValueType();
1416 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1417 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1418 Align Alignment =
1419 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1420 unsigned Offset =
1421 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1422 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1423 Offset, VT.getSimpleVT(),
1425 }
1426 }
1427
1428 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1429
1430 SDValue FINode;
1431 if (IsVarArg && NumBytes) {
1432 // For non-fixed arguments, next emit stores to store the argument values
1433 // to the stack buffer at the offsets computed above.
1434 MaybeAlign StackAlign = Layout.getStackAlignment();
1435 assert(StackAlign && "data layout string is missing stack alignment");
1436 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1437 /*isSS=*/false);
1438 unsigned ValNo = 0;
1440 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1441 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1442 "ArgLocs should remain in order and only hold varargs args");
1443 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1444 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1445 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1446 DAG.getConstant(Offset, DL, PtrVT));
1447 Chains.push_back(
1448 DAG.getStore(Chain, DL, Arg, Add,
1450 }
1451 if (!Chains.empty())
1452 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1453 } else if (IsVarArg) {
1454 FINode = DAG.getIntPtrConstant(0, DL);
1455 }
1456
1457 if (Callee->getOpcode() == ISD::GlobalAddress) {
1458 // If the callee is a GlobalAddress node (quite common, every direct call
1459 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1460 // doesn't at MO_GOT which is not needed for direct calls.
1461 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1464 GA->getOffset());
1465 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1466 getPointerTy(DAG.getDataLayout()), Callee);
1467 }
1468
1469 // Compute the operands for the CALLn node.
1471 Ops.push_back(Chain);
1472 Ops.push_back(Callee);
1473
1474 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1475 // isn't reliable.
1476 Ops.append(OutVals.begin(),
1477 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1478 // Add a pointer to the vararg buffer.
1479 if (IsVarArg)
1480 Ops.push_back(FINode);
1481
1482 SmallVector<EVT, 8> InTys;
1483 for (const auto &In : Ins) {
1484 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1485 assert(!In.Flags.isNest() && "nest is not valid for return values");
1486 if (In.Flags.isInAlloca())
1487 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1488 if (In.Flags.isInConsecutiveRegs())
1489 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1490 if (In.Flags.isInConsecutiveRegsLast())
1491 fail(DL, DAG,
1492 "WebAssembly hasn't implemented cons regs last return values");
1493 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1494 // registers.
1495 InTys.push_back(In.VT);
1496 }
1497
1498 // Lastly, if this is a call to a funcref we need to add an instruction
1499 // table.set to the chain and transform the call.
1501 CLI.CB->getCalledOperand()->getType())) {
1502 // In the absence of function references proposal where a funcref call is
1503 // lowered to call_ref, using reference types we generate a table.set to set
1504 // the funcref to a special table used solely for this purpose, followed by
1505 // a call_indirect. Here we just generate the table set, and return the
1506 // SDValue of the table.set so that LowerCall can finalize the lowering by
1507 // generating the call_indirect.
1508 SDValue Chain = Ops[0];
1509
1511 MF.getContext(), Subtarget);
1512 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1513 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1514 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1515 SDValue TableSet = DAG.getMemIntrinsicNode(
1516 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1517 MVT::funcref,
1518 // Machine Mem Operand args
1519 MachinePointerInfo(
1521 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1523
1524 Ops[0] = TableSet; // The new chain is the TableSet itself
1525 }
1526
1527 if (CLI.IsTailCall) {
1528 // ret_calls do not return values to the current frame
1529 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1530 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1531 }
1532
1533 InTys.push_back(MVT::Other);
1534 SDVTList InTyList = DAG.getVTList(InTys);
1535 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1536
1537 for (size_t I = 0; I < Ins.size(); ++I)
1538 InVals.push_back(Res.getValue(I));
1539
1540 // Return the chain
1541 return Res.getValue(Ins.size());
1542}
1543
1544bool WebAssemblyTargetLowering::CanLowerReturn(
1545 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1546 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1547 const Type *RetTy) const {
1548 // WebAssembly can only handle returning tuples with multivalue enabled
1549 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1550}
1551
1552SDValue WebAssemblyTargetLowering::LowerReturn(
1553 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1555 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1556 SelectionDAG &DAG) const {
1557 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1558 "MVP WebAssembly can only return up to one value");
1559 if (!callingConvSupported(CallConv))
1560 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1561
1562 SmallVector<SDValue, 4> RetOps(1, Chain);
1563 RetOps.append(OutVals.begin(), OutVals.end());
1564 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1565
1566 // Record the number and types of the return values.
1567 for (const ISD::OutputArg &Out : Outs) {
1568 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1569 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1570 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1571 if (Out.Flags.isInAlloca())
1572 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1573 if (Out.Flags.isInConsecutiveRegs())
1574 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1576 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1577 }
1578
1579 return Chain;
1580}
1581
1582SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1583 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1584 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1585 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1586 if (!callingConvSupported(CallConv))
1587 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1588
1589 MachineFunction &MF = DAG.getMachineFunction();
1590 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1591
1592 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1593 // of the incoming values before they're represented by virtual registers.
1594 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1595
1596 bool HasSwiftErrorArg = false;
1597 bool HasSwiftSelfArg = false;
1598 for (const ISD::InputArg &In : Ins) {
1599 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1600 HasSwiftErrorArg |= In.Flags.isSwiftError();
1601 if (In.Flags.isInAlloca())
1602 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1603 if (In.Flags.isNest())
1604 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1605 if (In.Flags.isInConsecutiveRegs())
1606 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1607 if (In.Flags.isInConsecutiveRegsLast())
1608 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1609 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1610 // registers.
1611 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1612 DAG.getTargetConstant(InVals.size(),
1613 DL, MVT::i32))
1614 : DAG.getUNDEF(In.VT));
1615
1616 // Record the number and types of arguments.
1617 MFI->addParam(In.VT);
1618 }
1619
1620 // For swiftcc, emit additional swiftself and swifterror arguments
1621 // if there aren't. These additional arguments are also added for callee
1622 // signature They are necessary to match callee and caller signature for
1623 // indirect call.
1624 auto PtrVT = getPointerTy(MF.getDataLayout());
1625 if (CallConv == CallingConv::Swift) {
1626 if (!HasSwiftSelfArg) {
1627 MFI->addParam(PtrVT);
1628 }
1629 if (!HasSwiftErrorArg) {
1630 MFI->addParam(PtrVT);
1631 }
1632 }
1633 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1634 // the buffer is passed as an argument.
1635 if (IsVarArg) {
1636 MVT PtrVT = getPointerTy(MF.getDataLayout());
1637 Register VarargVreg =
1639 MFI->setVarargBufferVreg(VarargVreg);
1640 Chain = DAG.getCopyToReg(
1641 Chain, DL, VarargVreg,
1642 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1643 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1644 MFI->addParam(PtrVT);
1645 }
1646
1647 // Record the number and types of arguments and results.
1648 SmallVector<MVT, 4> Params;
1651 MF.getFunction(), DAG.getTarget(), Params, Results);
1652 for (MVT VT : Results)
1653 MFI->addResult(VT);
1654 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1655 // the param logic here with ComputeSignatureVTs
1656 assert(MFI->getParams().size() == Params.size() &&
1657 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1658 Params.begin()));
1659
1660 return Chain;
1661}
1662
1663void WebAssemblyTargetLowering::ReplaceNodeResults(
1665 switch (N->getOpcode()) {
1667 // Do not add any results, signifying that N should not be custom lowered
1668 // after all. This happens because simd128 turns on custom lowering for
1669 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1670 // illegal type.
1671 break;
1674 // Do not add any results, signifying that N should not be custom lowered.
1675 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1676 break;
1677 case ISD::ADD:
1678 case ISD::SUB:
1679 Results.push_back(Replace128Op(N, DAG));
1680 break;
1681 default:
1683 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1684 }
1685}
1686
1687//===----------------------------------------------------------------------===//
1688// Custom lowering hooks.
1689//===----------------------------------------------------------------------===//
1690
1691SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1692 SelectionDAG &DAG) const {
1693 SDLoc DL(Op);
1694 switch (Op.getOpcode()) {
1695 default:
1696 llvm_unreachable("unimplemented operation lowering");
1697 return SDValue();
1698 case ISD::FrameIndex:
1699 return LowerFrameIndex(Op, DAG);
1700 case ISD::GlobalAddress:
1701 return LowerGlobalAddress(Op, DAG);
1703 return LowerGlobalTLSAddress(Op, DAG);
1705 return LowerExternalSymbol(Op, DAG);
1706 case ISD::JumpTable:
1707 return LowerJumpTable(Op, DAG);
1708 case ISD::BR_JT:
1709 return LowerBR_JT(Op, DAG);
1710 case ISD::VASTART:
1711 return LowerVASTART(Op, DAG);
1712 case ISD::BlockAddress:
1713 case ISD::BRIND:
1714 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1715 return SDValue();
1716 case ISD::RETURNADDR:
1717 return LowerRETURNADDR(Op, DAG);
1718 case ISD::FRAMEADDR:
1719 return LowerFRAMEADDR(Op, DAG);
1720 case ISD::CopyToReg:
1721 return LowerCopyToReg(Op, DAG);
1724 return LowerAccessVectorElement(Op, DAG);
1728 return LowerIntrinsic(Op, DAG);
1730 return LowerSIGN_EXTEND_INREG(Op, DAG);
1734 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1735 case ISD::BUILD_VECTOR:
1736 return LowerBUILD_VECTOR(Op, DAG);
1738 return LowerVECTOR_SHUFFLE(Op, DAG);
1739 case ISD::SETCC:
1740 return LowerSETCC(Op, DAG);
1741 case ISD::SHL:
1742 case ISD::SRA:
1743 case ISD::SRL:
1744 return LowerShift(Op, DAG);
1747 return LowerFP_TO_INT_SAT(Op, DAG);
1748 case ISD::LOAD:
1749 return LowerLoad(Op, DAG);
1750 case ISD::STORE:
1751 return LowerStore(Op, DAG);
1752 case ISD::CTPOP:
1753 case ISD::CTLZ:
1754 case ISD::CTTZ:
1755 return DAG.UnrollVectorOp(Op.getNode());
1756 case ISD::CLEAR_CACHE:
1757 report_fatal_error("llvm.clear_cache is not supported on wasm");
1758 case ISD::SMUL_LOHI:
1759 case ISD::UMUL_LOHI:
1760 return LowerMUL_LOHI(Op, DAG);
1761 case ISD::UADDO:
1762 return LowerUADDO(Op, DAG);
1763 }
1764}
1765
1769
1770 return false;
1771}
1772
1773static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1774 SelectionDAG &DAG) {
1776 if (!FI)
1777 return std::nullopt;
1778
1779 auto &MF = DAG.getMachineFunction();
1781}
1782
1783SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1784 SelectionDAG &DAG) const {
1785 SDLoc DL(Op);
1786 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1787 const SDValue &Value = SN->getValue();
1788 const SDValue &Base = SN->getBasePtr();
1789 const SDValue &Offset = SN->getOffset();
1790
1792 if (!Offset->isUndef())
1793 report_fatal_error("unexpected offset when storing to webassembly global",
1794 false);
1795
1796 SDVTList Tys = DAG.getVTList(MVT::Other);
1797 SDValue Ops[] = {SN->getChain(), Value, Base};
1798 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1799 SN->getMemoryVT(), SN->getMemOperand());
1800 }
1801
1802 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1803 if (!Offset->isUndef())
1804 report_fatal_error("unexpected offset when storing to webassembly local",
1805 false);
1806
1807 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1808 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1809 SDValue Ops[] = {SN->getChain(), Idx, Value};
1810 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1811 }
1812
1815 "Encountered an unlowerable store to the wasm_var address space",
1816 false);
1817
1818 return Op;
1819}
1820
1821SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1822 SelectionDAG &DAG) const {
1823 SDLoc DL(Op);
1824 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1825 const SDValue &Base = LN->getBasePtr();
1826 const SDValue &Offset = LN->getOffset();
1827
1829 if (!Offset->isUndef())
1831 "unexpected offset when loading from webassembly global", false);
1832
1833 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1834 SDValue Ops[] = {LN->getChain(), Base};
1835 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1836 LN->getMemoryVT(), LN->getMemOperand());
1837 }
1838
1839 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1840 if (!Offset->isUndef())
1842 "unexpected offset when loading from webassembly local", false);
1843
1844 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1845 EVT LocalVT = LN->getValueType(0);
1846 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1847 {LN->getChain(), Idx});
1848 }
1849
1852 "Encountered an unlowerable load from the wasm_var address space",
1853 false);
1854
1855 return Op;
1856}
1857
1858SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1859 SelectionDAG &DAG) const {
1860 assert(Subtarget->hasWideArithmetic());
1861 assert(Op.getValueType() == MVT::i64);
1862 SDLoc DL(Op);
1863 unsigned Opcode;
1864 switch (Op.getOpcode()) {
1865 case ISD::UMUL_LOHI:
1866 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1867 break;
1868 case ISD::SMUL_LOHI:
1869 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1870 break;
1871 default:
1872 llvm_unreachable("unexpected opcode");
1873 }
1874 SDValue LHS = Op.getOperand(0);
1875 SDValue RHS = Op.getOperand(1);
1876 SDValue Lo =
1877 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1878 SDValue Hi(Lo.getNode(), 1);
1879 SDValue Ops[] = {Lo, Hi};
1880 return DAG.getMergeValues(Ops, DL);
1881}
1882
1883// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1884//
1885// This enables generating a single wasm instruction for this operation where
1886// the upper half of both operands are constant zeros. The upper half of the
1887// result is then whether the overflow happened.
1888SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1889 SelectionDAG &DAG) const {
1890 assert(Subtarget->hasWideArithmetic());
1891 assert(Op.getValueType() == MVT::i64);
1892 assert(Op.getOpcode() == ISD::UADDO);
1893 SDLoc DL(Op);
1894 SDValue LHS = Op.getOperand(0);
1895 SDValue RHS = Op.getOperand(1);
1896 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1897 SDValue Result =
1898 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1899 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1900 SDValue CarryI64(Result.getNode(), 1);
1901 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1902 SDValue Ops[] = {Result, CarryI32};
1903 return DAG.getMergeValues(Ops, DL);
1904}
1905
1906SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1907 SelectionDAG &DAG) const {
1908 assert(Subtarget->hasWideArithmetic());
1909 assert(N->getValueType(0) == MVT::i128);
1910 SDLoc DL(N);
1911 unsigned Opcode;
1912 switch (N->getOpcode()) {
1913 case ISD::ADD:
1914 Opcode = WebAssemblyISD::I64_ADD128;
1915 break;
1916 case ISD::SUB:
1917 Opcode = WebAssemblyISD::I64_SUB128;
1918 break;
1919 default:
1920 llvm_unreachable("unexpected opcode");
1921 }
1922 SDValue LHS = N->getOperand(0);
1923 SDValue RHS = N->getOperand(1);
1924
1925 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1926 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1927 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1928 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1929 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1930 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1931 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1932 LHS_0, LHS_1, RHS_0, RHS_1);
1933 SDValue Result_HI(Result_LO.getNode(), 1);
1934 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1935}
1936
1937SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1938 SelectionDAG &DAG) const {
1939 SDValue Src = Op.getOperand(2);
1940 if (isa<FrameIndexSDNode>(Src.getNode())) {
1941 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1942 // the FI to some LEA-like instruction, but since we don't have that, we
1943 // need to insert some kind of instruction that can take an FI operand and
1944 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1945 // local.copy between Op and its FI operand.
1946 SDValue Chain = Op.getOperand(0);
1947 SDLoc DL(Op);
1948 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1949 EVT VT = Src.getValueType();
1950 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1951 : WebAssembly::COPY_I64,
1952 DL, VT, Src),
1953 0);
1954 return Op.getNode()->getNumValues() == 1
1955 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1956 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1957 Op.getNumOperands() == 4 ? Op.getOperand(3)
1958 : SDValue());
1959 }
1960 return SDValue();
1961}
1962
1963SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1964 SelectionDAG &DAG) const {
1965 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1966 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1967}
1968
1969SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1970 SelectionDAG &DAG) const {
1971 SDLoc DL(Op);
1972
1973 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1974 fail(DL, DAG,
1975 "Non-Emscripten WebAssembly hasn't implemented "
1976 "__builtin_return_address");
1977 return SDValue();
1978 }
1979
1980 unsigned Depth = Op.getConstantOperandVal(0);
1981 MakeLibCallOptions CallOptions;
1982 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1983 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1984 .first;
1985}
1986
1987SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1988 SelectionDAG &DAG) const {
1989 // Non-zero depths are not supported by WebAssembly currently. Use the
1990 // legalizer's default expansion, which is to return 0 (what this function is
1991 // documented to do).
1992 if (Op.getConstantOperandVal(0) > 0)
1993 return SDValue();
1994
1996 EVT VT = Op.getValueType();
1997 Register FP =
1998 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1999 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2000}
2001
2002SDValue
2003WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2004 SelectionDAG &DAG) const {
2005 SDLoc DL(Op);
2006 const auto *GA = cast<GlobalAddressSDNode>(Op);
2007
2008 MachineFunction &MF = DAG.getMachineFunction();
2009 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2010 report_fatal_error("cannot use thread-local storage without bulk memory",
2011 false);
2012
2013 const GlobalValue *GV = GA->getGlobal();
2014
2015 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2016 // on other targets, if we have thread-local storage, only the local-exec
2017 // model is possible.
2018 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2019 ? GV->getThreadLocalMode()
2021
2022 // Unsupported TLS modes
2025
2026 if (model == GlobalValue::LocalExecTLSModel ||
2029 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2030 // For DSO-local TLS variables we use offset from __tls_base
2031
2032 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2033 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2034 : WebAssembly::GLOBAL_GET_I32;
2035 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2036
2038 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2039 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2040 0);
2041
2042 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2043 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2044 SDValue SymOffset =
2045 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2046
2047 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2048 }
2049
2051
2052 EVT VT = Op.getValueType();
2053 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2054 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2055 GA->getOffset(),
2057}
2058
2059SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2060 SelectionDAG &DAG) const {
2061 SDLoc DL(Op);
2062 const auto *GA = cast<GlobalAddressSDNode>(Op);
2063 EVT VT = Op.getValueType();
2064 assert(GA->getTargetFlags() == 0 &&
2065 "Unexpected target flags on generic GlobalAddressSDNode");
2067 fail(DL, DAG, "Invalid address space for WebAssembly target");
2068
2069 unsigned OperandFlags = 0;
2070 const GlobalValue *GV = GA->getGlobal();
2071 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2072 // need special treatment for tables in PIC mode.
2073 if (isPositionIndependent() &&
2075 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2076 MachineFunction &MF = DAG.getMachineFunction();
2077 MVT PtrVT = getPointerTy(MF.getDataLayout());
2078 const char *BaseName;
2079 if (GV->getValueType()->isFunctionTy()) {
2080 BaseName = MF.createExternalSymbolName("__table_base");
2082 } else {
2083 BaseName = MF.createExternalSymbolName("__memory_base");
2085 }
2087 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2088 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2089
2090 SDValue SymAddr = DAG.getNode(
2091 WebAssemblyISD::WrapperREL, DL, VT,
2092 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2093 OperandFlags));
2094
2095 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2096 }
2098 }
2099
2100 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2101 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2102 GA->getOffset(), OperandFlags));
2103}
2104
2105SDValue
2106WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2107 SelectionDAG &DAG) const {
2108 SDLoc DL(Op);
2109 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2110 EVT VT = Op.getValueType();
2111 assert(ES->getTargetFlags() == 0 &&
2112 "Unexpected target flags on generic ExternalSymbolSDNode");
2113 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2114 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2115}
2116
2117SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2118 SelectionDAG &DAG) const {
2119 // There's no need for a Wrapper node because we always incorporate a jump
2120 // table operand into a BR_TABLE instruction, rather than ever
2121 // materializing it in a register.
2122 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2123 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2124 JT->getTargetFlags());
2125}
2126
2127SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2128 SelectionDAG &DAG) const {
2129 SDLoc DL(Op);
2130 SDValue Chain = Op.getOperand(0);
2131 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2132 SDValue Index = Op.getOperand(2);
2133 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2134
2136 Ops.push_back(Chain);
2137 Ops.push_back(Index);
2138
2139 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2140 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2141
2142 // Add an operand for each case.
2143 for (auto *MBB : MBBs)
2144 Ops.push_back(DAG.getBasicBlock(MBB));
2145
2146 // Add the first MBB as a dummy default target for now. This will be replaced
2147 // with the proper default target (and the preceding range check eliminated)
2148 // if possible by WebAssemblyFixBrTableDefaults.
2149 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2150 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2151}
2152
2153SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2154 SelectionDAG &DAG) const {
2155 SDLoc DL(Op);
2156 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2157
2158 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2159 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2160
2161 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2162 MFI->getVarargBufferVreg(), PtrVT);
2163 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2164 MachinePointerInfo(SV));
2165}
2166
2167SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2168 SelectionDAG &DAG) const {
2169 MachineFunction &MF = DAG.getMachineFunction();
2170 unsigned IntNo;
2171 switch (Op.getOpcode()) {
2174 IntNo = Op.getConstantOperandVal(1);
2175 break;
2177 IntNo = Op.getConstantOperandVal(0);
2178 break;
2179 default:
2180 llvm_unreachable("Invalid intrinsic");
2181 }
2182 SDLoc DL(Op);
2183
2184 switch (IntNo) {
2185 default:
2186 return SDValue(); // Don't custom lower most intrinsics.
2187
2188 case Intrinsic::wasm_lsda: {
2189 auto PtrVT = getPointerTy(MF.getDataLayout());
2190 const char *SymName = MF.createExternalSymbolName(
2191 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2192 if (isPositionIndependent()) {
2194 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2195 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2197 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2198 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2199 SDValue SymAddr =
2200 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2201 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2202 }
2203 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2204 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2205 }
2206
2207 case Intrinsic::wasm_shuffle: {
2208 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2209 SDValue Ops[18];
2210 size_t OpIdx = 0;
2211 Ops[OpIdx++] = Op.getOperand(1);
2212 Ops[OpIdx++] = Op.getOperand(2);
2213 while (OpIdx < 18) {
2214 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2215 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2216 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2217 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2218 } else {
2219 Ops[OpIdx++] = MaskIdx;
2220 }
2221 }
2222 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2223 }
2224
2225 case Intrinsic::thread_pointer: {
2226 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2227 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2228 : WebAssembly::GLOBAL_GET_I32;
2229 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2230 return SDValue(
2231 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2232 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2233 0);
2234 }
2235 }
2236}
2237
2238SDValue
2239WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2240 SelectionDAG &DAG) const {
2241 SDLoc DL(Op);
2242 // If sign extension operations are disabled, allow sext_inreg only if operand
2243 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2244 // extension operations, but allowing sext_inreg in this context lets us have
2245 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2246 // everywhere would be simpler in this file, but would necessitate large and
2247 // brittle patterns to undo the expansion and select extract_lane_s
2248 // instructions.
2249 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2250 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2251 return SDValue();
2252
2253 const SDValue &Extract = Op.getOperand(0);
2254 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2255 if (VecT.getVectorElementType().getSizeInBits() > 32)
2256 return SDValue();
2257 MVT ExtractedLaneT =
2258 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2259 MVT ExtractedVecT =
2260 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2261 if (ExtractedVecT == VecT)
2262 return Op;
2263
2264 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2265 const SDNode *Index = Extract.getOperand(1).getNode();
2266 if (!isa<ConstantSDNode>(Index))
2267 return SDValue();
2268 unsigned IndexVal = Index->getAsZExtVal();
2269 unsigned Scale =
2270 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2271 assert(Scale > 1);
2272 SDValue NewIndex =
2273 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2274 SDValue NewExtract = DAG.getNode(
2276 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2277 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2278 Op.getOperand(1));
2279}
2280
2281static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2282 SelectionDAG &DAG) {
2283 if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2284 return SDValue();
2285
2286 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2287 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2288 "expected extend_low");
2289 auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2290
2291 ArrayRef<int> Mask = Shuffle->getMask();
2292 // Look for a shuffle which moves from the high half to the low half.
2293 size_t FirstIdx = Mask.size() / 2;
2294 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2295 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2296 return SDValue();
2297 }
2298 }
2299
2300 SDLoc DL(Op);
2301 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2302 ? WebAssemblyISD::EXTEND_HIGH_S
2303 : WebAssemblyISD::EXTEND_HIGH_U;
2304 return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2305}
2306
2307SDValue
2308WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2309 SelectionDAG &DAG) const {
2310 SDLoc DL(Op);
2311 EVT VT = Op.getValueType();
2312 SDValue Src = Op.getOperand(0);
2313 EVT SrcVT = Src.getValueType();
2314
2315 if (SrcVT.getVectorElementType() == MVT::i1 ||
2316 SrcVT.getVectorElementType() == MVT::i64)
2317 return SDValue();
2318
2319 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2320 "Unexpected extension factor.");
2321 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2322
2323 if (Scale != 2 && Scale != 4 && Scale != 8)
2324 return SDValue();
2325
2326 unsigned Ext;
2327 switch (Op.getOpcode()) {
2328 default:
2329 llvm_unreachable("unexpected opcode");
2332 Ext = WebAssemblyISD::EXTEND_LOW_U;
2333 break;
2335 Ext = WebAssemblyISD::EXTEND_LOW_S;
2336 break;
2337 }
2338
2339 if (Scale == 2) {
2340 // See if we can use EXTEND_HIGH.
2341 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2342 return ExtendHigh;
2343 }
2344
2345 SDValue Ret = Src;
2346 while (Scale != 1) {
2347 Ret = DAG.getNode(Ext, DL,
2348 Ret.getValueType()
2351 Ret);
2352 Scale /= 2;
2353 }
2354 assert(Ret.getValueType() == VT);
2355 return Ret;
2356}
2357
2359 SDLoc DL(Op);
2360 if (Op.getValueType() != MVT::v2f64)
2361 return SDValue();
2362
2363 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2364 unsigned &Index) -> bool {
2365 switch (Op.getOpcode()) {
2366 case ISD::SINT_TO_FP:
2367 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2368 break;
2369 case ISD::UINT_TO_FP:
2370 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2371 break;
2372 case ISD::FP_EXTEND:
2373 Opcode = WebAssemblyISD::PROMOTE_LOW;
2374 break;
2375 default:
2376 return false;
2377 }
2378
2379 auto ExtractVector = Op.getOperand(0);
2380 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2381 return false;
2382
2383 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2384 return false;
2385
2386 SrcVec = ExtractVector.getOperand(0);
2387 Index = ExtractVector.getConstantOperandVal(1);
2388 return true;
2389 };
2390
2391 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2392 SDValue LHSSrcVec, RHSSrcVec;
2393 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2394 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2395 return SDValue();
2396
2397 if (LHSOpcode != RHSOpcode)
2398 return SDValue();
2399
2400 MVT ExpectedSrcVT;
2401 switch (LHSOpcode) {
2402 case WebAssemblyISD::CONVERT_LOW_S:
2403 case WebAssemblyISD::CONVERT_LOW_U:
2404 ExpectedSrcVT = MVT::v4i32;
2405 break;
2406 case WebAssemblyISD::PROMOTE_LOW:
2407 ExpectedSrcVT = MVT::v4f32;
2408 break;
2409 }
2410 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2411 return SDValue();
2412
2413 auto Src = LHSSrcVec;
2414 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2415 // Shuffle the source vector so that the converted lanes are the low lanes.
2416 Src = DAG.getVectorShuffle(
2417 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2418 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2419 }
2420 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2421}
2422
2423SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2424 SelectionDAG &DAG) const {
2425 MVT VT = Op.getSimpleValueType();
2426 if (VT == MVT::v8f16) {
2427 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2428 // FP16 type, so cast them to I16s.
2429 MVT IVT = VT.changeVectorElementType(MVT::i16);
2431 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2432 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2433 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2434 return DAG.getBitcast(VT, Res);
2435 }
2436
2437 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2438 return ConvertLow;
2439
2440 SDLoc DL(Op);
2441 const EVT VecT = Op.getValueType();
2442 const EVT LaneT = Op.getOperand(0).getValueType();
2443 const size_t Lanes = Op.getNumOperands();
2444 bool CanSwizzle = VecT == MVT::v16i8;
2445
2446 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2447 // possible number of lanes at once followed by a sequence of replace_lane
2448 // instructions to individually initialize any remaining lanes.
2449
2450 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2451 // swizzled lanes should be given greater weight.
2452
2453 // TODO: Investigate looping rather than always extracting/replacing specific
2454 // lanes to fill gaps.
2455
2456 auto IsConstant = [](const SDValue &V) {
2457 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2458 };
2459
2460 // Returns the source vector and index vector pair if they exist. Checks for:
2461 // (extract_vector_elt
2462 // $src,
2463 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2464 // )
2465 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2466 auto Bail = std::make_pair(SDValue(), SDValue());
2467 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2468 return Bail;
2469 const SDValue &SwizzleSrc = Lane->getOperand(0);
2470 const SDValue &IndexExt = Lane->getOperand(1);
2471 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2472 return Bail;
2473 const SDValue &Index = IndexExt->getOperand(0);
2474 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2475 return Bail;
2476 const SDValue &SwizzleIndices = Index->getOperand(0);
2477 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2478 SwizzleIndices.getValueType() != MVT::v16i8 ||
2479 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2480 Index->getConstantOperandVal(1) != I)
2481 return Bail;
2482 return std::make_pair(SwizzleSrc, SwizzleIndices);
2483 };
2484
2485 // If the lane is extracted from another vector at a constant index, return
2486 // that vector. The source vector must not have more lanes than the dest
2487 // because the shufflevector indices are in terms of the destination lanes and
2488 // would not be able to address the smaller individual source lanes.
2489 auto GetShuffleSrc = [&](const SDValue &Lane) {
2490 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2491 return SDValue();
2492 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2493 return SDValue();
2494 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2495 VecT.getVectorNumElements())
2496 return SDValue();
2497 return Lane->getOperand(0);
2498 };
2499
2500 using ValueEntry = std::pair<SDValue, size_t>;
2501 SmallVector<ValueEntry, 16> SplatValueCounts;
2502
2503 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2504 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2505
2506 using ShuffleEntry = std::pair<SDValue, size_t>;
2507 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2508
2509 auto AddCount = [](auto &Counts, const auto &Val) {
2510 auto CountIt =
2511 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2512 if (CountIt == Counts.end()) {
2513 Counts.emplace_back(Val, 1);
2514 } else {
2515 CountIt->second++;
2516 }
2517 };
2518
2519 auto GetMostCommon = [](auto &Counts) {
2520 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2521 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2522 return *CommonIt;
2523 };
2524
2525 size_t NumConstantLanes = 0;
2526
2527 // Count eligible lanes for each type of vector creation op
2528 for (size_t I = 0; I < Lanes; ++I) {
2529 const SDValue &Lane = Op->getOperand(I);
2530 if (Lane.isUndef())
2531 continue;
2532
2533 AddCount(SplatValueCounts, Lane);
2534
2535 if (IsConstant(Lane))
2536 NumConstantLanes++;
2537 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2538 AddCount(ShuffleCounts, ShuffleSrc);
2539 if (CanSwizzle) {
2540 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2541 if (SwizzleSrcs.first)
2542 AddCount(SwizzleCounts, SwizzleSrcs);
2543 }
2544 }
2545
2546 SDValue SplatValue;
2547 size_t NumSplatLanes;
2548 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2549
2550 SDValue SwizzleSrc;
2551 SDValue SwizzleIndices;
2552 size_t NumSwizzleLanes = 0;
2553 if (SwizzleCounts.size())
2554 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2555 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2556
2557 // Shuffles can draw from up to two vectors, so find the two most common
2558 // sources.
2559 SDValue ShuffleSrc1, ShuffleSrc2;
2560 size_t NumShuffleLanes = 0;
2561 if (ShuffleCounts.size()) {
2562 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2563 llvm::erase_if(ShuffleCounts,
2564 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2565 }
2566 if (ShuffleCounts.size()) {
2567 size_t AdditionalShuffleLanes;
2568 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2569 GetMostCommon(ShuffleCounts);
2570 NumShuffleLanes += AdditionalShuffleLanes;
2571 }
2572
2573 // Predicate returning true if the lane is properly initialized by the
2574 // original instruction
2575 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2577 // Prefer swizzles over shuffles over vector consts over splats
2578 if (NumSwizzleLanes >= NumShuffleLanes &&
2579 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2580 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2581 SwizzleIndices);
2582 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2583 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2584 return Swizzled == GetSwizzleSrcs(I, Lane);
2585 };
2586 } else if (NumShuffleLanes >= NumConstantLanes &&
2587 NumShuffleLanes >= NumSplatLanes) {
2588 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2589 size_t DestLaneCount = VecT.getVectorNumElements();
2590 size_t Scale1 = 1;
2591 size_t Scale2 = 1;
2592 SDValue Src1 = ShuffleSrc1;
2593 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2594 if (Src1.getValueType() != VecT) {
2595 size_t LaneSize =
2597 assert(LaneSize > DestLaneSize);
2598 Scale1 = LaneSize / DestLaneSize;
2599 Src1 = DAG.getBitcast(VecT, Src1);
2600 }
2601 if (Src2.getValueType() != VecT) {
2602 size_t LaneSize =
2604 assert(LaneSize > DestLaneSize);
2605 Scale2 = LaneSize / DestLaneSize;
2606 Src2 = DAG.getBitcast(VecT, Src2);
2607 }
2608
2609 int Mask[16];
2610 assert(DestLaneCount <= 16);
2611 for (size_t I = 0; I < DestLaneCount; ++I) {
2612 const SDValue &Lane = Op->getOperand(I);
2613 SDValue Src = GetShuffleSrc(Lane);
2614 if (Src == ShuffleSrc1) {
2615 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2616 } else if (Src && Src == ShuffleSrc2) {
2617 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2618 } else {
2619 Mask[I] = -1;
2620 }
2621 }
2622 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2623 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2624 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2625 auto Src = GetShuffleSrc(Lane);
2626 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2627 };
2628 } else if (NumConstantLanes >= NumSplatLanes) {
2629 SmallVector<SDValue, 16> ConstLanes;
2630 for (const SDValue &Lane : Op->op_values()) {
2631 if (IsConstant(Lane)) {
2632 // Values may need to be fixed so that they will sign extend to be
2633 // within the expected range during ISel. Check whether the value is in
2634 // bounds based on the lane bit width and if it is out of bounds, lop
2635 // off the extra bits.
2636 uint64_t LaneBits = 128 / Lanes;
2637 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2638 ConstLanes.push_back(DAG.getConstant(
2639 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2640 SDLoc(Lane), LaneT));
2641 } else {
2642 ConstLanes.push_back(Lane);
2643 }
2644 } else if (LaneT.isFloatingPoint()) {
2645 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2646 } else {
2647 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2648 }
2649 }
2650 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2651 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2652 return IsConstant(Lane);
2653 };
2654 } else {
2655 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2656 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2657 (DestLaneSize == 32 || DestLaneSize == 64)) {
2658 // Could be selected to load_zero.
2659 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2660 } else {
2661 // Use a splat (which might be selected as a load splat)
2662 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2663 }
2664 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2665 return Lane == SplatValue;
2666 };
2667 }
2668
2669 assert(Result);
2670 assert(IsLaneConstructed);
2671
2672 // Add replace_lane instructions for any unhandled values
2673 for (size_t I = 0; I < Lanes; ++I) {
2674 const SDValue &Lane = Op->getOperand(I);
2675 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2676 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2677 DAG.getConstant(I, DL, MVT::i32));
2678 }
2679
2680 return Result;
2681}
2682
2683SDValue
2684WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2685 SelectionDAG &DAG) const {
2686 SDLoc DL(Op);
2687 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2688 MVT VecType = Op.getOperand(0).getSimpleValueType();
2689 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2690 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2691
2692 // Space for two vector args and sixteen mask indices
2693 SDValue Ops[18];
2694 size_t OpIdx = 0;
2695 Ops[OpIdx++] = Op.getOperand(0);
2696 Ops[OpIdx++] = Op.getOperand(1);
2697
2698 // Expand mask indices to byte indices and materialize them as operands
2699 for (int M : Mask) {
2700 for (size_t J = 0; J < LaneBytes; ++J) {
2701 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2702 // whole lane of vector input, to allow further reduction at VM. E.g.
2703 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2704 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2705 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2706 }
2707 }
2708
2709 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2710}
2711
2712SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2713 SelectionDAG &DAG) const {
2714 SDLoc DL(Op);
2715 // The legalizer does not know how to expand the unsupported comparison modes
2716 // of i64x2 vectors, so we manually unroll them here.
2717 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2719 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2720 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2721 const SDValue &CC = Op->getOperand(2);
2722 auto MakeLane = [&](unsigned I) {
2723 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2724 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2725 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2726 };
2727 return DAG.getBuildVector(Op->getValueType(0), DL,
2728 {MakeLane(0), MakeLane(1)});
2729}
2730
2731SDValue
2732WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2733 SelectionDAG &DAG) const {
2734 // Allow constant lane indices, expand variable lane indices
2735 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2736 if (isa<ConstantSDNode>(IdxNode)) {
2737 // Ensure the index type is i32 to match the tablegen patterns
2738 uint64_t Idx = IdxNode->getAsZExtVal();
2739 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2740 Ops[Op.getNumOperands() - 1] =
2741 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2742 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2743 }
2744 // Perform default expansion
2745 return SDValue();
2746}
2747
2749 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2750 // 32-bit and 64-bit unrolled shifts will have proper semantics
2751 if (LaneT.bitsGE(MVT::i32))
2752 return DAG.UnrollVectorOp(Op.getNode());
2753 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2754 SDLoc DL(Op);
2755 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2756 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2757 unsigned ShiftOpcode = Op.getOpcode();
2758 SmallVector<SDValue, 16> ShiftedElements;
2759 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2760 SmallVector<SDValue, 16> ShiftElements;
2761 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2762 SmallVector<SDValue, 16> UnrolledOps;
2763 for (size_t i = 0; i < NumLanes; ++i) {
2764 SDValue MaskedShiftValue =
2765 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2766 SDValue ShiftedValue = ShiftedElements[i];
2767 if (ShiftOpcode == ISD::SRA)
2768 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2769 ShiftedValue, DAG.getValueType(LaneT));
2770 UnrolledOps.push_back(
2771 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2772 }
2773 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2774}
2775
2776SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2777 SelectionDAG &DAG) const {
2778 SDLoc DL(Op);
2779
2780 // Only manually lower vector shifts
2781 assert(Op.getSimpleValueType().isVector());
2782
2783 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2784 auto ShiftVal = Op.getOperand(1);
2785
2786 // Try to skip bitmask operation since it is implied inside shift instruction
2787 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2788 if (MaskOp.getOpcode() != ISD::AND)
2789 return MaskOp;
2790 SDValue LHS = MaskOp.getOperand(0);
2791 SDValue RHS = MaskOp.getOperand(1);
2792 if (MaskOp.getValueType().isVector()) {
2793 APInt MaskVal;
2794 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2795 std::swap(LHS, RHS);
2796
2797 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2798 MaskVal == MaskBits)
2799 MaskOp = LHS;
2800 } else {
2801 if (!isa<ConstantSDNode>(RHS.getNode()))
2802 std::swap(LHS, RHS);
2803
2804 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2805 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2806 MaskOp = LHS;
2807 }
2808
2809 return MaskOp;
2810 };
2811
2812 // Skip vector and operation
2813 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2814 ShiftVal = DAG.getSplatValue(ShiftVal);
2815 if (!ShiftVal)
2816 return unrollVectorShift(Op, DAG);
2817
2818 // Skip scalar and operation
2819 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2820 // Use anyext because none of the high bits can affect the shift
2821 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2822
2823 unsigned Opcode;
2824 switch (Op.getOpcode()) {
2825 case ISD::SHL:
2826 Opcode = WebAssemblyISD::VEC_SHL;
2827 break;
2828 case ISD::SRA:
2829 Opcode = WebAssemblyISD::VEC_SHR_S;
2830 break;
2831 case ISD::SRL:
2832 Opcode = WebAssemblyISD::VEC_SHR_U;
2833 break;
2834 default:
2835 llvm_unreachable("unexpected opcode");
2836 }
2837
2838 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2839}
2840
2841SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2842 SelectionDAG &DAG) const {
2843 EVT ResT = Op.getValueType();
2844 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2845
2846 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2847 (SatVT == MVT::i32 || SatVT == MVT::i64))
2848 return Op;
2849
2850 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2851 return Op;
2852
2853 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2854 return Op;
2855
2856 return SDValue();
2857}
2858
2859//===----------------------------------------------------------------------===//
2860// Custom DAG combine hooks
2861//===----------------------------------------------------------------------===//
2862static SDValue
2864 auto &DAG = DCI.DAG;
2865 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2866
2867 // Hoist vector bitcasts that don't change the number of lanes out of unary
2868 // shuffles, where they are less likely to get in the way of other combines.
2869 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2870 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2871 SDValue Bitcast = N->getOperand(0);
2872 if (Bitcast.getOpcode() != ISD::BITCAST)
2873 return SDValue();
2874 if (!N->getOperand(1).isUndef())
2875 return SDValue();
2876 SDValue CastOp = Bitcast.getOperand(0);
2877 EVT SrcType = CastOp.getValueType();
2878 EVT DstType = Bitcast.getValueType();
2879 if (!SrcType.is128BitVector() ||
2880 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2881 return SDValue();
2882 SDValue NewShuffle = DAG.getVectorShuffle(
2883 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2884 return DAG.getBitcast(DstType, NewShuffle);
2885}
2886
2887/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2888/// split up into scalar instructions during legalization, and the vector
2889/// extending instructions are selected in performVectorExtendCombine below.
2890static SDValue
2893 auto &DAG = DCI.DAG;
2894 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2895 N->getOpcode() == ISD::SINT_TO_FP);
2896
2897 EVT InVT = N->getOperand(0)->getValueType(0);
2898 EVT ResVT = N->getValueType(0);
2899 MVT ExtVT;
2900 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2901 ExtVT = MVT::v4i32;
2902 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2903 ExtVT = MVT::v2i32;
2904 else
2905 return SDValue();
2906
2907 unsigned Op =
2909 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2910 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2911}
2912
2913static SDValue
2916 auto &DAG = DCI.DAG;
2917
2918 SDNodeFlags Flags = N->getFlags();
2919 SDValue Op0 = N->getOperand(0);
2920 EVT VT = N->getValueType(0);
2921
2922 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2923 // Depending on the target (runtime) backend, this might be performance
2924 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2925 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2926 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2927 }
2928
2929 return SDValue();
2930}
2931
2932static SDValue
2934 auto &DAG = DCI.DAG;
2935 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2936 N->getOpcode() == ISD::ZERO_EXTEND);
2937
2938 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2939 // possible before the extract_subvector can be expanded.
2940 auto Extract = N->getOperand(0);
2941 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2942 return SDValue();
2943 auto Source = Extract.getOperand(0);
2944 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2945 if (IndexNode == nullptr)
2946 return SDValue();
2947 auto Index = IndexNode->getZExtValue();
2948
2949 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2950 // extracted subvector is the low or high half of its source.
2951 EVT ResVT = N->getValueType(0);
2952 if (ResVT == MVT::v8i16) {
2953 if (Extract.getValueType() != MVT::v8i8 ||
2954 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2955 return SDValue();
2956 } else if (ResVT == MVT::v4i32) {
2957 if (Extract.getValueType() != MVT::v4i16 ||
2958 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2959 return SDValue();
2960 } else if (ResVT == MVT::v2i64) {
2961 if (Extract.getValueType() != MVT::v2i32 ||
2962 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2963 return SDValue();
2964 } else {
2965 return SDValue();
2966 }
2967
2968 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2969 bool IsLow = Index == 0;
2970
2971 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2972 : WebAssemblyISD::EXTEND_HIGH_S)
2973 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2974 : WebAssemblyISD::EXTEND_HIGH_U);
2975
2976 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2977}
2978
2979static SDValue
2981 auto &DAG = DCI.DAG;
2982
2983 auto GetWasmConversionOp = [](unsigned Op) {
2984 switch (Op) {
2986 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
2988 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
2989 case ISD::FP_ROUND:
2990 return WebAssemblyISD::DEMOTE_ZERO;
2991 }
2992 llvm_unreachable("unexpected op");
2993 };
2994
2995 auto IsZeroSplat = [](SDValue SplatVal) {
2996 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
2997 APInt SplatValue, SplatUndef;
2998 unsigned SplatBitSize;
2999 bool HasAnyUndefs;
3000 // Endianness doesn't matter in this context because we are looking for
3001 // an all-zero value.
3002 return Splat &&
3003 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3004 HasAnyUndefs) &&
3005 SplatValue == 0;
3006 };
3007
3008 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3009 // Combine this:
3010 //
3011 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3012 //
3013 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3014 //
3015 // Or this:
3016 //
3017 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3018 //
3019 // into (f32x4.demote_zero_f64x2 $x).
3020 EVT ResVT;
3021 EVT ExpectedConversionType;
3022 auto Conversion = N->getOperand(0);
3023 auto ConversionOp = Conversion.getOpcode();
3024 switch (ConversionOp) {
3027 ResVT = MVT::v4i32;
3028 ExpectedConversionType = MVT::v2i32;
3029 break;
3030 case ISD::FP_ROUND:
3031 ResVT = MVT::v4f32;
3032 ExpectedConversionType = MVT::v2f32;
3033 break;
3034 default:
3035 return SDValue();
3036 }
3037
3038 if (N->getValueType(0) != ResVT)
3039 return SDValue();
3040
3041 if (Conversion.getValueType() != ExpectedConversionType)
3042 return SDValue();
3043
3044 auto Source = Conversion.getOperand(0);
3045 if (Source.getValueType() != MVT::v2f64)
3046 return SDValue();
3047
3048 if (!IsZeroSplat(N->getOperand(1)) ||
3049 N->getOperand(1).getValueType() != ExpectedConversionType)
3050 return SDValue();
3051
3052 unsigned Op = GetWasmConversionOp(ConversionOp);
3053 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3054 }
3055
3056 // Combine this:
3057 //
3058 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3059 //
3060 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3061 //
3062 // Or this:
3063 //
3064 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3065 //
3066 // into (f32x4.demote_zero_f64x2 $x).
3067 EVT ResVT;
3068 auto ConversionOp = N->getOpcode();
3069 switch (ConversionOp) {
3072 ResVT = MVT::v4i32;
3073 break;
3074 case ISD::FP_ROUND:
3075 ResVT = MVT::v4f32;
3076 break;
3077 default:
3078 llvm_unreachable("unexpected op");
3079 }
3080
3081 if (N->getValueType(0) != ResVT)
3082 return SDValue();
3083
3084 auto Concat = N->getOperand(0);
3085 if (Concat.getValueType() != MVT::v4f64)
3086 return SDValue();
3087
3088 auto Source = Concat.getOperand(0);
3089 if (Source.getValueType() != MVT::v2f64)
3090 return SDValue();
3091
3092 if (!IsZeroSplat(Concat.getOperand(1)) ||
3093 Concat.getOperand(1).getValueType() != MVT::v2f64)
3094 return SDValue();
3095
3096 unsigned Op = GetWasmConversionOp(ConversionOp);
3097 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3098}
3099
3100// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3101static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3102 const SDLoc &DL, unsigned VectorWidth) {
3103 EVT VT = Vec.getValueType();
3104 EVT ElVT = VT.getVectorElementType();
3105 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3106 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3107 VT.getVectorNumElements() / Factor);
3108
3109 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3110 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3111 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3112
3113 // This is the index of the first element of the VectorWidth-bit chunk
3114 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3115 IdxVal &= ~(ElemsPerChunk - 1);
3116
3117 // If the input is a buildvector just emit a smaller one.
3118 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3119 return DAG.getBuildVector(ResultVT, DL,
3120 Vec->ops().slice(IdxVal, ElemsPerChunk));
3121
3122 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3123 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3124}
3125
3126// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3127// is the expected destination value type after recursion. In is the initial
3128// input. Note that the input should have enough leading zero bits to prevent
3129// NARROW_U from saturating results.
3131 SelectionDAG &DAG) {
3132 EVT SrcVT = In.getValueType();
3133
3134 // No truncation required, we might get here due to recursive calls.
3135 if (SrcVT == DstVT)
3136 return In;
3137
3138 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3139 unsigned NumElems = SrcVT.getVectorNumElements();
3140 if (!isPowerOf2_32(NumElems))
3141 return SDValue();
3142 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3143 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3144
3145 LLVMContext &Ctx = *DAG.getContext();
3146 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3147
3148 // Narrow to the largest type possible:
3149 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3150 EVT InVT = MVT::i16, OutVT = MVT::i8;
3151 if (SrcVT.getScalarSizeInBits() > 16) {
3152 InVT = MVT::i32;
3153 OutVT = MVT::i16;
3154 }
3155 unsigned SubSizeInBits = SrcSizeInBits / 2;
3156 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3157 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3158
3159 // Split lower/upper subvectors.
3160 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3161 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3162
3163 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3164 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3165 Lo = DAG.getBitcast(InVT, Lo);
3166 Hi = DAG.getBitcast(InVT, Hi);
3167 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3168 return DAG.getBitcast(DstVT, Res);
3169 }
3170
3171 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3172 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3173 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3174 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3175
3176 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3177 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3178 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3179}
3180
3183 auto &DAG = DCI.DAG;
3184
3185 SDValue In = N->getOperand(0);
3186 EVT InVT = In.getValueType();
3187 if (!InVT.isSimple())
3188 return SDValue();
3189
3190 EVT OutVT = N->getValueType(0);
3191 if (!OutVT.isVector())
3192 return SDValue();
3193
3194 EVT OutSVT = OutVT.getVectorElementType();
3195 EVT InSVT = InVT.getVectorElementType();
3196 // Currently only cover truncate to v16i8 or v8i16.
3197 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3198 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3199 return SDValue();
3200
3201 SDLoc DL(N);
3203 OutVT.getScalarSizeInBits());
3204 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3205 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3206}
3207
3210 using namespace llvm::SDPatternMatch;
3211 auto &DAG = DCI.DAG;
3212 SDLoc DL(N);
3213 SDValue Src = N->getOperand(0);
3214 EVT VT = N->getValueType(0);
3215 EVT SrcVT = Src.getValueType();
3216
3217 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3218 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3219 return SDValue();
3220
3221 unsigned NumElts = SrcVT.getVectorNumElements();
3222 EVT Width = MVT::getIntegerVT(128 / NumElts);
3223
3224 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3225 // ==> bitmask
3226 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3227 return DAG.getZExtOrTrunc(
3228 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3229 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3230 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3231 SrcVT.changeVectorElementType(
3232 *DAG.getContext(), Width))}),
3233 DL, VT);
3234 }
3235
3236 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3237 if (NumElts == 32 || NumElts == 64) {
3238 // Strategy: We will setcc them seperately in v16i8 -> v16i1
3239 // Bitcast them to i16, extend them to either i32 or i64.
3240 // Add them together, shifting left 1 by 1.
3241 SDValue Concat, SetCCVector;
3242 ISD::CondCode SetCond;
3243
3244 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3245 m_CondCode(SetCond)))))
3246 return SDValue();
3247 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3248 return SDValue();
3249
3250 uint64_t ElementWidth =
3252
3253 SmallVector<SDValue> VectorsToShuffle;
3254 for (size_t I = 0; I < Concat->ops().size(); I++) {
3255 VectorsToShuffle.push_back(DAG.getBitcast(
3256 MVT::i16,
3257 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3258 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3259 DAG, DL, 128),
3260 SetCond)));
3261 }
3262
3263 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3264 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3265
3266 for (SDValue V : VectorsToShuffle) {
3267 ReturningInteger = DAG.getNode(
3268 ISD::SHL, DL, ReturnType,
3269 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3270
3271 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3272 ReturningInteger =
3273 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3274 }
3275
3276 return ReturningInteger;
3277 }
3278
3279 return SDValue();
3280}
3281
3283 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3284 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3285 // any_true (setcc <X>, 0, ne) => (any_true X)
3286 // all_true (setcc <X>, 0, ne) => (all_true X)
3287 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3288 using namespace llvm::SDPatternMatch;
3289
3290 SDValue LHS;
3291 if (N->getNumOperands() < 2 ||
3292 !sd_match(N->getOperand(1),
3294 return SDValue();
3295 EVT LT = LHS.getValueType();
3296 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3297 return SDValue();
3298
3299 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3300 ISD::CondCode SetType,
3301 Intrinsic::WASMIntrinsics InPost) {
3302 if (N->getConstantOperandVal(0) != InPre)
3303 return SDValue();
3304
3305 SDValue LHS;
3306 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3307 m_SpecificCondCode(SetType))))
3308 return SDValue();
3309
3310 SDLoc DL(N);
3311 SDValue Ret = DAG.getZExtOrTrunc(
3312 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3313 {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3314 DL, MVT::i1);
3315 if (SetType == ISD::SETEQ)
3316 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3317 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3318 };
3319
3320 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3321 Intrinsic::wasm_alltrue))
3322 return AnyTrueEQ;
3323 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3324 Intrinsic::wasm_anytrue))
3325 return AllTrueEQ;
3326 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3327 Intrinsic::wasm_anytrue))
3328 return AnyTrueNE;
3329 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3330 Intrinsic::wasm_alltrue))
3331 return AllTrueNE;
3332
3333 return SDValue();
3334}
3335
3336template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3337 Intrinsic::ID Intrin>
3339 SDValue LHS = N->getOperand(0);
3340 SDValue RHS = N->getOperand(1);
3341 SDValue Cond = N->getOperand(2);
3342 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3343 return SDValue();
3344
3345 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3346 return SDValue();
3347
3348 SDLoc DL(N);
3349 SDValue Ret = DAG.getZExtOrTrunc(
3350 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3351 {DAG.getConstant(Intrin, DL, MVT::i32),
3352 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3353 DL, MVT::i1);
3354 if (RequiresNegate)
3355 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3356 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3357}
3358
3359/// Try to convert a i128 comparison to a v16i8 comparison before type
3360/// legalization splits it up into chunks
3361static SDValue
3363 const WebAssemblySubtarget *Subtarget) {
3364
3365 SDLoc DL(N);
3366 SDValue X = N->getOperand(0);
3367 SDValue Y = N->getOperand(1);
3368 EVT VT = N->getValueType(0);
3369 EVT OpVT = X.getValueType();
3370
3371 SelectionDAG &DAG = DCI.DAG;
3373 Attribute::NoImplicitFloat))
3374 return SDValue();
3375
3376 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3377 // We're looking for an oversized integer equality comparison with SIMD
3378 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3379 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3380 return SDValue();
3381
3382 // Don't perform this combine if constructing the vector will be expensive.
3383 auto IsVectorBitCastCheap = [](SDValue X) {
3385 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3386 };
3387
3388 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3389 return SDValue();
3390
3391 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3392 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3393 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3394
3395 SDValue Intr =
3396 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3397 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3398 : Intrinsic::wasm_anytrue,
3399 DL, MVT::i32),
3400 Cmp});
3401
3402 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3403 ISD::SETNE);
3404}
3405
3408 const WebAssemblySubtarget *Subtarget) {
3409 if (!DCI.isBeforeLegalize())
3410 return SDValue();
3411
3412 EVT VT = N->getValueType(0);
3413 if (!VT.isScalarInteger())
3414 return SDValue();
3415
3416 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3417 return V;
3418
3419 SDValue LHS = N->getOperand(0);
3420 if (LHS->getOpcode() != ISD::BITCAST)
3421 return SDValue();
3422
3423 EVT FromVT = LHS->getOperand(0).getValueType();
3424 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3425 return SDValue();
3426
3427 unsigned NumElts = FromVT.getVectorNumElements();
3428 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3429 return SDValue();
3430
3431 if (!cast<ConstantSDNode>(N->getOperand(1)))
3432 return SDValue();
3433
3434 auto &DAG = DCI.DAG;
3435 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3436 MVT::getIntegerVT(128 / NumElts));
3437 // setcc (iN (bitcast (vNi1 X))), 0, ne
3438 // ==> any_true (vNi1 X)
3440 N, VecVT, DAG)) {
3441 return Match;
3442 }
3443 // setcc (iN (bitcast (vNi1 X))), 0, eq
3444 // ==> xor (any_true (vNi1 X)), -1
3446 N, VecVT, DAG)) {
3447 return Match;
3448 }
3449 // setcc (iN (bitcast (vNi1 X))), -1, eq
3450 // ==> all_true (vNi1 X)
3452 N, VecVT, DAG)) {
3453 return Match;
3454 }
3455 // setcc (iN (bitcast (vNi1 X))), -1, ne
3456 // ==> xor (all_true (vNi1 X)), -1
3458 N, VecVT, DAG)) {
3459 return Match;
3460 }
3461 return SDValue();
3462}
3463
3465 EVT VT = N->getValueType(0);
3466 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3467 return SDValue();
3468
3469 // Mul with extending inputs.
3470 SDValue LHS = N->getOperand(0);
3471 SDValue RHS = N->getOperand(1);
3472 if (LHS.getOpcode() != RHS.getOpcode())
3473 return SDValue();
3474
3475 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3476 LHS.getOpcode() != ISD::ZERO_EXTEND)
3477 return SDValue();
3478
3479 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3480 return SDValue();
3481
3482 EVT FromVT = LHS->getOperand(0).getValueType();
3483 EVT EltTy = FromVT.getVectorElementType();
3484 if (EltTy != MVT::i8)
3485 return SDValue();
3486
3487 // For an input DAG that looks like this
3488 // %a = input_type
3489 // %b = input_type
3490 // %lhs = extend %a to output_type
3491 // %rhs = extend %b to output_type
3492 // %mul = mul %lhs, %rhs
3493
3494 // input_type | output_type | instructions
3495 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3496 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3497 // | | %low_low = i32x4.ext_low_i16x8_ %low
3498 // | | %low_high = i32x4.ext_high_i16x8_ %low
3499 // | | %high_low = i32x4.ext_low_i16x8_ %high
3500 // | | %high_high = i32x4.ext_high_i16x8_ %high
3501 // | | %res = concat_vector(...)
3502 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3503 // | | %low_low = i32x4.ext_low_i16x8_ %low
3504 // | | %low_high = i32x4.ext_high_i16x8_ %low
3505 // | | %res = concat_vector(%low_low, %low_high)
3506
3507 SDLoc DL(N);
3508 unsigned NumElts = VT.getVectorNumElements();
3509 SDValue ExtendInLHS = LHS->getOperand(0);
3510 SDValue ExtendInRHS = RHS->getOperand(0);
3511 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3512 unsigned ExtendLowOpc =
3513 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3514 unsigned ExtendHighOpc =
3515 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3516
3517 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3518 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3519 };
3520 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3521 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3522 };
3523
3524 if (NumElts == 16) {
3525 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3526 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3527 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3528 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3529 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3530 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3531 SDValue SubVectors[] = {
3532 GetExtendLow(MVT::v4i32, MulLow),
3533 GetExtendHigh(MVT::v4i32, MulLow),
3534 GetExtendLow(MVT::v4i32, MulHigh),
3535 GetExtendHigh(MVT::v4i32, MulHigh),
3536 };
3537 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3538 } else {
3539 assert(NumElts == 8);
3540 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3541 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3542 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3543 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3544 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3545 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3546 }
3547 return SDValue();
3548}
3549
3552 assert(N->getOpcode() == ISD::MUL);
3553 EVT VT = N->getValueType(0);
3554 if (!VT.isVector())
3555 return SDValue();
3556
3557 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3558 return Res;
3559
3560 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3561 // extend them to v8i16. Only do this before legalization in case a narrow
3562 // vector is widened and may be simplified later.
3563 if (!DCI.isBeforeLegalize() || (VT != MVT::v8i8 && VT != MVT::v16i8))
3564 return SDValue();
3565
3566 SDLoc DL(N);
3567 SelectionDAG &DAG = DCI.DAG;
3568 SDValue LHS = N->getOperand(0);
3569 SDValue RHS = N->getOperand(1);
3570 EVT MulVT = MVT::v8i16;
3571
3572 if (VT == MVT::v8i8) {
3573 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3574 DAG.getUNDEF(MVT::v8i8));
3575 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3576 DAG.getUNDEF(MVT::v8i8));
3577 SDValue LowLHS =
3578 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3579 SDValue LowRHS =
3580 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3581 SDValue MulLow = DAG.getBitcast(
3582 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3583 // Take the low byte of each lane.
3584 SDValue Shuffle = DAG.getVectorShuffle(
3585 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3586 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3587 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3588 } else {
3589 assert(VT == MVT::v16i8 && "Expected v16i8");
3590 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3591 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3592 SDValue HighLHS =
3593 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3594 SDValue HighRHS =
3595 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3596
3597 SDValue MulLow =
3598 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3599 SDValue MulHigh =
3600 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3601
3602 // Take the low byte of each lane.
3603 return DAG.getVectorShuffle(
3604 VT, DL, MulLow, MulHigh,
3605 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3606 }
3607}
3608
3609SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3610 SelectionDAG &DAG) {
3611 SDLoc DL(In);
3612 LLVMContext &Ctx = *DAG.getContext();
3613 EVT InVT = In.getValueType();
3614 unsigned NumElems = InVT.getVectorNumElements() * 2;
3615 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3616 SDValue Concat =
3617 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3618 if (NumElems < RequiredNumElems) {
3619 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3620 }
3621 return Concat;
3622}
3623
3625 EVT OutVT = N->getValueType(0);
3626 if (!OutVT.isVector())
3627 return SDValue();
3628
3629 EVT OutElTy = OutVT.getVectorElementType();
3630 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3631 return SDValue();
3632
3633 unsigned NumElems = OutVT.getVectorNumElements();
3634 if (!isPowerOf2_32(NumElems))
3635 return SDValue();
3636
3637 EVT FPVT = N->getOperand(0)->getValueType(0);
3638 if (FPVT.getVectorElementType() != MVT::f32)
3639 return SDValue();
3640
3641 SDLoc DL(N);
3642
3643 // First, convert to i32.
3644 LLVMContext &Ctx = *DAG.getContext();
3645 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3646 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3648 OutVT.getScalarSizeInBits());
3649 // Mask out the top MSBs.
3650 SDValue Masked =
3651 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3652
3653 if (OutVT.getSizeInBits() < 128) {
3654 // Create a wide enough vector that we can use narrow.
3655 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3656 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3657 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3658 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3659 return DAG.getBitcast(
3660 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3661 } else {
3662 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3663 }
3664 return SDValue();
3665}
3666
3667SDValue
3668WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3669 DAGCombinerInfo &DCI) const {
3670 switch (N->getOpcode()) {
3671 default:
3672 return SDValue();
3673 case ISD::BITCAST:
3674 return performBitcastCombine(N, DCI);
3675 case ISD::SETCC:
3676 return performSETCCCombine(N, DCI, Subtarget);
3678 return performVECTOR_SHUFFLECombine(N, DCI);
3679 case ISD::SIGN_EXTEND:
3680 case ISD::ZERO_EXTEND:
3681 return performVectorExtendCombine(N, DCI);
3682 case ISD::UINT_TO_FP:
3683 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3684 return ExtCombine;
3685 return performVectorNonNegToFPCombine(N, DCI);
3686 case ISD::SINT_TO_FP:
3687 return performVectorExtendToFPCombine(N, DCI);
3690 case ISD::FP_ROUND:
3692 return performVectorTruncZeroCombine(N, DCI);
3693 case ISD::FP_TO_SINT:
3694 case ISD::FP_TO_UINT:
3695 return performConvertFPCombine(N, DCI.DAG);
3696 case ISD::TRUNCATE:
3697 return performTruncateCombine(N, DCI);
3699 return performAnyAllCombine(N, DCI.DAG);
3700 case ISD::MUL:
3701 return performMulCombine(N, DCI);
3702 }
3703}
unsigned const MachineRegisterInfo * MRI
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:712
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:813
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:773
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:513
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:874
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:579
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:741
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:904
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition ISDOpcodes.h:997
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:523
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:987
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:838
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:659
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:666
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:698
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:759
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:644
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:609
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:571
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:844
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:805
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:893
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:882
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:721
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:972
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:920
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:733
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:560
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:953
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:915
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:939
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:850
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:827
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:529
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:551
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool isValidAddressSpace(unsigned AS)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2078
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2168
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:444
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.