LLVM 23.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
162 setTruncStoreAction(T, MVT::f16, Expand);
163 }
164
165 // Expand unavailable integer operations.
166 for (auto Op :
170 for (auto T : {MVT::i32, MVT::i64})
172 if (Subtarget->hasSIMD128())
173 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
175 }
176
177 if (Subtarget->hasWideArithmetic()) {
183 }
184
185 if (Subtarget->hasNontrappingFPToInt())
187 for (auto T : {MVT::i32, MVT::i64})
189
190 if (Subtarget->hasRelaxedSIMD()) {
193 {MVT::v4f32, MVT::v2f64}, Custom);
194 }
195 // SIMD-specific configuration
196 if (Subtarget->hasSIMD128()) {
197
199
200 // Combine wide-vector muls, with extend inputs, to extmul_half.
202
203 // Combine vector mask reductions into alltrue/anytrue
205
206 // Convert vector to integer bitcasts to bitmask
208
209 // Hoist bitcasts out of shuffles
211
212 // Combine extends of extract_subvectors into widening ops
214
215 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
216 // conversions ops
219
220 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
221 // into conversion ops
225
227
228 // Support saturating add/sub for i8x16 and i16x8
230 for (auto T : {MVT::v16i8, MVT::v8i16})
232
233 // Support integer abs
234 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
236
237 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
238 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
239 MVT::v2f64})
241
242 if (Subtarget->hasFP16())
244
245 // We have custom shuffle lowering to expose the shuffle mask
246 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
247 MVT::v2f64})
249
250 if (Subtarget->hasFP16())
252
253 // Support splatting
254 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255 MVT::v2f64})
257
258 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
259
260 // Custom lowering since wasm shifts must have a scalar shift amount
261 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
264
265 // Custom lower lane accesses to expand out variable indices
267 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
268 MVT::v2f64})
270
271 // There is no i8x16.mul instruction
272 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
273
274 // There is no vector conditional select instruction
275 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
276 MVT::v2f64})
278
279 // Expand integer operations supported for scalars but not SIMD
280 for (auto Op :
282 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
284
285 // But we do have integer min and max operations
286 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
287 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
289
290 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
291 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
292 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
293 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
294
295 // Custom lower bit counting operations for other types to scalarize them.
296 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
297 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
299
300 // Expand float operations supported for scalars but not SIMD
303 for (auto T : {MVT::v4f32, MVT::v2f64})
305
306 // Unsigned comparison operations are unavailable for i64x2 vectors.
308 setCondCodeAction(CC, MVT::v2i64, Custom);
309
310 // 64x2 conversions are not in the spec
311 for (auto Op :
313 for (auto T : {MVT::v2i64, MVT::v2f64})
315
316 // But saturating fp_to_int converstions are
318 setOperationAction(Op, MVT::v4i32, Custom);
319 if (Subtarget->hasFP16()) {
320 setOperationAction(Op, MVT::v8i16, Custom);
321 }
322 }
323
324 // Support vector extending
329 }
330
331 if (Subtarget->hasFP16()) {
332 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
333 }
334
335 if (Subtarget->hasRelaxedSIMD()) {
338 }
339
340 // Partial MLA reductions.
342 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
343 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
344 }
345 }
346
347 // As a special case, these operators use the type to mean the type to
348 // sign-extend from.
350 if (!Subtarget->hasSignExt()) {
351 // Sign extends are legal only when extending a vector extract
352 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
353 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
355 }
358
359 // Dynamic stack allocation: use the default expansion.
363
367
368 // Expand these forms; we pattern-match the forms that we can handle in isel.
369 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
370 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
372
373 // We have custom switch handling.
375
376 // WebAssembly doesn't have:
377 // - Floating-point extending loads.
378 // - Floating-point truncating stores.
379 // - i1 extending loads.
380 // - truncating SIMD stores and most extending loads
381 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
382 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
383 for (auto T : MVT::integer_valuetypes())
384 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
385 setLoadExtAction(Ext, T, MVT::i1, Promote);
386 if (Subtarget->hasSIMD128()) {
387 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
388 MVT::v2f64}) {
389 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
390 if (MVT(T) != MemT) {
392 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
393 setLoadExtAction(Ext, T, MemT, Expand);
394 }
395 }
396 }
397 // But some vector extending loads are legal
398 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
399 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
400 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
401 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
402 }
403 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
404 }
405
406 // Don't do anything clever with build_pairs
408
409 // Trap lowers to wasm unreachable
410 setOperationAction(ISD::TRAP, MVT::Other, Legal);
412
413 // Exception handling intrinsics
417
419
420 // Always convert switches to br_tables unless there is only one case, which
421 // is equivalent to a simple branch. This reduces code size for wasm, and we
422 // defer possible jump table optimizations to the VM.
424}
425
434
443
445WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
446 const AtomicRMWInst *AI) const {
447 // We have wasm instructions for these
448 switch (AI->getOperation()) {
456 default:
457 break;
458 }
460}
461
462bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
463 // Implementation copied from X86TargetLowering.
464 unsigned Opc = VecOp.getOpcode();
465
466 // Assume target opcodes can't be scalarized.
467 // TODO - do we have any exceptions?
469 return false;
470
471 // If the vector op is not supported, try to convert to scalar.
472 EVT VecVT = VecOp.getValueType();
474 return true;
475
476 // If the vector op is supported, but the scalar op is not, the transform may
477 // not be worthwhile.
478 EVT ScalarVT = VecVT.getScalarType();
479 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
480}
481
482FastISel *WebAssemblyTargetLowering::createFastISel(
483 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
484 const LibcallLoweringInfo *LibcallLowering) const {
485 return WebAssembly::createFastISel(FuncInfo, LibInfo, LibcallLowering);
486}
487
488MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
489 EVT VT) const {
490 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
491 if (BitWidth > 1 && BitWidth < 8)
492 BitWidth = 8;
493
494 if (BitWidth > 64) {
495 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
496 // the count to be an i32.
497 BitWidth = 32;
499 "32-bit shift counts ought to be enough for anyone");
500 }
501
504 "Unable to represent scalar shift amount type");
505 return Result;
506}
507
508// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
509// undefined result on invalid/overflow, to the WebAssembly opcode, which
510// traps on invalid/overflow.
513 const TargetInstrInfo &TII,
514 bool IsUnsigned, bool Int64,
515 bool Float64, unsigned LoweredOpcode) {
517
518 Register OutReg = MI.getOperand(0).getReg();
519 Register InReg = MI.getOperand(1).getReg();
520
521 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
522 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
523 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
524 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
525 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
526 unsigned Eqz = WebAssembly::EQZ_I32;
527 unsigned And = WebAssembly::AND_I32;
528 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
529 int64_t Substitute = IsUnsigned ? 0 : Limit;
530 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
531 auto &Context = BB->getParent()->getFunction().getContext();
532 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
533
534 const BasicBlock *LLVMBB = BB->getBasicBlock();
535 MachineFunction *F = BB->getParent();
536 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
537 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
538 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
539
541 F->insert(It, FalseMBB);
542 F->insert(It, TrueMBB);
543 F->insert(It, DoneMBB);
544
545 // Transfer the remainder of BB and its successor edges to DoneMBB.
546 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
548
549 BB->addSuccessor(TrueMBB);
550 BB->addSuccessor(FalseMBB);
551 TrueMBB->addSuccessor(DoneMBB);
552 FalseMBB->addSuccessor(DoneMBB);
553
554 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
555 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
556 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
557 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
558 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
559 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
560 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
561
562 MI.eraseFromParent();
563 // For signed numbers, we can do a single comparison to determine whether
564 // fabs(x) is within range.
565 if (IsUnsigned) {
566 Tmp0 = InReg;
567 } else {
568 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
569 }
570 BuildMI(BB, DL, TII.get(FConst), Tmp1)
571 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
572 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
573
574 // For unsigned numbers, we have to do a separate comparison with zero.
575 if (IsUnsigned) {
576 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
577 Register SecondCmpReg =
578 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
579 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
580 BuildMI(BB, DL, TII.get(FConst), Tmp1)
581 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
582 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
583 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
584 CmpReg = AndReg;
585 }
586
587 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
588
589 // Create the CFG diamond to select between doing the conversion or using
590 // the substitute value.
591 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
592 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
593 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
594 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
595 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
596 .addReg(FalseReg)
597 .addMBB(FalseMBB)
598 .addReg(TrueReg)
599 .addMBB(TrueMBB);
600
601 return DoneMBB;
602}
603
604// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
605// instuction to handle the zero-length case.
608 const TargetInstrInfo &TII, bool Int64) {
610
611 MachineOperand DstMem = MI.getOperand(0);
612 MachineOperand SrcMem = MI.getOperand(1);
613 MachineOperand Dst = MI.getOperand(2);
614 MachineOperand Src = MI.getOperand(3);
615 MachineOperand Len = MI.getOperand(4);
616
617 // If the length is a constant, we don't actually need the check.
618 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
619 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
620 Def->getOpcode() == WebAssembly::CONST_I64) {
621 if (Def->getOperand(1).getImm() == 0) {
622 // A zero-length memcpy is a no-op.
623 MI.eraseFromParent();
624 return BB;
625 }
626 // A non-zero-length memcpy doesn't need a zero check.
627 unsigned MemoryCopy =
628 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
629 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
630 .add(DstMem)
631 .add(SrcMem)
632 .add(Dst)
633 .add(Src)
634 .add(Len);
635 MI.eraseFromParent();
636 return BB;
637 }
638 }
639
640 // We're going to add an extra use to `Len` to test if it's zero; that
641 // use shouldn't be a kill, even if the original use is.
642 MachineOperand NoKillLen = Len;
643 NoKillLen.setIsKill(false);
644
645 // Decide on which `MachineInstr` opcode we're going to use.
646 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
647 unsigned MemoryCopy =
648 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
649
650 // Create two new basic blocks; one for the new `memory.fill` that we can
651 // branch over, and one for the rest of the instructions after the original
652 // `memory.fill`.
653 const BasicBlock *LLVMBB = BB->getBasicBlock();
654 MachineFunction *F = BB->getParent();
655 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
656 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
657
659 F->insert(It, TrueMBB);
660 F->insert(It, DoneMBB);
661
662 // Transfer the remainder of BB and its successor edges to DoneMBB.
663 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
665
666 // Connect the CFG edges.
667 BB->addSuccessor(TrueMBB);
668 BB->addSuccessor(DoneMBB);
669 TrueMBB->addSuccessor(DoneMBB);
670
671 // Create a virtual register for the `Eqz` result.
672 unsigned EqzReg;
673 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
674
675 // Erase the original `memory.copy`.
676 MI.eraseFromParent();
677
678 // Test if `Len` is zero.
679 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
680
681 // Insert a new `memory.copy`.
682 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
683 .add(DstMem)
684 .add(SrcMem)
685 .add(Dst)
686 .add(Src)
687 .add(Len);
688
689 // Create the CFG triangle.
690 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
691 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
692
693 return DoneMBB;
694}
695
696// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
697// instuction to handle the zero-length case.
700 const TargetInstrInfo &TII, bool Int64) {
702
703 MachineOperand Mem = MI.getOperand(0);
704 MachineOperand Dst = MI.getOperand(1);
705 MachineOperand Val = MI.getOperand(2);
706 MachineOperand Len = MI.getOperand(3);
707
708 // If the length is a constant, we don't actually need the check.
709 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
710 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
711 Def->getOpcode() == WebAssembly::CONST_I64) {
712 if (Def->getOperand(1).getImm() == 0) {
713 // A zero-length memset is a no-op.
714 MI.eraseFromParent();
715 return BB;
716 }
717 // A non-zero-length memset doesn't need a zero check.
718 unsigned MemoryFill =
719 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
720 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
721 .add(Mem)
722 .add(Dst)
723 .add(Val)
724 .add(Len);
725 MI.eraseFromParent();
726 return BB;
727 }
728 }
729
730 // We're going to add an extra use to `Len` to test if it's zero; that
731 // use shouldn't be a kill, even if the original use is.
732 MachineOperand NoKillLen = Len;
733 NoKillLen.setIsKill(false);
734
735 // Decide on which `MachineInstr` opcode we're going to use.
736 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
737 unsigned MemoryFill =
738 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
739
740 // Create two new basic blocks; one for the new `memory.fill` that we can
741 // branch over, and one for the rest of the instructions after the original
742 // `memory.fill`.
743 const BasicBlock *LLVMBB = BB->getBasicBlock();
744 MachineFunction *F = BB->getParent();
745 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
746 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
747
749 F->insert(It, TrueMBB);
750 F->insert(It, DoneMBB);
751
752 // Transfer the remainder of BB and its successor edges to DoneMBB.
753 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
755
756 // Connect the CFG edges.
757 BB->addSuccessor(TrueMBB);
758 BB->addSuccessor(DoneMBB);
759 TrueMBB->addSuccessor(DoneMBB);
760
761 // Create a virtual register for the `Eqz` result.
762 unsigned EqzReg;
763 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
764
765 // Erase the original `memory.fill`.
766 MI.eraseFromParent();
767
768 // Test if `Len` is zero.
769 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
770
771 // Insert a new `memory.copy`.
772 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
773
774 // Create the CFG triangle.
775 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
776 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
777
778 return DoneMBB;
779}
780
781static MachineBasicBlock *
783 const WebAssemblySubtarget *Subtarget,
784 const TargetInstrInfo &TII) {
785 MachineInstr &CallParams = *CallResults.getPrevNode();
786 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
787 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
788 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
789
790 bool IsIndirect =
791 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
792 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
793
794 bool IsFuncrefCall = false;
795 if (IsIndirect && CallParams.getOperand(0).isReg()) {
796 Register Reg = CallParams.getOperand(0).getReg();
797 const MachineFunction *MF = BB->getParent();
798 const MachineRegisterInfo &MRI = MF->getRegInfo();
799 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
800 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
801 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
802 }
803
804 unsigned CallOp;
805 if (IsIndirect && IsRetCall) {
806 CallOp = WebAssembly::RET_CALL_INDIRECT;
807 } else if (IsIndirect) {
808 CallOp = WebAssembly::CALL_INDIRECT;
809 } else if (IsRetCall) {
810 CallOp = WebAssembly::RET_CALL;
811 } else {
812 CallOp = WebAssembly::CALL;
813 }
814
815 MachineFunction &MF = *BB->getParent();
816 const MCInstrDesc &MCID = TII.get(CallOp);
817 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
818
819 // Move the function pointer to the end of the arguments for indirect calls
820 if (IsIndirect) {
821 auto FnPtr = CallParams.getOperand(0);
822 CallParams.removeOperand(0);
823
824 // For funcrefs, call_indirect is done through __funcref_call_table and the
825 // funcref is always installed in slot 0 of the table, therefore instead of
826 // having the function pointer added at the end of the params list, a zero
827 // (the index in
828 // __funcref_call_table is added).
829 if (IsFuncrefCall) {
830 Register RegZero =
831 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
832 MachineInstrBuilder MIBC0 =
833 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
834
835 BB->insert(CallResults.getIterator(), MIBC0);
836 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
837 } else
838 CallParams.addOperand(FnPtr);
839 }
840
841 for (auto Def : CallResults.defs())
842 MIB.add(Def);
843
844 if (IsIndirect) {
845 // Placeholder for the type index.
846 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
847 MIB.addImm(0);
848 // The table into which this call_indirect indexes.
849 MCSymbolWasm *Table = IsFuncrefCall
851 MF.getContext(), Subtarget)
853 MF.getContext(), Subtarget);
854 if (Subtarget->hasCallIndirectOverlong()) {
855 MIB.addSym(Table);
856 } else {
857 // For the MVP there is at most one table whose number is 0, but we can't
858 // write a table symbol or issue relocations. Instead we just ensure the
859 // table is live and write a zero.
860 Table->setNoStrip();
861 MIB.addImm(0);
862 }
863 }
864
865 for (auto Use : CallParams.uses())
866 MIB.add(Use);
867
868 BB->insert(CallResults.getIterator(), MIB);
869 CallParams.eraseFromParent();
870 CallResults.eraseFromParent();
871
872 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
873 // table slot with ref.null upon call_indirect return.
874 //
875 // This generates the following code, which comes right after a call_indirect
876 // of a funcref:
877 //
878 // i32.const 0
879 // ref.null func
880 // table.set __funcref_call_table
881 if (IsIndirect && IsFuncrefCall) {
883 MF.getContext(), Subtarget);
884 Register RegZero =
885 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
886 MachineInstr *Const0 =
887 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
888 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
889
890 Register RegFuncref =
891 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
892 MachineInstr *RefNull =
893 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
894 BB->insertAfter(Const0->getIterator(), RefNull);
895
896 MachineInstr *TableSet =
897 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
898 .addSym(Table)
899 .addReg(RegZero)
900 .addReg(RegFuncref);
901 BB->insertAfter(RefNull->getIterator(), TableSet);
902 }
903
904 return BB;
905}
906
907MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
908 MachineInstr &MI, MachineBasicBlock *BB) const {
909 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
910 DebugLoc DL = MI.getDebugLoc();
911
912 switch (MI.getOpcode()) {
913 default:
914 llvm_unreachable("Unexpected instr type to insert");
915 case WebAssembly::FP_TO_SINT_I32_F32:
916 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
917 WebAssembly::I32_TRUNC_S_F32);
918 case WebAssembly::FP_TO_UINT_I32_F32:
919 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
920 WebAssembly::I32_TRUNC_U_F32);
921 case WebAssembly::FP_TO_SINT_I64_F32:
922 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
923 WebAssembly::I64_TRUNC_S_F32);
924 case WebAssembly::FP_TO_UINT_I64_F32:
925 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
926 WebAssembly::I64_TRUNC_U_F32);
927 case WebAssembly::FP_TO_SINT_I32_F64:
928 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
929 WebAssembly::I32_TRUNC_S_F64);
930 case WebAssembly::FP_TO_UINT_I32_F64:
931 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
932 WebAssembly::I32_TRUNC_U_F64);
933 case WebAssembly::FP_TO_SINT_I64_F64:
934 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
935 WebAssembly::I64_TRUNC_S_F64);
936 case WebAssembly::FP_TO_UINT_I64_F64:
937 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
938 WebAssembly::I64_TRUNC_U_F64);
939 case WebAssembly::MEMCPY_A32:
940 return LowerMemcpy(MI, DL, BB, TII, false);
941 case WebAssembly::MEMCPY_A64:
942 return LowerMemcpy(MI, DL, BB, TII, true);
943 case WebAssembly::MEMSET_A32:
944 return LowerMemset(MI, DL, BB, TII, false);
945 case WebAssembly::MEMSET_A64:
946 return LowerMemset(MI, DL, BB, TII, true);
947 case WebAssembly::CALL_RESULTS:
948 case WebAssembly::RET_CALL_RESULTS:
949 return LowerCallResults(MI, DL, BB, Subtarget, TII);
950 }
951}
952
953std::pair<unsigned, const TargetRegisterClass *>
954WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
955 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
956 // First, see if this is a constraint that directly corresponds to a
957 // WebAssembly register class.
958 if (Constraint.size() == 1) {
959 switch (Constraint[0]) {
960 case 'r':
961 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
962 if (Subtarget->hasSIMD128() && VT.isVector()) {
963 if (VT.getSizeInBits() == 128)
964 return std::make_pair(0U, &WebAssembly::V128RegClass);
965 }
966 if (VT.isInteger() && !VT.isVector()) {
967 if (VT.getSizeInBits() <= 32)
968 return std::make_pair(0U, &WebAssembly::I32RegClass);
969 if (VT.getSizeInBits() <= 64)
970 return std::make_pair(0U, &WebAssembly::I64RegClass);
971 }
972 if (VT.isFloatingPoint() && !VT.isVector()) {
973 switch (VT.getSizeInBits()) {
974 case 32:
975 return std::make_pair(0U, &WebAssembly::F32RegClass);
976 case 64:
977 return std::make_pair(0U, &WebAssembly::F64RegClass);
978 default:
979 break;
980 }
981 }
982 break;
983 default:
984 break;
985 }
986 }
987
989}
990
991bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
992 // Assume ctz is a relatively cheap operation.
993 return true;
994}
995
996bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
997 // Assume clz is a relatively cheap operation.
998 return true;
999}
1000
1001bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1002 const AddrMode &AM,
1003 Type *Ty, unsigned AS,
1004 Instruction *I) const {
1005 // WebAssembly offsets are added as unsigned without wrapping. The
1006 // isLegalAddressingMode gives us no way to determine if wrapping could be
1007 // happening, so we approximate this by accepting only non-negative offsets.
1008 if (AM.BaseOffs < 0)
1009 return false;
1010
1011 // WebAssembly has no scale register operands.
1012 if (AM.Scale != 0)
1013 return false;
1014
1015 // Everything else is legal.
1016 return true;
1017}
1018
1019bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1020 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1021 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1022 // WebAssembly supports unaligned accesses, though it should be declared
1023 // with the p2align attribute on loads and stores which do so, and there
1024 // may be a performance impact. We tell LLVM they're "fast" because
1025 // for the kinds of things that LLVM uses this for (merging adjacent stores
1026 // of constants, etc.), WebAssembly implementations will either want the
1027 // unaligned access or they'll split anyway.
1028 if (Fast)
1029 *Fast = 1;
1030 return true;
1031}
1032
1033bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1034 AttributeList Attr) const {
1035 // The current thinking is that wasm engines will perform this optimization,
1036 // so we can save on code size.
1037 return true;
1038}
1039
1040bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1041 EVT ExtT = ExtVal.getValueType();
1042 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1043 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1044 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1045 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1046}
1047
1048bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1049 const GlobalAddressSDNode *GA) const {
1050 // Wasm doesn't support function addresses with offsets
1051 const GlobalValue *GV = GA->getGlobal();
1053}
1054
1055EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1056 LLVMContext &C,
1057 EVT VT) const {
1058 if (VT.isVector())
1060
1061 // So far, all branch instructions in Wasm take an I32 condition.
1062 // The default TargetLowering::getSetCCResultType returns the pointer size,
1063 // which would be useful to reduce instruction counts when testing
1064 // against 64-bit pointers/values if at some point Wasm supports that.
1065 return EVT::getIntegerVT(C, 32);
1066}
1067
1068bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1069 const CallBase &I,
1070 MachineFunction &MF,
1071 unsigned Intrinsic) const {
1072 switch (Intrinsic) {
1073 case Intrinsic::wasm_memory_atomic_notify:
1075 Info.memVT = MVT::i32;
1076 Info.ptrVal = I.getArgOperand(0);
1077 Info.offset = 0;
1078 Info.align = Align(4);
1079 // atomic.notify instruction does not really load the memory specified with
1080 // this argument, but MachineMemOperand should either be load or store, so
1081 // we set this to a load.
1082 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1083 // instructions are treated as volatiles in the backend, so we should be
1084 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1086 return true;
1087 case Intrinsic::wasm_memory_atomic_wait32:
1089 Info.memVT = MVT::i32;
1090 Info.ptrVal = I.getArgOperand(0);
1091 Info.offset = 0;
1092 Info.align = Align(4);
1094 return true;
1095 case Intrinsic::wasm_memory_atomic_wait64:
1097 Info.memVT = MVT::i64;
1098 Info.ptrVal = I.getArgOperand(0);
1099 Info.offset = 0;
1100 Info.align = Align(8);
1102 return true;
1103 case Intrinsic::wasm_loadf16_f32:
1105 Info.memVT = MVT::f16;
1106 Info.ptrVal = I.getArgOperand(0);
1107 Info.offset = 0;
1108 Info.align = Align(2);
1110 return true;
1111 case Intrinsic::wasm_storef16_f32:
1113 Info.memVT = MVT::f16;
1114 Info.ptrVal = I.getArgOperand(1);
1115 Info.offset = 0;
1116 Info.align = Align(2);
1118 return true;
1119 default:
1120 return false;
1121 }
1122}
1123
1124void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1125 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1126 const SelectionDAG &DAG, unsigned Depth) const {
1127 switch (Op.getOpcode()) {
1128 default:
1129 break;
1131 unsigned IntNo = Op.getConstantOperandVal(0);
1132 switch (IntNo) {
1133 default:
1134 break;
1135 case Intrinsic::wasm_bitmask: {
1136 unsigned BitWidth = Known.getBitWidth();
1137 EVT VT = Op.getOperand(1).getSimpleValueType();
1138 unsigned PossibleBits = VT.getVectorNumElements();
1139 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1140 Known.Zero |= ZeroMask;
1141 break;
1142 }
1143 }
1144 break;
1145 }
1146 case WebAssemblyISD::EXTEND_LOW_U:
1147 case WebAssemblyISD::EXTEND_HIGH_U: {
1148 // We know the high half, of each destination vector element, will be zero.
1149 SDValue SrcOp = Op.getOperand(0);
1150 EVT VT = SrcOp.getSimpleValueType();
1151 unsigned BitWidth = Known.getBitWidth();
1152 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1153 assert(BitWidth >= 8 && "Unexpected width!");
1155 Known.Zero |= Mask;
1156 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1157 assert(BitWidth >= 16 && "Unexpected width!");
1159 Known.Zero |= Mask;
1160 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1161 assert(BitWidth >= 32 && "Unexpected width!");
1163 Known.Zero |= Mask;
1164 }
1165 break;
1166 }
1167 // For 128-bit addition if the upper bits are all zero then it's known that
1168 // the upper bits of the result will have all bits guaranteed zero except the
1169 // first.
1170 case WebAssemblyISD::I64_ADD128:
1171 if (Op.getResNo() == 1) {
1172 SDValue LHS_HI = Op.getOperand(1);
1173 SDValue RHS_HI = Op.getOperand(3);
1174 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1175 Known.Zero.setBitsFrom(1);
1176 }
1177 break;
1178 }
1179}
1180
1182WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1183 if (VT.isFixedLengthVector()) {
1184 MVT EltVT = VT.getVectorElementType();
1185 // We have legal vector types with these lane types, so widening the
1186 // vector would let us use some of the lanes directly without having to
1187 // extend or truncate values.
1188 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1189 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1190 return TypeWidenVector;
1191 }
1192
1194}
1195
1196bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1197 const MachineFunction &MF, EVT VT) const {
1198 if (!Subtarget->hasFP16() || !VT.isVector())
1199 return false;
1200
1201 EVT ScalarVT = VT.getScalarType();
1202 if (!ScalarVT.isSimple())
1203 return false;
1204
1205 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1206}
1207
1208bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1209 SDValue Op, const TargetLoweringOpt &TLO) const {
1210 // ISel process runs DAGCombiner after legalization; this step is called
1211 // SelectionDAG optimization phase. This post-legalization combining process
1212 // runs DAGCombiner on each node, and if there was a change to be made,
1213 // re-runs legalization again on it and its user nodes to make sure
1214 // everythiing is in a legalized state.
1215 //
1216 // The legalization calls lowering routines, and we do our custom lowering for
1217 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1218 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1219 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1220 // turns unused vector elements into undefs. But this routine does not work
1221 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1222 // combination can result in a infinite loop, in which undefs are converted to
1223 // zeros in legalization and back to undefs in combining.
1224 //
1225 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1226 // running for build_vectors.
1227 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1228 return false;
1229 return true;
1230}
1231
1232//===----------------------------------------------------------------------===//
1233// WebAssembly Lowering private implementation.
1234//===----------------------------------------------------------------------===//
1235
1236//===----------------------------------------------------------------------===//
1237// Lowering Code
1238//===----------------------------------------------------------------------===//
1239
1240static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1242 DAG.getContext()->diagnose(
1243 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1244}
1245
1246// Test whether the given calling convention is supported.
1248 // We currently support the language-independent target-independent
1249 // conventions. We don't yet have a way to annotate calls with properties like
1250 // "cold", and we don't have any call-clobbered registers, so these are mostly
1251 // all handled the same.
1252 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1253 CallConv == CallingConv::Cold ||
1254 CallConv == CallingConv::PreserveMost ||
1255 CallConv == CallingConv::PreserveAll ||
1256 CallConv == CallingConv::CXX_FAST_TLS ||
1258 CallConv == CallingConv::Swift;
1259}
1260
1261SDValue
1262WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1263 SmallVectorImpl<SDValue> &InVals) const {
1264 SelectionDAG &DAG = CLI.DAG;
1265 SDLoc DL = CLI.DL;
1266 SDValue Chain = CLI.Chain;
1267 SDValue Callee = CLI.Callee;
1268 MachineFunction &MF = DAG.getMachineFunction();
1269 auto Layout = MF.getDataLayout();
1270
1271 CallingConv::ID CallConv = CLI.CallConv;
1272 if (!callingConvSupported(CallConv))
1273 fail(DL, DAG,
1274 "WebAssembly doesn't support language-specific or target-specific "
1275 "calling conventions yet");
1276 if (CLI.IsPatchPoint)
1277 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1278
1279 if (CLI.IsTailCall) {
1280 auto NoTail = [&](const char *Msg) {
1281 if (CLI.CB && CLI.CB->isMustTailCall())
1282 fail(DL, DAG, Msg);
1283 CLI.IsTailCall = false;
1284 };
1285
1286 if (!Subtarget->hasTailCall())
1287 NoTail("WebAssembly 'tail-call' feature not enabled");
1288
1289 // Varargs calls cannot be tail calls because the buffer is on the stack
1290 if (CLI.IsVarArg)
1291 NoTail("WebAssembly does not support varargs tail calls");
1292
1293 // Do not tail call unless caller and callee return types match
1294 const Function &F = MF.getFunction();
1295 const TargetMachine &TM = getTargetMachine();
1296 Type *RetTy = F.getReturnType();
1297 SmallVector<MVT, 4> CallerRetTys;
1298 SmallVector<MVT, 4> CalleeRetTys;
1299 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1300 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1301 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1302 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1303 CalleeRetTys.begin());
1304 if (!TypesMatch)
1305 NoTail("WebAssembly tail call requires caller and callee return types to "
1306 "match");
1307
1308 // If pointers to local stack values are passed, we cannot tail call
1309 if (CLI.CB) {
1310 for (auto &Arg : CLI.CB->args()) {
1311 Value *Val = Arg.get();
1312 // Trace the value back through pointer operations
1313 while (true) {
1314 Value *Src = Val->stripPointerCastsAndAliases();
1315 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1316 Src = GEP->getPointerOperand();
1317 if (Val == Src)
1318 break;
1319 Val = Src;
1320 }
1321 if (isa<AllocaInst>(Val)) {
1322 NoTail(
1323 "WebAssembly does not support tail calling with stack arguments");
1324 break;
1325 }
1326 }
1327 }
1328 }
1329
1330 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1331 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1332 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1333
1334 // The generic code may have added an sret argument. If we're lowering an
1335 // invoke function, the ABI requires that the function pointer be the first
1336 // argument, so we may have to swap the arguments.
1337 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1338 Outs[0].Flags.isSRet()) {
1339 std::swap(Outs[0], Outs[1]);
1340 std::swap(OutVals[0], OutVals[1]);
1341 }
1342
1343 bool HasSwiftSelfArg = false;
1344 bool HasSwiftErrorArg = false;
1345 unsigned NumFixedArgs = 0;
1346 for (unsigned I = 0; I < Outs.size(); ++I) {
1347 const ISD::OutputArg &Out = Outs[I];
1348 SDValue &OutVal = OutVals[I];
1349 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1350 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1351 if (Out.Flags.isNest())
1352 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1353 if (Out.Flags.isInAlloca())
1354 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1355 if (Out.Flags.isInConsecutiveRegs())
1356 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1358 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1359 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1360 auto &MFI = MF.getFrameInfo();
1361 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1363 /*isSS=*/false);
1364 SDValue SizeNode =
1365 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1366 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1367 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1369 /*isVolatile*/ false, /*AlwaysInline=*/false,
1370 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1371 MachinePointerInfo());
1372 OutVal = FINode;
1373 }
1374 // Count the number of fixed args *after* legalization.
1375 NumFixedArgs += !Out.Flags.isVarArg();
1376 }
1377
1378 bool IsVarArg = CLI.IsVarArg;
1379 auto PtrVT = getPointerTy(Layout);
1380
1381 // For swiftcc, emit additional swiftself and swifterror arguments
1382 // if there aren't. These additional arguments are also added for callee
1383 // signature They are necessary to match callee and caller signature for
1384 // indirect call.
1385 if (CallConv == CallingConv::Swift) {
1386 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1387 if (!HasSwiftSelfArg) {
1388 NumFixedArgs++;
1389 ISD::ArgFlagsTy Flags;
1390 Flags.setSwiftSelf();
1391 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1392 CLI.Outs.push_back(Arg);
1393 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1394 CLI.OutVals.push_back(ArgVal);
1395 }
1396 if (!HasSwiftErrorArg) {
1397 NumFixedArgs++;
1398 ISD::ArgFlagsTy Flags;
1399 Flags.setSwiftError();
1400 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1401 CLI.Outs.push_back(Arg);
1402 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1403 CLI.OutVals.push_back(ArgVal);
1404 }
1405 }
1406
1407 // Analyze operands of the call, assigning locations to each operand.
1409 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1410
1411 if (IsVarArg) {
1412 // Outgoing non-fixed arguments are placed in a buffer. First
1413 // compute their offsets and the total amount of buffer space needed.
1414 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1415 const ISD::OutputArg &Out = Outs[I];
1416 SDValue &Arg = OutVals[I];
1417 EVT VT = Arg.getValueType();
1418 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1419 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1420 Align Alignment =
1421 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1422 unsigned Offset =
1423 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1424 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1425 Offset, VT.getSimpleVT(),
1427 }
1428 }
1429
1430 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1431
1432 SDValue FINode;
1433 if (IsVarArg && NumBytes) {
1434 // For non-fixed arguments, next emit stores to store the argument values
1435 // to the stack buffer at the offsets computed above.
1436 MaybeAlign StackAlign = Layout.getStackAlignment();
1437 assert(StackAlign && "data layout string is missing stack alignment");
1438 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1439 /*isSS=*/false);
1440 unsigned ValNo = 0;
1442 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1443 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1444 "ArgLocs should remain in order and only hold varargs args");
1445 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1446 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1447 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1448 DAG.getConstant(Offset, DL, PtrVT));
1449 Chains.push_back(
1450 DAG.getStore(Chain, DL, Arg, Add,
1452 }
1453 if (!Chains.empty())
1454 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1455 } else if (IsVarArg) {
1456 FINode = DAG.getIntPtrConstant(0, DL);
1457 }
1458
1459 if (Callee->getOpcode() == ISD::GlobalAddress) {
1460 // If the callee is a GlobalAddress node (quite common, every direct call
1461 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1462 // doesn't at MO_GOT which is not needed for direct calls.
1463 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1466 GA->getOffset());
1467 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1468 getPointerTy(DAG.getDataLayout()), Callee);
1469 }
1470
1471 // Compute the operands for the CALLn node.
1473 Ops.push_back(Chain);
1474 Ops.push_back(Callee);
1475
1476 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1477 // isn't reliable.
1478 Ops.append(OutVals.begin(),
1479 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1480 // Add a pointer to the vararg buffer.
1481 if (IsVarArg)
1482 Ops.push_back(FINode);
1483
1484 SmallVector<EVT, 8> InTys;
1485 for (const auto &In : Ins) {
1486 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1487 assert(!In.Flags.isNest() && "nest is not valid for return values");
1488 if (In.Flags.isInAlloca())
1489 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1490 if (In.Flags.isInConsecutiveRegs())
1491 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1492 if (In.Flags.isInConsecutiveRegsLast())
1493 fail(DL, DAG,
1494 "WebAssembly hasn't implemented cons regs last return values");
1495 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1496 // registers.
1497 InTys.push_back(In.VT);
1498 }
1499
1500 // Lastly, if this is a call to a funcref we need to add an instruction
1501 // table.set to the chain and transform the call.
1503 CLI.CB->getCalledOperand()->getType())) {
1504 // In the absence of function references proposal where a funcref call is
1505 // lowered to call_ref, using reference types we generate a table.set to set
1506 // the funcref to a special table used solely for this purpose, followed by
1507 // a call_indirect. Here we just generate the table set, and return the
1508 // SDValue of the table.set so that LowerCall can finalize the lowering by
1509 // generating the call_indirect.
1510 SDValue Chain = Ops[0];
1511
1513 MF.getContext(), Subtarget);
1514 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1515 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1516 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1517 SDValue TableSet = DAG.getMemIntrinsicNode(
1518 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1519 MVT::funcref,
1520 // Machine Mem Operand args
1521 MachinePointerInfo(
1523 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1525
1526 Ops[0] = TableSet; // The new chain is the TableSet itself
1527 }
1528
1529 if (CLI.IsTailCall) {
1530 // ret_calls do not return values to the current frame
1531 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1532 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1533 }
1534
1535 InTys.push_back(MVT::Other);
1536 SDVTList InTyList = DAG.getVTList(InTys);
1537 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1538
1539 for (size_t I = 0; I < Ins.size(); ++I)
1540 InVals.push_back(Res.getValue(I));
1541
1542 // Return the chain
1543 return Res.getValue(Ins.size());
1544}
1545
1546bool WebAssemblyTargetLowering::CanLowerReturn(
1547 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1548 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1549 const Type *RetTy) const {
1550 // WebAssembly can only handle returning tuples with multivalue enabled
1551 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1552}
1553
1554SDValue WebAssemblyTargetLowering::LowerReturn(
1555 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1557 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1558 SelectionDAG &DAG) const {
1559 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1560 "MVP WebAssembly can only return up to one value");
1561 if (!callingConvSupported(CallConv))
1562 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1563
1564 SmallVector<SDValue, 4> RetOps(1, Chain);
1565 RetOps.append(OutVals.begin(), OutVals.end());
1566 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1567
1568 // Record the number and types of the return values.
1569 for (const ISD::OutputArg &Out : Outs) {
1570 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1571 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1572 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1573 if (Out.Flags.isInAlloca())
1574 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1575 if (Out.Flags.isInConsecutiveRegs())
1576 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1578 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1579 }
1580
1581 return Chain;
1582}
1583
1584SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1585 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1586 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1587 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1588 if (!callingConvSupported(CallConv))
1589 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1590
1591 MachineFunction &MF = DAG.getMachineFunction();
1592 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1593
1594 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1595 // of the incoming values before they're represented by virtual registers.
1596 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1597
1598 bool HasSwiftErrorArg = false;
1599 bool HasSwiftSelfArg = false;
1600 for (const ISD::InputArg &In : Ins) {
1601 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1602 HasSwiftErrorArg |= In.Flags.isSwiftError();
1603 if (In.Flags.isInAlloca())
1604 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1605 if (In.Flags.isNest())
1606 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1607 if (In.Flags.isInConsecutiveRegs())
1608 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1609 if (In.Flags.isInConsecutiveRegsLast())
1610 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1611 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1612 // registers.
1613 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1614 DAG.getTargetConstant(InVals.size(),
1615 DL, MVT::i32))
1616 : DAG.getUNDEF(In.VT));
1617
1618 // Record the number and types of arguments.
1619 MFI->addParam(In.VT);
1620 }
1621
1622 // For swiftcc, emit additional swiftself and swifterror arguments
1623 // if there aren't. These additional arguments are also added for callee
1624 // signature They are necessary to match callee and caller signature for
1625 // indirect call.
1626 auto PtrVT = getPointerTy(MF.getDataLayout());
1627 if (CallConv == CallingConv::Swift) {
1628 if (!HasSwiftSelfArg) {
1629 MFI->addParam(PtrVT);
1630 }
1631 if (!HasSwiftErrorArg) {
1632 MFI->addParam(PtrVT);
1633 }
1634 }
1635 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1636 // the buffer is passed as an argument.
1637 if (IsVarArg) {
1638 MVT PtrVT = getPointerTy(MF.getDataLayout());
1639 Register VarargVreg =
1641 MFI->setVarargBufferVreg(VarargVreg);
1642 Chain = DAG.getCopyToReg(
1643 Chain, DL, VarargVreg,
1644 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1645 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1646 MFI->addParam(PtrVT);
1647 }
1648
1649 // Record the number and types of arguments and results.
1650 SmallVector<MVT, 4> Params;
1653 MF.getFunction(), DAG.getTarget(), Params, Results);
1654 for (MVT VT : Results)
1655 MFI->addResult(VT);
1656 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1657 // the param logic here with ComputeSignatureVTs
1658 assert(MFI->getParams().size() == Params.size() &&
1659 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1660 Params.begin()));
1661
1662 return Chain;
1663}
1664
1665void WebAssemblyTargetLowering::ReplaceNodeResults(
1667 switch (N->getOpcode()) {
1669 // Do not add any results, signifying that N should not be custom lowered
1670 // after all. This happens because simd128 turns on custom lowering for
1671 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1672 // illegal type.
1673 break;
1677 // Do not add any results, signifying that N should not be custom lowered.
1678 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1679 break;
1680 case ISD::ADD:
1681 case ISD::SUB:
1682 Results.push_back(Replace128Op(N, DAG));
1683 break;
1684 default:
1686 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1687 }
1688}
1689
1690//===----------------------------------------------------------------------===//
1691// Custom lowering hooks.
1692//===----------------------------------------------------------------------===//
1693
1694SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1695 SelectionDAG &DAG) const {
1696 SDLoc DL(Op);
1697 switch (Op.getOpcode()) {
1698 default:
1699 llvm_unreachable("unimplemented operation lowering");
1700 return SDValue();
1701 case ISD::FrameIndex:
1702 return LowerFrameIndex(Op, DAG);
1703 case ISD::GlobalAddress:
1704 return LowerGlobalAddress(Op, DAG);
1706 return LowerGlobalTLSAddress(Op, DAG);
1708 return LowerExternalSymbol(Op, DAG);
1709 case ISD::JumpTable:
1710 return LowerJumpTable(Op, DAG);
1711 case ISD::BR_JT:
1712 return LowerBR_JT(Op, DAG);
1713 case ISD::VASTART:
1714 return LowerVASTART(Op, DAG);
1715 case ISD::BlockAddress:
1716 case ISD::BRIND:
1717 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1718 return SDValue();
1719 case ISD::RETURNADDR:
1720 return LowerRETURNADDR(Op, DAG);
1721 case ISD::FRAMEADDR:
1722 return LowerFRAMEADDR(Op, DAG);
1723 case ISD::CopyToReg:
1724 return LowerCopyToReg(Op, DAG);
1727 return LowerAccessVectorElement(Op, DAG);
1731 return LowerIntrinsic(Op, DAG);
1733 return LowerSIGN_EXTEND_INREG(Op, DAG);
1737 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1738 case ISD::BUILD_VECTOR:
1739 return LowerBUILD_VECTOR(Op, DAG);
1741 return LowerVECTOR_SHUFFLE(Op, DAG);
1742 case ISD::SETCC:
1743 return LowerSETCC(Op, DAG);
1744 case ISD::SHL:
1745 case ISD::SRA:
1746 case ISD::SRL:
1747 return LowerShift(Op, DAG);
1750 return LowerFP_TO_INT_SAT(Op, DAG);
1751 case ISD::FMINNUM:
1752 case ISD::FMINIMUMNUM:
1753 return LowerFMIN(Op, DAG);
1754 case ISD::FMAXNUM:
1755 case ISD::FMAXIMUMNUM:
1756 return LowerFMAX(Op, DAG);
1757 case ISD::LOAD:
1758 return LowerLoad(Op, DAG);
1759 case ISD::STORE:
1760 return LowerStore(Op, DAG);
1761 case ISD::CTPOP:
1762 case ISD::CTLZ:
1763 case ISD::CTTZ:
1764 return DAG.UnrollVectorOp(Op.getNode());
1765 case ISD::CLEAR_CACHE:
1766 report_fatal_error("llvm.clear_cache is not supported on wasm");
1767 case ISD::SMUL_LOHI:
1768 case ISD::UMUL_LOHI:
1769 return LowerMUL_LOHI(Op, DAG);
1770 case ISD::UADDO:
1771 return LowerUADDO(Op, DAG);
1772 }
1773}
1774
1778
1779 return false;
1780}
1781
1782static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1783 SelectionDAG &DAG) {
1785 if (!FI)
1786 return std::nullopt;
1787
1788 auto &MF = DAG.getMachineFunction();
1790}
1791
1792SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1793 SelectionDAG &DAG) const {
1794 SDLoc DL(Op);
1795 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1796 const SDValue &Value = SN->getValue();
1797 const SDValue &Base = SN->getBasePtr();
1798 const SDValue &Offset = SN->getOffset();
1799
1801 if (!Offset->isUndef())
1802 report_fatal_error("unexpected offset when storing to webassembly global",
1803 false);
1804
1805 SDVTList Tys = DAG.getVTList(MVT::Other);
1806 SDValue Ops[] = {SN->getChain(), Value, Base};
1807 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1808 SN->getMemoryVT(), SN->getMemOperand());
1809 }
1810
1811 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1812 if (!Offset->isUndef())
1813 report_fatal_error("unexpected offset when storing to webassembly local",
1814 false);
1815
1816 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1817 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1818 SDValue Ops[] = {SN->getChain(), Idx, Value};
1819 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1820 }
1821
1824 "Encountered an unlowerable store to the wasm_var address space",
1825 false);
1826
1827 return Op;
1828}
1829
1830SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1831 SelectionDAG &DAG) const {
1832 SDLoc DL(Op);
1833 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1834 const SDValue &Base = LN->getBasePtr();
1835 const SDValue &Offset = LN->getOffset();
1836
1838 if (!Offset->isUndef())
1840 "unexpected offset when loading from webassembly global", false);
1841
1842 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1843 SDValue Ops[] = {LN->getChain(), Base};
1844 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1845 LN->getMemoryVT(), LN->getMemOperand());
1846 }
1847
1848 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1849 if (!Offset->isUndef())
1851 "unexpected offset when loading from webassembly local", false);
1852
1853 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1854 EVT LocalVT = LN->getValueType(0);
1855 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1856 {LN->getChain(), Idx});
1857 }
1858
1861 "Encountered an unlowerable load from the wasm_var address space",
1862 false);
1863
1864 return Op;
1865}
1866
1867SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1868 SelectionDAG &DAG) const {
1869 assert(Subtarget->hasWideArithmetic());
1870 assert(Op.getValueType() == MVT::i64);
1871 SDLoc DL(Op);
1872 unsigned Opcode;
1873 switch (Op.getOpcode()) {
1874 case ISD::UMUL_LOHI:
1875 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1876 break;
1877 case ISD::SMUL_LOHI:
1878 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1879 break;
1880 default:
1881 llvm_unreachable("unexpected opcode");
1882 }
1883 SDValue LHS = Op.getOperand(0);
1884 SDValue RHS = Op.getOperand(1);
1885 SDValue Lo =
1886 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1887 SDValue Hi(Lo.getNode(), 1);
1888 SDValue Ops[] = {Lo, Hi};
1889 return DAG.getMergeValues(Ops, DL);
1890}
1891
1892// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1893//
1894// This enables generating a single wasm instruction for this operation where
1895// the upper half of both operands are constant zeros. The upper half of the
1896// result is then whether the overflow happened.
1897SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1898 SelectionDAG &DAG) const {
1899 assert(Subtarget->hasWideArithmetic());
1900 assert(Op.getValueType() == MVT::i64);
1901 assert(Op.getOpcode() == ISD::UADDO);
1902 SDLoc DL(Op);
1903 SDValue LHS = Op.getOperand(0);
1904 SDValue RHS = Op.getOperand(1);
1905 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1906 SDValue Result =
1907 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1908 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1909 SDValue CarryI64(Result.getNode(), 1);
1910 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1911 SDValue Ops[] = {Result, CarryI32};
1912 return DAG.getMergeValues(Ops, DL);
1913}
1914
1915SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1916 SelectionDAG &DAG) const {
1917 assert(Subtarget->hasWideArithmetic());
1918 assert(N->getValueType(0) == MVT::i128);
1919 SDLoc DL(N);
1920 unsigned Opcode;
1921 switch (N->getOpcode()) {
1922 case ISD::ADD:
1923 Opcode = WebAssemblyISD::I64_ADD128;
1924 break;
1925 case ISD::SUB:
1926 Opcode = WebAssemblyISD::I64_SUB128;
1927 break;
1928 default:
1929 llvm_unreachable("unexpected opcode");
1930 }
1931 SDValue LHS = N->getOperand(0);
1932 SDValue RHS = N->getOperand(1);
1933
1934 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1935 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1936 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1937 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1938 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1939 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1940 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1941 LHS_0, LHS_1, RHS_0, RHS_1);
1942 SDValue Result_HI(Result_LO.getNode(), 1);
1943 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1944}
1945
1946SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1947 SelectionDAG &DAG) const {
1948 SDValue Src = Op.getOperand(2);
1949 if (isa<FrameIndexSDNode>(Src.getNode())) {
1950 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1951 // the FI to some LEA-like instruction, but since we don't have that, we
1952 // need to insert some kind of instruction that can take an FI operand and
1953 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1954 // local.copy between Op and its FI operand.
1955 SDValue Chain = Op.getOperand(0);
1956 SDLoc DL(Op);
1957 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1958 EVT VT = Src.getValueType();
1959 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1960 : WebAssembly::COPY_I64,
1961 DL, VT, Src),
1962 0);
1963 return Op.getNode()->getNumValues() == 1
1964 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1965 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1966 Op.getNumOperands() == 4 ? Op.getOperand(3)
1967 : SDValue());
1968 }
1969 return SDValue();
1970}
1971
1972SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1973 SelectionDAG &DAG) const {
1974 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1975 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1976}
1977
1978SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1979 SelectionDAG &DAG) const {
1980 SDLoc DL(Op);
1981
1982 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1983 fail(DL, DAG,
1984 "Non-Emscripten WebAssembly hasn't implemented "
1985 "__builtin_return_address");
1986 return SDValue();
1987 }
1988
1989 unsigned Depth = Op.getConstantOperandVal(0);
1990 MakeLibCallOptions CallOptions;
1991 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1992 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1993 .first;
1994}
1995
1996SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1997 SelectionDAG &DAG) const {
1998 // Non-zero depths are not supported by WebAssembly currently. Use the
1999 // legalizer's default expansion, which is to return 0 (what this function is
2000 // documented to do).
2001 if (Op.getConstantOperandVal(0) > 0)
2002 return SDValue();
2003
2005 EVT VT = Op.getValueType();
2006 Register FP =
2007 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
2008 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2009}
2010
2011SDValue
2012WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2013 SelectionDAG &DAG) const {
2014 SDLoc DL(Op);
2015 const auto *GA = cast<GlobalAddressSDNode>(Op);
2016
2017 MachineFunction &MF = DAG.getMachineFunction();
2018 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2019 report_fatal_error("cannot use thread-local storage without bulk memory",
2020 false);
2021
2022 const GlobalValue *GV = GA->getGlobal();
2023
2024 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2025 // on other targets, if we have thread-local storage, only the local-exec
2026 // model is possible.
2027 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2028 ? GV->getThreadLocalMode()
2030
2031 // Unsupported TLS modes
2034
2035 if (model == GlobalValue::LocalExecTLSModel ||
2038 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2039 // For DSO-local TLS variables we use offset from __tls_base
2040
2041 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2042 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2043 : WebAssembly::GLOBAL_GET_I32;
2044 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2045
2047 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2048 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2049 0);
2050
2051 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2052 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2053 SDValue SymOffset =
2054 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2055
2056 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2057 }
2058
2060
2061 EVT VT = Op.getValueType();
2062 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2063 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2064 GA->getOffset(),
2066}
2067
2068SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2069 SelectionDAG &DAG) const {
2070 SDLoc DL(Op);
2071 const auto *GA = cast<GlobalAddressSDNode>(Op);
2072 EVT VT = Op.getValueType();
2073 assert(GA->getTargetFlags() == 0 &&
2074 "Unexpected target flags on generic GlobalAddressSDNode");
2076 fail(DL, DAG, "Invalid address space for WebAssembly target");
2077
2078 unsigned OperandFlags = 0;
2079 const GlobalValue *GV = GA->getGlobal();
2080 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2081 // need special treatment for tables in PIC mode.
2082 if (isPositionIndependent() &&
2084 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2085 MachineFunction &MF = DAG.getMachineFunction();
2086 MVT PtrVT = getPointerTy(MF.getDataLayout());
2087 const char *BaseName;
2088 if (GV->getValueType()->isFunctionTy()) {
2089 BaseName = MF.createExternalSymbolName("__table_base");
2091 } else {
2092 BaseName = MF.createExternalSymbolName("__memory_base");
2094 }
2096 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2097 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2098
2099 SDValue SymAddr = DAG.getNode(
2100 WebAssemblyISD::WrapperREL, DL, VT,
2101 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2102 OperandFlags));
2103
2104 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2105 }
2107 }
2108
2109 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2110 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2111 GA->getOffset(), OperandFlags));
2112}
2113
2114SDValue
2115WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2116 SelectionDAG &DAG) const {
2117 SDLoc DL(Op);
2118 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2119 EVT VT = Op.getValueType();
2120 assert(ES->getTargetFlags() == 0 &&
2121 "Unexpected target flags on generic ExternalSymbolSDNode");
2122 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2123 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2124}
2125
2126SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2127 SelectionDAG &DAG) const {
2128 // There's no need for a Wrapper node because we always incorporate a jump
2129 // table operand into a BR_TABLE instruction, rather than ever
2130 // materializing it in a register.
2131 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2132 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2133 JT->getTargetFlags());
2134}
2135
2136SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2137 SelectionDAG &DAG) const {
2138 SDLoc DL(Op);
2139 SDValue Chain = Op.getOperand(0);
2140 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2141 SDValue Index = Op.getOperand(2);
2142 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2143
2145 Ops.push_back(Chain);
2146 Ops.push_back(Index);
2147
2148 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2149 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2150
2151 // Add an operand for each case.
2152 for (auto *MBB : MBBs)
2153 Ops.push_back(DAG.getBasicBlock(MBB));
2154
2155 // Add the first MBB as a dummy default target for now. This will be replaced
2156 // with the proper default target (and the preceding range check eliminated)
2157 // if possible by WebAssemblyFixBrTableDefaults.
2158 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2159 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2160}
2161
2162SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2163 SelectionDAG &DAG) const {
2164 SDLoc DL(Op);
2165 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2166
2167 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2168 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2169
2170 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2171 MFI->getVarargBufferVreg(), PtrVT);
2172 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2173 MachinePointerInfo(SV));
2174}
2175
2176SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2177 SelectionDAG &DAG) const {
2178 MachineFunction &MF = DAG.getMachineFunction();
2179 unsigned IntNo;
2180 switch (Op.getOpcode()) {
2183 IntNo = Op.getConstantOperandVal(1);
2184 break;
2186 IntNo = Op.getConstantOperandVal(0);
2187 break;
2188 default:
2189 llvm_unreachable("Invalid intrinsic");
2190 }
2191 SDLoc DL(Op);
2192
2193 switch (IntNo) {
2194 default:
2195 return SDValue(); // Don't custom lower most intrinsics.
2196
2197 case Intrinsic::wasm_lsda: {
2198 auto PtrVT = getPointerTy(MF.getDataLayout());
2199 const char *SymName = MF.createExternalSymbolName(
2200 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2201 if (isPositionIndependent()) {
2203 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2204 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2206 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2207 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2208 SDValue SymAddr =
2209 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2210 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2211 }
2212 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2213 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2214 }
2215
2216 case Intrinsic::wasm_shuffle: {
2217 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2218 SDValue Ops[18];
2219 size_t OpIdx = 0;
2220 Ops[OpIdx++] = Op.getOperand(1);
2221 Ops[OpIdx++] = Op.getOperand(2);
2222 while (OpIdx < 18) {
2223 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2224 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2225 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2226 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2227 } else {
2228 Ops[OpIdx++] = MaskIdx;
2229 }
2230 }
2231 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2232 }
2233
2234 case Intrinsic::thread_pointer: {
2235 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2236 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2237 : WebAssembly::GLOBAL_GET_I32;
2238 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2239 return SDValue(
2240 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2241 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2242 0);
2243 }
2244 }
2245}
2246
2247SDValue
2248WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2249 SelectionDAG &DAG) const {
2250 SDLoc DL(Op);
2251 // If sign extension operations are disabled, allow sext_inreg only if operand
2252 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2253 // extension operations, but allowing sext_inreg in this context lets us have
2254 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2255 // everywhere would be simpler in this file, but would necessitate large and
2256 // brittle patterns to undo the expansion and select extract_lane_s
2257 // instructions.
2258 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2259 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2260 return SDValue();
2261
2262 const SDValue &Extract = Op.getOperand(0);
2263 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2264 if (VecT.getVectorElementType().getSizeInBits() > 32)
2265 return SDValue();
2266 MVT ExtractedLaneT =
2267 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2268 MVT ExtractedVecT =
2269 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2270 if (ExtractedVecT == VecT)
2271 return Op;
2272
2273 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2274 const SDNode *Index = Extract.getOperand(1).getNode();
2275 if (!isa<ConstantSDNode>(Index))
2276 return SDValue();
2277 unsigned IndexVal = Index->getAsZExtVal();
2278 unsigned Scale =
2279 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2280 assert(Scale > 1);
2281 SDValue NewIndex =
2282 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2283 SDValue NewExtract = DAG.getNode(
2285 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2286 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2287 Op.getOperand(1));
2288}
2289
2290static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2291 SelectionDAG &DAG) {
2292 if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2293 return SDValue();
2294
2295 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2296 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2297 "expected extend_low");
2298 auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2299
2300 ArrayRef<int> Mask = Shuffle->getMask();
2301 // Look for a shuffle which moves from the high half to the low half.
2302 size_t FirstIdx = Mask.size() / 2;
2303 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2304 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2305 return SDValue();
2306 }
2307 }
2308
2309 SDLoc DL(Op);
2310 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2311 ? WebAssemblyISD::EXTEND_HIGH_S
2312 : WebAssemblyISD::EXTEND_HIGH_U;
2313 return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2314}
2315
2316SDValue
2317WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2318 SelectionDAG &DAG) const {
2319 SDLoc DL(Op);
2320 EVT VT = Op.getValueType();
2321 SDValue Src = Op.getOperand(0);
2322 EVT SrcVT = Src.getValueType();
2323
2324 if (SrcVT.getVectorElementType() == MVT::i1 ||
2325 SrcVT.getVectorElementType() == MVT::i64)
2326 return SDValue();
2327
2328 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2329 "Unexpected extension factor.");
2330 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2331
2332 if (Scale != 2 && Scale != 4 && Scale != 8)
2333 return SDValue();
2334
2335 unsigned Ext;
2336 switch (Op.getOpcode()) {
2337 default:
2338 llvm_unreachable("unexpected opcode");
2341 Ext = WebAssemblyISD::EXTEND_LOW_U;
2342 break;
2344 Ext = WebAssemblyISD::EXTEND_LOW_S;
2345 break;
2346 }
2347
2348 if (Scale == 2) {
2349 // See if we can use EXTEND_HIGH.
2350 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2351 return ExtendHigh;
2352 }
2353
2354 SDValue Ret = Src;
2355 while (Scale != 1) {
2356 Ret = DAG.getNode(Ext, DL,
2357 Ret.getValueType()
2360 Ret);
2361 Scale /= 2;
2362 }
2363 assert(Ret.getValueType() == VT);
2364 return Ret;
2365}
2366
2368 SDLoc DL(Op);
2369 if (Op.getValueType() != MVT::v2f64)
2370 return SDValue();
2371
2372 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2373 unsigned &Index) -> bool {
2374 switch (Op.getOpcode()) {
2375 case ISD::SINT_TO_FP:
2376 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2377 break;
2378 case ISD::UINT_TO_FP:
2379 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2380 break;
2381 case ISD::FP_EXTEND:
2382 Opcode = WebAssemblyISD::PROMOTE_LOW;
2383 break;
2384 default:
2385 return false;
2386 }
2387
2388 auto ExtractVector = Op.getOperand(0);
2389 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2390 return false;
2391
2392 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2393 return false;
2394
2395 SrcVec = ExtractVector.getOperand(0);
2396 Index = ExtractVector.getConstantOperandVal(1);
2397 return true;
2398 };
2399
2400 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2401 SDValue LHSSrcVec, RHSSrcVec;
2402 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2403 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2404 return SDValue();
2405
2406 if (LHSOpcode != RHSOpcode)
2407 return SDValue();
2408
2409 MVT ExpectedSrcVT;
2410 switch (LHSOpcode) {
2411 case WebAssemblyISD::CONVERT_LOW_S:
2412 case WebAssemblyISD::CONVERT_LOW_U:
2413 ExpectedSrcVT = MVT::v4i32;
2414 break;
2415 case WebAssemblyISD::PROMOTE_LOW:
2416 ExpectedSrcVT = MVT::v4f32;
2417 break;
2418 }
2419 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2420 return SDValue();
2421
2422 auto Src = LHSSrcVec;
2423 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2424 // Shuffle the source vector so that the converted lanes are the low lanes.
2425 Src = DAG.getVectorShuffle(
2426 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2427 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2428 }
2429 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2430}
2431
2432SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2433 SelectionDAG &DAG) const {
2434 MVT VT = Op.getSimpleValueType();
2435 if (VT == MVT::v8f16) {
2436 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2437 // FP16 type, so cast them to I16s.
2438 MVT IVT = VT.changeVectorElementType(MVT::i16);
2440 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2441 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2442 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2443 return DAG.getBitcast(VT, Res);
2444 }
2445
2446 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2447 return ConvertLow;
2448
2449 SDLoc DL(Op);
2450 const EVT VecT = Op.getValueType();
2451 const EVT LaneT = Op.getOperand(0).getValueType();
2452 const size_t Lanes = Op.getNumOperands();
2453 bool CanSwizzle = VecT == MVT::v16i8;
2454
2455 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2456 // possible number of lanes at once followed by a sequence of replace_lane
2457 // instructions to individually initialize any remaining lanes.
2458
2459 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2460 // swizzled lanes should be given greater weight.
2461
2462 // TODO: Investigate looping rather than always extracting/replacing specific
2463 // lanes to fill gaps.
2464
2465 auto IsConstant = [](const SDValue &V) {
2466 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2467 };
2468
2469 // Returns the source vector and index vector pair if they exist. Checks for:
2470 // (extract_vector_elt
2471 // $src,
2472 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2473 // )
2474 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2475 auto Bail = std::make_pair(SDValue(), SDValue());
2476 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2477 return Bail;
2478 const SDValue &SwizzleSrc = Lane->getOperand(0);
2479 const SDValue &IndexExt = Lane->getOperand(1);
2480 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2481 return Bail;
2482 const SDValue &Index = IndexExt->getOperand(0);
2483 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2484 return Bail;
2485 const SDValue &SwizzleIndices = Index->getOperand(0);
2486 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2487 SwizzleIndices.getValueType() != MVT::v16i8 ||
2488 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2489 Index->getConstantOperandVal(1) != I)
2490 return Bail;
2491 return std::make_pair(SwizzleSrc, SwizzleIndices);
2492 };
2493
2494 // If the lane is extracted from another vector at a constant index, return
2495 // that vector. The source vector must not have more lanes than the dest
2496 // because the shufflevector indices are in terms of the destination lanes and
2497 // would not be able to address the smaller individual source lanes.
2498 auto GetShuffleSrc = [&](const SDValue &Lane) {
2499 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2500 return SDValue();
2501 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2502 return SDValue();
2503 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2504 VecT.getVectorNumElements())
2505 return SDValue();
2506 return Lane->getOperand(0);
2507 };
2508
2509 using ValueEntry = std::pair<SDValue, size_t>;
2510 SmallVector<ValueEntry, 16> SplatValueCounts;
2511
2512 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2513 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2514
2515 using ShuffleEntry = std::pair<SDValue, size_t>;
2516 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2517
2518 auto AddCount = [](auto &Counts, const auto &Val) {
2519 auto CountIt =
2520 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2521 if (CountIt == Counts.end()) {
2522 Counts.emplace_back(Val, 1);
2523 } else {
2524 CountIt->second++;
2525 }
2526 };
2527
2528 auto GetMostCommon = [](auto &Counts) {
2529 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2530 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2531 return *CommonIt;
2532 };
2533
2534 size_t NumConstantLanes = 0;
2535
2536 // Count eligible lanes for each type of vector creation op
2537 for (size_t I = 0; I < Lanes; ++I) {
2538 const SDValue &Lane = Op->getOperand(I);
2539 if (Lane.isUndef())
2540 continue;
2541
2542 AddCount(SplatValueCounts, Lane);
2543
2544 if (IsConstant(Lane))
2545 NumConstantLanes++;
2546 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2547 AddCount(ShuffleCounts, ShuffleSrc);
2548 if (CanSwizzle) {
2549 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2550 if (SwizzleSrcs.first)
2551 AddCount(SwizzleCounts, SwizzleSrcs);
2552 }
2553 }
2554
2555 SDValue SplatValue;
2556 size_t NumSplatLanes;
2557 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2558
2559 SDValue SwizzleSrc;
2560 SDValue SwizzleIndices;
2561 size_t NumSwizzleLanes = 0;
2562 if (SwizzleCounts.size())
2563 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2564 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2565
2566 // Shuffles can draw from up to two vectors, so find the two most common
2567 // sources.
2568 SDValue ShuffleSrc1, ShuffleSrc2;
2569 size_t NumShuffleLanes = 0;
2570 if (ShuffleCounts.size()) {
2571 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2572 llvm::erase_if(ShuffleCounts,
2573 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2574 }
2575 if (ShuffleCounts.size()) {
2576 size_t AdditionalShuffleLanes;
2577 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2578 GetMostCommon(ShuffleCounts);
2579 NumShuffleLanes += AdditionalShuffleLanes;
2580 }
2581
2582 // Predicate returning true if the lane is properly initialized by the
2583 // original instruction
2584 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2586 // Prefer swizzles over shuffles over vector consts over splats
2587 if (NumSwizzleLanes >= NumShuffleLanes &&
2588 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2589 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2590 SwizzleIndices);
2591 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2592 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2593 return Swizzled == GetSwizzleSrcs(I, Lane);
2594 };
2595 } else if (NumShuffleLanes >= NumConstantLanes &&
2596 NumShuffleLanes >= NumSplatLanes) {
2597 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2598 size_t DestLaneCount = VecT.getVectorNumElements();
2599 size_t Scale1 = 1;
2600 size_t Scale2 = 1;
2601 SDValue Src1 = ShuffleSrc1;
2602 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2603 if (Src1.getValueType() != VecT) {
2604 size_t LaneSize =
2606 assert(LaneSize > DestLaneSize);
2607 Scale1 = LaneSize / DestLaneSize;
2608 Src1 = DAG.getBitcast(VecT, Src1);
2609 }
2610 if (Src2.getValueType() != VecT) {
2611 size_t LaneSize =
2613 assert(LaneSize > DestLaneSize);
2614 Scale2 = LaneSize / DestLaneSize;
2615 Src2 = DAG.getBitcast(VecT, Src2);
2616 }
2617
2618 int Mask[16];
2619 assert(DestLaneCount <= 16);
2620 for (size_t I = 0; I < DestLaneCount; ++I) {
2621 const SDValue &Lane = Op->getOperand(I);
2622 SDValue Src = GetShuffleSrc(Lane);
2623 if (Src == ShuffleSrc1) {
2624 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2625 } else if (Src && Src == ShuffleSrc2) {
2626 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2627 } else {
2628 Mask[I] = -1;
2629 }
2630 }
2631 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2632 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2633 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2634 auto Src = GetShuffleSrc(Lane);
2635 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2636 };
2637 } else if (NumConstantLanes >= NumSplatLanes) {
2638 SmallVector<SDValue, 16> ConstLanes;
2639 for (const SDValue &Lane : Op->op_values()) {
2640 if (IsConstant(Lane)) {
2641 // Values may need to be fixed so that they will sign extend to be
2642 // within the expected range during ISel. Check whether the value is in
2643 // bounds based on the lane bit width and if it is out of bounds, lop
2644 // off the extra bits.
2645 uint64_t LaneBits = 128 / Lanes;
2646 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2647 ConstLanes.push_back(DAG.getConstant(
2648 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2649 SDLoc(Lane), LaneT));
2650 } else {
2651 ConstLanes.push_back(Lane);
2652 }
2653 } else if (LaneT.isFloatingPoint()) {
2654 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2655 } else {
2656 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2657 }
2658 }
2659 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2660 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2661 return IsConstant(Lane);
2662 };
2663 } else {
2664 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2665 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2666 (DestLaneSize == 32 || DestLaneSize == 64)) {
2667 // Could be selected to load_zero.
2668 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2669 } else {
2670 // Use a splat (which might be selected as a load splat)
2671 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2672 }
2673 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2674 return Lane == SplatValue;
2675 };
2676 }
2677
2678 assert(Result);
2679 assert(IsLaneConstructed);
2680
2681 // Add replace_lane instructions for any unhandled values
2682 for (size_t I = 0; I < Lanes; ++I) {
2683 const SDValue &Lane = Op->getOperand(I);
2684 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2685 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2686 DAG.getConstant(I, DL, MVT::i32));
2687 }
2688
2689 return Result;
2690}
2691
2692SDValue
2693WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2694 SelectionDAG &DAG) const {
2695 SDLoc DL(Op);
2696 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2697 MVT VecType = Op.getOperand(0).getSimpleValueType();
2698 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2699 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2700
2701 // Space for two vector args and sixteen mask indices
2702 SDValue Ops[18];
2703 size_t OpIdx = 0;
2704 Ops[OpIdx++] = Op.getOperand(0);
2705 Ops[OpIdx++] = Op.getOperand(1);
2706
2707 // Expand mask indices to byte indices and materialize them as operands
2708 for (int M : Mask) {
2709 for (size_t J = 0; J < LaneBytes; ++J) {
2710 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2711 // whole lane of vector input, to allow further reduction at VM. E.g.
2712 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2713 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2714 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2715 }
2716 }
2717
2718 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2719}
2720
2721SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2722 SelectionDAG &DAG) const {
2723 SDLoc DL(Op);
2724 // The legalizer does not know how to expand the unsupported comparison modes
2725 // of i64x2 vectors, so we manually unroll them here.
2726 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2728 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2729 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2730 const SDValue &CC = Op->getOperand(2);
2731 auto MakeLane = [&](unsigned I) {
2732 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2733 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2734 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2735 };
2736 return DAG.getBuildVector(Op->getValueType(0), DL,
2737 {MakeLane(0), MakeLane(1)});
2738}
2739
2740SDValue
2741WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2742 SelectionDAG &DAG) const {
2743 // Allow constant lane indices, expand variable lane indices
2744 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2745 if (isa<ConstantSDNode>(IdxNode)) {
2746 // Ensure the index type is i32 to match the tablegen patterns
2747 uint64_t Idx = IdxNode->getAsZExtVal();
2748 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2749 Ops[Op.getNumOperands() - 1] =
2750 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2751 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2752 }
2753 // Perform default expansion
2754 return SDValue();
2755}
2756
2758 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2759 // 32-bit and 64-bit unrolled shifts will have proper semantics
2760 if (LaneT.bitsGE(MVT::i32))
2761 return DAG.UnrollVectorOp(Op.getNode());
2762 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2763 SDLoc DL(Op);
2764 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2765 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2766 unsigned ShiftOpcode = Op.getOpcode();
2767 SmallVector<SDValue, 16> ShiftedElements;
2768 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2769 SmallVector<SDValue, 16> ShiftElements;
2770 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2771 SmallVector<SDValue, 16> UnrolledOps;
2772 for (size_t i = 0; i < NumLanes; ++i) {
2773 SDValue MaskedShiftValue =
2774 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2775 SDValue ShiftedValue = ShiftedElements[i];
2776 if (ShiftOpcode == ISD::SRA)
2777 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2778 ShiftedValue, DAG.getValueType(LaneT));
2779 UnrolledOps.push_back(
2780 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2781 }
2782 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2783}
2784
2785SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2786 SelectionDAG &DAG) const {
2787 SDLoc DL(Op);
2788
2789 // Only manually lower vector shifts
2790 assert(Op.getSimpleValueType().isVector());
2791
2792 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2793 auto ShiftVal = Op.getOperand(1);
2794
2795 // Try to skip bitmask operation since it is implied inside shift instruction
2796 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2797 if (MaskOp.getOpcode() != ISD::AND)
2798 return MaskOp;
2799 SDValue LHS = MaskOp.getOperand(0);
2800 SDValue RHS = MaskOp.getOperand(1);
2801 if (MaskOp.getValueType().isVector()) {
2802 APInt MaskVal;
2803 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2804 std::swap(LHS, RHS);
2805
2806 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2807 MaskVal == MaskBits)
2808 MaskOp = LHS;
2809 } else {
2810 if (!isa<ConstantSDNode>(RHS.getNode()))
2811 std::swap(LHS, RHS);
2812
2813 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2814 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2815 MaskOp = LHS;
2816 }
2817
2818 return MaskOp;
2819 };
2820
2821 // Skip vector and operation
2822 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2823 ShiftVal = DAG.getSplatValue(ShiftVal);
2824 if (!ShiftVal)
2825 return unrollVectorShift(Op, DAG);
2826
2827 // Skip scalar and operation
2828 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2829 // Use anyext because none of the high bits can affect the shift
2830 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2831
2832 unsigned Opcode;
2833 switch (Op.getOpcode()) {
2834 case ISD::SHL:
2835 Opcode = WebAssemblyISD::VEC_SHL;
2836 break;
2837 case ISD::SRA:
2838 Opcode = WebAssemblyISD::VEC_SHR_S;
2839 break;
2840 case ISD::SRL:
2841 Opcode = WebAssemblyISD::VEC_SHR_U;
2842 break;
2843 default:
2844 llvm_unreachable("unexpected opcode");
2845 }
2846
2847 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2848}
2849
2850SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2851 SelectionDAG &DAG) const {
2852 EVT ResT = Op.getValueType();
2853 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2854
2855 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2856 (SatVT == MVT::i32 || SatVT == MVT::i64))
2857 return Op;
2858
2859 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2860 return Op;
2861
2862 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2863 return Op;
2864
2865 return SDValue();
2866}
2867
2869 return (Op->getFlags().hasNoNaNs() ||
2870 (DAG.isKnownNeverNaN(Op->getOperand(0)) &&
2871 DAG.isKnownNeverNaN(Op->getOperand(1)))) &&
2872 (Op->getFlags().hasNoSignedZeros() ||
2873 DAG.isKnownNeverZeroFloat(Op->getOperand(0)) ||
2874 DAG.isKnownNeverZeroFloat(Op->getOperand(1)));
2875}
2876
2877SDValue WebAssemblyTargetLowering::LowerFMIN(SDValue Op,
2878 SelectionDAG &DAG) const {
2879 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2880 return DAG.getNode(WebAssemblyISD::RELAXED_FMIN, SDLoc(Op),
2881 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2882 }
2883 return SDValue();
2884}
2885
2886SDValue WebAssemblyTargetLowering::LowerFMAX(SDValue Op,
2887 SelectionDAG &DAG) const {
2888 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2889 return DAG.getNode(WebAssemblyISD::RELAXED_FMAX, SDLoc(Op),
2890 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2891 }
2892 return SDValue();
2893}
2894
2895//===----------------------------------------------------------------------===//
2896// Custom DAG combine hooks
2897//===----------------------------------------------------------------------===//
2898static SDValue
2900 auto &DAG = DCI.DAG;
2901 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2902
2903 // Hoist vector bitcasts that don't change the number of lanes out of unary
2904 // shuffles, where they are less likely to get in the way of other combines.
2905 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2906 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2907 SDValue Bitcast = N->getOperand(0);
2908 if (Bitcast.getOpcode() != ISD::BITCAST)
2909 return SDValue();
2910 if (!N->getOperand(1).isUndef())
2911 return SDValue();
2912 SDValue CastOp = Bitcast.getOperand(0);
2913 EVT SrcType = CastOp.getValueType();
2914 EVT DstType = Bitcast.getValueType();
2915 if (!SrcType.is128BitVector() ||
2916 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2917 return SDValue();
2918 SDValue NewShuffle = DAG.getVectorShuffle(
2919 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2920 return DAG.getBitcast(DstType, NewShuffle);
2921}
2922
2923/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2924/// split up into scalar instructions during legalization, and the vector
2925/// extending instructions are selected in performVectorExtendCombine below.
2926static SDValue
2929 auto &DAG = DCI.DAG;
2930 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2931 N->getOpcode() == ISD::SINT_TO_FP);
2932
2933 EVT InVT = N->getOperand(0)->getValueType(0);
2934 EVT ResVT = N->getValueType(0);
2935 MVT ExtVT;
2936 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2937 ExtVT = MVT::v4i32;
2938 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2939 ExtVT = MVT::v2i32;
2940 else
2941 return SDValue();
2942
2943 unsigned Op =
2945 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2946 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2947}
2948
2949static SDValue
2952 auto &DAG = DCI.DAG;
2953
2954 SDNodeFlags Flags = N->getFlags();
2955 SDValue Op0 = N->getOperand(0);
2956 EVT VT = N->getValueType(0);
2957
2958 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2959 // Depending on the target (runtime) backend, this might be performance
2960 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2961 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2962 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2963 }
2964
2965 return SDValue();
2966}
2967
2968static SDValue
2970 auto &DAG = DCI.DAG;
2971 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2972 N->getOpcode() == ISD::ZERO_EXTEND);
2973
2974 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2975 // possible before the extract_subvector can be expanded.
2976 auto Extract = N->getOperand(0);
2977 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2978 return SDValue();
2979 auto Source = Extract.getOperand(0);
2980 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2981 if (IndexNode == nullptr)
2982 return SDValue();
2983 auto Index = IndexNode->getZExtValue();
2984
2985 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2986 // extracted subvector is the low or high half of its source.
2987 EVT ResVT = N->getValueType(0);
2988 if (ResVT == MVT::v8i16) {
2989 if (Extract.getValueType() != MVT::v8i8 ||
2990 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2991 return SDValue();
2992 } else if (ResVT == MVT::v4i32) {
2993 if (Extract.getValueType() != MVT::v4i16 ||
2994 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2995 return SDValue();
2996 } else if (ResVT == MVT::v2i64) {
2997 if (Extract.getValueType() != MVT::v2i32 ||
2998 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2999 return SDValue();
3000 } else {
3001 return SDValue();
3002 }
3003
3004 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3005 bool IsLow = Index == 0;
3006
3007 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3008 : WebAssemblyISD::EXTEND_HIGH_S)
3009 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3010 : WebAssemblyISD::EXTEND_HIGH_U);
3011
3012 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3013}
3014
3015static SDValue
3017 auto &DAG = DCI.DAG;
3018
3019 auto GetWasmConversionOp = [](unsigned Op) {
3020 switch (Op) {
3022 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3024 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3025 case ISD::FP_ROUND:
3026 return WebAssemblyISD::DEMOTE_ZERO;
3027 }
3028 llvm_unreachable("unexpected op");
3029 };
3030
3031 auto IsZeroSplat = [](SDValue SplatVal) {
3032 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3033 APInt SplatValue, SplatUndef;
3034 unsigned SplatBitSize;
3035 bool HasAnyUndefs;
3036 // Endianness doesn't matter in this context because we are looking for
3037 // an all-zero value.
3038 return Splat &&
3039 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3040 HasAnyUndefs) &&
3041 SplatValue == 0;
3042 };
3043
3044 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3045 // Combine this:
3046 //
3047 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3048 //
3049 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3050 //
3051 // Or this:
3052 //
3053 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3054 //
3055 // into (f32x4.demote_zero_f64x2 $x).
3056 EVT ResVT;
3057 EVT ExpectedConversionType;
3058 auto Conversion = N->getOperand(0);
3059 auto ConversionOp = Conversion.getOpcode();
3060 switch (ConversionOp) {
3063 ResVT = MVT::v4i32;
3064 ExpectedConversionType = MVT::v2i32;
3065 break;
3066 case ISD::FP_ROUND:
3067 ResVT = MVT::v4f32;
3068 ExpectedConversionType = MVT::v2f32;
3069 break;
3070 default:
3071 return SDValue();
3072 }
3073
3074 if (N->getValueType(0) != ResVT)
3075 return SDValue();
3076
3077 if (Conversion.getValueType() != ExpectedConversionType)
3078 return SDValue();
3079
3080 auto Source = Conversion.getOperand(0);
3081 if (Source.getValueType() != MVT::v2f64)
3082 return SDValue();
3083
3084 if (!IsZeroSplat(N->getOperand(1)) ||
3085 N->getOperand(1).getValueType() != ExpectedConversionType)
3086 return SDValue();
3087
3088 unsigned Op = GetWasmConversionOp(ConversionOp);
3089 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3090 }
3091
3092 // Combine this:
3093 //
3094 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3095 //
3096 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3097 //
3098 // Or this:
3099 //
3100 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3101 //
3102 // into (f32x4.demote_zero_f64x2 $x).
3103 EVT ResVT;
3104 auto ConversionOp = N->getOpcode();
3105 switch (ConversionOp) {
3108 ResVT = MVT::v4i32;
3109 break;
3110 case ISD::FP_ROUND:
3111 ResVT = MVT::v4f32;
3112 break;
3113 default:
3114 llvm_unreachable("unexpected op");
3115 }
3116
3117 if (N->getValueType(0) != ResVT)
3118 return SDValue();
3119
3120 auto Concat = N->getOperand(0);
3121 if (Concat.getValueType() != MVT::v4f64)
3122 return SDValue();
3123
3124 auto Source = Concat.getOperand(0);
3125 if (Source.getValueType() != MVT::v2f64)
3126 return SDValue();
3127
3128 if (!IsZeroSplat(Concat.getOperand(1)) ||
3129 Concat.getOperand(1).getValueType() != MVT::v2f64)
3130 return SDValue();
3131
3132 unsigned Op = GetWasmConversionOp(ConversionOp);
3133 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3134}
3135
3136// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3137static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3138 const SDLoc &DL, unsigned VectorWidth) {
3139 EVT VT = Vec.getValueType();
3140 EVT ElVT = VT.getVectorElementType();
3141 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3142 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3143 VT.getVectorNumElements() / Factor);
3144
3145 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3146 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3147 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3148
3149 // This is the index of the first element of the VectorWidth-bit chunk
3150 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3151 IdxVal &= ~(ElemsPerChunk - 1);
3152
3153 // If the input is a buildvector just emit a smaller one.
3154 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3155 return DAG.getBuildVector(ResultVT, DL,
3156 Vec->ops().slice(IdxVal, ElemsPerChunk));
3157
3158 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3159 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3160}
3161
3162// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3163// is the expected destination value type after recursion. In is the initial
3164// input. Note that the input should have enough leading zero bits to prevent
3165// NARROW_U from saturating results.
3167 SelectionDAG &DAG) {
3168 EVT SrcVT = In.getValueType();
3169
3170 // No truncation required, we might get here due to recursive calls.
3171 if (SrcVT == DstVT)
3172 return In;
3173
3174 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3175 unsigned NumElems = SrcVT.getVectorNumElements();
3176 if (!isPowerOf2_32(NumElems))
3177 return SDValue();
3178 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3179 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3180
3181 LLVMContext &Ctx = *DAG.getContext();
3182 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3183
3184 // Narrow to the largest type possible:
3185 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3186 EVT InVT = MVT::i16, OutVT = MVT::i8;
3187 if (SrcVT.getScalarSizeInBits() > 16) {
3188 InVT = MVT::i32;
3189 OutVT = MVT::i16;
3190 }
3191 unsigned SubSizeInBits = SrcSizeInBits / 2;
3192 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3193 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3194
3195 // Split lower/upper subvectors.
3196 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3197 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3198
3199 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3200 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3201 Lo = DAG.getBitcast(InVT, Lo);
3202 Hi = DAG.getBitcast(InVT, Hi);
3203 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3204 return DAG.getBitcast(DstVT, Res);
3205 }
3206
3207 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3208 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3209 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3210 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3211
3212 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3213 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3214 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3215}
3216
3219 auto &DAG = DCI.DAG;
3220
3221 SDValue In = N->getOperand(0);
3222 EVT InVT = In.getValueType();
3223 if (!InVT.isSimple())
3224 return SDValue();
3225
3226 EVT OutVT = N->getValueType(0);
3227 if (!OutVT.isVector())
3228 return SDValue();
3229
3230 EVT OutSVT = OutVT.getVectorElementType();
3231 EVT InSVT = InVT.getVectorElementType();
3232 // Currently only cover truncate to v16i8 or v8i16.
3233 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3234 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3235 return SDValue();
3236
3237 SDLoc DL(N);
3239 OutVT.getScalarSizeInBits());
3240 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3241 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3242}
3243
3246 using namespace llvm::SDPatternMatch;
3247 auto &DAG = DCI.DAG;
3248 SDLoc DL(N);
3249 SDValue Src = N->getOperand(0);
3250 EVT VT = N->getValueType(0);
3251 EVT SrcVT = Src.getValueType();
3252
3253 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3254 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3255 return SDValue();
3256
3257 unsigned NumElts = SrcVT.getVectorNumElements();
3258 EVT Width = MVT::getIntegerVT(128 / NumElts);
3259
3260 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3261 // ==> bitmask
3262 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3263 return DAG.getZExtOrTrunc(
3264 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3265 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3266 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3267 SrcVT.changeVectorElementType(
3268 *DAG.getContext(), Width))}),
3269 DL, VT);
3270 }
3271
3272 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3273 if (NumElts == 32 || NumElts == 64) {
3274 // Strategy: We will setcc them separately in v16i8 -> v16i1
3275 // Bitcast them to i16, extend them to either i32 or i64.
3276 // Add them together, shifting left 1 by 1.
3277 SDValue Concat, SetCCVector;
3278 ISD::CondCode SetCond;
3279
3280 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3281 m_CondCode(SetCond)))))
3282 return SDValue();
3283 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3284 return SDValue();
3285
3286 uint64_t ElementWidth =
3288
3289 SmallVector<SDValue> VectorsToShuffle;
3290 for (size_t I = 0; I < Concat->ops().size(); I++) {
3291 VectorsToShuffle.push_back(DAG.getBitcast(
3292 MVT::i16,
3293 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3294 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3295 DAG, DL, 128),
3296 SetCond)));
3297 }
3298
3299 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3300 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3301
3302 for (SDValue V : VectorsToShuffle) {
3303 ReturningInteger = DAG.getNode(
3304 ISD::SHL, DL, ReturnType,
3305 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3306
3307 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3308 ReturningInteger =
3309 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3310 }
3311
3312 return ReturningInteger;
3313 }
3314
3315 return SDValue();
3316}
3317
3319 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3320 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3321 // any_true (setcc <X>, 0, ne) => (any_true X)
3322 // all_true (setcc <X>, 0, ne) => (all_true X)
3323 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3324 using namespace llvm::SDPatternMatch;
3325
3326 SDValue LHS;
3327 if (N->getNumOperands() < 2 ||
3328 !sd_match(N->getOperand(1),
3330 return SDValue();
3331 EVT LT = LHS.getValueType();
3332 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3333 return SDValue();
3334
3335 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3336 ISD::CondCode SetType,
3337 Intrinsic::WASMIntrinsics InPost) {
3338 if (N->getConstantOperandVal(0) != InPre)
3339 return SDValue();
3340
3341 SDValue LHS;
3342 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3343 m_SpecificCondCode(SetType))))
3344 return SDValue();
3345
3346 SDLoc DL(N);
3347 SDValue Ret = DAG.getZExtOrTrunc(
3348 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3349 {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3350 DL, MVT::i1);
3351 if (SetType == ISD::SETEQ)
3352 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3353 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3354 };
3355
3356 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3357 Intrinsic::wasm_alltrue))
3358 return AnyTrueEQ;
3359 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3360 Intrinsic::wasm_anytrue))
3361 return AllTrueEQ;
3362 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3363 Intrinsic::wasm_anytrue))
3364 return AnyTrueNE;
3365 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3366 Intrinsic::wasm_alltrue))
3367 return AllTrueNE;
3368
3369 return SDValue();
3370}
3371
3372template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3373 Intrinsic::ID Intrin>
3375 SDValue LHS = N->getOperand(0);
3376 SDValue RHS = N->getOperand(1);
3377 SDValue Cond = N->getOperand(2);
3378 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3379 return SDValue();
3380
3381 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3382 return SDValue();
3383
3384 SDLoc DL(N);
3385 SDValue Ret = DAG.getZExtOrTrunc(
3386 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3387 {DAG.getConstant(Intrin, DL, MVT::i32),
3388 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3389 DL, MVT::i1);
3390 if (RequiresNegate)
3391 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3392 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3393}
3394
3395/// Try to convert a i128 comparison to a v16i8 comparison before type
3396/// legalization splits it up into chunks
3397static SDValue
3399 const WebAssemblySubtarget *Subtarget) {
3400
3401 SDLoc DL(N);
3402 SDValue X = N->getOperand(0);
3403 SDValue Y = N->getOperand(1);
3404 EVT VT = N->getValueType(0);
3405 EVT OpVT = X.getValueType();
3406
3407 SelectionDAG &DAG = DCI.DAG;
3409 Attribute::NoImplicitFloat))
3410 return SDValue();
3411
3412 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3413 // We're looking for an oversized integer equality comparison with SIMD
3414 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3415 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3416 return SDValue();
3417
3418 // Don't perform this combine if constructing the vector will be expensive.
3419 auto IsVectorBitCastCheap = [](SDValue X) {
3421 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3422 };
3423
3424 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3425 return SDValue();
3426
3427 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3428 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3429 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3430
3431 SDValue Intr =
3432 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3433 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3434 : Intrinsic::wasm_anytrue,
3435 DL, MVT::i32),
3436 Cmp});
3437
3438 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3439 ISD::SETNE);
3440}
3441
3444 const WebAssemblySubtarget *Subtarget) {
3445 if (!DCI.isBeforeLegalize())
3446 return SDValue();
3447
3448 EVT VT = N->getValueType(0);
3449 if (!VT.isScalarInteger())
3450 return SDValue();
3451
3452 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3453 return V;
3454
3455 SDValue LHS = N->getOperand(0);
3456 if (LHS->getOpcode() != ISD::BITCAST)
3457 return SDValue();
3458
3459 EVT FromVT = LHS->getOperand(0).getValueType();
3460 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3461 return SDValue();
3462
3463 unsigned NumElts = FromVT.getVectorNumElements();
3464 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3465 return SDValue();
3466
3467 if (!cast<ConstantSDNode>(N->getOperand(1)))
3468 return SDValue();
3469
3470 auto &DAG = DCI.DAG;
3471 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3472 MVT::getIntegerVT(128 / NumElts));
3473 // setcc (iN (bitcast (vNi1 X))), 0, ne
3474 // ==> any_true (vNi1 X)
3476 N, VecVT, DAG)) {
3477 return Match;
3478 }
3479 // setcc (iN (bitcast (vNi1 X))), 0, eq
3480 // ==> xor (any_true (vNi1 X)), -1
3482 N, VecVT, DAG)) {
3483 return Match;
3484 }
3485 // setcc (iN (bitcast (vNi1 X))), -1, eq
3486 // ==> all_true (vNi1 X)
3488 N, VecVT, DAG)) {
3489 return Match;
3490 }
3491 // setcc (iN (bitcast (vNi1 X))), -1, ne
3492 // ==> xor (all_true (vNi1 X)), -1
3494 N, VecVT, DAG)) {
3495 return Match;
3496 }
3497 return SDValue();
3498}
3499
3501 EVT VT = N->getValueType(0);
3502 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3503 return SDValue();
3504
3505 // Mul with extending inputs.
3506 SDValue LHS = N->getOperand(0);
3507 SDValue RHS = N->getOperand(1);
3508 if (LHS.getOpcode() != RHS.getOpcode())
3509 return SDValue();
3510
3511 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3512 LHS.getOpcode() != ISD::ZERO_EXTEND)
3513 return SDValue();
3514
3515 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3516 return SDValue();
3517
3518 EVT FromVT = LHS->getOperand(0).getValueType();
3519 EVT EltTy = FromVT.getVectorElementType();
3520 if (EltTy != MVT::i8)
3521 return SDValue();
3522
3523 // For an input DAG that looks like this
3524 // %a = input_type
3525 // %b = input_type
3526 // %lhs = extend %a to output_type
3527 // %rhs = extend %b to output_type
3528 // %mul = mul %lhs, %rhs
3529
3530 // input_type | output_type | instructions
3531 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3532 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3533 // | | %low_low = i32x4.ext_low_i16x8_ %low
3534 // | | %low_high = i32x4.ext_high_i16x8_ %low
3535 // | | %high_low = i32x4.ext_low_i16x8_ %high
3536 // | | %high_high = i32x4.ext_high_i16x8_ %high
3537 // | | %res = concat_vector(...)
3538 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3539 // | | %low_low = i32x4.ext_low_i16x8_ %low
3540 // | | %low_high = i32x4.ext_high_i16x8_ %low
3541 // | | %res = concat_vector(%low_low, %low_high)
3542
3543 SDLoc DL(N);
3544 unsigned NumElts = VT.getVectorNumElements();
3545 SDValue ExtendInLHS = LHS->getOperand(0);
3546 SDValue ExtendInRHS = RHS->getOperand(0);
3547 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3548 unsigned ExtendLowOpc =
3549 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3550 unsigned ExtendHighOpc =
3551 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3552
3553 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3554 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3555 };
3556 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3557 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3558 };
3559
3560 if (NumElts == 16) {
3561 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3562 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3563 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3564 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3565 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3566 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3567 SDValue SubVectors[] = {
3568 GetExtendLow(MVT::v4i32, MulLow),
3569 GetExtendHigh(MVT::v4i32, MulLow),
3570 GetExtendLow(MVT::v4i32, MulHigh),
3571 GetExtendHigh(MVT::v4i32, MulHigh),
3572 };
3573 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3574 } else {
3575 assert(NumElts == 8);
3576 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3577 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3578 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3579 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3580 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3581 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3582 }
3583 return SDValue();
3584}
3585
3588 assert(N->getOpcode() == ISD::MUL);
3589 EVT VT = N->getValueType(0);
3590 if (!VT.isVector())
3591 return SDValue();
3592
3593 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3594 return Res;
3595
3596 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3597 // extend them to v8i16.
3598 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3599 return SDValue();
3600
3601 SDLoc DL(N);
3602 SelectionDAG &DAG = DCI.DAG;
3603 SDValue LHS = N->getOperand(0);
3604 SDValue RHS = N->getOperand(1);
3605 EVT MulVT = MVT::v8i16;
3606
3607 if (VT == MVT::v8i8) {
3608 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3609 DAG.getUNDEF(MVT::v8i8));
3610 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3611 DAG.getUNDEF(MVT::v8i8));
3612 SDValue LowLHS =
3613 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3614 SDValue LowRHS =
3615 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3616 SDValue MulLow = DAG.getBitcast(
3617 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3618 // Take the low byte of each lane.
3619 SDValue Shuffle = DAG.getVectorShuffle(
3620 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3621 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3622 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3623 } else {
3624 assert(VT == MVT::v16i8 && "Expected v16i8");
3625 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3626 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3627 SDValue HighLHS =
3628 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3629 SDValue HighRHS =
3630 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3631
3632 SDValue MulLow =
3633 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3634 SDValue MulHigh =
3635 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3636
3637 // Take the low byte of each lane.
3638 return DAG.getVectorShuffle(
3639 VT, DL, MulLow, MulHigh,
3640 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3641 }
3642}
3643
3644SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3645 SelectionDAG &DAG) {
3646 SDLoc DL(In);
3647 LLVMContext &Ctx = *DAG.getContext();
3648 EVT InVT = In.getValueType();
3649 unsigned NumElems = InVT.getVectorNumElements() * 2;
3650 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3651 SDValue Concat =
3652 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3653 if (NumElems < RequiredNumElems) {
3654 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3655 }
3656 return Concat;
3657}
3658
3660 EVT OutVT = N->getValueType(0);
3661 if (!OutVT.isVector())
3662 return SDValue();
3663
3664 EVT OutElTy = OutVT.getVectorElementType();
3665 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3666 return SDValue();
3667
3668 unsigned NumElems = OutVT.getVectorNumElements();
3669 if (!isPowerOf2_32(NumElems))
3670 return SDValue();
3671
3672 EVT FPVT = N->getOperand(0)->getValueType(0);
3673 if (FPVT.getVectorElementType() != MVT::f32)
3674 return SDValue();
3675
3676 SDLoc DL(N);
3677
3678 // First, convert to i32.
3679 LLVMContext &Ctx = *DAG.getContext();
3680 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3681 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3683 OutVT.getScalarSizeInBits());
3684 // Mask out the top MSBs.
3685 SDValue Masked =
3686 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3687
3688 if (OutVT.getSizeInBits() < 128) {
3689 // Create a wide enough vector that we can use narrow.
3690 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3691 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3692 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3693 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3694 return DAG.getBitcast(
3695 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3696 } else {
3697 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3698 }
3699 return SDValue();
3700}
3701
3702SDValue
3703WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3704 DAGCombinerInfo &DCI) const {
3705 switch (N->getOpcode()) {
3706 default:
3707 return SDValue();
3708 case ISD::BITCAST:
3709 return performBitcastCombine(N, DCI);
3710 case ISD::SETCC:
3711 return performSETCCCombine(N, DCI, Subtarget);
3713 return performVECTOR_SHUFFLECombine(N, DCI);
3714 case ISD::SIGN_EXTEND:
3715 case ISD::ZERO_EXTEND:
3716 return performVectorExtendCombine(N, DCI);
3717 case ISD::UINT_TO_FP:
3718 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3719 return ExtCombine;
3720 return performVectorNonNegToFPCombine(N, DCI);
3721 case ISD::SINT_TO_FP:
3722 return performVectorExtendToFPCombine(N, DCI);
3725 case ISD::FP_ROUND:
3727 return performVectorTruncZeroCombine(N, DCI);
3728 case ISD::FP_TO_SINT:
3729 case ISD::FP_TO_UINT:
3730 return performConvertFPCombine(N, DCI.DAG);
3731 case ISD::TRUNCATE:
3732 return performTruncateCombine(N, DCI);
3734 return performAnyAllCombine(N, DCI.DAG);
3735 case ISD::MUL:
3736 return performMulCombine(N, DCI);
3737 }
3738}
unsigned const MachineRegisterInfo * MRI
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static bool HasNoSignedZerosOrNaNs(SDValue Op, SelectionDAG &DAG)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1394
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:712
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
bool isValidAddressSpace(unsigned AS)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2078
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:444
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.