LLVM 23.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
161 if (Subtarget->hasFP16() && T == MVT::f32) {
163 setTruncStoreAction(T, MVT::f16, Legal);
164 } else {
166 setTruncStoreAction(T, MVT::f16, Expand);
167 }
168 }
169
170 // Expand unavailable integer operations.
171 for (auto Op :
175 for (auto T : {MVT::i32, MVT::i64})
177 if (Subtarget->hasSIMD128())
178 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
180 }
181
182 if (Subtarget->hasWideArithmetic()) {
188 }
189
190 if (Subtarget->hasNontrappingFPToInt())
192 for (auto T : {MVT::i32, MVT::i64})
194
195 if (Subtarget->hasRelaxedSIMD()) {
198 {MVT::v4f32, MVT::v2f64}, Custom);
199 }
200 // SIMD-specific configuration
201 if (Subtarget->hasSIMD128()) {
202
204
205 // Combine wide-vector muls, with extend inputs, to extmul_half.
208
209 // Combine vector mask reductions into alltrue/anytrue
211
212 // Convert vector to integer bitcasts to bitmask
214
215 // Hoist bitcasts out of shuffles
217
218 // Combine extends of extract_subvectors into widening ops
220
221 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
222 // conversions ops
225
226 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
227 // into conversion ops
231
233
234 // Support saturating add/sub for i8x16 and i16x8
236 for (auto T : {MVT::v16i8, MVT::v8i16})
238
239 // Support integer abs
240 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
242
243 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
244 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
245 MVT::v2f64})
247
248 if (Subtarget->hasFP16())
250
251 // We have custom shuffle lowering to expose the shuffle mask
252 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
253 MVT::v2f64})
255
256 if (Subtarget->hasFP16())
258
259 // Support splatting
260 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
261 MVT::v2f64})
263
264 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
265
266 // Custom lowering since wasm shifts must have a scalar shift amount
267 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
268 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
270
271 // Custom lower lane accesses to expand out variable indices
273 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
274 MVT::v2f64})
276
277 // There is no i8x16.mul instruction
278 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
279
280 // Expand integer operations supported for scalars but not SIMD
281 for (auto Op :
283 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
285
286 // But we do have integer min and max operations
287 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
288 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
290
291 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
292 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
293 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
294 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
295
296 // Custom lower bit counting operations for other types to scalarize them.
297 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
298 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
300
301 // Expand float operations supported for scalars but not SIMD
304 for (auto T : {MVT::v4f32, MVT::v2f64})
306
307 // Unsigned comparison operations are unavailable for i64x2 vectors.
309 setCondCodeAction(CC, MVT::v2i64, Custom);
310
311 // 64x2 conversions are not in the spec
312 for (auto Op :
314 for (auto T : {MVT::v2i64, MVT::v2f64})
316
317 // But saturating fp_to_int converstions are
319 setOperationAction(Op, MVT::v4i32, Custom);
320 if (Subtarget->hasFP16()) {
321 setOperationAction(Op, MVT::v8i16, Custom);
322 }
323 }
324
325 // Support vector extending
330 }
331
332 if (Subtarget->hasFP16()) {
333 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
334 }
335
336 if (Subtarget->hasRelaxedSIMD()) {
339 }
340
341 // Partial MLA reductions.
343 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
344 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
345 }
346 }
347
348 // As a special case, these operators use the type to mean the type to
349 // sign-extend from.
351 if (!Subtarget->hasSignExt()) {
352 // Sign extends are legal only when extending a vector extract
353 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
354 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
356 }
359
360 // Dynamic stack allocation: use the default expansion.
364
368
369 // Expand these forms; we pattern-match the forms that we can handle in isel.
370 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
371 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
373
374 if (Subtarget->hasReferenceTypes())
375 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
376 for (auto T : {MVT::externref, MVT::funcref})
378
379 // There is no vector conditional select instruction
380 for (auto T :
381 {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, MVT::v2f64})
383
384 // We have custom switch handling.
386
387 // WebAssembly doesn't have:
388 // - Floating-point extending loads.
389 // - Floating-point truncating stores.
390 // - i1 extending loads.
391 // - truncating SIMD stores and most extending loads
392 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
393 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
394 for (auto T : MVT::integer_valuetypes())
395 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
396 setLoadExtAction(Ext, T, MVT::i1, Promote);
397 if (Subtarget->hasSIMD128()) {
398 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
399 MVT::v2f64}) {
400 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
401 if (MVT(T) != MemT) {
403 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
404 setLoadExtAction(Ext, T, MemT, Expand);
405 }
406 }
407 }
408 // But some vector extending loads are legal
409 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
410 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
411 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
412 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
413 }
414 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
415 }
416
417 // Don't do anything clever with build_pairs
419
420 // Trap lowers to wasm unreachable
421 setOperationAction(ISD::TRAP, MVT::Other, Legal);
423
424 // Exception handling intrinsics
428
430
431 // Always convert switches to br_tables unless there is only one case, which
432 // is equivalent to a simple branch. This reduces code size for wasm, and we
433 // defer possible jump table optimizations to the VM.
435}
436
445
454
456WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
457 const AtomicRMWInst *AI) const {
458 // We have wasm instructions for these
459 switch (AI->getOperation()) {
467 default:
468 break;
469 }
471}
472
473bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
474 // Implementation copied from X86TargetLowering.
475 unsigned Opc = VecOp.getOpcode();
476
477 // Assume target opcodes can't be scalarized.
478 // TODO - do we have any exceptions?
480 return false;
481
482 // If the vector op is not supported, try to convert to scalar.
483 EVT VecVT = VecOp.getValueType();
485 return true;
486
487 // If the vector op is supported, but the scalar op is not, the transform may
488 // not be worthwhile.
489 EVT ScalarVT = VecVT.getScalarType();
490 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
491}
492
493FastISel *WebAssemblyTargetLowering::createFastISel(
494 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
495 const LibcallLoweringInfo *LibcallLowering) const {
496 return WebAssembly::createFastISel(FuncInfo, LibInfo, LibcallLowering);
497}
498
499MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
500 EVT VT) const {
501 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
502 if (BitWidth > 1 && BitWidth < 8)
503 BitWidth = 8;
504
505 if (BitWidth > 64) {
506 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
507 // the count to be an i32.
508 BitWidth = 32;
510 "32-bit shift counts ought to be enough for anyone");
511 }
512
515 "Unable to represent scalar shift amount type");
516 return Result;
517}
518
519// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
520// undefined result on invalid/overflow, to the WebAssembly opcode, which
521// traps on invalid/overflow.
524 const TargetInstrInfo &TII,
525 bool IsUnsigned, bool Int64,
526 bool Float64, unsigned LoweredOpcode) {
528
529 Register OutReg = MI.getOperand(0).getReg();
530 Register InReg = MI.getOperand(1).getReg();
531
532 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
533 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
534 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
535 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
536 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
537 unsigned Eqz = WebAssembly::EQZ_I32;
538 unsigned And = WebAssembly::AND_I32;
539 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
540 int64_t Substitute = IsUnsigned ? 0 : Limit;
541 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
542 auto &Context = BB->getParent()->getFunction().getContext();
543 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
544
545 const BasicBlock *LLVMBB = BB->getBasicBlock();
546 MachineFunction *F = BB->getParent();
547 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
548 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
549 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
550
552 F->insert(It, FalseMBB);
553 F->insert(It, TrueMBB);
554 F->insert(It, DoneMBB);
555
556 // Transfer the remainder of BB and its successor edges to DoneMBB.
557 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
559
560 BB->addSuccessor(TrueMBB);
561 BB->addSuccessor(FalseMBB);
562 TrueMBB->addSuccessor(DoneMBB);
563 FalseMBB->addSuccessor(DoneMBB);
564
565 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
566 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
567 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
568 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
569 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
570 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
571 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
572
573 MI.eraseFromParent();
574 // For signed numbers, we can do a single comparison to determine whether
575 // fabs(x) is within range.
576 if (IsUnsigned) {
577 Tmp0 = InReg;
578 } else {
579 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
580 }
581 BuildMI(BB, DL, TII.get(FConst), Tmp1)
582 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
583 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
584
585 // For unsigned numbers, we have to do a separate comparison with zero.
586 if (IsUnsigned) {
587 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
588 Register SecondCmpReg =
589 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
590 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
591 BuildMI(BB, DL, TII.get(FConst), Tmp1)
592 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
593 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
594 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
595 CmpReg = AndReg;
596 }
597
598 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
599
600 // Create the CFG diamond to select between doing the conversion or using
601 // the substitute value.
602 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
603 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
604 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
605 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
606 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
607 .addReg(FalseReg)
608 .addMBB(FalseMBB)
609 .addReg(TrueReg)
610 .addMBB(TrueMBB);
611
612 return DoneMBB;
613}
614
615// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
616// instuction to handle the zero-length case.
619 const TargetInstrInfo &TII, bool Int64) {
621
622 MachineOperand DstMem = MI.getOperand(0);
623 MachineOperand SrcMem = MI.getOperand(1);
624 MachineOperand Dst = MI.getOperand(2);
625 MachineOperand Src = MI.getOperand(3);
626 MachineOperand Len = MI.getOperand(4);
627
628 // If the length is a constant, we don't actually need the check.
629 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
630 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
631 Def->getOpcode() == WebAssembly::CONST_I64) {
632 if (Def->getOperand(1).getImm() == 0) {
633 // A zero-length memcpy is a no-op.
634 MI.eraseFromParent();
635 return BB;
636 }
637 // A non-zero-length memcpy doesn't need a zero check.
638 unsigned MemoryCopy =
639 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
640 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
641 .add(DstMem)
642 .add(SrcMem)
643 .add(Dst)
644 .add(Src)
645 .add(Len);
646 MI.eraseFromParent();
647 return BB;
648 }
649 }
650
651 // We're going to add an extra use to `Len` to test if it's zero; that
652 // use shouldn't be a kill, even if the original use is.
653 MachineOperand NoKillLen = Len;
654 NoKillLen.setIsKill(false);
655
656 // Decide on which `MachineInstr` opcode we're going to use.
657 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
658 unsigned MemoryCopy =
659 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
660
661 // Create two new basic blocks; one for the new `memory.fill` that we can
662 // branch over, and one for the rest of the instructions after the original
663 // `memory.fill`.
664 const BasicBlock *LLVMBB = BB->getBasicBlock();
665 MachineFunction *F = BB->getParent();
666 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
667 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
668
670 F->insert(It, TrueMBB);
671 F->insert(It, DoneMBB);
672
673 // Transfer the remainder of BB and its successor edges to DoneMBB.
674 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
676
677 // Connect the CFG edges.
678 BB->addSuccessor(TrueMBB);
679 BB->addSuccessor(DoneMBB);
680 TrueMBB->addSuccessor(DoneMBB);
681
682 // Create a virtual register for the `Eqz` result.
683 unsigned EqzReg;
684 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
685
686 // Erase the original `memory.copy`.
687 MI.eraseFromParent();
688
689 // Test if `Len` is zero.
690 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
691
692 // Insert a new `memory.copy`.
693 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
694 .add(DstMem)
695 .add(SrcMem)
696 .add(Dst)
697 .add(Src)
698 .add(Len);
699
700 // Create the CFG triangle.
701 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
702 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
703
704 return DoneMBB;
705}
706
707// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
708// instuction to handle the zero-length case.
711 const TargetInstrInfo &TII, bool Int64) {
713
714 MachineOperand Mem = MI.getOperand(0);
715 MachineOperand Dst = MI.getOperand(1);
716 MachineOperand Val = MI.getOperand(2);
717 MachineOperand Len = MI.getOperand(3);
718
719 // If the length is a constant, we don't actually need the check.
720 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
721 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
722 Def->getOpcode() == WebAssembly::CONST_I64) {
723 if (Def->getOperand(1).getImm() == 0) {
724 // A zero-length memset is a no-op.
725 MI.eraseFromParent();
726 return BB;
727 }
728 // A non-zero-length memset doesn't need a zero check.
729 unsigned MemoryFill =
730 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
731 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
732 .add(Mem)
733 .add(Dst)
734 .add(Val)
735 .add(Len);
736 MI.eraseFromParent();
737 return BB;
738 }
739 }
740
741 // We're going to add an extra use to `Len` to test if it's zero; that
742 // use shouldn't be a kill, even if the original use is.
743 MachineOperand NoKillLen = Len;
744 NoKillLen.setIsKill(false);
745
746 // Decide on which `MachineInstr` opcode we're going to use.
747 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
748 unsigned MemoryFill =
749 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
750
751 // Create two new basic blocks; one for the new `memory.fill` that we can
752 // branch over, and one for the rest of the instructions after the original
753 // `memory.fill`.
754 const BasicBlock *LLVMBB = BB->getBasicBlock();
755 MachineFunction *F = BB->getParent();
756 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
757 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
758
760 F->insert(It, TrueMBB);
761 F->insert(It, DoneMBB);
762
763 // Transfer the remainder of BB and its successor edges to DoneMBB.
764 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
766
767 // Connect the CFG edges.
768 BB->addSuccessor(TrueMBB);
769 BB->addSuccessor(DoneMBB);
770 TrueMBB->addSuccessor(DoneMBB);
771
772 // Create a virtual register for the `Eqz` result.
773 unsigned EqzReg;
774 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
775
776 // Erase the original `memory.fill`.
777 MI.eraseFromParent();
778
779 // Test if `Len` is zero.
780 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
781
782 // Insert a new `memory.copy`.
783 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
784
785 // Create the CFG triangle.
786 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
787 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
788
789 return DoneMBB;
790}
791
792static MachineBasicBlock *
794 const WebAssemblySubtarget *Subtarget,
795 const TargetInstrInfo &TII) {
796 MachineInstr &CallParams = *CallResults.getPrevNode();
797 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
798 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
799 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
800
801 bool IsIndirect =
802 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
803 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
804
805 bool IsFuncrefCall = false;
806 if (IsIndirect && CallParams.getOperand(0).isReg()) {
807 Register Reg = CallParams.getOperand(0).getReg();
808 const MachineFunction *MF = BB->getParent();
809 const MachineRegisterInfo &MRI = MF->getRegInfo();
810 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
811 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
812 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
813 }
814
815 unsigned CallOp;
816 if (IsIndirect && IsRetCall) {
817 CallOp = WebAssembly::RET_CALL_INDIRECT;
818 } else if (IsIndirect) {
819 CallOp = WebAssembly::CALL_INDIRECT;
820 } else if (IsRetCall) {
821 CallOp = WebAssembly::RET_CALL;
822 } else {
823 CallOp = WebAssembly::CALL;
824 }
825
826 MachineFunction &MF = *BB->getParent();
827 const MCInstrDesc &MCID = TII.get(CallOp);
828 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
829
830 // Move the function pointer to the end of the arguments for indirect calls
831 if (IsIndirect) {
832 auto FnPtr = CallParams.getOperand(0);
833 CallParams.removeOperand(0);
834
835 // For funcrefs, call_indirect is done through __funcref_call_table and the
836 // funcref is always installed in slot 0 of the table, therefore instead of
837 // having the function pointer added at the end of the params list, a zero
838 // (the index in
839 // __funcref_call_table is added).
840 if (IsFuncrefCall) {
841 Register RegZero =
842 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
843 MachineInstrBuilder MIBC0 =
844 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
845
846 BB->insert(CallResults.getIterator(), MIBC0);
847 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
848 } else
849 CallParams.addOperand(FnPtr);
850 }
851
852 for (auto Def : CallResults.defs())
853 MIB.add(Def);
854
855 if (IsIndirect) {
856 // Placeholder for the type index.
857 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
858 MIB.addImm(0);
859 // The table into which this call_indirect indexes.
860 MCSymbolWasm *Table = IsFuncrefCall
862 MF.getContext(), Subtarget)
864 MF.getContext(), Subtarget);
865 if (Subtarget->hasCallIndirectOverlong()) {
866 MIB.addSym(Table);
867 } else {
868 // For the MVP there is at most one table whose number is 0, but we can't
869 // write a table symbol or issue relocations. Instead we just ensure the
870 // table is live and write a zero.
871 Table->setNoStrip();
872 MIB.addImm(0);
873 }
874 }
875
876 for (auto Use : CallParams.uses())
877 MIB.add(Use);
878
879 BB->insert(CallResults.getIterator(), MIB);
880 CallParams.eraseFromParent();
881 CallResults.eraseFromParent();
882
883 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
884 // table slot with ref.null upon call_indirect return.
885 //
886 // This generates the following code, which comes right after a call_indirect
887 // of a funcref:
888 //
889 // i32.const 0
890 // ref.null func
891 // table.set __funcref_call_table
892 if (IsIndirect && IsFuncrefCall) {
894 MF.getContext(), Subtarget);
895 Register RegZero =
896 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
897 MachineInstr *Const0 =
898 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
899 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
900
901 Register RegFuncref =
902 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
903 MachineInstr *RefNull =
904 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
905 BB->insertAfter(Const0->getIterator(), RefNull);
906
907 MachineInstr *TableSet =
908 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
909 .addSym(Table)
910 .addReg(RegZero)
911 .addReg(RegFuncref);
912 BB->insertAfter(RefNull->getIterator(), TableSet);
913 }
914
915 return BB;
916}
917
918MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
919 MachineInstr &MI, MachineBasicBlock *BB) const {
920 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
921 DebugLoc DL = MI.getDebugLoc();
922
923 switch (MI.getOpcode()) {
924 default:
925 llvm_unreachable("Unexpected instr type to insert");
926 case WebAssembly::FP_TO_SINT_I32_F32:
927 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
928 WebAssembly::I32_TRUNC_S_F32);
929 case WebAssembly::FP_TO_UINT_I32_F32:
930 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
931 WebAssembly::I32_TRUNC_U_F32);
932 case WebAssembly::FP_TO_SINT_I64_F32:
933 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
934 WebAssembly::I64_TRUNC_S_F32);
935 case WebAssembly::FP_TO_UINT_I64_F32:
936 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
937 WebAssembly::I64_TRUNC_U_F32);
938 case WebAssembly::FP_TO_SINT_I32_F64:
939 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
940 WebAssembly::I32_TRUNC_S_F64);
941 case WebAssembly::FP_TO_UINT_I32_F64:
942 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
943 WebAssembly::I32_TRUNC_U_F64);
944 case WebAssembly::FP_TO_SINT_I64_F64:
945 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
946 WebAssembly::I64_TRUNC_S_F64);
947 case WebAssembly::FP_TO_UINT_I64_F64:
948 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
949 WebAssembly::I64_TRUNC_U_F64);
950 case WebAssembly::MEMCPY_A32:
951 return LowerMemcpy(MI, DL, BB, TII, false);
952 case WebAssembly::MEMCPY_A64:
953 return LowerMemcpy(MI, DL, BB, TII, true);
954 case WebAssembly::MEMSET_A32:
955 return LowerMemset(MI, DL, BB, TII, false);
956 case WebAssembly::MEMSET_A64:
957 return LowerMemset(MI, DL, BB, TII, true);
958 case WebAssembly::CALL_RESULTS:
959 case WebAssembly::RET_CALL_RESULTS:
960 return LowerCallResults(MI, DL, BB, Subtarget, TII);
961 }
962}
963
964std::pair<unsigned, const TargetRegisterClass *>
965WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
966 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
967 // First, see if this is a constraint that directly corresponds to a
968 // WebAssembly register class.
969 if (Constraint.size() == 1) {
970 switch (Constraint[0]) {
971 case 'r':
972 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
973 if (Subtarget->hasSIMD128() && VT.isVector()) {
974 if (VT.getSizeInBits() == 128)
975 return std::make_pair(0U, &WebAssembly::V128RegClass);
976 }
977 if (VT.isInteger() && !VT.isVector()) {
978 if (VT.getSizeInBits() <= 32)
979 return std::make_pair(0U, &WebAssembly::I32RegClass);
980 if (VT.getSizeInBits() <= 64)
981 return std::make_pair(0U, &WebAssembly::I64RegClass);
982 }
983 if (VT.isFloatingPoint() && !VT.isVector()) {
984 switch (VT.getSizeInBits()) {
985 case 32:
986 return std::make_pair(0U, &WebAssembly::F32RegClass);
987 case 64:
988 return std::make_pair(0U, &WebAssembly::F64RegClass);
989 default:
990 break;
991 }
992 }
993 break;
994 default:
995 break;
996 }
997 }
998
1000}
1001
1002bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
1003 // Assume ctz is a relatively cheap operation.
1004 return true;
1005}
1006
1007bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1008 // Assume clz is a relatively cheap operation.
1009 return true;
1010}
1011
1012bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1013 const AddrMode &AM,
1014 Type *Ty, unsigned AS,
1015 Instruction *I) const {
1016 // WebAssembly offsets are added as unsigned without wrapping. The
1017 // isLegalAddressingMode gives us no way to determine if wrapping could be
1018 // happening, so we approximate this by accepting only non-negative offsets.
1019 if (AM.BaseOffs < 0)
1020 return false;
1021
1022 // WebAssembly has no scale register operands.
1023 if (AM.Scale != 0)
1024 return false;
1025
1026 // Everything else is legal.
1027 return true;
1028}
1029
1030bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1031 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1032 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1033 // WebAssembly supports unaligned accesses, though it should be declared
1034 // with the p2align attribute on loads and stores which do so, and there
1035 // may be a performance impact. We tell LLVM they're "fast" because
1036 // for the kinds of things that LLVM uses this for (merging adjacent stores
1037 // of constants, etc.), WebAssembly implementations will either want the
1038 // unaligned access or they'll split anyway.
1039 if (Fast)
1040 *Fast = 1;
1041 return true;
1042}
1043
1044bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1045 AttributeList Attr) const {
1046 // The current thinking is that wasm engines will perform this optimization,
1047 // so we can save on code size.
1048 return true;
1049}
1050
1051bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1052 EVT ExtT = ExtVal.getValueType();
1053 SDValue N0 = ExtVal->getOperand(0);
1054 if (N0.getOpcode() == ISD::FREEZE)
1055 N0 = N0.getOperand(0);
1056 auto *Load = dyn_cast<LoadSDNode>(N0);
1057 if (!Load)
1058 return false;
1059 EVT MemT = Load->getValueType(0);
1060 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1061 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1062 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1063}
1064
1065bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1066 const GlobalAddressSDNode *GA) const {
1067 // Wasm doesn't support function addresses with offsets
1068 const GlobalValue *GV = GA->getGlobal();
1070}
1071
1072EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1073 LLVMContext &C,
1074 EVT VT) const {
1075 if (VT.isVector()) {
1076 if (VT.getVectorElementType() == MVT::f16 && !Subtarget->hasFP16())
1077 return VT.changeElementType(C, MVT::i1);
1078
1080 }
1081
1082 // So far, all branch instructions in Wasm take an I32 condition.
1083 // The default TargetLowering::getSetCCResultType returns the pointer size,
1084 // which would be useful to reduce instruction counts when testing
1085 // against 64-bit pointers/values if at some point Wasm supports that.
1086 return EVT::getIntegerVT(C, 32);
1087}
1088
1089void WebAssemblyTargetLowering::getTgtMemIntrinsic(
1091 MachineFunction &MF, unsigned Intrinsic) const {
1093 switch (Intrinsic) {
1094 case Intrinsic::wasm_memory_atomic_notify:
1096 Info.memVT = MVT::i32;
1097 Info.ptrVal = I.getArgOperand(0);
1098 Info.offset = 0;
1099 Info.align = Align(4);
1100 // atomic.notify instruction does not really load the memory specified with
1101 // this argument, but MachineMemOperand should either be load or store, so
1102 // we set this to a load.
1103 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1104 // instructions are treated as volatiles in the backend, so we should be
1105 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1107 Infos.push_back(Info);
1108 return;
1109 case Intrinsic::wasm_memory_atomic_wait32:
1111 Info.memVT = MVT::i32;
1112 Info.ptrVal = I.getArgOperand(0);
1113 Info.offset = 0;
1114 Info.align = Align(4);
1116 Infos.push_back(Info);
1117 return;
1118 case Intrinsic::wasm_memory_atomic_wait64:
1120 Info.memVT = MVT::i64;
1121 Info.ptrVal = I.getArgOperand(0);
1122 Info.offset = 0;
1123 Info.align = Align(8);
1125 Infos.push_back(Info);
1126 return;
1127 case Intrinsic::wasm_loadf16_f32:
1129 Info.memVT = MVT::f16;
1130 Info.ptrVal = I.getArgOperand(0);
1131 Info.offset = 0;
1132 Info.align = Align(2);
1134 Infos.push_back(Info);
1135 return;
1136 case Intrinsic::wasm_storef16_f32:
1138 Info.memVT = MVT::f16;
1139 Info.ptrVal = I.getArgOperand(1);
1140 Info.offset = 0;
1141 Info.align = Align(2);
1143 Infos.push_back(Info);
1144 return;
1145 default:
1146 return;
1147 }
1148}
1149
1150void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1151 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1152 const SelectionDAG &DAG, unsigned Depth) const {
1153 switch (Op.getOpcode()) {
1154 default:
1155 break;
1157 unsigned IntNo = Op.getConstantOperandVal(0);
1158 switch (IntNo) {
1159 default:
1160 break;
1161 case Intrinsic::wasm_bitmask: {
1162 unsigned BitWidth = Known.getBitWidth();
1163 EVT VT = Op.getOperand(1).getSimpleValueType();
1164 unsigned PossibleBits = VT.getVectorNumElements();
1165 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1166 Known.Zero |= ZeroMask;
1167 break;
1168 }
1169 }
1170 break;
1171 }
1172 case WebAssemblyISD::EXTEND_LOW_U:
1173 case WebAssemblyISD::EXTEND_HIGH_U: {
1174 // We know the high half, of each destination vector element, will be zero.
1175 SDValue SrcOp = Op.getOperand(0);
1176 EVT VT = SrcOp.getSimpleValueType();
1177 unsigned BitWidth = Known.getBitWidth();
1178 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1179 assert(BitWidth >= 8 && "Unexpected width!");
1181 Known.Zero |= Mask;
1182 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1183 assert(BitWidth >= 16 && "Unexpected width!");
1185 Known.Zero |= Mask;
1186 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1187 assert(BitWidth >= 32 && "Unexpected width!");
1189 Known.Zero |= Mask;
1190 }
1191 break;
1192 }
1193 // For 128-bit addition if the upper bits are all zero then it's known that
1194 // the upper bits of the result will have all bits guaranteed zero except the
1195 // first.
1196 case WebAssemblyISD::I64_ADD128:
1197 if (Op.getResNo() == 1) {
1198 SDValue LHS_HI = Op.getOperand(1);
1199 SDValue RHS_HI = Op.getOperand(3);
1200 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1201 Known.Zero.setBitsFrom(1);
1202 }
1203 break;
1204 }
1205}
1206
1208WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1209 if (VT.isFixedLengthVector()) {
1210 MVT EltVT = VT.getVectorElementType();
1211 // We have legal vector types with these lane types, so widening the
1212 // vector would let us use some of the lanes directly without having to
1213 // extend or truncate values.
1214 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1215 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1216 return TypeWidenVector;
1217 }
1218
1220}
1221
1222bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1223 const MachineFunction &MF, EVT VT) const {
1224 if (!Subtarget->hasFP16() || !VT.isVector())
1225 return false;
1226
1227 EVT ScalarVT = VT.getScalarType();
1228 if (!ScalarVT.isSimple())
1229 return false;
1230
1231 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1232}
1233
1234bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1235 SDValue Op, const TargetLoweringOpt &TLO) const {
1236 // ISel process runs DAGCombiner after legalization; this step is called
1237 // SelectionDAG optimization phase. This post-legalization combining process
1238 // runs DAGCombiner on each node, and if there was a change to be made,
1239 // re-runs legalization again on it and its user nodes to make sure
1240 // everythiing is in a legalized state.
1241 //
1242 // The legalization calls lowering routines, and we do our custom lowering for
1243 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1244 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1245 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1246 // turns unused vector elements into undefs. But this routine does not work
1247 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1248 // combination can result in a infinite loop, in which undefs are converted to
1249 // zeros in legalization and back to undefs in combining.
1250 //
1251 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1252 // running for build_vectors.
1253 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1254 return false;
1255 return true;
1256}
1257
1258//===----------------------------------------------------------------------===//
1259// WebAssembly Lowering private implementation.
1260//===----------------------------------------------------------------------===//
1261
1262//===----------------------------------------------------------------------===//
1263// Lowering Code
1264//===----------------------------------------------------------------------===//
1265
1266static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1268 DAG.getContext()->diagnose(
1269 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1270}
1271
1272// Test whether the given calling convention is supported.
1274 // We currently support the language-independent target-independent
1275 // conventions. We don't yet have a way to annotate calls with properties like
1276 // "cold", and we don't have any call-clobbered registers, so these are mostly
1277 // all handled the same.
1278 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1279 CallConv == CallingConv::Cold ||
1280 CallConv == CallingConv::PreserveMost ||
1281 CallConv == CallingConv::PreserveAll ||
1282 CallConv == CallingConv::CXX_FAST_TLS ||
1284 CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail;
1285}
1286
1287SDValue
1288WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1289 SmallVectorImpl<SDValue> &InVals) const {
1290 SelectionDAG &DAG = CLI.DAG;
1291 SDLoc DL = CLI.DL;
1292 SDValue Chain = CLI.Chain;
1293 SDValue Callee = CLI.Callee;
1294 MachineFunction &MF = DAG.getMachineFunction();
1295 auto Layout = MF.getDataLayout();
1296
1297 CallingConv::ID CallConv = CLI.CallConv;
1298 if (!callingConvSupported(CallConv))
1299 fail(DL, DAG,
1300 "WebAssembly doesn't support language-specific or target-specific "
1301 "calling conventions yet");
1302 if (CLI.IsPatchPoint)
1303 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1304
1305 if (CLI.IsTailCall) {
1306 auto NoTail = [&](const char *Msg) {
1307 if (CLI.CB && CLI.CB->isMustTailCall())
1308 fail(DL, DAG, Msg);
1309 CLI.IsTailCall = false;
1310 };
1311
1312 if (!Subtarget->hasTailCall())
1313 NoTail("WebAssembly 'tail-call' feature not enabled");
1314
1315 // Varargs calls cannot be tail calls because the buffer is on the stack
1316 if (CLI.IsVarArg)
1317 NoTail("WebAssembly does not support varargs tail calls");
1318
1319 // Do not tail call unless caller and callee return types match
1320 const Function &F = MF.getFunction();
1321 const TargetMachine &TM = getTargetMachine();
1322 Type *RetTy = F.getReturnType();
1323 SmallVector<MVT, 4> CallerRetTys;
1324 SmallVector<MVT, 4> CalleeRetTys;
1325 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1326 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1327 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1328 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1329 CalleeRetTys.begin());
1330 if (!TypesMatch)
1331 NoTail("WebAssembly tail call requires caller and callee return types to "
1332 "match");
1333
1334 // If pointers to local stack values are passed, we cannot tail call
1335 if (CLI.CB) {
1336 for (auto &Arg : CLI.CB->args()) {
1337 Value *Val = Arg.get();
1338 // Trace the value back through pointer operations
1339 while (true) {
1340 Value *Src = Val->stripPointerCastsAndAliases();
1341 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1342 Src = GEP->getPointerOperand();
1343 if (Val == Src)
1344 break;
1345 Val = Src;
1346 }
1347 if (isa<AllocaInst>(Val)) {
1348 NoTail(
1349 "WebAssembly does not support tail calling with stack arguments");
1350 break;
1351 }
1352 }
1353 }
1354 }
1355
1356 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1357 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1358 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1359
1360 // The generic code may have added an sret argument. If we're lowering an
1361 // invoke function, the ABI requires that the function pointer be the first
1362 // argument, so we may have to swap the arguments.
1363 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1364 Outs[0].Flags.isSRet()) {
1365 std::swap(Outs[0], Outs[1]);
1366 std::swap(OutVals[0], OutVals[1]);
1367 }
1368
1369 bool HasSwiftSelfArg = false;
1370 bool HasSwiftErrorArg = false;
1371 bool HasSwiftAsyncArg = false;
1372 unsigned NumFixedArgs = 0;
1373 for (unsigned I = 0; I < Outs.size(); ++I) {
1374 const ISD::OutputArg &Out = Outs[I];
1375 SDValue &OutVal = OutVals[I];
1376 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1377 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1378 HasSwiftAsyncArg |= Out.Flags.isSwiftAsync();
1379 if (Out.Flags.isNest())
1380 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1381 if (Out.Flags.isInAlloca())
1382 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1383 if (Out.Flags.isInConsecutiveRegs())
1384 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1386 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1387 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1388 auto &MFI = MF.getFrameInfo();
1389 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1391 /*isSS=*/false);
1392 SDValue SizeNode =
1393 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1394 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1395 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1397 /*isVolatile*/ false, /*AlwaysInline=*/false,
1398 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1399 MachinePointerInfo());
1400 OutVal = FINode;
1401 }
1402 // Count the number of fixed args *after* legalization.
1403 NumFixedArgs += !Out.Flags.isVarArg();
1404 }
1405
1406 bool IsVarArg = CLI.IsVarArg;
1407 auto PtrVT = getPointerTy(Layout);
1408
1409 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1410 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1411 // arguments are also added for callee signature. They are necessary to match
1412 // callee and caller signature for indirect call.
1413 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1414 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1415 if (!HasSwiftSelfArg) {
1416 NumFixedArgs++;
1417 ISD::ArgFlagsTy Flags;
1418 Flags.setSwiftSelf();
1419 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1420 CLI.Outs.push_back(Arg);
1421 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1422 CLI.OutVals.push_back(ArgVal);
1423 }
1424 if (!HasSwiftErrorArg) {
1425 NumFixedArgs++;
1426 ISD::ArgFlagsTy Flags;
1427 Flags.setSwiftError();
1428 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1429 CLI.Outs.push_back(Arg);
1430 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1431 CLI.OutVals.push_back(ArgVal);
1432 }
1433 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1434 NumFixedArgs++;
1435 ISD::ArgFlagsTy Flags;
1436 Flags.setSwiftAsync();
1437 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1438 CLI.Outs.push_back(Arg);
1439 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1440 CLI.OutVals.push_back(ArgVal);
1441 }
1442 }
1443
1444 // Analyze operands of the call, assigning locations to each operand.
1446 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1447
1448 if (IsVarArg) {
1449 // Outgoing non-fixed arguments are placed in a buffer. First
1450 // compute their offsets and the total amount of buffer space needed.
1451 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1452 const ISD::OutputArg &Out = Outs[I];
1453 SDValue &Arg = OutVals[I];
1454 EVT VT = Arg.getValueType();
1455 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1456 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1457 Align Alignment =
1458 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1459 unsigned Offset =
1460 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1461 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1462 Offset, VT.getSimpleVT(),
1464 }
1465 }
1466
1467 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1468
1469 SDValue FINode;
1470 if (IsVarArg && NumBytes) {
1471 // For non-fixed arguments, next emit stores to store the argument values
1472 // to the stack buffer at the offsets computed above.
1473 MaybeAlign StackAlign = Layout.getStackAlignment();
1474 assert(StackAlign && "data layout string is missing stack alignment");
1475 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1476 /*isSS=*/false);
1477 unsigned ValNo = 0;
1479 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1480 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1481 "ArgLocs should remain in order and only hold varargs args");
1482 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1483 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1484 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1485 DAG.getConstant(Offset, DL, PtrVT));
1486 Chains.push_back(
1487 DAG.getStore(Chain, DL, Arg, Add,
1489 }
1490 if (!Chains.empty())
1491 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1492 } else if (IsVarArg) {
1493 FINode = DAG.getIntPtrConstant(0, DL);
1494 }
1495
1496 if (Callee->getOpcode() == ISD::GlobalAddress) {
1497 // If the callee is a GlobalAddress node (quite common, every direct call
1498 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1499 // doesn't at MO_GOT which is not needed for direct calls.
1500 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1503 GA->getOffset());
1504 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1505 getPointerTy(DAG.getDataLayout()), Callee);
1506 }
1507
1508 // Compute the operands for the CALLn node.
1510 Ops.push_back(Chain);
1511 Ops.push_back(Callee);
1512
1513 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1514 // isn't reliable.
1515 Ops.append(OutVals.begin(),
1516 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1517 // Add a pointer to the vararg buffer.
1518 if (IsVarArg)
1519 Ops.push_back(FINode);
1520
1521 SmallVector<EVT, 8> InTys;
1522 for (const auto &In : Ins) {
1523 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1524 assert(!In.Flags.isNest() && "nest is not valid for return values");
1525 if (In.Flags.isInAlloca())
1526 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1527 if (In.Flags.isInConsecutiveRegs())
1528 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1529 if (In.Flags.isInConsecutiveRegsLast())
1530 fail(DL, DAG,
1531 "WebAssembly hasn't implemented cons regs last return values");
1532 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1533 // registers.
1534 InTys.push_back(In.VT);
1535 }
1536
1537 // Lastly, if this is a call to a funcref we need to add an instruction
1538 // table.set to the chain and transform the call.
1540 CLI.CB->getCalledOperand()->getType())) {
1541 // In the absence of function references proposal where a funcref call is
1542 // lowered to call_ref, using reference types we generate a table.set to set
1543 // the funcref to a special table used solely for this purpose, followed by
1544 // a call_indirect. Here we just generate the table set, and return the
1545 // SDValue of the table.set so that LowerCall can finalize the lowering by
1546 // generating the call_indirect.
1547 SDValue Chain = Ops[0];
1548
1550 MF.getContext(), Subtarget);
1551 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1552 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1553 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1554 SDValue TableSet = DAG.getMemIntrinsicNode(
1555 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1556 MVT::funcref,
1557 // Machine Mem Operand args
1558 MachinePointerInfo(
1560 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1562
1563 Ops[0] = TableSet; // The new chain is the TableSet itself
1564 }
1565
1566 if (CLI.IsTailCall) {
1567 // ret_calls do not return values to the current frame
1568 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1569 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1570 }
1571
1572 InTys.push_back(MVT::Other);
1573 SDVTList InTyList = DAG.getVTList(InTys);
1574 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1575
1576 for (size_t I = 0; I < Ins.size(); ++I)
1577 InVals.push_back(Res.getValue(I));
1578
1579 // Return the chain
1580 return Res.getValue(Ins.size());
1581}
1582
1583bool WebAssemblyTargetLowering::CanLowerReturn(
1584 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1585 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1586 const Type *RetTy) const {
1587 // WebAssembly can only handle returning tuples with multivalue enabled
1588 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1589}
1590
1591SDValue WebAssemblyTargetLowering::LowerReturn(
1592 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1594 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1595 SelectionDAG &DAG) const {
1596 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1597 "MVP WebAssembly can only return up to one value");
1598 if (!callingConvSupported(CallConv))
1599 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1600
1601 SmallVector<SDValue, 4> RetOps(1, Chain);
1602 RetOps.append(OutVals.begin(), OutVals.end());
1603 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1604
1605 // Record the number and types of the return values.
1606 for (const ISD::OutputArg &Out : Outs) {
1607 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1608 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1609 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1610 if (Out.Flags.isInAlloca())
1611 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1612 if (Out.Flags.isInConsecutiveRegs())
1613 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1615 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1616 }
1617
1618 return Chain;
1619}
1620
1621SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1622 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1623 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1624 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1625 if (!callingConvSupported(CallConv))
1626 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1627
1628 MachineFunction &MF = DAG.getMachineFunction();
1629 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1630
1631 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1632 // of the incoming values before they're represented by virtual registers.
1633 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1634
1635 bool HasSwiftErrorArg = false;
1636 bool HasSwiftSelfArg = false;
1637 bool HasSwiftAsyncArg = false;
1638 for (const ISD::InputArg &In : Ins) {
1639 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1640 HasSwiftErrorArg |= In.Flags.isSwiftError();
1641 HasSwiftAsyncArg |= In.Flags.isSwiftAsync();
1642 if (In.Flags.isInAlloca())
1643 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1644 if (In.Flags.isNest())
1645 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1646 if (In.Flags.isInConsecutiveRegs())
1647 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1648 if (In.Flags.isInConsecutiveRegsLast())
1649 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1650 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1651 // registers.
1652 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1653 DAG.getTargetConstant(InVals.size(),
1654 DL, MVT::i32))
1655 : DAG.getUNDEF(In.VT));
1656
1657 // Record the number and types of arguments.
1658 MFI->addParam(In.VT);
1659 }
1660
1661 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1662 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1663 // arguments are also added for callee signature. They are necessary to match
1664 // callee and caller signature for indirect call.
1665 auto PtrVT = getPointerTy(MF.getDataLayout());
1666 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1667 if (!HasSwiftSelfArg) {
1668 MFI->addParam(PtrVT);
1669 }
1670 if (!HasSwiftErrorArg) {
1671 MFI->addParam(PtrVT);
1672 }
1673 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1674 MFI->addParam(PtrVT);
1675 }
1676 }
1677 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1678 // the buffer is passed as an argument.
1679 if (IsVarArg) {
1680 MVT PtrVT = getPointerTy(MF.getDataLayout());
1681 Register VarargVreg =
1683 MFI->setVarargBufferVreg(VarargVreg);
1684 Chain = DAG.getCopyToReg(
1685 Chain, DL, VarargVreg,
1686 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1687 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1688 MFI->addParam(PtrVT);
1689 }
1690
1691 // Record the number and types of arguments and results.
1692 SmallVector<MVT, 4> Params;
1695 MF.getFunction(), DAG.getTarget(), Params, Results);
1696 for (MVT VT : Results)
1697 MFI->addResult(VT);
1698 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1699 // the param logic here with ComputeSignatureVTs
1700 assert(MFI->getParams().size() == Params.size() &&
1701 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1702 Params.begin()));
1703
1704 return Chain;
1705}
1706
1707void WebAssemblyTargetLowering::ReplaceNodeResults(
1709 switch (N->getOpcode()) {
1711 // Do not add any results, signifying that N should not be custom lowered
1712 // after all. This happens because simd128 turns on custom lowering for
1713 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1714 // illegal type.
1715 break;
1719 // Do not add any results, signifying that N should not be custom lowered.
1720 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1721 break;
1722 case ISD::ADD:
1723 case ISD::SUB:
1724 Results.push_back(Replace128Op(N, DAG));
1725 break;
1726 default:
1728 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1729 }
1730}
1731
1732//===----------------------------------------------------------------------===//
1733// Custom lowering hooks.
1734//===----------------------------------------------------------------------===//
1735
1736SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1737 SelectionDAG &DAG) const {
1738 SDLoc DL(Op);
1739 switch (Op.getOpcode()) {
1740 default:
1741 llvm_unreachable("unimplemented operation lowering");
1742 return SDValue();
1743 case ISD::FrameIndex:
1744 return LowerFrameIndex(Op, DAG);
1745 case ISD::GlobalAddress:
1746 return LowerGlobalAddress(Op, DAG);
1748 return LowerGlobalTLSAddress(Op, DAG);
1750 return LowerExternalSymbol(Op, DAG);
1751 case ISD::JumpTable:
1752 return LowerJumpTable(Op, DAG);
1753 case ISD::BR_JT:
1754 return LowerBR_JT(Op, DAG);
1755 case ISD::VASTART:
1756 return LowerVASTART(Op, DAG);
1757 case ISD::BlockAddress:
1758 case ISD::BRIND:
1759 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1760 return SDValue();
1761 case ISD::RETURNADDR:
1762 return LowerRETURNADDR(Op, DAG);
1763 case ISD::FRAMEADDR:
1764 return LowerFRAMEADDR(Op, DAG);
1765 case ISD::CopyToReg:
1766 return LowerCopyToReg(Op, DAG);
1769 return LowerAccessVectorElement(Op, DAG);
1773 return LowerIntrinsic(Op, DAG);
1775 return LowerSIGN_EXTEND_INREG(Op, DAG);
1779 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1780 case ISD::BUILD_VECTOR:
1781 return LowerBUILD_VECTOR(Op, DAG);
1783 return LowerVECTOR_SHUFFLE(Op, DAG);
1784 case ISD::SETCC:
1785 return LowerSETCC(Op, DAG);
1786 case ISD::SHL:
1787 case ISD::SRA:
1788 case ISD::SRL:
1789 return LowerShift(Op, DAG);
1792 return LowerFP_TO_INT_SAT(Op, DAG);
1793 case ISD::FMINNUM:
1794 case ISD::FMINIMUMNUM:
1795 return LowerFMIN(Op, DAG);
1796 case ISD::FMAXNUM:
1797 case ISD::FMAXIMUMNUM:
1798 return LowerFMAX(Op, DAG);
1799 case ISD::LOAD:
1800 return LowerLoad(Op, DAG);
1801 case ISD::STORE:
1802 return LowerStore(Op, DAG);
1803 case ISD::CTPOP:
1804 case ISD::CTLZ:
1805 case ISD::CTTZ:
1806 return DAG.UnrollVectorOp(Op.getNode());
1807 case ISD::CLEAR_CACHE:
1808 report_fatal_error("llvm.clear_cache is not supported on wasm");
1809 case ISD::SMUL_LOHI:
1810 case ISD::UMUL_LOHI:
1811 return LowerMUL_LOHI(Op, DAG);
1812 case ISD::UADDO:
1813 return LowerUADDO(Op, DAG);
1814 }
1815}
1816
1820
1821 return false;
1822}
1823
1824static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1825 SelectionDAG &DAG) {
1827 if (!FI)
1828 return std::nullopt;
1829
1830 auto &MF = DAG.getMachineFunction();
1832}
1833
1834SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1835 SelectionDAG &DAG) const {
1836 SDLoc DL(Op);
1837 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1838 const SDValue &Value = SN->getValue();
1839 const SDValue &Base = SN->getBasePtr();
1840 const SDValue &Offset = SN->getOffset();
1841
1843 if (!Offset->isUndef())
1844 report_fatal_error("unexpected offset when storing to webassembly global",
1845 false);
1846
1847 SDVTList Tys = DAG.getVTList(MVT::Other);
1848 SDValue Ops[] = {SN->getChain(), Value, Base};
1849 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1850 SN->getMemoryVT(), SN->getMemOperand());
1851 }
1852
1853 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1854 if (!Offset->isUndef())
1855 report_fatal_error("unexpected offset when storing to webassembly local",
1856 false);
1857
1858 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1859 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1860 SDValue Ops[] = {SN->getChain(), Idx, Value};
1861 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1862 }
1863
1866 "Encountered an unlowerable store to the wasm_var address space",
1867 false);
1868
1869 return Op;
1870}
1871
1872SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1873 SelectionDAG &DAG) const {
1874 SDLoc DL(Op);
1875 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1876 const SDValue &Base = LN->getBasePtr();
1877 const SDValue &Offset = LN->getOffset();
1878
1880 if (!Offset->isUndef())
1882 "unexpected offset when loading from webassembly global", false);
1883
1884 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1885 SDValue Ops[] = {LN->getChain(), Base};
1886 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1887 LN->getMemoryVT(), LN->getMemOperand());
1888 }
1889
1890 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1891 if (!Offset->isUndef())
1893 "unexpected offset when loading from webassembly local", false);
1894
1895 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1896 EVT LocalVT = LN->getValueType(0);
1897 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1898 {LN->getChain(), Idx});
1899 }
1900
1903 "Encountered an unlowerable load from the wasm_var address space",
1904 false);
1905
1906 return Op;
1907}
1908
1909SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1910 SelectionDAG &DAG) const {
1911 assert(Subtarget->hasWideArithmetic());
1912 assert(Op.getValueType() == MVT::i64);
1913 SDLoc DL(Op);
1914 unsigned Opcode;
1915 switch (Op.getOpcode()) {
1916 case ISD::UMUL_LOHI:
1917 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1918 break;
1919 case ISD::SMUL_LOHI:
1920 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1921 break;
1922 default:
1923 llvm_unreachable("unexpected opcode");
1924 }
1925 SDValue LHS = Op.getOperand(0);
1926 SDValue RHS = Op.getOperand(1);
1927 SDValue Lo =
1928 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1929 SDValue Hi(Lo.getNode(), 1);
1930 SDValue Ops[] = {Lo, Hi};
1931 return DAG.getMergeValues(Ops, DL);
1932}
1933
1934// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1935//
1936// This enables generating a single wasm instruction for this operation where
1937// the upper half of both operands are constant zeros. The upper half of the
1938// result is then whether the overflow happened.
1939SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1940 SelectionDAG &DAG) const {
1941 assert(Subtarget->hasWideArithmetic());
1942 assert(Op.getValueType() == MVT::i64);
1943 assert(Op.getOpcode() == ISD::UADDO);
1944 SDLoc DL(Op);
1945 SDValue LHS = Op.getOperand(0);
1946 SDValue RHS = Op.getOperand(1);
1947 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1948 SDValue Result =
1949 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1950 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1951 SDValue CarryI64(Result.getNode(), 1);
1952 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1953 SDValue Ops[] = {Result, CarryI32};
1954 return DAG.getMergeValues(Ops, DL);
1955}
1956
1957SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1958 SelectionDAG &DAG) const {
1959 assert(Subtarget->hasWideArithmetic());
1960 assert(N->getValueType(0) == MVT::i128);
1961 SDLoc DL(N);
1962 unsigned Opcode;
1963 switch (N->getOpcode()) {
1964 case ISD::ADD:
1965 Opcode = WebAssemblyISD::I64_ADD128;
1966 break;
1967 case ISD::SUB:
1968 Opcode = WebAssemblyISD::I64_SUB128;
1969 break;
1970 default:
1971 llvm_unreachable("unexpected opcode");
1972 }
1973 SDValue LHS = N->getOperand(0);
1974 SDValue RHS = N->getOperand(1);
1975
1976 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1977 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1978 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1979 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1980 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1981 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1982 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1983 LHS_0, LHS_1, RHS_0, RHS_1);
1984 SDValue Result_HI(Result_LO.getNode(), 1);
1985 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1986}
1987
1988SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1989 SelectionDAG &DAG) const {
1990 SDValue Src = Op.getOperand(2);
1991 if (isa<FrameIndexSDNode>(Src.getNode())) {
1992 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1993 // the FI to some LEA-like instruction, but since we don't have that, we
1994 // need to insert some kind of instruction that can take an FI operand and
1995 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1996 // local.copy between Op and its FI operand.
1997 SDValue Chain = Op.getOperand(0);
1998 SDLoc DL(Op);
1999 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
2000 EVT VT = Src.getValueType();
2001 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
2002 : WebAssembly::COPY_I64,
2003 DL, VT, Src),
2004 0);
2005 return Op.getNode()->getNumValues() == 1
2006 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
2007 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
2008 Op.getNumOperands() == 4 ? Op.getOperand(3)
2009 : SDValue());
2010 }
2011 return SDValue();
2012}
2013
2014SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
2015 SelectionDAG &DAG) const {
2016 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
2017 return DAG.getTargetFrameIndex(FI, Op.getValueType());
2018}
2019
2020SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
2021 SelectionDAG &DAG) const {
2022 SDLoc DL(Op);
2023
2024 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
2025 fail(DL, DAG,
2026 "Non-Emscripten WebAssembly hasn't implemented "
2027 "__builtin_return_address");
2028 return SDValue();
2029 }
2030
2031 unsigned Depth = Op.getConstantOperandVal(0);
2032 MakeLibCallOptions CallOptions;
2033 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
2034 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
2035 .first;
2036}
2037
2038SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
2039 SelectionDAG &DAG) const {
2040 // Non-zero depths are not supported by WebAssembly currently. Use the
2041 // legalizer's default expansion, which is to return 0 (what this function is
2042 // documented to do).
2043 if (Op.getConstantOperandVal(0) > 0)
2044 return SDValue();
2045
2047 EVT VT = Op.getValueType();
2048 Register FP =
2049 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
2050 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2051}
2052
2053SDValue
2054WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2055 SelectionDAG &DAG) const {
2056 SDLoc DL(Op);
2057 const auto *GA = cast<GlobalAddressSDNode>(Op);
2058
2059 MachineFunction &MF = DAG.getMachineFunction();
2060 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2061 report_fatal_error("cannot use thread-local storage without bulk memory",
2062 false);
2063
2064 const GlobalValue *GV = GA->getGlobal();
2065
2066 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2067 // on other targets, if we have thread-local storage, only the local-exec
2068 // model is possible.
2069 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2070 ? GV->getThreadLocalMode()
2072
2073 // Unsupported TLS modes
2076
2077 if (model == GlobalValue::LocalExecTLSModel ||
2080 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2081 // For DSO-local TLS variables we use offset from __tls_base
2082
2083 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2084 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2085 : WebAssembly::GLOBAL_GET_I32;
2086 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2087
2089 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2090 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2091 0);
2092
2093 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2094 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2095 SDValue SymOffset =
2096 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2097
2098 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2099 }
2100
2102
2103 EVT VT = Op.getValueType();
2104 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2105 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2106 GA->getOffset(),
2108}
2109
2110SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2111 SelectionDAG &DAG) const {
2112 SDLoc DL(Op);
2113 const auto *GA = cast<GlobalAddressSDNode>(Op);
2114 EVT VT = Op.getValueType();
2115 assert(GA->getTargetFlags() == 0 &&
2116 "Unexpected target flags on generic GlobalAddressSDNode");
2118 fail(DL, DAG, "Invalid address space for WebAssembly target");
2119
2120 unsigned OperandFlags = 0;
2121 const GlobalValue *GV = GA->getGlobal();
2122 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2123 // need special treatment for tables in PIC mode.
2124 if (isPositionIndependent() &&
2126 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2127 MachineFunction &MF = DAG.getMachineFunction();
2128 MVT PtrVT = getPointerTy(MF.getDataLayout());
2129 const char *BaseName;
2130 if (GV->getValueType()->isFunctionTy()) {
2131 BaseName = MF.createExternalSymbolName("__table_base");
2133 } else {
2134 BaseName = MF.createExternalSymbolName("__memory_base");
2136 }
2138 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2139 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2140
2141 SDValue SymAddr = DAG.getNode(
2142 WebAssemblyISD::WrapperREL, DL, VT,
2143 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2144 OperandFlags));
2145
2146 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2147 }
2149 }
2150
2151 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2152 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2153 GA->getOffset(), OperandFlags));
2154}
2155
2156SDValue
2157WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2158 SelectionDAG &DAG) const {
2159 SDLoc DL(Op);
2160 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2161 EVT VT = Op.getValueType();
2162 assert(ES->getTargetFlags() == 0 &&
2163 "Unexpected target flags on generic ExternalSymbolSDNode");
2164 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2165 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2166}
2167
2168SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2169 SelectionDAG &DAG) const {
2170 // There's no need for a Wrapper node because we always incorporate a jump
2171 // table operand into a BR_TABLE instruction, rather than ever
2172 // materializing it in a register.
2173 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2174 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2175 JT->getTargetFlags());
2176}
2177
2178SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2179 SelectionDAG &DAG) const {
2180 SDLoc DL(Op);
2181 SDValue Chain = Op.getOperand(0);
2182 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2183 SDValue Index = Op.getOperand(2);
2184 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2185
2187 Ops.push_back(Chain);
2188 Ops.push_back(Index);
2189
2190 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2191 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2192
2193 // Add an operand for each case.
2194 for (auto *MBB : MBBs)
2195 Ops.push_back(DAG.getBasicBlock(MBB));
2196
2197 // Add the first MBB as a dummy default target for now. This will be replaced
2198 // with the proper default target (and the preceding range check eliminated)
2199 // if possible by WebAssemblyFixBrTableDefaults.
2200 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2201 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2202}
2203
2204SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2205 SelectionDAG &DAG) const {
2206 SDLoc DL(Op);
2207 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2208
2209 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2210 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2211
2212 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2213 MFI->getVarargBufferVreg(), PtrVT);
2214 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2215 MachinePointerInfo(SV));
2216}
2217
2218SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2219 SelectionDAG &DAG) const {
2220 MachineFunction &MF = DAG.getMachineFunction();
2221 unsigned IntNo;
2222 switch (Op.getOpcode()) {
2225 IntNo = Op.getConstantOperandVal(1);
2226 break;
2228 IntNo = Op.getConstantOperandVal(0);
2229 break;
2230 default:
2231 llvm_unreachable("Invalid intrinsic");
2232 }
2233 SDLoc DL(Op);
2234
2235 switch (IntNo) {
2236 default:
2237 return SDValue(); // Don't custom lower most intrinsics.
2238
2239 case Intrinsic::wasm_lsda: {
2240 auto PtrVT = getPointerTy(MF.getDataLayout());
2241 const char *SymName = MF.createExternalSymbolName(
2242 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2243 if (isPositionIndependent()) {
2245 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2246 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2248 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2249 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2250 SDValue SymAddr =
2251 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2252 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2253 }
2254 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2255 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2256 }
2257
2258 case Intrinsic::wasm_shuffle: {
2259 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2260 SDValue Ops[18];
2261 size_t OpIdx = 0;
2262 Ops[OpIdx++] = Op.getOperand(1);
2263 Ops[OpIdx++] = Op.getOperand(2);
2264 while (OpIdx < 18) {
2265 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2266 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2267 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2268 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2269 } else {
2270 Ops[OpIdx++] = MaskIdx;
2271 }
2272 }
2273 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2274 }
2275
2276 case Intrinsic::thread_pointer: {
2277 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2278 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2279 : WebAssembly::GLOBAL_GET_I32;
2280 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2281 return SDValue(
2282 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2283 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2284 0);
2285 }
2286 }
2287}
2288
2289SDValue
2290WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2291 SelectionDAG &DAG) const {
2292 SDLoc DL(Op);
2293 // If sign extension operations are disabled, allow sext_inreg only if operand
2294 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2295 // extension operations, but allowing sext_inreg in this context lets us have
2296 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2297 // everywhere would be simpler in this file, but would necessitate large and
2298 // brittle patterns to undo the expansion and select extract_lane_s
2299 // instructions.
2300 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2301 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2302 return SDValue();
2303
2304 const SDValue &Extract = Op.getOperand(0);
2305 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2306 if (VecT.getVectorElementType().getSizeInBits() > 32)
2307 return SDValue();
2308 MVT ExtractedLaneT =
2309 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2310 MVT ExtractedVecT =
2311 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2312 if (ExtractedVecT == VecT)
2313 return Op;
2314
2315 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2316 const SDNode *Index = Extract.getOperand(1).getNode();
2317 if (!isa<ConstantSDNode>(Index))
2318 return SDValue();
2319 unsigned IndexVal = Index->getAsZExtVal();
2320 unsigned Scale =
2321 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2322 assert(Scale > 1);
2323 SDValue NewIndex =
2324 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2325 SDValue NewExtract = DAG.getNode(
2327 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2328 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2329 Op.getOperand(1));
2330}
2331
2332static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2333 SelectionDAG &DAG) {
2334 SDValue Source = peekThroughBitcasts(Op);
2335 if (Source.getOpcode() != ISD::VECTOR_SHUFFLE)
2336 return SDValue();
2337
2338 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2339 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2340 "expected extend_low");
2341 auto *Shuffle = cast<ShuffleVectorSDNode>(Source.getNode());
2342
2343 ArrayRef<int> Mask = Shuffle->getMask();
2344 // Look for a shuffle which moves from the high half to the low half.
2345 size_t FirstIdx = Mask.size() / 2;
2346 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2347 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2348 return SDValue();
2349 }
2350 }
2351
2352 SDLoc DL(Op);
2353 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2354 ? WebAssemblyISD::EXTEND_HIGH_S
2355 : WebAssemblyISD::EXTEND_HIGH_U;
2356 SDValue ShuffleSrc = Shuffle->getOperand(0);
2357 if (Op.getOpcode() == ISD::BITCAST)
2358 ShuffleSrc = DAG.getBitcast(Op.getValueType(), ShuffleSrc);
2359
2360 return DAG.getNode(Opc, DL, VT, ShuffleSrc);
2361}
2362
2363SDValue
2364WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2365 SelectionDAG &DAG) const {
2366 SDLoc DL(Op);
2367 EVT VT = Op.getValueType();
2368 SDValue Src = Op.getOperand(0);
2369 EVT SrcVT = Src.getValueType();
2370
2371 if (SrcVT.getVectorElementType() == MVT::i1 ||
2372 SrcVT.getVectorElementType() == MVT::i64)
2373 return SDValue();
2374
2375 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2376 "Unexpected extension factor.");
2377 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2378
2379 if (Scale != 2 && Scale != 4 && Scale != 8)
2380 return SDValue();
2381
2382 unsigned Ext;
2383 switch (Op.getOpcode()) {
2384 default:
2385 llvm_unreachable("unexpected opcode");
2388 Ext = WebAssemblyISD::EXTEND_LOW_U;
2389 break;
2391 Ext = WebAssemblyISD::EXTEND_LOW_S;
2392 break;
2393 }
2394
2395 if (Scale == 2) {
2396 // See if we can use EXTEND_HIGH.
2397 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2398 return ExtendHigh;
2399 }
2400
2401 SDValue Ret = Src;
2402 while (Scale != 1) {
2403 Ret = DAG.getNode(Ext, DL,
2404 Ret.getValueType()
2407 Ret);
2408 Scale /= 2;
2409 }
2410 assert(Ret.getValueType() == VT);
2411 return Ret;
2412}
2413
2415 SDLoc DL(Op);
2416 if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
2417 return SDValue();
2418
2419 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2420 unsigned &Index) -> bool {
2421 switch (Op.getOpcode()) {
2422 case ISD::SINT_TO_FP:
2423 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2424 break;
2425 case ISD::UINT_TO_FP:
2426 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2427 break;
2428 case ISD::FP_EXTEND:
2429 case ISD::FP16_TO_FP:
2430 Opcode = WebAssemblyISD::PROMOTE_LOW;
2431 break;
2432 default:
2433 return false;
2434 }
2435
2436 auto ExtractVector = Op.getOperand(0);
2437 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2438 return false;
2439
2440 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2441 return false;
2442
2443 SrcVec = ExtractVector.getOperand(0);
2444 Index = ExtractVector.getConstantOperandVal(1);
2445 return true;
2446 };
2447
2448 unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
2449 unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
2450 unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
2451 SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
2452
2453 if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec,
2454 FirstIndex) ||
2455 !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec,
2456 SecondIndex))
2457 return SDValue();
2458
2459 // If we're converting to v4f32, check the third and fourth lanes, too.
2460 if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode,
2461 ThirdSrcVec, ThirdIndex) ||
2462 !GetConvertedLane(Op.getOperand(3), FourthOpcode,
2463 FourthSrcVec, FourthIndex)))
2464 return SDValue();
2465
2466 if (FirstOpcode != SecondOpcode)
2467 return SDValue();
2468
2469 // TODO Add an optimization similar to the v2f64 below for shuffling the
2470 // vectors when the lanes are in the wrong order or come from different src
2471 // vectors.
2472 if (NumLanes == 4 &&
2473 (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
2474 FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec ||
2475 FirstSrcVec != FourthSrcVec || FirstIndex != 0 || SecondIndex != 1 ||
2476 ThirdIndex != 2 || FourthIndex != 3))
2477 return SDValue();
2478
2479 MVT ExpectedSrcVT;
2480 switch (FirstOpcode) {
2481 case WebAssemblyISD::CONVERT_LOW_S:
2482 case WebAssemblyISD::CONVERT_LOW_U:
2483 ExpectedSrcVT = MVT::v4i32;
2484 break;
2485 case WebAssemblyISD::PROMOTE_LOW:
2486 ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
2487 break;
2488 }
2489 if (FirstSrcVec.getValueType() != ExpectedSrcVT)
2490 return SDValue();
2491
2492 auto Src = FirstSrcVec;
2493 if (NumLanes == 2 &&
2494 (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
2495 // Shuffle the source vector so that the converted lanes are the low lanes.
2496 Src = DAG.getVectorShuffle(ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
2497 {static_cast<int>(FirstIndex),
2498 static_cast<int>(SecondIndex) + 4, -1, -1});
2499 }
2500 return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32,
2501 Src);
2502}
2503
2504SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2505 SelectionDAG &DAG) const {
2506 MVT VT = Op.getSimpleValueType();
2507 if (VT == MVT::v8f16) {
2508 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2509 // FP16 type, so cast them to I16s.
2510 MVT IVT = VT.changeVectorElementType(MVT::i16);
2512 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2513 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2514 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2515 return DAG.getBitcast(VT, Res);
2516 }
2517
2518 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2519 return ConvertLow;
2520
2521 SDLoc DL(Op);
2522 const EVT VecT = Op.getValueType();
2523 const EVT LaneT = Op.getOperand(0).getValueType();
2524 const size_t Lanes = Op.getNumOperands();
2525 bool CanSwizzle = VecT == MVT::v16i8;
2526
2527 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2528 // possible number of lanes at once followed by a sequence of replace_lane
2529 // instructions to individually initialize any remaining lanes.
2530
2531 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2532 // swizzled lanes should be given greater weight.
2533
2534 // TODO: Investigate looping rather than always extracting/replacing specific
2535 // lanes to fill gaps.
2536
2537 auto IsConstant = [](const SDValue &V) {
2538 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2539 };
2540
2541 // Returns the source vector and index vector pair if they exist. Checks for:
2542 // (extract_vector_elt
2543 // $src,
2544 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2545 // )
2546 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2547 auto Bail = std::make_pair(SDValue(), SDValue());
2548 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2549 return Bail;
2550 const SDValue &SwizzleSrc = Lane->getOperand(0);
2551 const SDValue &IndexExt = Lane->getOperand(1);
2552 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2553 return Bail;
2554 const SDValue &Index = IndexExt->getOperand(0);
2555 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2556 return Bail;
2557 const SDValue &SwizzleIndices = Index->getOperand(0);
2558 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2559 SwizzleIndices.getValueType() != MVT::v16i8 ||
2560 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2561 Index->getConstantOperandVal(1) != I)
2562 return Bail;
2563 return std::make_pair(SwizzleSrc, SwizzleIndices);
2564 };
2565
2566 // If the lane is extracted from another vector at a constant index, return
2567 // that vector. The source vector must not have more lanes than the dest
2568 // because the shufflevector indices are in terms of the destination lanes and
2569 // would not be able to address the smaller individual source lanes.
2570 auto GetShuffleSrc = [&](const SDValue &Lane) {
2571 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2572 return SDValue();
2573 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2574 return SDValue();
2575 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2576 VecT.getVectorNumElements())
2577 return SDValue();
2578 return Lane->getOperand(0);
2579 };
2580
2581 using ValueEntry = std::pair<SDValue, size_t>;
2582 SmallVector<ValueEntry, 16> SplatValueCounts;
2583
2584 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2585 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2586
2587 using ShuffleEntry = std::pair<SDValue, size_t>;
2588 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2589
2590 auto AddCount = [](auto &Counts, const auto &Val) {
2591 auto CountIt =
2592 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2593 if (CountIt == Counts.end()) {
2594 Counts.emplace_back(Val, 1);
2595 } else {
2596 CountIt->second++;
2597 }
2598 };
2599
2600 auto GetMostCommon = [](auto &Counts) {
2601 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2602 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2603 return *CommonIt;
2604 };
2605
2606 size_t NumConstantLanes = 0;
2607
2608 // Count eligible lanes for each type of vector creation op
2609 for (size_t I = 0; I < Lanes; ++I) {
2610 const SDValue &Lane = Op->getOperand(I);
2611 if (Lane.isUndef())
2612 continue;
2613
2614 AddCount(SplatValueCounts, Lane);
2615
2616 if (IsConstant(Lane))
2617 NumConstantLanes++;
2618 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2619 AddCount(ShuffleCounts, ShuffleSrc);
2620 if (CanSwizzle) {
2621 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2622 if (SwizzleSrcs.first)
2623 AddCount(SwizzleCounts, SwizzleSrcs);
2624 }
2625 }
2626
2627 SDValue SplatValue;
2628 size_t NumSplatLanes;
2629 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2630
2631 SDValue SwizzleSrc;
2632 SDValue SwizzleIndices;
2633 size_t NumSwizzleLanes = 0;
2634 if (SwizzleCounts.size())
2635 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2636 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2637
2638 // Shuffles can draw from up to two vectors, so find the two most common
2639 // sources.
2640 SDValue ShuffleSrc1, ShuffleSrc2;
2641 size_t NumShuffleLanes = 0;
2642 if (ShuffleCounts.size()) {
2643 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2644 llvm::erase_if(ShuffleCounts,
2645 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2646 }
2647 if (ShuffleCounts.size()) {
2648 size_t AdditionalShuffleLanes;
2649 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2650 GetMostCommon(ShuffleCounts);
2651 NumShuffleLanes += AdditionalShuffleLanes;
2652 }
2653
2654 // Predicate returning true if the lane is properly initialized by the
2655 // original instruction
2656 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2658 // Prefer swizzles over shuffles over vector consts over splats
2659 if (NumSwizzleLanes >= NumShuffleLanes &&
2660 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2661 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2662 SwizzleIndices);
2663 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2664 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2665 return Swizzled == GetSwizzleSrcs(I, Lane);
2666 };
2667 } else if (NumShuffleLanes >= NumConstantLanes &&
2668 NumShuffleLanes >= NumSplatLanes) {
2669 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2670 size_t DestLaneCount = VecT.getVectorNumElements();
2671 size_t Scale1 = 1;
2672 size_t Scale2 = 1;
2673 SDValue Src1 = ShuffleSrc1;
2674 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2675 if (Src1.getValueType() != VecT) {
2676 size_t LaneSize =
2678 assert(LaneSize > DestLaneSize);
2679 Scale1 = LaneSize / DestLaneSize;
2680 Src1 = DAG.getBitcast(VecT, Src1);
2681 }
2682 if (Src2.getValueType() != VecT) {
2683 size_t LaneSize =
2685 assert(LaneSize > DestLaneSize);
2686 Scale2 = LaneSize / DestLaneSize;
2687 Src2 = DAG.getBitcast(VecT, Src2);
2688 }
2689
2690 int Mask[16];
2691 assert(DestLaneCount <= 16);
2692 for (size_t I = 0; I < DestLaneCount; ++I) {
2693 const SDValue &Lane = Op->getOperand(I);
2694 SDValue Src = GetShuffleSrc(Lane);
2695 if (Src == ShuffleSrc1) {
2696 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2697 } else if (Src && Src == ShuffleSrc2) {
2698 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2699 } else {
2700 Mask[I] = -1;
2701 }
2702 }
2703 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2704 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2705 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2706 auto Src = GetShuffleSrc(Lane);
2707 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2708 };
2709 } else if (NumConstantLanes >= NumSplatLanes) {
2710 SmallVector<SDValue, 16> ConstLanes;
2711 for (const SDValue &Lane : Op->op_values()) {
2712 if (IsConstant(Lane)) {
2713 // Values may need to be fixed so that they will sign extend to be
2714 // within the expected range during ISel. Check whether the value is in
2715 // bounds based on the lane bit width and if it is out of bounds, lop
2716 // off the extra bits.
2717 uint64_t LaneBits = 128 / Lanes;
2718 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2719 ConstLanes.push_back(DAG.getConstant(
2720 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2721 SDLoc(Lane), LaneT));
2722 } else {
2723 ConstLanes.push_back(Lane);
2724 }
2725 } else if (LaneT.isFloatingPoint()) {
2726 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2727 } else {
2728 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2729 }
2730 }
2731 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2732 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2733 return IsConstant(Lane);
2734 };
2735 } else {
2736 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2737 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2738 (DestLaneSize == 32 || DestLaneSize == 64)) {
2739 // Could be selected to load_zero.
2740 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2741 } else {
2742 // Use a splat (which might be selected as a load splat)
2743 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2744 }
2745 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2746 return Lane == SplatValue;
2747 };
2748 }
2749
2750 assert(Result);
2751 assert(IsLaneConstructed);
2752
2753 // Add replace_lane instructions for any unhandled values
2754 for (size_t I = 0; I < Lanes; ++I) {
2755 const SDValue &Lane = Op->getOperand(I);
2756 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2757 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2758 DAG.getConstant(I, DL, MVT::i32));
2759 }
2760
2761 return Result;
2762}
2763
2764SDValue
2765WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2766 SelectionDAG &DAG) const {
2767 SDLoc DL(Op);
2768 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2769 MVT VecType = Op.getOperand(0).getSimpleValueType();
2770 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2771 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2772
2773 // Space for two vector args and sixteen mask indices
2774 SDValue Ops[18];
2775 size_t OpIdx = 0;
2776 Ops[OpIdx++] = Op.getOperand(0);
2777 Ops[OpIdx++] = Op.getOperand(1);
2778
2779 // Expand mask indices to byte indices and materialize them as operands
2780 for (int M : Mask) {
2781 for (size_t J = 0; J < LaneBytes; ++J) {
2782 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2783 // whole lane of vector input, to allow further reduction at VM. E.g.
2784 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2785 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2786 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2787 }
2788 }
2789
2790 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2791}
2792
2793SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2794 SelectionDAG &DAG) const {
2795 SDLoc DL(Op);
2796 // The legalizer does not know how to expand the unsupported comparison modes
2797 // of i64x2 vectors, so we manually unroll them here.
2798 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2800 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2801 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2802 const SDValue &CC = Op->getOperand(2);
2803 auto MakeLane = [&](unsigned I) {
2804 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2805 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2806 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2807 };
2808 return DAG.getBuildVector(Op->getValueType(0), DL,
2809 {MakeLane(0), MakeLane(1)});
2810}
2811
2812SDValue
2813WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2814 SelectionDAG &DAG) const {
2815 // Allow constant lane indices, expand variable lane indices
2816 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2817 if (isa<ConstantSDNode>(IdxNode)) {
2818 // Ensure the index type is i32 to match the tablegen patterns
2819 uint64_t Idx = IdxNode->getAsZExtVal();
2820 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2821 Ops[Op.getNumOperands() - 1] =
2822 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2823 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2824 }
2825 // Perform default expansion
2826 return SDValue();
2827}
2828
2830 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2831 // 32-bit and 64-bit unrolled shifts will have proper semantics
2832 if (LaneT.bitsGE(MVT::i32))
2833 return DAG.UnrollVectorOp(Op.getNode());
2834 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2835 SDLoc DL(Op);
2836 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2837 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2838 unsigned ShiftOpcode = Op.getOpcode();
2839 SmallVector<SDValue, 16> ShiftedElements;
2840 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2841 SmallVector<SDValue, 16> ShiftElements;
2842 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2843 SmallVector<SDValue, 16> UnrolledOps;
2844 for (size_t i = 0; i < NumLanes; ++i) {
2845 SDValue MaskedShiftValue =
2846 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2847 SDValue ShiftedValue = ShiftedElements[i];
2848 if (ShiftOpcode == ISD::SRA)
2849 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2850 ShiftedValue, DAG.getValueType(LaneT));
2851 UnrolledOps.push_back(
2852 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2853 }
2854 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2855}
2856
2857SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2858 SelectionDAG &DAG) const {
2859 SDLoc DL(Op);
2860 // Only manually lower vector shifts
2861 assert(Op.getSimpleValueType().isVector());
2862
2863 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2864 auto ShiftVal = Op.getOperand(1);
2865
2866 // Try to skip bitmask operation since it is implied inside shift instruction
2867 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2868 if (MaskOp.getOpcode() != ISD::AND)
2869 return MaskOp;
2870 SDValue LHS = MaskOp.getOperand(0);
2871 SDValue RHS = MaskOp.getOperand(1);
2872 if (MaskOp.getValueType().isVector()) {
2873 APInt MaskVal;
2874 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2875 std::swap(LHS, RHS);
2876
2877 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2878 MaskVal == MaskBits)
2879 MaskOp = LHS;
2880 } else {
2881 if (!isa<ConstantSDNode>(RHS.getNode()))
2882 std::swap(LHS, RHS);
2883
2884 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2885 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2886 MaskOp = LHS;
2887 }
2888
2889 return MaskOp;
2890 };
2891
2892 // Skip vector and operation
2893 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2894 ShiftVal = DAG.getSplatValue(ShiftVal);
2895 if (!ShiftVal)
2896 return unrollVectorShift(Op, DAG);
2897
2898 // Skip scalar and operation
2899 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2900 // Use anyext because none of the high bits can affect the shift
2901 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2902
2903 unsigned Opcode;
2904 switch (Op.getOpcode()) {
2905 case ISD::SHL:
2906 Opcode = WebAssemblyISD::VEC_SHL;
2907 break;
2908 case ISD::SRA:
2909 Opcode = WebAssemblyISD::VEC_SHR_S;
2910 break;
2911 case ISD::SRL:
2912 Opcode = WebAssemblyISD::VEC_SHR_U;
2913 break;
2914 default:
2915 llvm_unreachable("unexpected opcode");
2916 }
2917
2918 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2919}
2920
2921SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2922 SelectionDAG &DAG) const {
2923 EVT ResT = Op.getValueType();
2924 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2925
2926 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2927 (SatVT == MVT::i32 || SatVT == MVT::i64))
2928 return Op;
2929
2930 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2931 return Op;
2932
2933 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2934 return Op;
2935
2936 return SDValue();
2937}
2938
2940 return (Op->getFlags().hasNoNaNs() ||
2941 (DAG.isKnownNeverNaN(Op->getOperand(0)) &&
2942 DAG.isKnownNeverNaN(Op->getOperand(1)))) &&
2943 (Op->getFlags().hasNoSignedZeros() ||
2944 DAG.isKnownNeverLogicalZero(Op->getOperand(0)) ||
2945 DAG.isKnownNeverLogicalZero(Op->getOperand(1)));
2946}
2947
2948SDValue WebAssemblyTargetLowering::LowerFMIN(SDValue Op,
2949 SelectionDAG &DAG) const {
2950 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2951 return DAG.getNode(WebAssemblyISD::RELAXED_FMIN, SDLoc(Op),
2952 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2953 }
2954 return SDValue();
2955}
2956
2957SDValue WebAssemblyTargetLowering::LowerFMAX(SDValue Op,
2958 SelectionDAG &DAG) const {
2959 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2960 return DAG.getNode(WebAssemblyISD::RELAXED_FMAX, SDLoc(Op),
2961 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2962 }
2963 return SDValue();
2964}
2965
2966//===----------------------------------------------------------------------===//
2967// Custom DAG combine hooks
2968//===----------------------------------------------------------------------===//
2969static SDValue
2971 auto &DAG = DCI.DAG;
2972 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2973
2974 // Hoist vector bitcasts that don't change the number of lanes out of unary
2975 // shuffles, where they are less likely to get in the way of other combines.
2976 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2977 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2978 SDValue Bitcast = N->getOperand(0);
2979 if (Bitcast.getOpcode() != ISD::BITCAST)
2980 return SDValue();
2981 if (!N->getOperand(1).isUndef())
2982 return SDValue();
2983 SDValue CastOp = Bitcast.getOperand(0);
2984 EVT SrcType = CastOp.getValueType();
2985 EVT DstType = Bitcast.getValueType();
2986 if (!SrcType.is128BitVector() ||
2987 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2988 return SDValue();
2989 SDValue NewShuffle = DAG.getVectorShuffle(
2990 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2991 return DAG.getBitcast(DstType, NewShuffle);
2992}
2993
2994/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2995/// split up into scalar instructions during legalization, and the vector
2996/// extending instructions are selected in performVectorExtendCombine below.
2997static SDValue
3000 auto &DAG = DCI.DAG;
3001 assert(N->getOpcode() == ISD::UINT_TO_FP ||
3002 N->getOpcode() == ISD::SINT_TO_FP);
3003
3004 EVT InVT = N->getOperand(0)->getValueType(0);
3005 EVT ResVT = N->getValueType(0);
3006 MVT ExtVT;
3007 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
3008 ExtVT = MVT::v4i32;
3009 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
3010 ExtVT = MVT::v2i32;
3011 else
3012 return SDValue();
3013
3014 unsigned Op =
3016 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
3017 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
3018}
3019
3020static SDValue
3023 auto &DAG = DCI.DAG;
3024
3025 SDNodeFlags Flags = N->getFlags();
3026 SDValue Op0 = N->getOperand(0);
3027 EVT VT = N->getValueType(0);
3028
3029 // Optimize uitofp to sitofp when the sign bit is known to be zero.
3030 // Depending on the target (runtime) backend, this might be performance
3031 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
3032 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
3033 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
3034 }
3035
3036 return SDValue();
3037}
3038
3039static SDValue
3041 auto &DAG = DCI.DAG;
3042 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
3043 N->getOpcode() == ISD::ZERO_EXTEND);
3044
3045 EVT ResVT = N->getValueType(0);
3046 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3047 SDLoc DL(N);
3048
3049 if (ResVT == MVT::v16i32 && N->getOperand(0)->getValueType(0) == MVT::v16i8) {
3050 // Use a tree of extend low/high to split and extend the input in two
3051 // layers to avoid doing several shuffles and even more extends.
3052 unsigned LowOp =
3053 IsSext ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3054 unsigned HighOp =
3055 IsSext ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3056 SDValue Input = N->getOperand(0);
3057 SDValue LowHalf = DAG.getNode(LowOp, DL, MVT::v8i16, Input);
3058 SDValue HighHalf = DAG.getNode(HighOp, DL, MVT::v8i16, Input);
3059 SDValue Subvectors[] = {
3060 DAG.getNode(LowOp, DL, MVT::v4i32, LowHalf),
3061 DAG.getNode(HighOp, DL, MVT::v4i32, LowHalf),
3062 DAG.getNode(LowOp, DL, MVT::v4i32, HighHalf),
3063 DAG.getNode(HighOp, DL, MVT::v4i32, HighHalf),
3064 };
3065 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Subvectors);
3066 }
3067
3068 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
3069 // possible before the extract_subvector can be expanded.
3070 auto Extract = N->getOperand(0);
3071 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3072 return SDValue();
3073 auto Source = Extract.getOperand(0);
3074 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
3075 if (IndexNode == nullptr)
3076 return SDValue();
3077 auto Index = IndexNode->getZExtValue();
3078
3079 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
3080 // extracted subvector is the low or high half of its source.
3081 if (ResVT == MVT::v8i16) {
3082 if (Extract.getValueType() != MVT::v8i8 ||
3083 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
3084 return SDValue();
3085 } else if (ResVT == MVT::v4i32) {
3086 if (Extract.getValueType() != MVT::v4i16 ||
3087 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
3088 return SDValue();
3089 } else if (ResVT == MVT::v2i64) {
3090 if (Extract.getValueType() != MVT::v2i32 ||
3091 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
3092 return SDValue();
3093 } else {
3094 return SDValue();
3095 }
3096
3097 bool IsLow = Index == 0;
3098
3099 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3100 : WebAssemblyISD::EXTEND_HIGH_S)
3101 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3102 : WebAssemblyISD::EXTEND_HIGH_U);
3103
3104 return DAG.getNode(Op, DL, ResVT, Source);
3105}
3106
3107static SDValue
3109 auto &DAG = DCI.DAG;
3110
3111 auto GetWasmConversionOp = [](unsigned Op) {
3112 switch (Op) {
3114 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3116 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3117 case ISD::FP_ROUND:
3118 return WebAssemblyISD::DEMOTE_ZERO;
3119 }
3120 llvm_unreachable("unexpected op");
3121 };
3122
3123 auto IsZeroSplat = [](SDValue SplatVal) {
3124 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3125 APInt SplatValue, SplatUndef;
3126 unsigned SplatBitSize;
3127 bool HasAnyUndefs;
3128 // Endianness doesn't matter in this context because we are looking for
3129 // an all-zero value.
3130 return Splat &&
3131 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3132 HasAnyUndefs) &&
3133 SplatValue == 0;
3134 };
3135
3136 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3137 // Combine this:
3138 //
3139 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3140 //
3141 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3142 //
3143 // Or this:
3144 //
3145 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3146 //
3147 // into (f32x4.demote_zero_f64x2 $x).
3148 EVT ResVT;
3149 EVT ExpectedConversionType;
3150 auto Conversion = N->getOperand(0);
3151 auto ConversionOp = Conversion.getOpcode();
3152 switch (ConversionOp) {
3155 ResVT = MVT::v4i32;
3156 ExpectedConversionType = MVT::v2i32;
3157 break;
3158 case ISD::FP_ROUND:
3159 ResVT = MVT::v4f32;
3160 ExpectedConversionType = MVT::v2f32;
3161 break;
3162 default:
3163 return SDValue();
3164 }
3165
3166 if (N->getValueType(0) != ResVT)
3167 return SDValue();
3168
3169 if (Conversion.getValueType() != ExpectedConversionType)
3170 return SDValue();
3171
3172 auto Source = Conversion.getOperand(0);
3173 if (Source.getValueType() != MVT::v2f64)
3174 return SDValue();
3175
3176 if (!IsZeroSplat(N->getOperand(1)) ||
3177 N->getOperand(1).getValueType() != ExpectedConversionType)
3178 return SDValue();
3179
3180 unsigned Op = GetWasmConversionOp(ConversionOp);
3181 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3182 }
3183
3184 // Combine this:
3185 //
3186 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3187 //
3188 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3189 //
3190 // Or this:
3191 //
3192 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3193 //
3194 // into (f32x4.demote_zero_f64x2 $x).
3195 EVT ResVT;
3196 auto ConversionOp = N->getOpcode();
3197 switch (ConversionOp) {
3200 ResVT = MVT::v4i32;
3201 break;
3202 case ISD::FP_ROUND:
3203 ResVT = MVT::v4f32;
3204 break;
3205 default:
3206 llvm_unreachable("unexpected op");
3207 }
3208
3209 if (N->getValueType(0) != ResVT)
3210 return SDValue();
3211
3212 auto Concat = N->getOperand(0);
3213 if (Concat.getValueType() != MVT::v4f64)
3214 return SDValue();
3215
3216 auto Source = Concat.getOperand(0);
3217 if (Source.getValueType() != MVT::v2f64)
3218 return SDValue();
3219
3220 if (!IsZeroSplat(Concat.getOperand(1)) ||
3221 Concat.getOperand(1).getValueType() != MVT::v2f64)
3222 return SDValue();
3223
3224 unsigned Op = GetWasmConversionOp(ConversionOp);
3225 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3226}
3227
3228// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3229static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3230 const SDLoc &DL, unsigned VectorWidth) {
3231 EVT VT = Vec.getValueType();
3232 EVT ElVT = VT.getVectorElementType();
3233 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3234 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3235 VT.getVectorNumElements() / Factor);
3236
3237 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3238 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3239 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3240
3241 // This is the index of the first element of the VectorWidth-bit chunk
3242 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3243 IdxVal &= ~(ElemsPerChunk - 1);
3244
3245 // If the input is a buildvector just emit a smaller one.
3246 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3247 return DAG.getBuildVector(ResultVT, DL,
3248 Vec->ops().slice(IdxVal, ElemsPerChunk));
3249
3250 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3251 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3252}
3253
3254// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3255// is the expected destination value type after recursion. In is the initial
3256// input. Note that the input should have enough leading zero bits to prevent
3257// NARROW_U from saturating results.
3259 SelectionDAG &DAG) {
3260 EVT SrcVT = In.getValueType();
3261
3262 // No truncation required, we might get here due to recursive calls.
3263 if (SrcVT == DstVT)
3264 return In;
3265
3266 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3267 unsigned NumElems = SrcVT.getVectorNumElements();
3268 if (!isPowerOf2_32(NumElems))
3269 return SDValue();
3270 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3271 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3272
3273 LLVMContext &Ctx = *DAG.getContext();
3274 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3275
3276 // Narrow to the largest type possible:
3277 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3278 EVT InVT = MVT::i16, OutVT = MVT::i8;
3279 if (SrcVT.getScalarSizeInBits() > 16) {
3280 InVT = MVT::i32;
3281 OutVT = MVT::i16;
3282 }
3283 unsigned SubSizeInBits = SrcSizeInBits / 2;
3284 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3285 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3286
3287 // Split lower/upper subvectors.
3288 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3289 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3290
3291 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3292 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3293 Lo = DAG.getBitcast(InVT, Lo);
3294 Hi = DAG.getBitcast(InVT, Hi);
3295 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3296 return DAG.getBitcast(DstVT, Res);
3297 }
3298
3299 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3300 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3301 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3302 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3303
3304 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3305 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3306 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3307}
3308
3311 auto &DAG = DCI.DAG;
3312
3313 SDValue In = N->getOperand(0);
3314 EVT InVT = In.getValueType();
3315 if (!InVT.isSimple())
3316 return SDValue();
3317
3318 EVT OutVT = N->getValueType(0);
3319 if (!OutVT.isVector())
3320 return SDValue();
3321
3322 EVT OutSVT = OutVT.getVectorElementType();
3323 EVT InSVT = InVT.getVectorElementType();
3324 // Currently only cover truncate to v16i8 or v8i16.
3325 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3326 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3327 return SDValue();
3328
3329 SDLoc DL(N);
3331 OutVT.getScalarSizeInBits());
3332 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3333 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3334}
3335
3338 using namespace llvm::SDPatternMatch;
3339 auto &DAG = DCI.DAG;
3340 SDLoc DL(N);
3341 SDValue Src = N->getOperand(0);
3342 EVT VT = N->getValueType(0);
3343 EVT SrcVT = Src.getValueType();
3344
3345 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3346 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3347 return SDValue();
3348
3349 unsigned NumElts = SrcVT.getVectorNumElements();
3350 EVT Width = MVT::getIntegerVT(128 / NumElts);
3351
3352 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3353 // ==> bitmask
3354 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3355 return DAG.getZExtOrTrunc(
3356 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3357 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3358 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3359 SrcVT.changeVectorElementType(
3360 *DAG.getContext(), Width))}),
3361 DL, VT);
3362 }
3363
3364 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3365 if (NumElts == 32 || NumElts == 64) {
3366 SDValue Concat, SetCCVector;
3367 ISD::CondCode SetCond;
3368
3369 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3370 m_CondCode(SetCond)))))
3371 return SDValue();
3372 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3373 return SDValue();
3374
3375 // Reconstruct the wide bitmask from each CONCAT_VECTORS operand.
3376 // Derive the per-chunk mask/integer types from the actual operand type
3377 // instead of hardcoding v16i1 / i16 for every chunk.
3378 EVT ConcatOperandVT = Concat.getOperand(0).getValueType();
3379 unsigned ConcatOperandNumElts = ConcatOperandVT.getVectorNumElements();
3380
3381 EVT ConcatOperandMaskVT =
3382 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
3383 ElementCount::getFixed(ConcatOperandNumElts));
3384 EVT ConcatOperandBitmaskVT =
3385 EVT::getIntegerVT(*DAG.getContext(), ConcatOperandNumElts);
3386 EVT ReturnVT = N->getValueType(0);
3387 SDValue ReconstructedBitmask = DAG.getConstant(0, DL, ReturnVT);
3388 // Example:
3389 // v32i16 = concat(v8i16, v8i16, v8i16, v8i16)
3390 // -> v8i1 + v8i1 + v8i1 + v8i1
3391 // -> i8 + i8 + i8 + i8
3392 // -> reconstructed i32 bitmask
3393 for (size_t I = 0; I < Concat->ops().size(); ++I) {
3394 SDValue ConcatOperand = Concat.getOperand(I);
3395 assert(ConcatOperand.getValueType() == ConcatOperandVT &&
3396 "concat_vectors operands must have the same type");
3397
3398 SDValue SetCCVectorOperand =
3399 extractSubVector(SetCCVector, I * ConcatOperandNumElts, DAG, DL, 128);
3400 if (!SetCCVectorOperand ||
3401 SetCCVectorOperand.getValueType() != ConcatOperandVT)
3402 return SDValue();
3403
3404 // Build the per-chunk mask using the correct chunk type:
3405 // v16i8 -> v16i1 -> i16
3406 // v8i16 -> v8i1 -> i8
3407 // v4i32 -> v4i1 -> i4
3408 // v2i64 -> v2i1 -> i2
3409 SDValue ConcatOperandMask = DAG.getSetCC(
3410 DL, ConcatOperandMaskVT, ConcatOperand, SetCCVectorOperand, SetCond);
3411 SDValue ConcatOperandBitmask =
3412 DAG.getBitcast(ConcatOperandBitmaskVT, ConcatOperandMask);
3413 SDValue ExtendedConcatOperandBitmask =
3414 DAG.getZExtOrTrunc(ConcatOperandBitmask, DL, ReturnVT);
3415
3416 // Shift the previously reconstructed bits to make room for this chunk.
3417 if (I != 0) {
3418 ReconstructedBitmask = DAG.getNode(
3419 ISD::SHL, DL, ReturnVT, ReconstructedBitmask,
3420 DAG.getShiftAmountConstant(ConcatOperandNumElts, ReturnVT, DL));
3421 }
3422
3423 // Merge disjoint partial bitmasks with OR.
3424 ReconstructedBitmask =
3425 DAG.getNode(ISD::OR, DL, ReturnVT, ReconstructedBitmask,
3426 ExtendedConcatOperandBitmask);
3427 }
3428
3429 return ReconstructedBitmask;
3430 }
3431
3432 return SDValue();
3433}
3434
3436 // bitmask (setcc <X>, 0, setlt) => bitmask X
3437 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3438 using namespace llvm::SDPatternMatch;
3439
3440 if (N->getConstantOperandVal(0) != Intrinsic::wasm_bitmask)
3441 return SDValue();
3442
3443 SDValue LHS;
3444 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3446 return SDValue();
3447
3448 SDLoc DL(N);
3449 return DAG.getNode(
3450 ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
3451 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32), LHS});
3452}
3453
3455 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3456 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3457 // any_true (setcc <X>, 0, ne) => (any_true X)
3458 // all_true (setcc <X>, 0, ne) => (all_true X)
3459 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3460 using namespace llvm::SDPatternMatch;
3461
3462 SDValue LHS;
3463 if (N->getNumOperands() < 2 ||
3464 !sd_match(N->getOperand(1),
3466 return SDValue();
3467 EVT LT = LHS.getValueType();
3468 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3469 return SDValue();
3470
3471 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3472 ISD::CondCode SetType,
3473 Intrinsic::WASMIntrinsics InPost) {
3474 if (N->getConstantOperandVal(0) != InPre)
3475 return SDValue();
3476
3477 SDValue LHS;
3478 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3479 m_SpecificCondCode(SetType))))
3480 return SDValue();
3481
3482 SDLoc DL(N);
3483 SDValue Ret = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3484 {DAG.getConstant(InPost, DL, MVT::i32), LHS});
3485 if (SetType == ISD::SETEQ)
3486 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3487 DAG.getConstant(1, DL, MVT::i32));
3488 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3489 };
3490
3491 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3492 Intrinsic::wasm_alltrue))
3493 return AnyTrueEQ;
3494 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3495 Intrinsic::wasm_anytrue))
3496 return AllTrueEQ;
3497 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3498 Intrinsic::wasm_anytrue))
3499 return AnyTrueNE;
3500 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3501 Intrinsic::wasm_alltrue))
3502 return AllTrueNE;
3503
3504 return SDValue();
3505}
3506
3507template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3508 Intrinsic::ID Intrin>
3510 SDValue LHS = N->getOperand(0);
3511 SDValue RHS = N->getOperand(1);
3512 SDValue Cond = N->getOperand(2);
3513 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3514 return SDValue();
3515
3516 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3517 return SDValue();
3518
3519 SDLoc DL(N);
3520 SDValue Ret =
3521 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3522 {DAG.getConstant(Intrin, DL, MVT::i32),
3523 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)});
3524 if (RequiresNegate)
3525 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3526 DAG.getConstant(1, DL, MVT::i32));
3527 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3528}
3529
3530/// Try to convert a i128 comparison to a v16i8 comparison before type
3531/// legalization splits it up into chunks
3532static SDValue
3534 const WebAssemblySubtarget *Subtarget) {
3535
3536 SDLoc DL(N);
3537 SDValue X = N->getOperand(0);
3538 SDValue Y = N->getOperand(1);
3539 EVT VT = N->getValueType(0);
3540 EVT OpVT = X.getValueType();
3541
3542 SelectionDAG &DAG = DCI.DAG;
3544 Attribute::NoImplicitFloat))
3545 return SDValue();
3546
3547 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3548 // We're looking for an oversized integer equality comparison with SIMD
3549 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3550 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3551 return SDValue();
3552
3553 // Don't perform this combine if constructing the vector will be expensive.
3554 auto IsVectorBitCastCheap = [](SDValue X) {
3556 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3557 };
3558
3559 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3560 return SDValue();
3561
3562 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3563 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3564 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3565
3566 SDValue Intr =
3567 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3568 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3569 : Intrinsic::wasm_anytrue,
3570 DL, MVT::i32),
3571 Cmp});
3572
3573 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3574 ISD::SETNE);
3575}
3576
3579 const WebAssemblySubtarget *Subtarget) {
3580 if (!DCI.isBeforeLegalize())
3581 return SDValue();
3582
3583 EVT VT = N->getValueType(0);
3584 if (!VT.isScalarInteger())
3585 return SDValue();
3586
3587 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3588 return V;
3589
3590 SDValue LHS = N->getOperand(0);
3591 if (LHS->getOpcode() != ISD::BITCAST)
3592 return SDValue();
3593
3594 EVT FromVT = LHS->getOperand(0).getValueType();
3595 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3596 return SDValue();
3597
3598 unsigned NumElts = FromVT.getVectorNumElements();
3599 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3600 return SDValue();
3601
3602 if (!cast<ConstantSDNode>(N->getOperand(1)))
3603 return SDValue();
3604
3605 auto &DAG = DCI.DAG;
3606 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3607 MVT::getIntegerVT(128 / NumElts));
3608 // setcc (iN (bitcast (vNi1 X))), 0, ne
3609 // ==> any_true (vNi1 X)
3611 N, VecVT, DAG)) {
3612 return Match;
3613 }
3614 // setcc (iN (bitcast (vNi1 X))), 0, eq
3615 // ==> xor (any_true (vNi1 X)), -1
3617 N, VecVT, DAG)) {
3618 return Match;
3619 }
3620 // setcc (iN (bitcast (vNi1 X))), -1, eq
3621 // ==> all_true (vNi1 X)
3623 N, VecVT, DAG)) {
3624 return Match;
3625 }
3626 // setcc (iN (bitcast (vNi1 X))), -1, ne
3627 // ==> xor (all_true (vNi1 X)), -1
3629 N, VecVT, DAG)) {
3630 return Match;
3631 }
3632 return SDValue();
3633}
3634
3636 EVT VT = N->getValueType(0);
3637 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3638 return SDValue();
3639
3640 // Mul with extending inputs.
3641 SDValue LHS = N->getOperand(0);
3642 SDValue RHS = N->getOperand(1);
3643 if (LHS.getOpcode() != RHS.getOpcode())
3644 return SDValue();
3645
3646 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3647 LHS.getOpcode() != ISD::ZERO_EXTEND)
3648 return SDValue();
3649
3650 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3651 return SDValue();
3652
3653 EVT FromVT = LHS->getOperand(0).getValueType();
3654 EVT EltTy = FromVT.getVectorElementType();
3655 if (EltTy != MVT::i8)
3656 return SDValue();
3657
3658 // For an input DAG that looks like this
3659 // %a = input_type
3660 // %b = input_type
3661 // %lhs = extend %a to output_type
3662 // %rhs = extend %b to output_type
3663 // %mul = mul %lhs, %rhs
3664
3665 // input_type | output_type | instructions
3666 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3667 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3668 // | | %low_low = i32x4.ext_low_i16x8_ %low
3669 // | | %low_high = i32x4.ext_high_i16x8_ %low
3670 // | | %high_low = i32x4.ext_low_i16x8_ %high
3671 // | | %high_high = i32x4.ext_high_i16x8_ %high
3672 // | | %res = concat_vector(...)
3673 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3674 // | | %low_low = i32x4.ext_low_i16x8_ %low
3675 // | | %low_high = i32x4.ext_high_i16x8_ %low
3676 // | | %res = concat_vector(%low_low, %low_high)
3677
3678 SDLoc DL(N);
3679 unsigned NumElts = VT.getVectorNumElements();
3680 SDValue ExtendInLHS = LHS->getOperand(0);
3681 SDValue ExtendInRHS = RHS->getOperand(0);
3682 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3683 unsigned ExtendLowOpc =
3684 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3685 unsigned ExtendHighOpc =
3686 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3687
3688 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3689 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3690 };
3691 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3692 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3693 };
3694
3695 if (NumElts == 16) {
3696 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3697 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3698 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3699 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3700 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3701 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3702 SDValue SubVectors[] = {
3703 GetExtendLow(MVT::v4i32, MulLow),
3704 GetExtendHigh(MVT::v4i32, MulLow),
3705 GetExtendLow(MVT::v4i32, MulHigh),
3706 GetExtendHigh(MVT::v4i32, MulHigh),
3707 };
3708 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3709 } else {
3710 assert(NumElts == 8);
3711 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3712 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3713 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3714 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3715 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3716 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3717 }
3718 return SDValue();
3719}
3720
3723 assert(N->getOpcode() == ISD::MUL);
3724 EVT VT = N->getValueType(0);
3725 if (!VT.isVector())
3726 return SDValue();
3727
3728 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3729 return Res;
3730
3731 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3732 // extend them to v8i16.
3733 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3734 return SDValue();
3735
3736 SDLoc DL(N);
3737 SelectionDAG &DAG = DCI.DAG;
3738 SDValue LHS = N->getOperand(0);
3739 SDValue RHS = N->getOperand(1);
3740 EVT MulVT = MVT::v8i16;
3741
3742 if (VT == MVT::v8i8) {
3743 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3744 DAG.getUNDEF(MVT::v8i8));
3745 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3746 DAG.getUNDEF(MVT::v8i8));
3747 SDValue LowLHS =
3748 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3749 SDValue LowRHS =
3750 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3751 SDValue MulLow = DAG.getBitcast(
3752 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3753 // Take the low byte of each lane.
3754 SDValue Shuffle = DAG.getVectorShuffle(
3755 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3756 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3757 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3758 } else {
3759 assert(VT == MVT::v16i8 && "Expected v16i8");
3760 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3761 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3762 SDValue HighLHS =
3763 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3764 SDValue HighRHS =
3765 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3766
3767 SDValue MulLow =
3768 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3769 SDValue MulHigh =
3770 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3771
3772 // Take the low byte of each lane.
3773 return DAG.getVectorShuffle(
3774 VT, DL, MulLow, MulHigh,
3775 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3776 }
3777}
3778
3779SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3780 SelectionDAG &DAG) {
3781 SDLoc DL(In);
3782 LLVMContext &Ctx = *DAG.getContext();
3783 EVT InVT = In.getValueType();
3784 unsigned NumElems = InVT.getVectorNumElements() * 2;
3785 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3786 SDValue Concat =
3787 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3788 if (NumElems < RequiredNumElems) {
3789 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3790 }
3791 return Concat;
3792}
3793
3795 EVT OutVT = N->getValueType(0);
3796 if (!OutVT.isVector())
3797 return SDValue();
3798
3799 EVT OutElTy = OutVT.getVectorElementType();
3800 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3801 return SDValue();
3802
3803 unsigned NumElems = OutVT.getVectorNumElements();
3804 if (!isPowerOf2_32(NumElems))
3805 return SDValue();
3806
3807 EVT FPVT = N->getOperand(0)->getValueType(0);
3808 if (FPVT.getVectorElementType() != MVT::f32)
3809 return SDValue();
3810
3811 SDLoc DL(N);
3812
3813 // First, convert to i32.
3814 LLVMContext &Ctx = *DAG.getContext();
3815 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3816 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3818 OutVT.getScalarSizeInBits());
3819 // Mask out the top MSBs.
3820 SDValue Masked =
3821 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3822
3823 if (OutVT.getSizeInBits() < 128) {
3824 // Create a wide enough vector that we can use narrow.
3825 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3826 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3827 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3828 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3829 return DAG.getBitcast(
3830 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3831 } else {
3832 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3833 }
3834 return SDValue();
3835}
3836
3837// Wide vector shift operations such as v8i32 with sign-extended
3838// operands cause Type Legalizer crashes because the target-specific
3839// extension nodes cannot be directly mapped to the 256-bit size.
3840//
3841// To resolve the crash and optimize performance, we intercept the
3842// illegal v8i32 shift in DAGCombine. We convert the shift amounts
3843// into multipliers and manually split the vector into two v4i32 halves.
3844//
3845// Before: t1: v8i32 = shl (sign_extend v8i16), const_vec
3846// After : t2: v4i32 = mul (ext_low_s v8i16), (ext_low_s narrow_vec)
3847// t3: v4i32 = mul (ext_high_s v8i16), (ext_high_s narrow_vec)
3848// t4: v8i32 = concat_vectors t2, t3
3851 SelectionDAG &DAG = DCI.DAG;
3852 assert(N->getOpcode() == ISD::SHL);
3853 EVT VT = N->getValueType(0);
3854 if (VT != MVT::v8i32)
3855 return SDValue();
3856
3857 SDValue LHS = N->getOperand(0);
3858 SDValue RHS = N->getOperand(1);
3859 unsigned ExtOpc = LHS.getOpcode();
3860 if (ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND)
3861 return SDValue();
3862
3863 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
3864 return SDValue();
3865
3866 SDLoc DL(N);
3867 SDValue ExtendIn = LHS.getOperand(0);
3868 EVT FromVT = ExtendIn.getValueType();
3869 if (FromVT != MVT::v8i16)
3870 return SDValue();
3871
3872 unsigned NumElts = VT.getVectorNumElements();
3873 unsigned BitWidth = FromVT.getScalarSizeInBits();
3874 bool IsSigned = (ExtOpc == ISD::SIGN_EXTEND);
3875 unsigned MaxValidShift = IsSigned ? (BitWidth - 1) : BitWidth;
3876 SmallVector<SDValue, 16> MulConsts;
3877 for (unsigned I = 0; I < NumElts; ++I) {
3878 auto *C = dyn_cast<ConstantSDNode>(RHS.getOperand(I));
3879 if (!C)
3880 return SDValue();
3881
3882 const APInt &ShiftAmt = C->getAPIntValue();
3883 if (ShiftAmt.uge(MaxValidShift))
3884 return SDValue();
3885
3886 APInt MulAmt = APInt::getOneBitSet(BitWidth, ShiftAmt.getZExtValue());
3887 MulConsts.push_back(DAG.getConstant(MulAmt, DL, FromVT.getScalarType(),
3888 /*isTarget=*/false, /*isOpaque=*/true));
3889 }
3890
3891 SDValue NarrowConst = DAG.getBuildVector(FromVT, DL, MulConsts);
3892 unsigned ExtLowOpc =
3893 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3894 unsigned ExtHighOpc =
3895 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3896
3897 EVT HalfVT = MVT::v4i32;
3898 SDValue LHSLo = DAG.getNode(ExtLowOpc, DL, HalfVT, ExtendIn);
3899 SDValue LHSHi = DAG.getNode(ExtHighOpc, DL, HalfVT, ExtendIn);
3900 SDValue RHSLo = DAG.getNode(ExtLowOpc, DL, HalfVT, NarrowConst);
3901 SDValue RHSHi = DAG.getNode(ExtHighOpc, DL, HalfVT, NarrowConst);
3902 SDValue MulLo = DAG.getNode(ISD::MUL, DL, HalfVT, LHSLo, RHSLo);
3903 SDValue MulHi = DAG.getNode(ISD::MUL, DL, HalfVT, LHSHi, RHSHi);
3904 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MulLo, MulHi);
3905}
3906
3907SDValue
3908WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3909 DAGCombinerInfo &DCI) const {
3910 switch (N->getOpcode()) {
3911 default:
3912 return SDValue();
3913 case ISD::BITCAST:
3914 return performBitcastCombine(N, DCI);
3915 case ISD::SETCC:
3916 return performSETCCCombine(N, DCI, Subtarget);
3918 return performVECTOR_SHUFFLECombine(N, DCI);
3919 case ISD::SIGN_EXTEND:
3920 case ISD::ZERO_EXTEND:
3921 return performVectorExtendCombine(N, DCI);
3922 case ISD::UINT_TO_FP:
3923 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3924 return ExtCombine;
3925 return performVectorNonNegToFPCombine(N, DCI);
3926 case ISD::SINT_TO_FP:
3927 return performVectorExtendToFPCombine(N, DCI);
3930 case ISD::FP_ROUND:
3932 return performVectorTruncZeroCombine(N, DCI);
3933 case ISD::FP_TO_SINT:
3934 case ISD::FP_TO_UINT:
3935 return performConvertFPCombine(N, DCI.DAG);
3936 case ISD::TRUNCATE:
3937 return performTruncateCombine(N, DCI);
3939 if (SDValue V = performBitmaskCombine(N, DCI.DAG))
3940 return V;
3941 return performAnyAllCombine(N, DCI.DAG);
3942 }
3943 case ISD::MUL:
3944 return performMulCombine(N, DCI);
3945 case ISD::SHL:
3946 return performShiftCombine(N, DCI);
3947 }
3948}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool callingConvSupported(CallingConv::ID CallConv)
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performBitmaskCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static bool HasNoSignedZerosOrNaNs(SDValue Op, SelectionDAG &DAG)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue performShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:275
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:291
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:713
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
auto m_Value()
Match an arbitrary value and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
bool isValidAddressSpace(unsigned AS)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2088
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:460
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:300
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:220
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.