LLVM 23.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
162 setTruncStoreAction(T, MVT::f16, Expand);
163 }
164
165 // Expand unavailable integer operations.
166 for (auto Op :
170 for (auto T : {MVT::i32, MVT::i64})
172 if (Subtarget->hasSIMD128())
173 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
175 }
176
177 if (Subtarget->hasWideArithmetic()) {
183 }
184
185 if (Subtarget->hasNontrappingFPToInt())
187 for (auto T : {MVT::i32, MVT::i64})
189
190 if (Subtarget->hasRelaxedSIMD()) {
193 {MVT::v4f32, MVT::v2f64}, Custom);
194 }
195 // SIMD-specific configuration
196 if (Subtarget->hasSIMD128()) {
197
199
200 // Combine wide-vector muls, with extend inputs, to extmul_half.
202
203 // Combine vector mask reductions into alltrue/anytrue
205
206 // Convert vector to integer bitcasts to bitmask
208
209 // Hoist bitcasts out of shuffles
211
212 // Combine extends of extract_subvectors into widening ops
214
215 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
216 // conversions ops
219
220 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
221 // into conversion ops
225
227
228 // Support saturating add/sub for i8x16 and i16x8
230 for (auto T : {MVT::v16i8, MVT::v8i16})
232
233 // Support integer abs
234 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
236
237 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
238 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
239 MVT::v2f64})
241
242 if (Subtarget->hasFP16())
244
245 // We have custom shuffle lowering to expose the shuffle mask
246 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
247 MVT::v2f64})
249
250 if (Subtarget->hasFP16())
252
253 // Support splatting
254 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255 MVT::v2f64})
257
258 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
259
260 // Custom lowering since wasm shifts must have a scalar shift amount
261 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
264
265 // Custom lower lane accesses to expand out variable indices
267 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
268 MVT::v2f64})
270
271 // There is no i8x16.mul instruction
272 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
273
274 // Expand integer operations supported for scalars but not SIMD
275 for (auto Op :
277 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
279
280 // But we do have integer min and max operations
281 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
282 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
284
285 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
286 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
287 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
288 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
289
290 // Custom lower bit counting operations for other types to scalarize them.
291 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
292 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
294
295 // Expand float operations supported for scalars but not SIMD
298 for (auto T : {MVT::v4f32, MVT::v2f64})
300
301 // Unsigned comparison operations are unavailable for i64x2 vectors.
303 setCondCodeAction(CC, MVT::v2i64, Custom);
304
305 // 64x2 conversions are not in the spec
306 for (auto Op :
308 for (auto T : {MVT::v2i64, MVT::v2f64})
310
311 // But saturating fp_to_int converstions are
313 setOperationAction(Op, MVT::v4i32, Custom);
314 if (Subtarget->hasFP16()) {
315 setOperationAction(Op, MVT::v8i16, Custom);
316 }
317 }
318
319 // Support vector extending
324 }
325
326 if (Subtarget->hasFP16()) {
327 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
328 }
329
330 if (Subtarget->hasRelaxedSIMD()) {
333 }
334
335 // Partial MLA reductions.
337 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
338 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
339 }
340 }
341
342 // As a special case, these operators use the type to mean the type to
343 // sign-extend from.
345 if (!Subtarget->hasSignExt()) {
346 // Sign extends are legal only when extending a vector extract
347 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
348 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
350 }
353
354 // Dynamic stack allocation: use the default expansion.
358
362
363 // Expand these forms; we pattern-match the forms that we can handle in isel.
364 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
365 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
367
368 if (Subtarget->hasReferenceTypes())
369 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
370 for (auto T : {MVT::externref, MVT::funcref})
372
373 // There is no vector conditional select instruction
374 for (auto T :
375 {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, MVT::v2f64})
377
378 // We have custom switch handling.
380
381 // WebAssembly doesn't have:
382 // - Floating-point extending loads.
383 // - Floating-point truncating stores.
384 // - i1 extending loads.
385 // - truncating SIMD stores and most extending loads
386 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
387 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
388 for (auto T : MVT::integer_valuetypes())
389 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
390 setLoadExtAction(Ext, T, MVT::i1, Promote);
391 if (Subtarget->hasSIMD128()) {
392 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
393 MVT::v2f64}) {
394 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
395 if (MVT(T) != MemT) {
397 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
398 setLoadExtAction(Ext, T, MemT, Expand);
399 }
400 }
401 }
402 // But some vector extending loads are legal
403 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
404 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
405 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
406 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
407 }
408 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
409 }
410
411 // Don't do anything clever with build_pairs
413
414 // Trap lowers to wasm unreachable
415 setOperationAction(ISD::TRAP, MVT::Other, Legal);
417
418 // Exception handling intrinsics
422
424
425 // Always convert switches to br_tables unless there is only one case, which
426 // is equivalent to a simple branch. This reduces code size for wasm, and we
427 // defer possible jump table optimizations to the VM.
429}
430
439
448
450WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
451 const AtomicRMWInst *AI) const {
452 // We have wasm instructions for these
453 switch (AI->getOperation()) {
461 default:
462 break;
463 }
465}
466
467bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
468 // Implementation copied from X86TargetLowering.
469 unsigned Opc = VecOp.getOpcode();
470
471 // Assume target opcodes can't be scalarized.
472 // TODO - do we have any exceptions?
474 return false;
475
476 // If the vector op is not supported, try to convert to scalar.
477 EVT VecVT = VecOp.getValueType();
479 return true;
480
481 // If the vector op is supported, but the scalar op is not, the transform may
482 // not be worthwhile.
483 EVT ScalarVT = VecVT.getScalarType();
484 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
485}
486
487FastISel *WebAssemblyTargetLowering::createFastISel(
488 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
489 const LibcallLoweringInfo *LibcallLowering) const {
490 return WebAssembly::createFastISel(FuncInfo, LibInfo, LibcallLowering);
491}
492
493MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
494 EVT VT) const {
495 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
496 if (BitWidth > 1 && BitWidth < 8)
497 BitWidth = 8;
498
499 if (BitWidth > 64) {
500 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
501 // the count to be an i32.
502 BitWidth = 32;
504 "32-bit shift counts ought to be enough for anyone");
505 }
506
509 "Unable to represent scalar shift amount type");
510 return Result;
511}
512
513// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
514// undefined result on invalid/overflow, to the WebAssembly opcode, which
515// traps on invalid/overflow.
518 const TargetInstrInfo &TII,
519 bool IsUnsigned, bool Int64,
520 bool Float64, unsigned LoweredOpcode) {
522
523 Register OutReg = MI.getOperand(0).getReg();
524 Register InReg = MI.getOperand(1).getReg();
525
526 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
527 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
528 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
529 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
530 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
531 unsigned Eqz = WebAssembly::EQZ_I32;
532 unsigned And = WebAssembly::AND_I32;
533 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
534 int64_t Substitute = IsUnsigned ? 0 : Limit;
535 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
536 auto &Context = BB->getParent()->getFunction().getContext();
537 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
538
539 const BasicBlock *LLVMBB = BB->getBasicBlock();
540 MachineFunction *F = BB->getParent();
541 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
542 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
543 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
544
546 F->insert(It, FalseMBB);
547 F->insert(It, TrueMBB);
548 F->insert(It, DoneMBB);
549
550 // Transfer the remainder of BB and its successor edges to DoneMBB.
551 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
553
554 BB->addSuccessor(TrueMBB);
555 BB->addSuccessor(FalseMBB);
556 TrueMBB->addSuccessor(DoneMBB);
557 FalseMBB->addSuccessor(DoneMBB);
558
559 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
560 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
561 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
562 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
563 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
564 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
565 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
566
567 MI.eraseFromParent();
568 // For signed numbers, we can do a single comparison to determine whether
569 // fabs(x) is within range.
570 if (IsUnsigned) {
571 Tmp0 = InReg;
572 } else {
573 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
574 }
575 BuildMI(BB, DL, TII.get(FConst), Tmp1)
576 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
577 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
578
579 // For unsigned numbers, we have to do a separate comparison with zero.
580 if (IsUnsigned) {
581 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
582 Register SecondCmpReg =
583 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
584 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
585 BuildMI(BB, DL, TII.get(FConst), Tmp1)
586 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
587 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
588 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
589 CmpReg = AndReg;
590 }
591
592 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
593
594 // Create the CFG diamond to select between doing the conversion or using
595 // the substitute value.
596 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
597 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
598 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
599 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
600 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
601 .addReg(FalseReg)
602 .addMBB(FalseMBB)
603 .addReg(TrueReg)
604 .addMBB(TrueMBB);
605
606 return DoneMBB;
607}
608
609// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
610// instuction to handle the zero-length case.
613 const TargetInstrInfo &TII, bool Int64) {
615
616 MachineOperand DstMem = MI.getOperand(0);
617 MachineOperand SrcMem = MI.getOperand(1);
618 MachineOperand Dst = MI.getOperand(2);
619 MachineOperand Src = MI.getOperand(3);
620 MachineOperand Len = MI.getOperand(4);
621
622 // If the length is a constant, we don't actually need the check.
623 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
624 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
625 Def->getOpcode() == WebAssembly::CONST_I64) {
626 if (Def->getOperand(1).getImm() == 0) {
627 // A zero-length memcpy is a no-op.
628 MI.eraseFromParent();
629 return BB;
630 }
631 // A non-zero-length memcpy doesn't need a zero check.
632 unsigned MemoryCopy =
633 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
634 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
635 .add(DstMem)
636 .add(SrcMem)
637 .add(Dst)
638 .add(Src)
639 .add(Len);
640 MI.eraseFromParent();
641 return BB;
642 }
643 }
644
645 // We're going to add an extra use to `Len` to test if it's zero; that
646 // use shouldn't be a kill, even if the original use is.
647 MachineOperand NoKillLen = Len;
648 NoKillLen.setIsKill(false);
649
650 // Decide on which `MachineInstr` opcode we're going to use.
651 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
652 unsigned MemoryCopy =
653 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
654
655 // Create two new basic blocks; one for the new `memory.fill` that we can
656 // branch over, and one for the rest of the instructions after the original
657 // `memory.fill`.
658 const BasicBlock *LLVMBB = BB->getBasicBlock();
659 MachineFunction *F = BB->getParent();
660 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
661 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
662
664 F->insert(It, TrueMBB);
665 F->insert(It, DoneMBB);
666
667 // Transfer the remainder of BB and its successor edges to DoneMBB.
668 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
670
671 // Connect the CFG edges.
672 BB->addSuccessor(TrueMBB);
673 BB->addSuccessor(DoneMBB);
674 TrueMBB->addSuccessor(DoneMBB);
675
676 // Create a virtual register for the `Eqz` result.
677 unsigned EqzReg;
678 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
679
680 // Erase the original `memory.copy`.
681 MI.eraseFromParent();
682
683 // Test if `Len` is zero.
684 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
685
686 // Insert a new `memory.copy`.
687 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
688 .add(DstMem)
689 .add(SrcMem)
690 .add(Dst)
691 .add(Src)
692 .add(Len);
693
694 // Create the CFG triangle.
695 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
696 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
697
698 return DoneMBB;
699}
700
701// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
702// instuction to handle the zero-length case.
705 const TargetInstrInfo &TII, bool Int64) {
707
708 MachineOperand Mem = MI.getOperand(0);
709 MachineOperand Dst = MI.getOperand(1);
710 MachineOperand Val = MI.getOperand(2);
711 MachineOperand Len = MI.getOperand(3);
712
713 // If the length is a constant, we don't actually need the check.
714 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
715 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
716 Def->getOpcode() == WebAssembly::CONST_I64) {
717 if (Def->getOperand(1).getImm() == 0) {
718 // A zero-length memset is a no-op.
719 MI.eraseFromParent();
720 return BB;
721 }
722 // A non-zero-length memset doesn't need a zero check.
723 unsigned MemoryFill =
724 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
725 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
726 .add(Mem)
727 .add(Dst)
728 .add(Val)
729 .add(Len);
730 MI.eraseFromParent();
731 return BB;
732 }
733 }
734
735 // We're going to add an extra use to `Len` to test if it's zero; that
736 // use shouldn't be a kill, even if the original use is.
737 MachineOperand NoKillLen = Len;
738 NoKillLen.setIsKill(false);
739
740 // Decide on which `MachineInstr` opcode we're going to use.
741 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
742 unsigned MemoryFill =
743 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
744
745 // Create two new basic blocks; one for the new `memory.fill` that we can
746 // branch over, and one for the rest of the instructions after the original
747 // `memory.fill`.
748 const BasicBlock *LLVMBB = BB->getBasicBlock();
749 MachineFunction *F = BB->getParent();
750 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
751 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
752
754 F->insert(It, TrueMBB);
755 F->insert(It, DoneMBB);
756
757 // Transfer the remainder of BB and its successor edges to DoneMBB.
758 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
760
761 // Connect the CFG edges.
762 BB->addSuccessor(TrueMBB);
763 BB->addSuccessor(DoneMBB);
764 TrueMBB->addSuccessor(DoneMBB);
765
766 // Create a virtual register for the `Eqz` result.
767 unsigned EqzReg;
768 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
769
770 // Erase the original `memory.fill`.
771 MI.eraseFromParent();
772
773 // Test if `Len` is zero.
774 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
775
776 // Insert a new `memory.copy`.
777 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
778
779 // Create the CFG triangle.
780 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
781 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
782
783 return DoneMBB;
784}
785
786static MachineBasicBlock *
788 const WebAssemblySubtarget *Subtarget,
789 const TargetInstrInfo &TII) {
790 MachineInstr &CallParams = *CallResults.getPrevNode();
791 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
792 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
793 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
794
795 bool IsIndirect =
796 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
797 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
798
799 bool IsFuncrefCall = false;
800 if (IsIndirect && CallParams.getOperand(0).isReg()) {
801 Register Reg = CallParams.getOperand(0).getReg();
802 const MachineFunction *MF = BB->getParent();
803 const MachineRegisterInfo &MRI = MF->getRegInfo();
804 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
805 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
806 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
807 }
808
809 unsigned CallOp;
810 if (IsIndirect && IsRetCall) {
811 CallOp = WebAssembly::RET_CALL_INDIRECT;
812 } else if (IsIndirect) {
813 CallOp = WebAssembly::CALL_INDIRECT;
814 } else if (IsRetCall) {
815 CallOp = WebAssembly::RET_CALL;
816 } else {
817 CallOp = WebAssembly::CALL;
818 }
819
820 MachineFunction &MF = *BB->getParent();
821 const MCInstrDesc &MCID = TII.get(CallOp);
822 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
823
824 // Move the function pointer to the end of the arguments for indirect calls
825 if (IsIndirect) {
826 auto FnPtr = CallParams.getOperand(0);
827 CallParams.removeOperand(0);
828
829 // For funcrefs, call_indirect is done through __funcref_call_table and the
830 // funcref is always installed in slot 0 of the table, therefore instead of
831 // having the function pointer added at the end of the params list, a zero
832 // (the index in
833 // __funcref_call_table is added).
834 if (IsFuncrefCall) {
835 Register RegZero =
836 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
837 MachineInstrBuilder MIBC0 =
838 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
839
840 BB->insert(CallResults.getIterator(), MIBC0);
841 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
842 } else
843 CallParams.addOperand(FnPtr);
844 }
845
846 for (auto Def : CallResults.defs())
847 MIB.add(Def);
848
849 if (IsIndirect) {
850 // Placeholder for the type index.
851 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
852 MIB.addImm(0);
853 // The table into which this call_indirect indexes.
854 MCSymbolWasm *Table = IsFuncrefCall
856 MF.getContext(), Subtarget)
858 MF.getContext(), Subtarget);
859 if (Subtarget->hasCallIndirectOverlong()) {
860 MIB.addSym(Table);
861 } else {
862 // For the MVP there is at most one table whose number is 0, but we can't
863 // write a table symbol or issue relocations. Instead we just ensure the
864 // table is live and write a zero.
865 Table->setNoStrip();
866 MIB.addImm(0);
867 }
868 }
869
870 for (auto Use : CallParams.uses())
871 MIB.add(Use);
872
873 BB->insert(CallResults.getIterator(), MIB);
874 CallParams.eraseFromParent();
875 CallResults.eraseFromParent();
876
877 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
878 // table slot with ref.null upon call_indirect return.
879 //
880 // This generates the following code, which comes right after a call_indirect
881 // of a funcref:
882 //
883 // i32.const 0
884 // ref.null func
885 // table.set __funcref_call_table
886 if (IsIndirect && IsFuncrefCall) {
888 MF.getContext(), Subtarget);
889 Register RegZero =
890 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
891 MachineInstr *Const0 =
892 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
893 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
894
895 Register RegFuncref =
896 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
897 MachineInstr *RefNull =
898 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
899 BB->insertAfter(Const0->getIterator(), RefNull);
900
901 MachineInstr *TableSet =
902 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
903 .addSym(Table)
904 .addReg(RegZero)
905 .addReg(RegFuncref);
906 BB->insertAfter(RefNull->getIterator(), TableSet);
907 }
908
909 return BB;
910}
911
912MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
913 MachineInstr &MI, MachineBasicBlock *BB) const {
914 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
915 DebugLoc DL = MI.getDebugLoc();
916
917 switch (MI.getOpcode()) {
918 default:
919 llvm_unreachable("Unexpected instr type to insert");
920 case WebAssembly::FP_TO_SINT_I32_F32:
921 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
922 WebAssembly::I32_TRUNC_S_F32);
923 case WebAssembly::FP_TO_UINT_I32_F32:
924 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
925 WebAssembly::I32_TRUNC_U_F32);
926 case WebAssembly::FP_TO_SINT_I64_F32:
927 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
928 WebAssembly::I64_TRUNC_S_F32);
929 case WebAssembly::FP_TO_UINT_I64_F32:
930 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
931 WebAssembly::I64_TRUNC_U_F32);
932 case WebAssembly::FP_TO_SINT_I32_F64:
933 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
934 WebAssembly::I32_TRUNC_S_F64);
935 case WebAssembly::FP_TO_UINT_I32_F64:
936 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
937 WebAssembly::I32_TRUNC_U_F64);
938 case WebAssembly::FP_TO_SINT_I64_F64:
939 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
940 WebAssembly::I64_TRUNC_S_F64);
941 case WebAssembly::FP_TO_UINT_I64_F64:
942 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
943 WebAssembly::I64_TRUNC_U_F64);
944 case WebAssembly::MEMCPY_A32:
945 return LowerMemcpy(MI, DL, BB, TII, false);
946 case WebAssembly::MEMCPY_A64:
947 return LowerMemcpy(MI, DL, BB, TII, true);
948 case WebAssembly::MEMSET_A32:
949 return LowerMemset(MI, DL, BB, TII, false);
950 case WebAssembly::MEMSET_A64:
951 return LowerMemset(MI, DL, BB, TII, true);
952 case WebAssembly::CALL_RESULTS:
953 case WebAssembly::RET_CALL_RESULTS:
954 return LowerCallResults(MI, DL, BB, Subtarget, TII);
955 }
956}
957
958std::pair<unsigned, const TargetRegisterClass *>
959WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
960 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
961 // First, see if this is a constraint that directly corresponds to a
962 // WebAssembly register class.
963 if (Constraint.size() == 1) {
964 switch (Constraint[0]) {
965 case 'r':
966 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
967 if (Subtarget->hasSIMD128() && VT.isVector()) {
968 if (VT.getSizeInBits() == 128)
969 return std::make_pair(0U, &WebAssembly::V128RegClass);
970 }
971 if (VT.isInteger() && !VT.isVector()) {
972 if (VT.getSizeInBits() <= 32)
973 return std::make_pair(0U, &WebAssembly::I32RegClass);
974 if (VT.getSizeInBits() <= 64)
975 return std::make_pair(0U, &WebAssembly::I64RegClass);
976 }
977 if (VT.isFloatingPoint() && !VT.isVector()) {
978 switch (VT.getSizeInBits()) {
979 case 32:
980 return std::make_pair(0U, &WebAssembly::F32RegClass);
981 case 64:
982 return std::make_pair(0U, &WebAssembly::F64RegClass);
983 default:
984 break;
985 }
986 }
987 break;
988 default:
989 break;
990 }
991 }
992
994}
995
996bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
997 // Assume ctz is a relatively cheap operation.
998 return true;
999}
1000
1001bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
1002 // Assume clz is a relatively cheap operation.
1003 return true;
1004}
1005
1006bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1007 const AddrMode &AM,
1008 Type *Ty, unsigned AS,
1009 Instruction *I) const {
1010 // WebAssembly offsets are added as unsigned without wrapping. The
1011 // isLegalAddressingMode gives us no way to determine if wrapping could be
1012 // happening, so we approximate this by accepting only non-negative offsets.
1013 if (AM.BaseOffs < 0)
1014 return false;
1015
1016 // WebAssembly has no scale register operands.
1017 if (AM.Scale != 0)
1018 return false;
1019
1020 // Everything else is legal.
1021 return true;
1022}
1023
1024bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1025 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1026 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1027 // WebAssembly supports unaligned accesses, though it should be declared
1028 // with the p2align attribute on loads and stores which do so, and there
1029 // may be a performance impact. We tell LLVM they're "fast" because
1030 // for the kinds of things that LLVM uses this for (merging adjacent stores
1031 // of constants, etc.), WebAssembly implementations will either want the
1032 // unaligned access or they'll split anyway.
1033 if (Fast)
1034 *Fast = 1;
1035 return true;
1036}
1037
1038bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1039 AttributeList Attr) const {
1040 // The current thinking is that wasm engines will perform this optimization,
1041 // so we can save on code size.
1042 return true;
1043}
1044
1045bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1046 EVT ExtT = ExtVal.getValueType();
1047 SDValue N0 = ExtVal->getOperand(0);
1048 if (N0.getOpcode() == ISD::FREEZE)
1049 N0 = N0.getOperand(0);
1050 auto *Load = dyn_cast<LoadSDNode>(N0);
1051 if (!Load)
1052 return false;
1053 EVT MemT = Load->getValueType(0);
1054 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1055 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1056 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1057}
1058
1059bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1060 const GlobalAddressSDNode *GA) const {
1061 // Wasm doesn't support function addresses with offsets
1062 const GlobalValue *GV = GA->getGlobal();
1064}
1065
1066EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1067 LLVMContext &C,
1068 EVT VT) const {
1069 if (VT.isVector())
1071
1072 // So far, all branch instructions in Wasm take an I32 condition.
1073 // The default TargetLowering::getSetCCResultType returns the pointer size,
1074 // which would be useful to reduce instruction counts when testing
1075 // against 64-bit pointers/values if at some point Wasm supports that.
1076 return EVT::getIntegerVT(C, 32);
1077}
1078
1079void WebAssemblyTargetLowering::getTgtMemIntrinsic(
1081 MachineFunction &MF, unsigned Intrinsic) const {
1083 switch (Intrinsic) {
1084 case Intrinsic::wasm_memory_atomic_notify:
1086 Info.memVT = MVT::i32;
1087 Info.ptrVal = I.getArgOperand(0);
1088 Info.offset = 0;
1089 Info.align = Align(4);
1090 // atomic.notify instruction does not really load the memory specified with
1091 // this argument, but MachineMemOperand should either be load or store, so
1092 // we set this to a load.
1093 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1094 // instructions are treated as volatiles in the backend, so we should be
1095 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1097 Infos.push_back(Info);
1098 return;
1099 case Intrinsic::wasm_memory_atomic_wait32:
1101 Info.memVT = MVT::i32;
1102 Info.ptrVal = I.getArgOperand(0);
1103 Info.offset = 0;
1104 Info.align = Align(4);
1106 Infos.push_back(Info);
1107 return;
1108 case Intrinsic::wasm_memory_atomic_wait64:
1110 Info.memVT = MVT::i64;
1111 Info.ptrVal = I.getArgOperand(0);
1112 Info.offset = 0;
1113 Info.align = Align(8);
1115 Infos.push_back(Info);
1116 return;
1117 case Intrinsic::wasm_loadf16_f32:
1119 Info.memVT = MVT::f16;
1120 Info.ptrVal = I.getArgOperand(0);
1121 Info.offset = 0;
1122 Info.align = Align(2);
1124 Infos.push_back(Info);
1125 return;
1126 case Intrinsic::wasm_storef16_f32:
1128 Info.memVT = MVT::f16;
1129 Info.ptrVal = I.getArgOperand(1);
1130 Info.offset = 0;
1131 Info.align = Align(2);
1133 Infos.push_back(Info);
1134 return;
1135 default:
1136 return;
1137 }
1138}
1139
1140void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1141 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1142 const SelectionDAG &DAG, unsigned Depth) const {
1143 switch (Op.getOpcode()) {
1144 default:
1145 break;
1147 unsigned IntNo = Op.getConstantOperandVal(0);
1148 switch (IntNo) {
1149 default:
1150 break;
1151 case Intrinsic::wasm_bitmask: {
1152 unsigned BitWidth = Known.getBitWidth();
1153 EVT VT = Op.getOperand(1).getSimpleValueType();
1154 unsigned PossibleBits = VT.getVectorNumElements();
1155 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1156 Known.Zero |= ZeroMask;
1157 break;
1158 }
1159 }
1160 break;
1161 }
1162 case WebAssemblyISD::EXTEND_LOW_U:
1163 case WebAssemblyISD::EXTEND_HIGH_U: {
1164 // We know the high half, of each destination vector element, will be zero.
1165 SDValue SrcOp = Op.getOperand(0);
1166 EVT VT = SrcOp.getSimpleValueType();
1167 unsigned BitWidth = Known.getBitWidth();
1168 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1169 assert(BitWidth >= 8 && "Unexpected width!");
1171 Known.Zero |= Mask;
1172 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1173 assert(BitWidth >= 16 && "Unexpected width!");
1175 Known.Zero |= Mask;
1176 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1177 assert(BitWidth >= 32 && "Unexpected width!");
1179 Known.Zero |= Mask;
1180 }
1181 break;
1182 }
1183 // For 128-bit addition if the upper bits are all zero then it's known that
1184 // the upper bits of the result will have all bits guaranteed zero except the
1185 // first.
1186 case WebAssemblyISD::I64_ADD128:
1187 if (Op.getResNo() == 1) {
1188 SDValue LHS_HI = Op.getOperand(1);
1189 SDValue RHS_HI = Op.getOperand(3);
1190 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1191 Known.Zero.setBitsFrom(1);
1192 }
1193 break;
1194 }
1195}
1196
1198WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1199 if (VT.isFixedLengthVector()) {
1200 MVT EltVT = VT.getVectorElementType();
1201 // We have legal vector types with these lane types, so widening the
1202 // vector would let us use some of the lanes directly without having to
1203 // extend or truncate values.
1204 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1205 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1206 return TypeWidenVector;
1207 }
1208
1210}
1211
1212bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1213 const MachineFunction &MF, EVT VT) const {
1214 if (!Subtarget->hasFP16() || !VT.isVector())
1215 return false;
1216
1217 EVT ScalarVT = VT.getScalarType();
1218 if (!ScalarVT.isSimple())
1219 return false;
1220
1221 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1222}
1223
1224bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1225 SDValue Op, const TargetLoweringOpt &TLO) const {
1226 // ISel process runs DAGCombiner after legalization; this step is called
1227 // SelectionDAG optimization phase. This post-legalization combining process
1228 // runs DAGCombiner on each node, and if there was a change to be made,
1229 // re-runs legalization again on it and its user nodes to make sure
1230 // everythiing is in a legalized state.
1231 //
1232 // The legalization calls lowering routines, and we do our custom lowering for
1233 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1234 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1235 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1236 // turns unused vector elements into undefs. But this routine does not work
1237 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1238 // combination can result in a infinite loop, in which undefs are converted to
1239 // zeros in legalization and back to undefs in combining.
1240 //
1241 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1242 // running for build_vectors.
1243 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1244 return false;
1245 return true;
1246}
1247
1248//===----------------------------------------------------------------------===//
1249// WebAssembly Lowering private implementation.
1250//===----------------------------------------------------------------------===//
1251
1252//===----------------------------------------------------------------------===//
1253// Lowering Code
1254//===----------------------------------------------------------------------===//
1255
1256static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1258 DAG.getContext()->diagnose(
1259 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1260}
1261
1262// Test whether the given calling convention is supported.
1264 // We currently support the language-independent target-independent
1265 // conventions. We don't yet have a way to annotate calls with properties like
1266 // "cold", and we don't have any call-clobbered registers, so these are mostly
1267 // all handled the same.
1268 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1269 CallConv == CallingConv::Cold ||
1270 CallConv == CallingConv::PreserveMost ||
1271 CallConv == CallingConv::PreserveAll ||
1272 CallConv == CallingConv::CXX_FAST_TLS ||
1274 CallConv == CallingConv::Swift;
1275}
1276
1277SDValue
1278WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1279 SmallVectorImpl<SDValue> &InVals) const {
1280 SelectionDAG &DAG = CLI.DAG;
1281 SDLoc DL = CLI.DL;
1282 SDValue Chain = CLI.Chain;
1283 SDValue Callee = CLI.Callee;
1284 MachineFunction &MF = DAG.getMachineFunction();
1285 auto Layout = MF.getDataLayout();
1286
1287 CallingConv::ID CallConv = CLI.CallConv;
1288 if (!callingConvSupported(CallConv))
1289 fail(DL, DAG,
1290 "WebAssembly doesn't support language-specific or target-specific "
1291 "calling conventions yet");
1292 if (CLI.IsPatchPoint)
1293 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1294
1295 if (CLI.IsTailCall) {
1296 auto NoTail = [&](const char *Msg) {
1297 if (CLI.CB && CLI.CB->isMustTailCall())
1298 fail(DL, DAG, Msg);
1299 CLI.IsTailCall = false;
1300 };
1301
1302 if (!Subtarget->hasTailCall())
1303 NoTail("WebAssembly 'tail-call' feature not enabled");
1304
1305 // Varargs calls cannot be tail calls because the buffer is on the stack
1306 if (CLI.IsVarArg)
1307 NoTail("WebAssembly does not support varargs tail calls");
1308
1309 // Do not tail call unless caller and callee return types match
1310 const Function &F = MF.getFunction();
1311 const TargetMachine &TM = getTargetMachine();
1312 Type *RetTy = F.getReturnType();
1313 SmallVector<MVT, 4> CallerRetTys;
1314 SmallVector<MVT, 4> CalleeRetTys;
1315 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1316 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1317 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1318 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1319 CalleeRetTys.begin());
1320 if (!TypesMatch)
1321 NoTail("WebAssembly tail call requires caller and callee return types to "
1322 "match");
1323
1324 // If pointers to local stack values are passed, we cannot tail call
1325 if (CLI.CB) {
1326 for (auto &Arg : CLI.CB->args()) {
1327 Value *Val = Arg.get();
1328 // Trace the value back through pointer operations
1329 while (true) {
1330 Value *Src = Val->stripPointerCastsAndAliases();
1331 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1332 Src = GEP->getPointerOperand();
1333 if (Val == Src)
1334 break;
1335 Val = Src;
1336 }
1337 if (isa<AllocaInst>(Val)) {
1338 NoTail(
1339 "WebAssembly does not support tail calling with stack arguments");
1340 break;
1341 }
1342 }
1343 }
1344 }
1345
1346 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1347 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1348 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1349
1350 // The generic code may have added an sret argument. If we're lowering an
1351 // invoke function, the ABI requires that the function pointer be the first
1352 // argument, so we may have to swap the arguments.
1353 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1354 Outs[0].Flags.isSRet()) {
1355 std::swap(Outs[0], Outs[1]);
1356 std::swap(OutVals[0], OutVals[1]);
1357 }
1358
1359 bool HasSwiftSelfArg = false;
1360 bool HasSwiftErrorArg = false;
1361 unsigned NumFixedArgs = 0;
1362 for (unsigned I = 0; I < Outs.size(); ++I) {
1363 const ISD::OutputArg &Out = Outs[I];
1364 SDValue &OutVal = OutVals[I];
1365 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1366 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1367 if (Out.Flags.isNest())
1368 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1369 if (Out.Flags.isInAlloca())
1370 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1371 if (Out.Flags.isInConsecutiveRegs())
1372 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1374 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1375 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1376 auto &MFI = MF.getFrameInfo();
1377 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1379 /*isSS=*/false);
1380 SDValue SizeNode =
1381 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1382 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1383 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1385 /*isVolatile*/ false, /*AlwaysInline=*/false,
1386 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1387 MachinePointerInfo());
1388 OutVal = FINode;
1389 }
1390 // Count the number of fixed args *after* legalization.
1391 NumFixedArgs += !Out.Flags.isVarArg();
1392 }
1393
1394 bool IsVarArg = CLI.IsVarArg;
1395 auto PtrVT = getPointerTy(Layout);
1396
1397 // For swiftcc, emit additional swiftself and swifterror arguments
1398 // if there aren't. These additional arguments are also added for callee
1399 // signature They are necessary to match callee and caller signature for
1400 // indirect call.
1401 if (CallConv == CallingConv::Swift) {
1402 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1403 if (!HasSwiftSelfArg) {
1404 NumFixedArgs++;
1405 ISD::ArgFlagsTy Flags;
1406 Flags.setSwiftSelf();
1407 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1408 CLI.Outs.push_back(Arg);
1409 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1410 CLI.OutVals.push_back(ArgVal);
1411 }
1412 if (!HasSwiftErrorArg) {
1413 NumFixedArgs++;
1414 ISD::ArgFlagsTy Flags;
1415 Flags.setSwiftError();
1416 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1417 CLI.Outs.push_back(Arg);
1418 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1419 CLI.OutVals.push_back(ArgVal);
1420 }
1421 }
1422
1423 // Analyze operands of the call, assigning locations to each operand.
1425 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1426
1427 if (IsVarArg) {
1428 // Outgoing non-fixed arguments are placed in a buffer. First
1429 // compute their offsets and the total amount of buffer space needed.
1430 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1431 const ISD::OutputArg &Out = Outs[I];
1432 SDValue &Arg = OutVals[I];
1433 EVT VT = Arg.getValueType();
1434 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1435 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1436 Align Alignment =
1437 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1438 unsigned Offset =
1439 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1440 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1441 Offset, VT.getSimpleVT(),
1443 }
1444 }
1445
1446 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1447
1448 SDValue FINode;
1449 if (IsVarArg && NumBytes) {
1450 // For non-fixed arguments, next emit stores to store the argument values
1451 // to the stack buffer at the offsets computed above.
1452 MaybeAlign StackAlign = Layout.getStackAlignment();
1453 assert(StackAlign && "data layout string is missing stack alignment");
1454 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1455 /*isSS=*/false);
1456 unsigned ValNo = 0;
1458 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1459 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1460 "ArgLocs should remain in order and only hold varargs args");
1461 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1462 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1463 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1464 DAG.getConstant(Offset, DL, PtrVT));
1465 Chains.push_back(
1466 DAG.getStore(Chain, DL, Arg, Add,
1468 }
1469 if (!Chains.empty())
1470 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1471 } else if (IsVarArg) {
1472 FINode = DAG.getIntPtrConstant(0, DL);
1473 }
1474
1475 if (Callee->getOpcode() == ISD::GlobalAddress) {
1476 // If the callee is a GlobalAddress node (quite common, every direct call
1477 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1478 // doesn't at MO_GOT which is not needed for direct calls.
1479 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1482 GA->getOffset());
1483 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1484 getPointerTy(DAG.getDataLayout()), Callee);
1485 }
1486
1487 // Compute the operands for the CALLn node.
1489 Ops.push_back(Chain);
1490 Ops.push_back(Callee);
1491
1492 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1493 // isn't reliable.
1494 Ops.append(OutVals.begin(),
1495 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1496 // Add a pointer to the vararg buffer.
1497 if (IsVarArg)
1498 Ops.push_back(FINode);
1499
1500 SmallVector<EVT, 8> InTys;
1501 for (const auto &In : Ins) {
1502 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1503 assert(!In.Flags.isNest() && "nest is not valid for return values");
1504 if (In.Flags.isInAlloca())
1505 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1506 if (In.Flags.isInConsecutiveRegs())
1507 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1508 if (In.Flags.isInConsecutiveRegsLast())
1509 fail(DL, DAG,
1510 "WebAssembly hasn't implemented cons regs last return values");
1511 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1512 // registers.
1513 InTys.push_back(In.VT);
1514 }
1515
1516 // Lastly, if this is a call to a funcref we need to add an instruction
1517 // table.set to the chain and transform the call.
1519 CLI.CB->getCalledOperand()->getType())) {
1520 // In the absence of function references proposal where a funcref call is
1521 // lowered to call_ref, using reference types we generate a table.set to set
1522 // the funcref to a special table used solely for this purpose, followed by
1523 // a call_indirect. Here we just generate the table set, and return the
1524 // SDValue of the table.set so that LowerCall can finalize the lowering by
1525 // generating the call_indirect.
1526 SDValue Chain = Ops[0];
1527
1529 MF.getContext(), Subtarget);
1530 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1531 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1532 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1533 SDValue TableSet = DAG.getMemIntrinsicNode(
1534 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1535 MVT::funcref,
1536 // Machine Mem Operand args
1537 MachinePointerInfo(
1539 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1541
1542 Ops[0] = TableSet; // The new chain is the TableSet itself
1543 }
1544
1545 if (CLI.IsTailCall) {
1546 // ret_calls do not return values to the current frame
1547 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1548 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1549 }
1550
1551 InTys.push_back(MVT::Other);
1552 SDVTList InTyList = DAG.getVTList(InTys);
1553 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1554
1555 for (size_t I = 0; I < Ins.size(); ++I)
1556 InVals.push_back(Res.getValue(I));
1557
1558 // Return the chain
1559 return Res.getValue(Ins.size());
1560}
1561
1562bool WebAssemblyTargetLowering::CanLowerReturn(
1563 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1564 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1565 const Type *RetTy) const {
1566 // WebAssembly can only handle returning tuples with multivalue enabled
1567 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1568}
1569
1570SDValue WebAssemblyTargetLowering::LowerReturn(
1571 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1573 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1574 SelectionDAG &DAG) const {
1575 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1576 "MVP WebAssembly can only return up to one value");
1577 if (!callingConvSupported(CallConv))
1578 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1579
1580 SmallVector<SDValue, 4> RetOps(1, Chain);
1581 RetOps.append(OutVals.begin(), OutVals.end());
1582 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1583
1584 // Record the number and types of the return values.
1585 for (const ISD::OutputArg &Out : Outs) {
1586 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1587 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1588 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1589 if (Out.Flags.isInAlloca())
1590 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1591 if (Out.Flags.isInConsecutiveRegs())
1592 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1594 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1595 }
1596
1597 return Chain;
1598}
1599
1600SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1601 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1602 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1603 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1604 if (!callingConvSupported(CallConv))
1605 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1606
1607 MachineFunction &MF = DAG.getMachineFunction();
1608 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1609
1610 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1611 // of the incoming values before they're represented by virtual registers.
1612 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1613
1614 bool HasSwiftErrorArg = false;
1615 bool HasSwiftSelfArg = false;
1616 for (const ISD::InputArg &In : Ins) {
1617 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1618 HasSwiftErrorArg |= In.Flags.isSwiftError();
1619 if (In.Flags.isInAlloca())
1620 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1621 if (In.Flags.isNest())
1622 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1623 if (In.Flags.isInConsecutiveRegs())
1624 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1625 if (In.Flags.isInConsecutiveRegsLast())
1626 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1627 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1628 // registers.
1629 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1630 DAG.getTargetConstant(InVals.size(),
1631 DL, MVT::i32))
1632 : DAG.getUNDEF(In.VT));
1633
1634 // Record the number and types of arguments.
1635 MFI->addParam(In.VT);
1636 }
1637
1638 // For swiftcc, emit additional swiftself and swifterror arguments
1639 // if there aren't. These additional arguments are also added for callee
1640 // signature They are necessary to match callee and caller signature for
1641 // indirect call.
1642 auto PtrVT = getPointerTy(MF.getDataLayout());
1643 if (CallConv == CallingConv::Swift) {
1644 if (!HasSwiftSelfArg) {
1645 MFI->addParam(PtrVT);
1646 }
1647 if (!HasSwiftErrorArg) {
1648 MFI->addParam(PtrVT);
1649 }
1650 }
1651 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1652 // the buffer is passed as an argument.
1653 if (IsVarArg) {
1654 MVT PtrVT = getPointerTy(MF.getDataLayout());
1655 Register VarargVreg =
1657 MFI->setVarargBufferVreg(VarargVreg);
1658 Chain = DAG.getCopyToReg(
1659 Chain, DL, VarargVreg,
1660 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1661 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1662 MFI->addParam(PtrVT);
1663 }
1664
1665 // Record the number and types of arguments and results.
1666 SmallVector<MVT, 4> Params;
1669 MF.getFunction(), DAG.getTarget(), Params, Results);
1670 for (MVT VT : Results)
1671 MFI->addResult(VT);
1672 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1673 // the param logic here with ComputeSignatureVTs
1674 assert(MFI->getParams().size() == Params.size() &&
1675 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1676 Params.begin()));
1677
1678 return Chain;
1679}
1680
1681void WebAssemblyTargetLowering::ReplaceNodeResults(
1683 switch (N->getOpcode()) {
1685 // Do not add any results, signifying that N should not be custom lowered
1686 // after all. This happens because simd128 turns on custom lowering for
1687 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1688 // illegal type.
1689 break;
1693 // Do not add any results, signifying that N should not be custom lowered.
1694 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1695 break;
1696 case ISD::ADD:
1697 case ISD::SUB:
1698 Results.push_back(Replace128Op(N, DAG));
1699 break;
1700 default:
1702 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1703 }
1704}
1705
1706//===----------------------------------------------------------------------===//
1707// Custom lowering hooks.
1708//===----------------------------------------------------------------------===//
1709
1710SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1711 SelectionDAG &DAG) const {
1712 SDLoc DL(Op);
1713 switch (Op.getOpcode()) {
1714 default:
1715 llvm_unreachable("unimplemented operation lowering");
1716 return SDValue();
1717 case ISD::FrameIndex:
1718 return LowerFrameIndex(Op, DAG);
1719 case ISD::GlobalAddress:
1720 return LowerGlobalAddress(Op, DAG);
1722 return LowerGlobalTLSAddress(Op, DAG);
1724 return LowerExternalSymbol(Op, DAG);
1725 case ISD::JumpTable:
1726 return LowerJumpTable(Op, DAG);
1727 case ISD::BR_JT:
1728 return LowerBR_JT(Op, DAG);
1729 case ISD::VASTART:
1730 return LowerVASTART(Op, DAG);
1731 case ISD::BlockAddress:
1732 case ISD::BRIND:
1733 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1734 return SDValue();
1735 case ISD::RETURNADDR:
1736 return LowerRETURNADDR(Op, DAG);
1737 case ISD::FRAMEADDR:
1738 return LowerFRAMEADDR(Op, DAG);
1739 case ISD::CopyToReg:
1740 return LowerCopyToReg(Op, DAG);
1743 return LowerAccessVectorElement(Op, DAG);
1747 return LowerIntrinsic(Op, DAG);
1749 return LowerSIGN_EXTEND_INREG(Op, DAG);
1753 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1754 case ISD::BUILD_VECTOR:
1755 return LowerBUILD_VECTOR(Op, DAG);
1757 return LowerVECTOR_SHUFFLE(Op, DAG);
1758 case ISD::SETCC:
1759 return LowerSETCC(Op, DAG);
1760 case ISD::SHL:
1761 case ISD::SRA:
1762 case ISD::SRL:
1763 return LowerShift(Op, DAG);
1766 return LowerFP_TO_INT_SAT(Op, DAG);
1767 case ISD::FMINNUM:
1768 case ISD::FMINIMUMNUM:
1769 return LowerFMIN(Op, DAG);
1770 case ISD::FMAXNUM:
1771 case ISD::FMAXIMUMNUM:
1772 return LowerFMAX(Op, DAG);
1773 case ISD::LOAD:
1774 return LowerLoad(Op, DAG);
1775 case ISD::STORE:
1776 return LowerStore(Op, DAG);
1777 case ISD::CTPOP:
1778 case ISD::CTLZ:
1779 case ISD::CTTZ:
1780 return DAG.UnrollVectorOp(Op.getNode());
1781 case ISD::CLEAR_CACHE:
1782 report_fatal_error("llvm.clear_cache is not supported on wasm");
1783 case ISD::SMUL_LOHI:
1784 case ISD::UMUL_LOHI:
1785 return LowerMUL_LOHI(Op, DAG);
1786 case ISD::UADDO:
1787 return LowerUADDO(Op, DAG);
1788 }
1789}
1790
1794
1795 return false;
1796}
1797
1798static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1799 SelectionDAG &DAG) {
1801 if (!FI)
1802 return std::nullopt;
1803
1804 auto &MF = DAG.getMachineFunction();
1806}
1807
1808SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1809 SelectionDAG &DAG) const {
1810 SDLoc DL(Op);
1811 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1812 const SDValue &Value = SN->getValue();
1813 const SDValue &Base = SN->getBasePtr();
1814 const SDValue &Offset = SN->getOffset();
1815
1817 if (!Offset->isUndef())
1818 report_fatal_error("unexpected offset when storing to webassembly global",
1819 false);
1820
1821 SDVTList Tys = DAG.getVTList(MVT::Other);
1822 SDValue Ops[] = {SN->getChain(), Value, Base};
1823 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1824 SN->getMemoryVT(), SN->getMemOperand());
1825 }
1826
1827 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1828 if (!Offset->isUndef())
1829 report_fatal_error("unexpected offset when storing to webassembly local",
1830 false);
1831
1832 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1833 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1834 SDValue Ops[] = {SN->getChain(), Idx, Value};
1835 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1836 }
1837
1840 "Encountered an unlowerable store to the wasm_var address space",
1841 false);
1842
1843 return Op;
1844}
1845
1846SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1847 SelectionDAG &DAG) const {
1848 SDLoc DL(Op);
1849 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1850 const SDValue &Base = LN->getBasePtr();
1851 const SDValue &Offset = LN->getOffset();
1852
1854 if (!Offset->isUndef())
1856 "unexpected offset when loading from webassembly global", false);
1857
1858 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1859 SDValue Ops[] = {LN->getChain(), Base};
1860 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1861 LN->getMemoryVT(), LN->getMemOperand());
1862 }
1863
1864 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1865 if (!Offset->isUndef())
1867 "unexpected offset when loading from webassembly local", false);
1868
1869 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1870 EVT LocalVT = LN->getValueType(0);
1871 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1872 {LN->getChain(), Idx});
1873 }
1874
1877 "Encountered an unlowerable load from the wasm_var address space",
1878 false);
1879
1880 return Op;
1881}
1882
1883SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1884 SelectionDAG &DAG) const {
1885 assert(Subtarget->hasWideArithmetic());
1886 assert(Op.getValueType() == MVT::i64);
1887 SDLoc DL(Op);
1888 unsigned Opcode;
1889 switch (Op.getOpcode()) {
1890 case ISD::UMUL_LOHI:
1891 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1892 break;
1893 case ISD::SMUL_LOHI:
1894 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1895 break;
1896 default:
1897 llvm_unreachable("unexpected opcode");
1898 }
1899 SDValue LHS = Op.getOperand(0);
1900 SDValue RHS = Op.getOperand(1);
1901 SDValue Lo =
1902 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1903 SDValue Hi(Lo.getNode(), 1);
1904 SDValue Ops[] = {Lo, Hi};
1905 return DAG.getMergeValues(Ops, DL);
1906}
1907
1908// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1909//
1910// This enables generating a single wasm instruction for this operation where
1911// the upper half of both operands are constant zeros. The upper half of the
1912// result is then whether the overflow happened.
1913SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1914 SelectionDAG &DAG) const {
1915 assert(Subtarget->hasWideArithmetic());
1916 assert(Op.getValueType() == MVT::i64);
1917 assert(Op.getOpcode() == ISD::UADDO);
1918 SDLoc DL(Op);
1919 SDValue LHS = Op.getOperand(0);
1920 SDValue RHS = Op.getOperand(1);
1921 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1922 SDValue Result =
1923 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1924 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1925 SDValue CarryI64(Result.getNode(), 1);
1926 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1927 SDValue Ops[] = {Result, CarryI32};
1928 return DAG.getMergeValues(Ops, DL);
1929}
1930
1931SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1932 SelectionDAG &DAG) const {
1933 assert(Subtarget->hasWideArithmetic());
1934 assert(N->getValueType(0) == MVT::i128);
1935 SDLoc DL(N);
1936 unsigned Opcode;
1937 switch (N->getOpcode()) {
1938 case ISD::ADD:
1939 Opcode = WebAssemblyISD::I64_ADD128;
1940 break;
1941 case ISD::SUB:
1942 Opcode = WebAssemblyISD::I64_SUB128;
1943 break;
1944 default:
1945 llvm_unreachable("unexpected opcode");
1946 }
1947 SDValue LHS = N->getOperand(0);
1948 SDValue RHS = N->getOperand(1);
1949
1950 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1951 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1952 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1953 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1954 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1955 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1956 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1957 LHS_0, LHS_1, RHS_0, RHS_1);
1958 SDValue Result_HI(Result_LO.getNode(), 1);
1959 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1960}
1961
1962SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1963 SelectionDAG &DAG) const {
1964 SDValue Src = Op.getOperand(2);
1965 if (isa<FrameIndexSDNode>(Src.getNode())) {
1966 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1967 // the FI to some LEA-like instruction, but since we don't have that, we
1968 // need to insert some kind of instruction that can take an FI operand and
1969 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1970 // local.copy between Op and its FI operand.
1971 SDValue Chain = Op.getOperand(0);
1972 SDLoc DL(Op);
1973 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1974 EVT VT = Src.getValueType();
1975 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1976 : WebAssembly::COPY_I64,
1977 DL, VT, Src),
1978 0);
1979 return Op.getNode()->getNumValues() == 1
1980 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1981 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1982 Op.getNumOperands() == 4 ? Op.getOperand(3)
1983 : SDValue());
1984 }
1985 return SDValue();
1986}
1987
1988SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1989 SelectionDAG &DAG) const {
1990 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1991 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1992}
1993
1994SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1995 SelectionDAG &DAG) const {
1996 SDLoc DL(Op);
1997
1998 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1999 fail(DL, DAG,
2000 "Non-Emscripten WebAssembly hasn't implemented "
2001 "__builtin_return_address");
2002 return SDValue();
2003 }
2004
2005 unsigned Depth = Op.getConstantOperandVal(0);
2006 MakeLibCallOptions CallOptions;
2007 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
2008 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
2009 .first;
2010}
2011
2012SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
2013 SelectionDAG &DAG) const {
2014 // Non-zero depths are not supported by WebAssembly currently. Use the
2015 // legalizer's default expansion, which is to return 0 (what this function is
2016 // documented to do).
2017 if (Op.getConstantOperandVal(0) > 0)
2018 return SDValue();
2019
2021 EVT VT = Op.getValueType();
2022 Register FP =
2023 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
2024 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2025}
2026
2027SDValue
2028WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2029 SelectionDAG &DAG) const {
2030 SDLoc DL(Op);
2031 const auto *GA = cast<GlobalAddressSDNode>(Op);
2032
2033 MachineFunction &MF = DAG.getMachineFunction();
2034 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2035 report_fatal_error("cannot use thread-local storage without bulk memory",
2036 false);
2037
2038 const GlobalValue *GV = GA->getGlobal();
2039
2040 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2041 // on other targets, if we have thread-local storage, only the local-exec
2042 // model is possible.
2043 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2044 ? GV->getThreadLocalMode()
2046
2047 // Unsupported TLS modes
2050
2051 if (model == GlobalValue::LocalExecTLSModel ||
2054 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2055 // For DSO-local TLS variables we use offset from __tls_base
2056
2057 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2058 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2059 : WebAssembly::GLOBAL_GET_I32;
2060 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2061
2063 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2064 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2065 0);
2066
2067 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2068 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2069 SDValue SymOffset =
2070 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2071
2072 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2073 }
2074
2076
2077 EVT VT = Op.getValueType();
2078 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2079 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2080 GA->getOffset(),
2082}
2083
2084SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2085 SelectionDAG &DAG) const {
2086 SDLoc DL(Op);
2087 const auto *GA = cast<GlobalAddressSDNode>(Op);
2088 EVT VT = Op.getValueType();
2089 assert(GA->getTargetFlags() == 0 &&
2090 "Unexpected target flags on generic GlobalAddressSDNode");
2092 fail(DL, DAG, "Invalid address space for WebAssembly target");
2093
2094 unsigned OperandFlags = 0;
2095 const GlobalValue *GV = GA->getGlobal();
2096 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2097 // need special treatment for tables in PIC mode.
2098 if (isPositionIndependent() &&
2100 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2101 MachineFunction &MF = DAG.getMachineFunction();
2102 MVT PtrVT = getPointerTy(MF.getDataLayout());
2103 const char *BaseName;
2104 if (GV->getValueType()->isFunctionTy()) {
2105 BaseName = MF.createExternalSymbolName("__table_base");
2107 } else {
2108 BaseName = MF.createExternalSymbolName("__memory_base");
2110 }
2112 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2113 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2114
2115 SDValue SymAddr = DAG.getNode(
2116 WebAssemblyISD::WrapperREL, DL, VT,
2117 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2118 OperandFlags));
2119
2120 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2121 }
2123 }
2124
2125 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2126 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2127 GA->getOffset(), OperandFlags));
2128}
2129
2130SDValue
2131WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2132 SelectionDAG &DAG) const {
2133 SDLoc DL(Op);
2134 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2135 EVT VT = Op.getValueType();
2136 assert(ES->getTargetFlags() == 0 &&
2137 "Unexpected target flags on generic ExternalSymbolSDNode");
2138 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2139 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2140}
2141
2142SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2143 SelectionDAG &DAG) const {
2144 // There's no need for a Wrapper node because we always incorporate a jump
2145 // table operand into a BR_TABLE instruction, rather than ever
2146 // materializing it in a register.
2147 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2148 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2149 JT->getTargetFlags());
2150}
2151
2152SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2153 SelectionDAG &DAG) const {
2154 SDLoc DL(Op);
2155 SDValue Chain = Op.getOperand(0);
2156 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2157 SDValue Index = Op.getOperand(2);
2158 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2159
2161 Ops.push_back(Chain);
2162 Ops.push_back(Index);
2163
2164 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2165 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2166
2167 // Add an operand for each case.
2168 for (auto *MBB : MBBs)
2169 Ops.push_back(DAG.getBasicBlock(MBB));
2170
2171 // Add the first MBB as a dummy default target for now. This will be replaced
2172 // with the proper default target (and the preceding range check eliminated)
2173 // if possible by WebAssemblyFixBrTableDefaults.
2174 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2175 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2176}
2177
2178SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2179 SelectionDAG &DAG) const {
2180 SDLoc DL(Op);
2181 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2182
2183 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2184 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2185
2186 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2187 MFI->getVarargBufferVreg(), PtrVT);
2188 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2189 MachinePointerInfo(SV));
2190}
2191
2192SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2193 SelectionDAG &DAG) const {
2194 MachineFunction &MF = DAG.getMachineFunction();
2195 unsigned IntNo;
2196 switch (Op.getOpcode()) {
2199 IntNo = Op.getConstantOperandVal(1);
2200 break;
2202 IntNo = Op.getConstantOperandVal(0);
2203 break;
2204 default:
2205 llvm_unreachable("Invalid intrinsic");
2206 }
2207 SDLoc DL(Op);
2208
2209 switch (IntNo) {
2210 default:
2211 return SDValue(); // Don't custom lower most intrinsics.
2212
2213 case Intrinsic::wasm_lsda: {
2214 auto PtrVT = getPointerTy(MF.getDataLayout());
2215 const char *SymName = MF.createExternalSymbolName(
2216 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2217 if (isPositionIndependent()) {
2219 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2220 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2222 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2223 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2224 SDValue SymAddr =
2225 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2226 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2227 }
2228 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2229 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2230 }
2231
2232 case Intrinsic::wasm_shuffle: {
2233 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2234 SDValue Ops[18];
2235 size_t OpIdx = 0;
2236 Ops[OpIdx++] = Op.getOperand(1);
2237 Ops[OpIdx++] = Op.getOperand(2);
2238 while (OpIdx < 18) {
2239 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2240 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2241 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2242 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2243 } else {
2244 Ops[OpIdx++] = MaskIdx;
2245 }
2246 }
2247 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2248 }
2249
2250 case Intrinsic::thread_pointer: {
2251 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2252 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2253 : WebAssembly::GLOBAL_GET_I32;
2254 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2255 return SDValue(
2256 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2257 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2258 0);
2259 }
2260 }
2261}
2262
2263SDValue
2264WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2265 SelectionDAG &DAG) const {
2266 SDLoc DL(Op);
2267 // If sign extension operations are disabled, allow sext_inreg only if operand
2268 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2269 // extension operations, but allowing sext_inreg in this context lets us have
2270 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2271 // everywhere would be simpler in this file, but would necessitate large and
2272 // brittle patterns to undo the expansion and select extract_lane_s
2273 // instructions.
2274 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2275 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2276 return SDValue();
2277
2278 const SDValue &Extract = Op.getOperand(0);
2279 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2280 if (VecT.getVectorElementType().getSizeInBits() > 32)
2281 return SDValue();
2282 MVT ExtractedLaneT =
2283 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2284 MVT ExtractedVecT =
2285 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2286 if (ExtractedVecT == VecT)
2287 return Op;
2288
2289 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2290 const SDNode *Index = Extract.getOperand(1).getNode();
2291 if (!isa<ConstantSDNode>(Index))
2292 return SDValue();
2293 unsigned IndexVal = Index->getAsZExtVal();
2294 unsigned Scale =
2295 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2296 assert(Scale > 1);
2297 SDValue NewIndex =
2298 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2299 SDValue NewExtract = DAG.getNode(
2301 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2302 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2303 Op.getOperand(1));
2304}
2305
2306static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2307 SelectionDAG &DAG) {
2308 SDValue Source = peekThroughBitcasts(Op);
2309 if (Source.getOpcode() != ISD::VECTOR_SHUFFLE)
2310 return SDValue();
2311
2312 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2313 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2314 "expected extend_low");
2315 auto *Shuffle = cast<ShuffleVectorSDNode>(Source.getNode());
2316
2317 ArrayRef<int> Mask = Shuffle->getMask();
2318 // Look for a shuffle which moves from the high half to the low half.
2319 size_t FirstIdx = Mask.size() / 2;
2320 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2321 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2322 return SDValue();
2323 }
2324 }
2325
2326 SDLoc DL(Op);
2327 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2328 ? WebAssemblyISD::EXTEND_HIGH_S
2329 : WebAssemblyISD::EXTEND_HIGH_U;
2330 SDValue ShuffleSrc = Shuffle->getOperand(0);
2331 if (Op.getOpcode() == ISD::BITCAST)
2332 ShuffleSrc = DAG.getBitcast(Op.getValueType(), ShuffleSrc);
2333
2334 return DAG.getNode(Opc, DL, VT, ShuffleSrc);
2335}
2336
2337SDValue
2338WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2339 SelectionDAG &DAG) const {
2340 SDLoc DL(Op);
2341 EVT VT = Op.getValueType();
2342 SDValue Src = Op.getOperand(0);
2343 EVT SrcVT = Src.getValueType();
2344
2345 if (SrcVT.getVectorElementType() == MVT::i1 ||
2346 SrcVT.getVectorElementType() == MVT::i64)
2347 return SDValue();
2348
2349 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2350 "Unexpected extension factor.");
2351 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2352
2353 if (Scale != 2 && Scale != 4 && Scale != 8)
2354 return SDValue();
2355
2356 unsigned Ext;
2357 switch (Op.getOpcode()) {
2358 default:
2359 llvm_unreachable("unexpected opcode");
2362 Ext = WebAssemblyISD::EXTEND_LOW_U;
2363 break;
2365 Ext = WebAssemblyISD::EXTEND_LOW_S;
2366 break;
2367 }
2368
2369 if (Scale == 2) {
2370 // See if we can use EXTEND_HIGH.
2371 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2372 return ExtendHigh;
2373 }
2374
2375 SDValue Ret = Src;
2376 while (Scale != 1) {
2377 Ret = DAG.getNode(Ext, DL,
2378 Ret.getValueType()
2381 Ret);
2382 Scale /= 2;
2383 }
2384 assert(Ret.getValueType() == VT);
2385 return Ret;
2386}
2387
2389 SDLoc DL(Op);
2390 if (Op.getValueType() != MVT::v2f64)
2391 return SDValue();
2392
2393 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2394 unsigned &Index) -> bool {
2395 switch (Op.getOpcode()) {
2396 case ISD::SINT_TO_FP:
2397 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2398 break;
2399 case ISD::UINT_TO_FP:
2400 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2401 break;
2402 case ISD::FP_EXTEND:
2403 Opcode = WebAssemblyISD::PROMOTE_LOW;
2404 break;
2405 default:
2406 return false;
2407 }
2408
2409 auto ExtractVector = Op.getOperand(0);
2410 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2411 return false;
2412
2413 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2414 return false;
2415
2416 SrcVec = ExtractVector.getOperand(0);
2417 Index = ExtractVector.getConstantOperandVal(1);
2418 return true;
2419 };
2420
2421 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2422 SDValue LHSSrcVec, RHSSrcVec;
2423 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2424 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2425 return SDValue();
2426
2427 if (LHSOpcode != RHSOpcode)
2428 return SDValue();
2429
2430 MVT ExpectedSrcVT;
2431 switch (LHSOpcode) {
2432 case WebAssemblyISD::CONVERT_LOW_S:
2433 case WebAssemblyISD::CONVERT_LOW_U:
2434 ExpectedSrcVT = MVT::v4i32;
2435 break;
2436 case WebAssemblyISD::PROMOTE_LOW:
2437 ExpectedSrcVT = MVT::v4f32;
2438 break;
2439 }
2440 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2441 return SDValue();
2442
2443 auto Src = LHSSrcVec;
2444 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2445 // Shuffle the source vector so that the converted lanes are the low lanes.
2446 Src = DAG.getVectorShuffle(
2447 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2448 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2449 }
2450 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2451}
2452
2453SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2454 SelectionDAG &DAG) const {
2455 MVT VT = Op.getSimpleValueType();
2456 if (VT == MVT::v8f16) {
2457 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2458 // FP16 type, so cast them to I16s.
2459 MVT IVT = VT.changeVectorElementType(MVT::i16);
2461 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2462 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2463 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2464 return DAG.getBitcast(VT, Res);
2465 }
2466
2467 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2468 return ConvertLow;
2469
2470 SDLoc DL(Op);
2471 const EVT VecT = Op.getValueType();
2472 const EVT LaneT = Op.getOperand(0).getValueType();
2473 const size_t Lanes = Op.getNumOperands();
2474 bool CanSwizzle = VecT == MVT::v16i8;
2475
2476 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2477 // possible number of lanes at once followed by a sequence of replace_lane
2478 // instructions to individually initialize any remaining lanes.
2479
2480 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2481 // swizzled lanes should be given greater weight.
2482
2483 // TODO: Investigate looping rather than always extracting/replacing specific
2484 // lanes to fill gaps.
2485
2486 auto IsConstant = [](const SDValue &V) {
2487 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2488 };
2489
2490 // Returns the source vector and index vector pair if they exist. Checks for:
2491 // (extract_vector_elt
2492 // $src,
2493 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2494 // )
2495 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2496 auto Bail = std::make_pair(SDValue(), SDValue());
2497 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2498 return Bail;
2499 const SDValue &SwizzleSrc = Lane->getOperand(0);
2500 const SDValue &IndexExt = Lane->getOperand(1);
2501 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2502 return Bail;
2503 const SDValue &Index = IndexExt->getOperand(0);
2504 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2505 return Bail;
2506 const SDValue &SwizzleIndices = Index->getOperand(0);
2507 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2508 SwizzleIndices.getValueType() != MVT::v16i8 ||
2509 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2510 Index->getConstantOperandVal(1) != I)
2511 return Bail;
2512 return std::make_pair(SwizzleSrc, SwizzleIndices);
2513 };
2514
2515 // If the lane is extracted from another vector at a constant index, return
2516 // that vector. The source vector must not have more lanes than the dest
2517 // because the shufflevector indices are in terms of the destination lanes and
2518 // would not be able to address the smaller individual source lanes.
2519 auto GetShuffleSrc = [&](const SDValue &Lane) {
2520 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2521 return SDValue();
2522 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2523 return SDValue();
2524 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2525 VecT.getVectorNumElements())
2526 return SDValue();
2527 return Lane->getOperand(0);
2528 };
2529
2530 using ValueEntry = std::pair<SDValue, size_t>;
2531 SmallVector<ValueEntry, 16> SplatValueCounts;
2532
2533 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2534 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2535
2536 using ShuffleEntry = std::pair<SDValue, size_t>;
2537 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2538
2539 auto AddCount = [](auto &Counts, const auto &Val) {
2540 auto CountIt =
2541 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2542 if (CountIt == Counts.end()) {
2543 Counts.emplace_back(Val, 1);
2544 } else {
2545 CountIt->second++;
2546 }
2547 };
2548
2549 auto GetMostCommon = [](auto &Counts) {
2550 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2551 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2552 return *CommonIt;
2553 };
2554
2555 size_t NumConstantLanes = 0;
2556
2557 // Count eligible lanes for each type of vector creation op
2558 for (size_t I = 0; I < Lanes; ++I) {
2559 const SDValue &Lane = Op->getOperand(I);
2560 if (Lane.isUndef())
2561 continue;
2562
2563 AddCount(SplatValueCounts, Lane);
2564
2565 if (IsConstant(Lane))
2566 NumConstantLanes++;
2567 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2568 AddCount(ShuffleCounts, ShuffleSrc);
2569 if (CanSwizzle) {
2570 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2571 if (SwizzleSrcs.first)
2572 AddCount(SwizzleCounts, SwizzleSrcs);
2573 }
2574 }
2575
2576 SDValue SplatValue;
2577 size_t NumSplatLanes;
2578 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2579
2580 SDValue SwizzleSrc;
2581 SDValue SwizzleIndices;
2582 size_t NumSwizzleLanes = 0;
2583 if (SwizzleCounts.size())
2584 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2585 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2586
2587 // Shuffles can draw from up to two vectors, so find the two most common
2588 // sources.
2589 SDValue ShuffleSrc1, ShuffleSrc2;
2590 size_t NumShuffleLanes = 0;
2591 if (ShuffleCounts.size()) {
2592 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2593 llvm::erase_if(ShuffleCounts,
2594 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2595 }
2596 if (ShuffleCounts.size()) {
2597 size_t AdditionalShuffleLanes;
2598 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2599 GetMostCommon(ShuffleCounts);
2600 NumShuffleLanes += AdditionalShuffleLanes;
2601 }
2602
2603 // Predicate returning true if the lane is properly initialized by the
2604 // original instruction
2605 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2607 // Prefer swizzles over shuffles over vector consts over splats
2608 if (NumSwizzleLanes >= NumShuffleLanes &&
2609 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2610 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2611 SwizzleIndices);
2612 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2613 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2614 return Swizzled == GetSwizzleSrcs(I, Lane);
2615 };
2616 } else if (NumShuffleLanes >= NumConstantLanes &&
2617 NumShuffleLanes >= NumSplatLanes) {
2618 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2619 size_t DestLaneCount = VecT.getVectorNumElements();
2620 size_t Scale1 = 1;
2621 size_t Scale2 = 1;
2622 SDValue Src1 = ShuffleSrc1;
2623 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2624 if (Src1.getValueType() != VecT) {
2625 size_t LaneSize =
2627 assert(LaneSize > DestLaneSize);
2628 Scale1 = LaneSize / DestLaneSize;
2629 Src1 = DAG.getBitcast(VecT, Src1);
2630 }
2631 if (Src2.getValueType() != VecT) {
2632 size_t LaneSize =
2634 assert(LaneSize > DestLaneSize);
2635 Scale2 = LaneSize / DestLaneSize;
2636 Src2 = DAG.getBitcast(VecT, Src2);
2637 }
2638
2639 int Mask[16];
2640 assert(DestLaneCount <= 16);
2641 for (size_t I = 0; I < DestLaneCount; ++I) {
2642 const SDValue &Lane = Op->getOperand(I);
2643 SDValue Src = GetShuffleSrc(Lane);
2644 if (Src == ShuffleSrc1) {
2645 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2646 } else if (Src && Src == ShuffleSrc2) {
2647 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2648 } else {
2649 Mask[I] = -1;
2650 }
2651 }
2652 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2653 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2654 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2655 auto Src = GetShuffleSrc(Lane);
2656 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2657 };
2658 } else if (NumConstantLanes >= NumSplatLanes) {
2659 SmallVector<SDValue, 16> ConstLanes;
2660 for (const SDValue &Lane : Op->op_values()) {
2661 if (IsConstant(Lane)) {
2662 // Values may need to be fixed so that they will sign extend to be
2663 // within the expected range during ISel. Check whether the value is in
2664 // bounds based on the lane bit width and if it is out of bounds, lop
2665 // off the extra bits.
2666 uint64_t LaneBits = 128 / Lanes;
2667 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2668 ConstLanes.push_back(DAG.getConstant(
2669 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2670 SDLoc(Lane), LaneT));
2671 } else {
2672 ConstLanes.push_back(Lane);
2673 }
2674 } else if (LaneT.isFloatingPoint()) {
2675 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2676 } else {
2677 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2678 }
2679 }
2680 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2681 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2682 return IsConstant(Lane);
2683 };
2684 } else {
2685 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2686 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2687 (DestLaneSize == 32 || DestLaneSize == 64)) {
2688 // Could be selected to load_zero.
2689 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2690 } else {
2691 // Use a splat (which might be selected as a load splat)
2692 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2693 }
2694 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2695 return Lane == SplatValue;
2696 };
2697 }
2698
2699 assert(Result);
2700 assert(IsLaneConstructed);
2701
2702 // Add replace_lane instructions for any unhandled values
2703 for (size_t I = 0; I < Lanes; ++I) {
2704 const SDValue &Lane = Op->getOperand(I);
2705 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2706 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2707 DAG.getConstant(I, DL, MVT::i32));
2708 }
2709
2710 return Result;
2711}
2712
2713SDValue
2714WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2715 SelectionDAG &DAG) const {
2716 SDLoc DL(Op);
2717 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2718 MVT VecType = Op.getOperand(0).getSimpleValueType();
2719 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2720 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2721
2722 // Space for two vector args and sixteen mask indices
2723 SDValue Ops[18];
2724 size_t OpIdx = 0;
2725 Ops[OpIdx++] = Op.getOperand(0);
2726 Ops[OpIdx++] = Op.getOperand(1);
2727
2728 // Expand mask indices to byte indices and materialize them as operands
2729 for (int M : Mask) {
2730 for (size_t J = 0; J < LaneBytes; ++J) {
2731 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2732 // whole lane of vector input, to allow further reduction at VM. E.g.
2733 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2734 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2735 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2736 }
2737 }
2738
2739 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2740}
2741
2742SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2743 SelectionDAG &DAG) const {
2744 SDLoc DL(Op);
2745 // The legalizer does not know how to expand the unsupported comparison modes
2746 // of i64x2 vectors, so we manually unroll them here.
2747 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2749 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2750 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2751 const SDValue &CC = Op->getOperand(2);
2752 auto MakeLane = [&](unsigned I) {
2753 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2754 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2755 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2756 };
2757 return DAG.getBuildVector(Op->getValueType(0), DL,
2758 {MakeLane(0), MakeLane(1)});
2759}
2760
2761SDValue
2762WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2763 SelectionDAG &DAG) const {
2764 // Allow constant lane indices, expand variable lane indices
2765 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2766 if (isa<ConstantSDNode>(IdxNode)) {
2767 // Ensure the index type is i32 to match the tablegen patterns
2768 uint64_t Idx = IdxNode->getAsZExtVal();
2769 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2770 Ops[Op.getNumOperands() - 1] =
2771 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2772 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2773 }
2774 // Perform default expansion
2775 return SDValue();
2776}
2777
2779 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2780 // 32-bit and 64-bit unrolled shifts will have proper semantics
2781 if (LaneT.bitsGE(MVT::i32))
2782 return DAG.UnrollVectorOp(Op.getNode());
2783 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2784 SDLoc DL(Op);
2785 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2786 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2787 unsigned ShiftOpcode = Op.getOpcode();
2788 SmallVector<SDValue, 16> ShiftedElements;
2789 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2790 SmallVector<SDValue, 16> ShiftElements;
2791 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2792 SmallVector<SDValue, 16> UnrolledOps;
2793 for (size_t i = 0; i < NumLanes; ++i) {
2794 SDValue MaskedShiftValue =
2795 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2796 SDValue ShiftedValue = ShiftedElements[i];
2797 if (ShiftOpcode == ISD::SRA)
2798 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2799 ShiftedValue, DAG.getValueType(LaneT));
2800 UnrolledOps.push_back(
2801 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2802 }
2803 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2804}
2805
2806SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2807 SelectionDAG &DAG) const {
2808 SDLoc DL(Op);
2809
2810 // Only manually lower vector shifts
2811 assert(Op.getSimpleValueType().isVector());
2812
2813 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2814 auto ShiftVal = Op.getOperand(1);
2815
2816 // Try to skip bitmask operation since it is implied inside shift instruction
2817 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2818 if (MaskOp.getOpcode() != ISD::AND)
2819 return MaskOp;
2820 SDValue LHS = MaskOp.getOperand(0);
2821 SDValue RHS = MaskOp.getOperand(1);
2822 if (MaskOp.getValueType().isVector()) {
2823 APInt MaskVal;
2824 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2825 std::swap(LHS, RHS);
2826
2827 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2828 MaskVal == MaskBits)
2829 MaskOp = LHS;
2830 } else {
2831 if (!isa<ConstantSDNode>(RHS.getNode()))
2832 std::swap(LHS, RHS);
2833
2834 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2835 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2836 MaskOp = LHS;
2837 }
2838
2839 return MaskOp;
2840 };
2841
2842 // Skip vector and operation
2843 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2844 ShiftVal = DAG.getSplatValue(ShiftVal);
2845 if (!ShiftVal)
2846 return unrollVectorShift(Op, DAG);
2847
2848 // Skip scalar and operation
2849 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2850 // Use anyext because none of the high bits can affect the shift
2851 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2852
2853 unsigned Opcode;
2854 switch (Op.getOpcode()) {
2855 case ISD::SHL:
2856 Opcode = WebAssemblyISD::VEC_SHL;
2857 break;
2858 case ISD::SRA:
2859 Opcode = WebAssemblyISD::VEC_SHR_S;
2860 break;
2861 case ISD::SRL:
2862 Opcode = WebAssemblyISD::VEC_SHR_U;
2863 break;
2864 default:
2865 llvm_unreachable("unexpected opcode");
2866 }
2867
2868 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2869}
2870
2871SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2872 SelectionDAG &DAG) const {
2873 EVT ResT = Op.getValueType();
2874 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2875
2876 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2877 (SatVT == MVT::i32 || SatVT == MVT::i64))
2878 return Op;
2879
2880 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2881 return Op;
2882
2883 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2884 return Op;
2885
2886 return SDValue();
2887}
2888
2890 return (Op->getFlags().hasNoNaNs() ||
2891 (DAG.isKnownNeverNaN(Op->getOperand(0)) &&
2892 DAG.isKnownNeverNaN(Op->getOperand(1)))) &&
2893 (Op->getFlags().hasNoSignedZeros() ||
2894 DAG.isKnownNeverZeroFloat(Op->getOperand(0)) ||
2895 DAG.isKnownNeverZeroFloat(Op->getOperand(1)));
2896}
2897
2898SDValue WebAssemblyTargetLowering::LowerFMIN(SDValue Op,
2899 SelectionDAG &DAG) const {
2900 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2901 return DAG.getNode(WebAssemblyISD::RELAXED_FMIN, SDLoc(Op),
2902 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2903 }
2904 return SDValue();
2905}
2906
2907SDValue WebAssemblyTargetLowering::LowerFMAX(SDValue Op,
2908 SelectionDAG &DAG) const {
2909 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2910 return DAG.getNode(WebAssemblyISD::RELAXED_FMAX, SDLoc(Op),
2911 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2912 }
2913 return SDValue();
2914}
2915
2916//===----------------------------------------------------------------------===//
2917// Custom DAG combine hooks
2918//===----------------------------------------------------------------------===//
2919static SDValue
2921 auto &DAG = DCI.DAG;
2922 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2923
2924 // Hoist vector bitcasts that don't change the number of lanes out of unary
2925 // shuffles, where they are less likely to get in the way of other combines.
2926 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2927 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2928 SDValue Bitcast = N->getOperand(0);
2929 if (Bitcast.getOpcode() != ISD::BITCAST)
2930 return SDValue();
2931 if (!N->getOperand(1).isUndef())
2932 return SDValue();
2933 SDValue CastOp = Bitcast.getOperand(0);
2934 EVT SrcType = CastOp.getValueType();
2935 EVT DstType = Bitcast.getValueType();
2936 if (!SrcType.is128BitVector() ||
2937 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2938 return SDValue();
2939 SDValue NewShuffle = DAG.getVectorShuffle(
2940 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2941 return DAG.getBitcast(DstType, NewShuffle);
2942}
2943
2944/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2945/// split up into scalar instructions during legalization, and the vector
2946/// extending instructions are selected in performVectorExtendCombine below.
2947static SDValue
2950 auto &DAG = DCI.DAG;
2951 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2952 N->getOpcode() == ISD::SINT_TO_FP);
2953
2954 EVT InVT = N->getOperand(0)->getValueType(0);
2955 EVT ResVT = N->getValueType(0);
2956 MVT ExtVT;
2957 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2958 ExtVT = MVT::v4i32;
2959 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2960 ExtVT = MVT::v2i32;
2961 else
2962 return SDValue();
2963
2964 unsigned Op =
2966 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2967 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2968}
2969
2970static SDValue
2973 auto &DAG = DCI.DAG;
2974
2975 SDNodeFlags Flags = N->getFlags();
2976 SDValue Op0 = N->getOperand(0);
2977 EVT VT = N->getValueType(0);
2978
2979 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2980 // Depending on the target (runtime) backend, this might be performance
2981 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2982 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2983 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2984 }
2985
2986 return SDValue();
2987}
2988
2989static SDValue
2991 auto &DAG = DCI.DAG;
2992 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2993 N->getOpcode() == ISD::ZERO_EXTEND);
2994
2995 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2996 // possible before the extract_subvector can be expanded.
2997 auto Extract = N->getOperand(0);
2998 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2999 return SDValue();
3000 auto Source = Extract.getOperand(0);
3001 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
3002 if (IndexNode == nullptr)
3003 return SDValue();
3004 auto Index = IndexNode->getZExtValue();
3005
3006 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
3007 // extracted subvector is the low or high half of its source.
3008 EVT ResVT = N->getValueType(0);
3009 if (ResVT == MVT::v8i16) {
3010 if (Extract.getValueType() != MVT::v8i8 ||
3011 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
3012 return SDValue();
3013 } else if (ResVT == MVT::v4i32) {
3014 if (Extract.getValueType() != MVT::v4i16 ||
3015 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
3016 return SDValue();
3017 } else if (ResVT == MVT::v2i64) {
3018 if (Extract.getValueType() != MVT::v2i32 ||
3019 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
3020 return SDValue();
3021 } else {
3022 return SDValue();
3023 }
3024
3025 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3026 bool IsLow = Index == 0;
3027
3028 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3029 : WebAssemblyISD::EXTEND_HIGH_S)
3030 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3031 : WebAssemblyISD::EXTEND_HIGH_U);
3032
3033 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3034}
3035
3036static SDValue
3038 auto &DAG = DCI.DAG;
3039
3040 auto GetWasmConversionOp = [](unsigned Op) {
3041 switch (Op) {
3043 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3045 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3046 case ISD::FP_ROUND:
3047 return WebAssemblyISD::DEMOTE_ZERO;
3048 }
3049 llvm_unreachable("unexpected op");
3050 };
3051
3052 auto IsZeroSplat = [](SDValue SplatVal) {
3053 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3054 APInt SplatValue, SplatUndef;
3055 unsigned SplatBitSize;
3056 bool HasAnyUndefs;
3057 // Endianness doesn't matter in this context because we are looking for
3058 // an all-zero value.
3059 return Splat &&
3060 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3061 HasAnyUndefs) &&
3062 SplatValue == 0;
3063 };
3064
3065 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3066 // Combine this:
3067 //
3068 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3069 //
3070 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3071 //
3072 // Or this:
3073 //
3074 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3075 //
3076 // into (f32x4.demote_zero_f64x2 $x).
3077 EVT ResVT;
3078 EVT ExpectedConversionType;
3079 auto Conversion = N->getOperand(0);
3080 auto ConversionOp = Conversion.getOpcode();
3081 switch (ConversionOp) {
3084 ResVT = MVT::v4i32;
3085 ExpectedConversionType = MVT::v2i32;
3086 break;
3087 case ISD::FP_ROUND:
3088 ResVT = MVT::v4f32;
3089 ExpectedConversionType = MVT::v2f32;
3090 break;
3091 default:
3092 return SDValue();
3093 }
3094
3095 if (N->getValueType(0) != ResVT)
3096 return SDValue();
3097
3098 if (Conversion.getValueType() != ExpectedConversionType)
3099 return SDValue();
3100
3101 auto Source = Conversion.getOperand(0);
3102 if (Source.getValueType() != MVT::v2f64)
3103 return SDValue();
3104
3105 if (!IsZeroSplat(N->getOperand(1)) ||
3106 N->getOperand(1).getValueType() != ExpectedConversionType)
3107 return SDValue();
3108
3109 unsigned Op = GetWasmConversionOp(ConversionOp);
3110 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3111 }
3112
3113 // Combine this:
3114 //
3115 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3116 //
3117 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3118 //
3119 // Or this:
3120 //
3121 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3122 //
3123 // into (f32x4.demote_zero_f64x2 $x).
3124 EVT ResVT;
3125 auto ConversionOp = N->getOpcode();
3126 switch (ConversionOp) {
3129 ResVT = MVT::v4i32;
3130 break;
3131 case ISD::FP_ROUND:
3132 ResVT = MVT::v4f32;
3133 break;
3134 default:
3135 llvm_unreachable("unexpected op");
3136 }
3137
3138 if (N->getValueType(0) != ResVT)
3139 return SDValue();
3140
3141 auto Concat = N->getOperand(0);
3142 if (Concat.getValueType() != MVT::v4f64)
3143 return SDValue();
3144
3145 auto Source = Concat.getOperand(0);
3146 if (Source.getValueType() != MVT::v2f64)
3147 return SDValue();
3148
3149 if (!IsZeroSplat(Concat.getOperand(1)) ||
3150 Concat.getOperand(1).getValueType() != MVT::v2f64)
3151 return SDValue();
3152
3153 unsigned Op = GetWasmConversionOp(ConversionOp);
3154 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3155}
3156
3157// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3158static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3159 const SDLoc &DL, unsigned VectorWidth) {
3160 EVT VT = Vec.getValueType();
3161 EVT ElVT = VT.getVectorElementType();
3162 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3163 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3164 VT.getVectorNumElements() / Factor);
3165
3166 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3167 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3168 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3169
3170 // This is the index of the first element of the VectorWidth-bit chunk
3171 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3172 IdxVal &= ~(ElemsPerChunk - 1);
3173
3174 // If the input is a buildvector just emit a smaller one.
3175 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3176 return DAG.getBuildVector(ResultVT, DL,
3177 Vec->ops().slice(IdxVal, ElemsPerChunk));
3178
3179 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3180 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3181}
3182
3183// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3184// is the expected destination value type after recursion. In is the initial
3185// input. Note that the input should have enough leading zero bits to prevent
3186// NARROW_U from saturating results.
3188 SelectionDAG &DAG) {
3189 EVT SrcVT = In.getValueType();
3190
3191 // No truncation required, we might get here due to recursive calls.
3192 if (SrcVT == DstVT)
3193 return In;
3194
3195 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3196 unsigned NumElems = SrcVT.getVectorNumElements();
3197 if (!isPowerOf2_32(NumElems))
3198 return SDValue();
3199 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3200 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3201
3202 LLVMContext &Ctx = *DAG.getContext();
3203 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3204
3205 // Narrow to the largest type possible:
3206 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3207 EVT InVT = MVT::i16, OutVT = MVT::i8;
3208 if (SrcVT.getScalarSizeInBits() > 16) {
3209 InVT = MVT::i32;
3210 OutVT = MVT::i16;
3211 }
3212 unsigned SubSizeInBits = SrcSizeInBits / 2;
3213 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3214 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3215
3216 // Split lower/upper subvectors.
3217 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3218 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3219
3220 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3221 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3222 Lo = DAG.getBitcast(InVT, Lo);
3223 Hi = DAG.getBitcast(InVT, Hi);
3224 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3225 return DAG.getBitcast(DstVT, Res);
3226 }
3227
3228 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3229 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3230 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3231 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3232
3233 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3234 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3235 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3236}
3237
3240 auto &DAG = DCI.DAG;
3241
3242 SDValue In = N->getOperand(0);
3243 EVT InVT = In.getValueType();
3244 if (!InVT.isSimple())
3245 return SDValue();
3246
3247 EVT OutVT = N->getValueType(0);
3248 if (!OutVT.isVector())
3249 return SDValue();
3250
3251 EVT OutSVT = OutVT.getVectorElementType();
3252 EVT InSVT = InVT.getVectorElementType();
3253 // Currently only cover truncate to v16i8 or v8i16.
3254 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3255 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3256 return SDValue();
3257
3258 SDLoc DL(N);
3260 OutVT.getScalarSizeInBits());
3261 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3262 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3263}
3264
3267 using namespace llvm::SDPatternMatch;
3268 auto &DAG = DCI.DAG;
3269 SDLoc DL(N);
3270 SDValue Src = N->getOperand(0);
3271 EVT VT = N->getValueType(0);
3272 EVT SrcVT = Src.getValueType();
3273
3274 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3275 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3276 return SDValue();
3277
3278 unsigned NumElts = SrcVT.getVectorNumElements();
3279 EVT Width = MVT::getIntegerVT(128 / NumElts);
3280
3281 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3282 // ==> bitmask
3283 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3284 return DAG.getZExtOrTrunc(
3285 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3286 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3287 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3288 SrcVT.changeVectorElementType(
3289 *DAG.getContext(), Width))}),
3290 DL, VT);
3291 }
3292
3293 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3294 if (NumElts == 32 || NumElts == 64) {
3295 // Strategy: We will setcc them separately in v16i8 -> v16i1
3296 // Bitcast them to i16, extend them to either i32 or i64.
3297 // Add them together, shifting left 1 by 1.
3298 SDValue Concat, SetCCVector;
3299 ISD::CondCode SetCond;
3300
3301 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3302 m_CondCode(SetCond)))))
3303 return SDValue();
3304 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3305 return SDValue();
3306
3307 uint64_t ElementWidth =
3309
3310 SmallVector<SDValue> VectorsToShuffle;
3311 for (size_t I = 0; I < Concat->ops().size(); I++) {
3312 VectorsToShuffle.push_back(DAG.getBitcast(
3313 MVT::i16,
3314 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3315 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3316 DAG, DL, 128),
3317 SetCond)));
3318 }
3319
3320 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3321 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3322
3323 for (SDValue V : VectorsToShuffle) {
3324 ReturningInteger = DAG.getNode(
3325 ISD::SHL, DL, ReturnType,
3326 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3327
3328 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3329 ReturningInteger =
3330 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3331 }
3332
3333 return ReturningInteger;
3334 }
3335
3336 return SDValue();
3337}
3338
3340 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3341 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3342 // any_true (setcc <X>, 0, ne) => (any_true X)
3343 // all_true (setcc <X>, 0, ne) => (all_true X)
3344 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3345 using namespace llvm::SDPatternMatch;
3346
3347 SDValue LHS;
3348 if (N->getNumOperands() < 2 ||
3349 !sd_match(N->getOperand(1),
3351 return SDValue();
3352 EVT LT = LHS.getValueType();
3353 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3354 return SDValue();
3355
3356 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3357 ISD::CondCode SetType,
3358 Intrinsic::WASMIntrinsics InPost) {
3359 if (N->getConstantOperandVal(0) != InPre)
3360 return SDValue();
3361
3362 SDValue LHS;
3363 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3364 m_SpecificCondCode(SetType))))
3365 return SDValue();
3366
3367 SDLoc DL(N);
3368 SDValue Ret = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3369 {DAG.getConstant(InPost, DL, MVT::i32), LHS});
3370 if (SetType == ISD::SETEQ)
3371 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3372 DAG.getConstant(1, DL, MVT::i32));
3373 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3374 };
3375
3376 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3377 Intrinsic::wasm_alltrue))
3378 return AnyTrueEQ;
3379 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3380 Intrinsic::wasm_anytrue))
3381 return AllTrueEQ;
3382 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3383 Intrinsic::wasm_anytrue))
3384 return AnyTrueNE;
3385 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3386 Intrinsic::wasm_alltrue))
3387 return AllTrueNE;
3388
3389 return SDValue();
3390}
3391
3392template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3393 Intrinsic::ID Intrin>
3395 SDValue LHS = N->getOperand(0);
3396 SDValue RHS = N->getOperand(1);
3397 SDValue Cond = N->getOperand(2);
3398 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3399 return SDValue();
3400
3401 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3402 return SDValue();
3403
3404 SDLoc DL(N);
3405 SDValue Ret =
3406 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3407 {DAG.getConstant(Intrin, DL, MVT::i32),
3408 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)});
3409 if (RequiresNegate)
3410 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3411 DAG.getConstant(1, DL, MVT::i32));
3412 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3413}
3414
3415/// Try to convert a i128 comparison to a v16i8 comparison before type
3416/// legalization splits it up into chunks
3417static SDValue
3419 const WebAssemblySubtarget *Subtarget) {
3420
3421 SDLoc DL(N);
3422 SDValue X = N->getOperand(0);
3423 SDValue Y = N->getOperand(1);
3424 EVT VT = N->getValueType(0);
3425 EVT OpVT = X.getValueType();
3426
3427 SelectionDAG &DAG = DCI.DAG;
3429 Attribute::NoImplicitFloat))
3430 return SDValue();
3431
3432 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3433 // We're looking for an oversized integer equality comparison with SIMD
3434 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3435 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3436 return SDValue();
3437
3438 // Don't perform this combine if constructing the vector will be expensive.
3439 auto IsVectorBitCastCheap = [](SDValue X) {
3441 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3442 };
3443
3444 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3445 return SDValue();
3446
3447 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3448 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3449 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3450
3451 SDValue Intr =
3452 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3453 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3454 : Intrinsic::wasm_anytrue,
3455 DL, MVT::i32),
3456 Cmp});
3457
3458 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3459 ISD::SETNE);
3460}
3461
3464 const WebAssemblySubtarget *Subtarget) {
3465 if (!DCI.isBeforeLegalize())
3466 return SDValue();
3467
3468 EVT VT = N->getValueType(0);
3469 if (!VT.isScalarInteger())
3470 return SDValue();
3471
3472 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3473 return V;
3474
3475 SDValue LHS = N->getOperand(0);
3476 if (LHS->getOpcode() != ISD::BITCAST)
3477 return SDValue();
3478
3479 EVT FromVT = LHS->getOperand(0).getValueType();
3480 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3481 return SDValue();
3482
3483 unsigned NumElts = FromVT.getVectorNumElements();
3484 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3485 return SDValue();
3486
3487 if (!cast<ConstantSDNode>(N->getOperand(1)))
3488 return SDValue();
3489
3490 auto &DAG = DCI.DAG;
3491 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3492 MVT::getIntegerVT(128 / NumElts));
3493 // setcc (iN (bitcast (vNi1 X))), 0, ne
3494 // ==> any_true (vNi1 X)
3496 N, VecVT, DAG)) {
3497 return Match;
3498 }
3499 // setcc (iN (bitcast (vNi1 X))), 0, eq
3500 // ==> xor (any_true (vNi1 X)), -1
3502 N, VecVT, DAG)) {
3503 return Match;
3504 }
3505 // setcc (iN (bitcast (vNi1 X))), -1, eq
3506 // ==> all_true (vNi1 X)
3508 N, VecVT, DAG)) {
3509 return Match;
3510 }
3511 // setcc (iN (bitcast (vNi1 X))), -1, ne
3512 // ==> xor (all_true (vNi1 X)), -1
3514 N, VecVT, DAG)) {
3515 return Match;
3516 }
3517 return SDValue();
3518}
3519
3521 EVT VT = N->getValueType(0);
3522 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3523 return SDValue();
3524
3525 // Mul with extending inputs.
3526 SDValue LHS = N->getOperand(0);
3527 SDValue RHS = N->getOperand(1);
3528 if (LHS.getOpcode() != RHS.getOpcode())
3529 return SDValue();
3530
3531 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3532 LHS.getOpcode() != ISD::ZERO_EXTEND)
3533 return SDValue();
3534
3535 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3536 return SDValue();
3537
3538 EVT FromVT = LHS->getOperand(0).getValueType();
3539 EVT EltTy = FromVT.getVectorElementType();
3540 if (EltTy != MVT::i8)
3541 return SDValue();
3542
3543 // For an input DAG that looks like this
3544 // %a = input_type
3545 // %b = input_type
3546 // %lhs = extend %a to output_type
3547 // %rhs = extend %b to output_type
3548 // %mul = mul %lhs, %rhs
3549
3550 // input_type | output_type | instructions
3551 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3552 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3553 // | | %low_low = i32x4.ext_low_i16x8_ %low
3554 // | | %low_high = i32x4.ext_high_i16x8_ %low
3555 // | | %high_low = i32x4.ext_low_i16x8_ %high
3556 // | | %high_high = i32x4.ext_high_i16x8_ %high
3557 // | | %res = concat_vector(...)
3558 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3559 // | | %low_low = i32x4.ext_low_i16x8_ %low
3560 // | | %low_high = i32x4.ext_high_i16x8_ %low
3561 // | | %res = concat_vector(%low_low, %low_high)
3562
3563 SDLoc DL(N);
3564 unsigned NumElts = VT.getVectorNumElements();
3565 SDValue ExtendInLHS = LHS->getOperand(0);
3566 SDValue ExtendInRHS = RHS->getOperand(0);
3567 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3568 unsigned ExtendLowOpc =
3569 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3570 unsigned ExtendHighOpc =
3571 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3572
3573 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3574 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3575 };
3576 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3577 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3578 };
3579
3580 if (NumElts == 16) {
3581 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3582 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3583 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3584 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3585 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3586 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3587 SDValue SubVectors[] = {
3588 GetExtendLow(MVT::v4i32, MulLow),
3589 GetExtendHigh(MVT::v4i32, MulLow),
3590 GetExtendLow(MVT::v4i32, MulHigh),
3591 GetExtendHigh(MVT::v4i32, MulHigh),
3592 };
3593 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3594 } else {
3595 assert(NumElts == 8);
3596 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3597 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3598 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3599 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3600 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3601 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3602 }
3603 return SDValue();
3604}
3605
3608 assert(N->getOpcode() == ISD::MUL);
3609 EVT VT = N->getValueType(0);
3610 if (!VT.isVector())
3611 return SDValue();
3612
3613 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3614 return Res;
3615
3616 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3617 // extend them to v8i16.
3618 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3619 return SDValue();
3620
3621 SDLoc DL(N);
3622 SelectionDAG &DAG = DCI.DAG;
3623 SDValue LHS = N->getOperand(0);
3624 SDValue RHS = N->getOperand(1);
3625 EVT MulVT = MVT::v8i16;
3626
3627 if (VT == MVT::v8i8) {
3628 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3629 DAG.getUNDEF(MVT::v8i8));
3630 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3631 DAG.getUNDEF(MVT::v8i8));
3632 SDValue LowLHS =
3633 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3634 SDValue LowRHS =
3635 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3636 SDValue MulLow = DAG.getBitcast(
3637 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3638 // Take the low byte of each lane.
3639 SDValue Shuffle = DAG.getVectorShuffle(
3640 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3641 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3642 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3643 } else {
3644 assert(VT == MVT::v16i8 && "Expected v16i8");
3645 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3646 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3647 SDValue HighLHS =
3648 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3649 SDValue HighRHS =
3650 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3651
3652 SDValue MulLow =
3653 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3654 SDValue MulHigh =
3655 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3656
3657 // Take the low byte of each lane.
3658 return DAG.getVectorShuffle(
3659 VT, DL, MulLow, MulHigh,
3660 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3661 }
3662}
3663
3664SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3665 SelectionDAG &DAG) {
3666 SDLoc DL(In);
3667 LLVMContext &Ctx = *DAG.getContext();
3668 EVT InVT = In.getValueType();
3669 unsigned NumElems = InVT.getVectorNumElements() * 2;
3670 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3671 SDValue Concat =
3672 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3673 if (NumElems < RequiredNumElems) {
3674 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3675 }
3676 return Concat;
3677}
3678
3680 EVT OutVT = N->getValueType(0);
3681 if (!OutVT.isVector())
3682 return SDValue();
3683
3684 EVT OutElTy = OutVT.getVectorElementType();
3685 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3686 return SDValue();
3687
3688 unsigned NumElems = OutVT.getVectorNumElements();
3689 if (!isPowerOf2_32(NumElems))
3690 return SDValue();
3691
3692 EVT FPVT = N->getOperand(0)->getValueType(0);
3693 if (FPVT.getVectorElementType() != MVT::f32)
3694 return SDValue();
3695
3696 SDLoc DL(N);
3697
3698 // First, convert to i32.
3699 LLVMContext &Ctx = *DAG.getContext();
3700 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3701 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3703 OutVT.getScalarSizeInBits());
3704 // Mask out the top MSBs.
3705 SDValue Masked =
3706 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3707
3708 if (OutVT.getSizeInBits() < 128) {
3709 // Create a wide enough vector that we can use narrow.
3710 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3711 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3712 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3713 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3714 return DAG.getBitcast(
3715 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3716 } else {
3717 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3718 }
3719 return SDValue();
3720}
3721
3722SDValue
3723WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3724 DAGCombinerInfo &DCI) const {
3725 switch (N->getOpcode()) {
3726 default:
3727 return SDValue();
3728 case ISD::BITCAST:
3729 return performBitcastCombine(N, DCI);
3730 case ISD::SETCC:
3731 return performSETCCCombine(N, DCI, Subtarget);
3733 return performVECTOR_SHUFFLECombine(N, DCI);
3734 case ISD::SIGN_EXTEND:
3735 case ISD::ZERO_EXTEND:
3736 return performVectorExtendCombine(N, DCI);
3737 case ISD::UINT_TO_FP:
3738 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3739 return ExtCombine;
3740 return performVectorNonNegToFPCombine(N, DCI);
3741 case ISD::SINT_TO_FP:
3742 return performVectorExtendToFPCombine(N, DCI);
3745 case ISD::FP_ROUND:
3747 return performVectorTruncZeroCombine(N, DCI);
3748 case ISD::FP_TO_SINT:
3749 case ISD::FP_TO_UINT:
3750 return performConvertFPCombine(N, DCI.DAG);
3751 case ISD::TRUNCATE:
3752 return performTruncateCombine(N, DCI);
3754 return performAnyAllCombine(N, DCI.DAG);
3755 case ISD::MUL:
3756 return performMulCombine(N, DCI);
3757 }
3758}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static bool HasNoSignedZerosOrNaNs(SDValue Op, SelectionDAG &DAG)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1400
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:358
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI bool isKnownNeverZeroFloat(SDValue Op) const
Test whether the given floating point SDValue is known to never be positive or negative zero.
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:717
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
bool isValidAddressSpace(unsigned AS)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2088
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:452
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:300
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:220
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:461
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.