LLVM 23.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
111 }
112 }
113
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
145 else
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
152 // Support minimum and maximum, which otherwise default to expand.
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
160 }
161 if (Subtarget->hasFP16() && T == MVT::f32) {
163 setTruncStoreAction(T, MVT::f16, Legal);
164 } else {
166 setTruncStoreAction(T, MVT::f16, Expand);
167 }
168 }
169
170 // Expand unavailable integer operations.
171 for (auto Op :
175 for (auto T : {MVT::i32, MVT::i64})
177 if (Subtarget->hasSIMD128())
178 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
180 }
181
182 if (Subtarget->hasWideArithmetic()) {
188 }
189
190 if (Subtarget->hasNontrappingFPToInt())
192 for (auto T : {MVT::i32, MVT::i64})
194
195 if (Subtarget->hasRelaxedSIMD()) {
198 {MVT::v4f32, MVT::v2f64}, Custom);
199 }
200 // SIMD-specific configuration
201 if (Subtarget->hasSIMD128()) {
202
204
205 // Combine wide-vector muls, with extend inputs, to extmul_half.
208
209 // Combine vector mask reductions into alltrue/anytrue
211
212 // Convert vector to integer bitcasts to bitmask
214
215 // Hoist bitcasts out of shuffles
217
218 // Combine extends of extract_subvectors into widening ops
220
221 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
222 // conversions ops
225
226 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
227 // into conversion ops
231
233
234 // Support saturating add/sub for i8x16 and i16x8
236 for (auto T : {MVT::v16i8, MVT::v8i16})
238
239 // Support integer abs
240 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
242
243 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
244 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
245 MVT::v2f64})
247
248 if (Subtarget->hasFP16()) {
251 }
252
253 // We have custom shuffle lowering to expose the shuffle mask
254 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255 MVT::v2f64})
257
258 if (Subtarget->hasFP16())
260
261 // Support splatting
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
263 MVT::v2f64})
265
266 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
267
268 // Custom lowering since wasm shifts must have a scalar shift amount
269 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
270 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
272
273 // Custom lower lane accesses to expand out variable indices
275 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
276 MVT::v2f64})
278
279 // There is no i8x16.mul instruction
280 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
281
282 // Expand integer operations supported for scalars but not SIMD
283 for (auto Op :
285 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
287
288 // But we do have integer min and max operations
289 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
290 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
292
293 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
294 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
295 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
296 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
297
298 // Custom lower bit counting operations for other types to scalarize them.
299 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
300 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
302
303 // Expand float operations supported for scalars but not SIMD
306 for (auto T : {MVT::v4f32, MVT::v2f64})
308
309 // Unsigned comparison operations are unavailable for i64x2 vectors.
311 setCondCodeAction(CC, MVT::v2i64, Custom);
312
313 // 64x2 conversions are not in the spec
314 for (auto Op :
316 for (auto T : {MVT::v2i64, MVT::v2f64})
318
319 // But saturating fp_to_int converstions are
321 setOperationAction(Op, MVT::v4i32, Custom);
322 if (Subtarget->hasFP16()) {
323 setOperationAction(Op, MVT::v8i16, Custom);
324 }
325 }
326
327 // Support vector extending
332 }
333
334 if (Subtarget->hasFP16()) {
335 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
336 }
337
338 if (Subtarget->hasRelaxedSIMD()) {
341 }
342
343 // Partial MLA reductions.
345 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
346 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
347 }
348 }
349
350 // As a special case, these operators use the type to mean the type to
351 // sign-extend from.
353 if (!Subtarget->hasSignExt()) {
354 // Sign extends are legal only when extending a vector extract
355 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
356 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
358 }
361
362 // Dynamic stack allocation: use the default expansion.
366
370
371 // Expand these forms; we pattern-match the forms that we can handle in isel.
372 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
373 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
375
376 if (Subtarget->hasReferenceTypes())
377 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
378 for (auto T : {MVT::externref, MVT::funcref})
380
381 // There is no vector conditional select instruction
382 for (auto T :
383 {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, MVT::v2f64})
385
386 // We have custom switch handling.
388
389 // WebAssembly doesn't have:
390 // - Floating-point extending loads.
391 // - Floating-point truncating stores.
392 // - i1 extending loads.
393 // - truncating SIMD stores and most extending loads
394 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
395 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
396 for (auto T : MVT::integer_valuetypes())
397 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
398 setLoadExtAction(Ext, T, MVT::i1, Promote);
399 if (Subtarget->hasSIMD128()) {
400 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
401 MVT::v2f64}) {
402 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
403 if (MVT(T) != MemT) {
405 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
406 setLoadExtAction(Ext, T, MemT, Expand);
407 }
408 }
409 }
410 // But some vector extending loads are legal
411 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
412 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
413 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
414 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
415 }
416 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
417 }
418
419 // Don't do anything clever with build_pairs
421
422 // Trap lowers to wasm unreachable
423 setOperationAction(ISD::TRAP, MVT::Other, Legal);
425
426 // Exception handling intrinsics
430
432
433 // Always convert switches to br_tables unless there is only one case, which
434 // is equivalent to a simple branch. This reduces code size for wasm, and we
435 // defer possible jump table optimizations to the VM.
437}
438
440WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
441 const AtomicRMWInst *AI) const {
442 // We have wasm instructions for these
443 switch (AI->getOperation()) {
451 default:
452 break;
453 }
455}
456
457bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
458 // Implementation copied from X86TargetLowering.
459 unsigned Opc = VecOp.getOpcode();
460
461 // Assume target opcodes can't be scalarized.
462 // TODO - do we have any exceptions?
464 return false;
465
466 // If the vector op is not supported, try to convert to scalar.
467 EVT VecVT = VecOp.getValueType();
469 return true;
470
471 // If the vector op is supported, but the scalar op is not, the transform may
472 // not be worthwhile.
473 EVT ScalarVT = VecVT.getScalarType();
474 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
475}
476
477FastISel *WebAssemblyTargetLowering::createFastISel(
478 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
479 const LibcallLoweringInfo *LibcallLowering) const {
480 return WebAssembly::createFastISel(FuncInfo, LibInfo, LibcallLowering);
481}
482
483MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
484 EVT VT) const {
485 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
486 if (BitWidth > 1 && BitWidth < 8)
487 BitWidth = 8;
488
489 if (BitWidth > 64) {
490 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
491 // the count to be an i32.
492 BitWidth = 32;
494 "32-bit shift counts ought to be enough for anyone");
495 }
496
499 "Unable to represent scalar shift amount type");
500 return Result;
501}
502
503// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
504// undefined result on invalid/overflow, to the WebAssembly opcode, which
505// traps on invalid/overflow.
508 const TargetInstrInfo &TII,
509 bool IsUnsigned, bool Int64,
510 bool Float64, unsigned LoweredOpcode) {
512
513 Register OutReg = MI.getOperand(0).getReg();
514 Register InReg = MI.getOperand(1).getReg();
515
516 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
517 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
518 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
519 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
520 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
521 unsigned Eqz = WebAssembly::EQZ_I32;
522 unsigned And = WebAssembly::AND_I32;
523 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
524 int64_t Substitute = IsUnsigned ? 0 : Limit;
525 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
526 auto &Context = BB->getParent()->getFunction().getContext();
527 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
528
529 const BasicBlock *LLVMBB = BB->getBasicBlock();
530 MachineFunction *F = BB->getParent();
531 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
532 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
533 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
534
536 F->insert(It, FalseMBB);
537 F->insert(It, TrueMBB);
538 F->insert(It, DoneMBB);
539
540 // Transfer the remainder of BB and its successor edges to DoneMBB.
541 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
543
544 BB->addSuccessor(TrueMBB);
545 BB->addSuccessor(FalseMBB);
546 TrueMBB->addSuccessor(DoneMBB);
547 FalseMBB->addSuccessor(DoneMBB);
548
549 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
550 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
551 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
552 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
553 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
554 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
555 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
556
557 MI.eraseFromParent();
558 // For signed numbers, we can do a single comparison to determine whether
559 // fabs(x) is within range.
560 if (IsUnsigned) {
561 Tmp0 = InReg;
562 } else {
563 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
564 }
565 BuildMI(BB, DL, TII.get(FConst), Tmp1)
566 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
567 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
568
569 // For unsigned numbers, we have to do a separate comparison with zero.
570 if (IsUnsigned) {
571 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
572 Register SecondCmpReg =
573 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
574 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
575 BuildMI(BB, DL, TII.get(FConst), Tmp1)
576 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
577 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
578 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
579 CmpReg = AndReg;
580 }
581
582 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
583
584 // Create the CFG diamond to select between doing the conversion or using
585 // the substitute value.
586 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
587 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
588 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
589 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
590 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
591 .addReg(FalseReg)
592 .addMBB(FalseMBB)
593 .addReg(TrueReg)
594 .addMBB(TrueMBB);
595
596 return DoneMBB;
597}
598
599// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
600// instuction to handle the zero-length case.
603 const TargetInstrInfo &TII, bool Int64) {
605
606 MachineOperand DstMem = MI.getOperand(0);
607 MachineOperand SrcMem = MI.getOperand(1);
608 MachineOperand Dst = MI.getOperand(2);
609 MachineOperand Src = MI.getOperand(3);
610 MachineOperand Len = MI.getOperand(4);
611
612 // If the length is a constant, we don't actually need the check.
613 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
614 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
615 Def->getOpcode() == WebAssembly::CONST_I64) {
616 if (Def->getOperand(1).getImm() == 0) {
617 // A zero-length memcpy is a no-op.
618 MI.eraseFromParent();
619 return BB;
620 }
621 // A non-zero-length memcpy doesn't need a zero check.
622 unsigned MemoryCopy =
623 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
624 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
625 .add(DstMem)
626 .add(SrcMem)
627 .add(Dst)
628 .add(Src)
629 .add(Len);
630 MI.eraseFromParent();
631 return BB;
632 }
633 }
634
635 // We're going to add an extra use to `Len` to test if it's zero; that
636 // use shouldn't be a kill, even if the original use is.
637 MachineOperand NoKillLen = Len;
638 NoKillLen.setIsKill(false);
639
640 // Decide on which `MachineInstr` opcode we're going to use.
641 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
642 unsigned MemoryCopy =
643 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
644
645 // Create two new basic blocks; one for the new `memory.fill` that we can
646 // branch over, and one for the rest of the instructions after the original
647 // `memory.fill`.
648 const BasicBlock *LLVMBB = BB->getBasicBlock();
649 MachineFunction *F = BB->getParent();
650 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
651 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
652
654 F->insert(It, TrueMBB);
655 F->insert(It, DoneMBB);
656
657 // Transfer the remainder of BB and its successor edges to DoneMBB.
658 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
660
661 // Connect the CFG edges.
662 BB->addSuccessor(TrueMBB);
663 BB->addSuccessor(DoneMBB);
664 TrueMBB->addSuccessor(DoneMBB);
665
666 // Create a virtual register for the `Eqz` result.
667 unsigned EqzReg;
668 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
669
670 // Erase the original `memory.copy`.
671 MI.eraseFromParent();
672
673 // Test if `Len` is zero.
674 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
675
676 // Insert a new `memory.copy`.
677 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
678 .add(DstMem)
679 .add(SrcMem)
680 .add(Dst)
681 .add(Src)
682 .add(Len);
683
684 // Create the CFG triangle.
685 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
686 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
687
688 return DoneMBB;
689}
690
691// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
692// instuction to handle the zero-length case.
695 const TargetInstrInfo &TII, bool Int64) {
697
698 MachineOperand Mem = MI.getOperand(0);
699 MachineOperand Dst = MI.getOperand(1);
700 MachineOperand Val = MI.getOperand(2);
701 MachineOperand Len = MI.getOperand(3);
702
703 // If the length is a constant, we don't actually need the check.
704 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
705 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
706 Def->getOpcode() == WebAssembly::CONST_I64) {
707 if (Def->getOperand(1).getImm() == 0) {
708 // A zero-length memset is a no-op.
709 MI.eraseFromParent();
710 return BB;
711 }
712 // A non-zero-length memset doesn't need a zero check.
713 unsigned MemoryFill =
714 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
715 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
716 .add(Mem)
717 .add(Dst)
718 .add(Val)
719 .add(Len);
720 MI.eraseFromParent();
721 return BB;
722 }
723 }
724
725 // We're going to add an extra use to `Len` to test if it's zero; that
726 // use shouldn't be a kill, even if the original use is.
727 MachineOperand NoKillLen = Len;
728 NoKillLen.setIsKill(false);
729
730 // Decide on which `MachineInstr` opcode we're going to use.
731 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
732 unsigned MemoryFill =
733 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
734
735 // Create two new basic blocks; one for the new `memory.fill` that we can
736 // branch over, and one for the rest of the instructions after the original
737 // `memory.fill`.
738 const BasicBlock *LLVMBB = BB->getBasicBlock();
739 MachineFunction *F = BB->getParent();
740 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
741 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
742
744 F->insert(It, TrueMBB);
745 F->insert(It, DoneMBB);
746
747 // Transfer the remainder of BB and its successor edges to DoneMBB.
748 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
750
751 // Connect the CFG edges.
752 BB->addSuccessor(TrueMBB);
753 BB->addSuccessor(DoneMBB);
754 TrueMBB->addSuccessor(DoneMBB);
755
756 // Create a virtual register for the `Eqz` result.
757 unsigned EqzReg;
758 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
759
760 // Erase the original `memory.fill`.
761 MI.eraseFromParent();
762
763 // Test if `Len` is zero.
764 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
765
766 // Insert a new `memory.copy`.
767 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
768
769 // Create the CFG triangle.
770 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
771 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
772
773 return DoneMBB;
774}
775
776static MachineBasicBlock *
778 const WebAssemblySubtarget *Subtarget,
779 const TargetInstrInfo &TII) {
780 MachineInstr &CallParams = *CallResults.getPrevNode();
781 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
782 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
783 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
784
785 bool IsIndirect =
786 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
787 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
788
789 bool IsFuncrefCall = false;
790 if (IsIndirect && CallParams.getOperand(0).isReg()) {
791 Register Reg = CallParams.getOperand(0).getReg();
792 const MachineFunction *MF = BB->getParent();
793 const MachineRegisterInfo &MRI = MF->getRegInfo();
794 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
795 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
796 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
797 }
798
799 unsigned CallOp;
800 if (IsIndirect && IsRetCall) {
801 CallOp = WebAssembly::RET_CALL_INDIRECT;
802 } else if (IsIndirect) {
803 CallOp = WebAssembly::CALL_INDIRECT;
804 } else if (IsRetCall) {
805 CallOp = WebAssembly::RET_CALL;
806 } else {
807 CallOp = WebAssembly::CALL;
808 }
809
810 MachineFunction &MF = *BB->getParent();
811 const MCInstrDesc &MCID = TII.get(CallOp);
812 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
813
814 // Move the function pointer to the end of the arguments for indirect calls
815 if (IsIndirect) {
816 auto FnPtr = CallParams.getOperand(0);
817 CallParams.removeOperand(0);
818
819 // For funcrefs, call_indirect is done through __funcref_call_table and the
820 // funcref is always installed in slot 0 of the table, therefore instead of
821 // having the function pointer added at the end of the params list, a zero
822 // (the index in
823 // __funcref_call_table is added).
824 if (IsFuncrefCall) {
825 Register RegZero =
826 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
827 MachineInstrBuilder MIBC0 =
828 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
829
830 BB->insert(CallResults.getIterator(), MIBC0);
831 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
832 } else
833 CallParams.addOperand(FnPtr);
834 }
835
836 for (auto Def : CallResults.defs())
837 MIB.add(Def);
838
839 if (IsIndirect) {
840 // Placeholder for the type index.
841 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
842 MIB.addImm(0);
843 // The table into which this call_indirect indexes.
844 MCSymbolWasm *Table = IsFuncrefCall
846 MF.getContext(), Subtarget)
848 MF.getContext(), Subtarget);
849 if (Subtarget->hasCallIndirectOverlong()) {
850 MIB.addSym(Table);
851 } else {
852 // For the MVP there is at most one table whose number is 0, but we can't
853 // write a table symbol or issue relocations. Instead we just ensure the
854 // table is live and write a zero.
855 Table->setNoStrip();
856 MIB.addImm(0);
857 }
858 }
859
860 for (auto Use : CallParams.uses())
861 MIB.add(Use);
862
863 BB->insert(CallResults.getIterator(), MIB);
864 CallParams.eraseFromParent();
865 CallResults.eraseFromParent();
866
867 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
868 // table slot with ref.null upon call_indirect return.
869 //
870 // This generates the following code, which comes right after a call_indirect
871 // of a funcref:
872 //
873 // i32.const 0
874 // ref.null func
875 // table.set __funcref_call_table
876 if (IsIndirect && IsFuncrefCall) {
878 MF.getContext(), Subtarget);
879 Register RegZero =
880 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
881 MachineInstr *Const0 =
882 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
883 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
884
885 Register RegFuncref =
886 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
887 MachineInstr *RefNull =
888 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
889 BB->insertAfter(Const0->getIterator(), RefNull);
890
891 MachineInstr *TableSet =
892 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
893 .addSym(Table)
894 .addReg(RegZero)
895 .addReg(RegFuncref);
896 BB->insertAfter(RefNull->getIterator(), TableSet);
897 }
898
899 return BB;
900}
901
902MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
903 MachineInstr &MI, MachineBasicBlock *BB) const {
904 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
905 DebugLoc DL = MI.getDebugLoc();
906
907 switch (MI.getOpcode()) {
908 default:
909 llvm_unreachable("Unexpected instr type to insert");
910 case WebAssembly::FP_TO_SINT_I32_F32:
911 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
912 WebAssembly::I32_TRUNC_S_F32);
913 case WebAssembly::FP_TO_UINT_I32_F32:
914 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
915 WebAssembly::I32_TRUNC_U_F32);
916 case WebAssembly::FP_TO_SINT_I64_F32:
917 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
918 WebAssembly::I64_TRUNC_S_F32);
919 case WebAssembly::FP_TO_UINT_I64_F32:
920 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
921 WebAssembly::I64_TRUNC_U_F32);
922 case WebAssembly::FP_TO_SINT_I32_F64:
923 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
924 WebAssembly::I32_TRUNC_S_F64);
925 case WebAssembly::FP_TO_UINT_I32_F64:
926 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
927 WebAssembly::I32_TRUNC_U_F64);
928 case WebAssembly::FP_TO_SINT_I64_F64:
929 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
930 WebAssembly::I64_TRUNC_S_F64);
931 case WebAssembly::FP_TO_UINT_I64_F64:
932 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
933 WebAssembly::I64_TRUNC_U_F64);
934 case WebAssembly::MEMCPY_A32:
935 return LowerMemcpy(MI, DL, BB, TII, false);
936 case WebAssembly::MEMCPY_A64:
937 return LowerMemcpy(MI, DL, BB, TII, true);
938 case WebAssembly::MEMSET_A32:
939 return LowerMemset(MI, DL, BB, TII, false);
940 case WebAssembly::MEMSET_A64:
941 return LowerMemset(MI, DL, BB, TII, true);
942 case WebAssembly::CALL_RESULTS:
943 case WebAssembly::RET_CALL_RESULTS:
944 return LowerCallResults(MI, DL, BB, Subtarget, TII);
945 }
946}
947
948std::pair<unsigned, const TargetRegisterClass *>
949WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
950 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
951 // First, see if this is a constraint that directly corresponds to a
952 // WebAssembly register class.
953 if (Constraint.size() == 1) {
954 switch (Constraint[0]) {
955 case 'r':
956 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
957 if (Subtarget->hasSIMD128() && VT.isVector()) {
958 if (VT.getSizeInBits() == 128)
959 return std::make_pair(0U, &WebAssembly::V128RegClass);
960 }
961 if (VT.isInteger() && !VT.isVector()) {
962 if (VT.getSizeInBits() <= 32)
963 return std::make_pair(0U, &WebAssembly::I32RegClass);
964 if (VT.getSizeInBits() <= 64)
965 return std::make_pair(0U, &WebAssembly::I64RegClass);
966 }
967 if (VT.isFloatingPoint() && !VT.isVector()) {
968 switch (VT.getSizeInBits()) {
969 case 32:
970 return std::make_pair(0U, &WebAssembly::F32RegClass);
971 case 64:
972 return std::make_pair(0U, &WebAssembly::F64RegClass);
973 default:
974 break;
975 }
976 }
977 break;
978 default:
979 break;
980 }
981 }
982
984}
985
986bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
987 // Assume ctz is a relatively cheap operation.
988 return true;
989}
990
991bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
992 // Assume clz is a relatively cheap operation.
993 return true;
994}
995
996bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
997 const AddrMode &AM,
998 Type *Ty, unsigned AS,
999 Instruction *I) const {
1000 // WebAssembly offsets are added as unsigned without wrapping. The
1001 // isLegalAddressingMode gives us no way to determine if wrapping could be
1002 // happening, so we approximate this by accepting only non-negative offsets.
1003 if (AM.BaseOffs < 0)
1004 return false;
1005
1006 // WebAssembly has no scale register operands.
1007 if (AM.Scale != 0)
1008 return false;
1009
1010 // Everything else is legal.
1011 return true;
1012}
1013
1014bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1015 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1016 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1017 // WebAssembly supports unaligned accesses, though it should be declared
1018 // with the p2align attribute on loads and stores which do so, and there
1019 // may be a performance impact. We tell LLVM they're "fast" because
1020 // for the kinds of things that LLVM uses this for (merging adjacent stores
1021 // of constants, etc.), WebAssembly implementations will either want the
1022 // unaligned access or they'll split anyway.
1023 if (Fast)
1024 *Fast = 1;
1025 return true;
1026}
1027
1028bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1029 AttributeList Attr) const {
1030 // The current thinking is that wasm engines will perform this optimization,
1031 // so we can save on code size.
1032 return true;
1033}
1034
1035bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1036 EVT ExtT = ExtVal.getValueType();
1037 SDValue N0 = peekThroughFreeze(ExtVal->getOperand(0));
1038 auto *Load = dyn_cast<LoadSDNode>(N0);
1039 if (!Load)
1040 return false;
1041 EVT MemT = Load->getValueType(0);
1042 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1043 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1044 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1045}
1046
1047bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1048 const GlobalAddressSDNode *GA) const {
1049 // Wasm doesn't support function addresses with offsets
1050 const GlobalValue *GV = GA->getGlobal();
1052}
1053
1054EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1055 LLVMContext &C,
1056 EVT VT) const {
1057 if (VT.isVector()) {
1058 if (VT.getVectorElementType() == MVT::f16 && !Subtarget->hasFP16())
1059 return VT.changeElementType(C, MVT::i1);
1060
1062 }
1063
1064 // So far, all branch instructions in Wasm take an I32 condition.
1065 // The default TargetLowering::getSetCCResultType returns the pointer size,
1066 // which would be useful to reduce instruction counts when testing
1067 // against 64-bit pointers/values if at some point Wasm supports that.
1068 return EVT::getIntegerVT(C, 32);
1069}
1070
1071void WebAssemblyTargetLowering::getTgtMemIntrinsic(
1073 MachineFunction &MF, unsigned Intrinsic) const {
1075 switch (Intrinsic) {
1076 case Intrinsic::wasm_memory_atomic_notify:
1078 Info.memVT = MVT::i32;
1079 Info.ptrVal = I.getArgOperand(0);
1080 Info.offset = 0;
1081 Info.align = Align(4);
1082 // atomic.notify instruction does not really load the memory specified with
1083 // this argument, but MachineMemOperand should either be load or store, so
1084 // we set this to a load.
1085 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1086 // instructions are treated as volatiles in the backend, so we should be
1087 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1089 Infos.push_back(Info);
1090 return;
1091 case Intrinsic::wasm_memory_atomic_wait32:
1093 Info.memVT = MVT::i32;
1094 Info.ptrVal = I.getArgOperand(0);
1095 Info.offset = 0;
1096 Info.align = Align(4);
1098 Infos.push_back(Info);
1099 return;
1100 case Intrinsic::wasm_memory_atomic_wait64:
1102 Info.memVT = MVT::i64;
1103 Info.ptrVal = I.getArgOperand(0);
1104 Info.offset = 0;
1105 Info.align = Align(8);
1107 Infos.push_back(Info);
1108 return;
1109 case Intrinsic::wasm_loadf16_f32:
1111 Info.memVT = MVT::f16;
1112 Info.ptrVal = I.getArgOperand(0);
1113 Info.offset = 0;
1114 Info.align = Align(2);
1116 Infos.push_back(Info);
1117 return;
1118 case Intrinsic::wasm_storef16_f32:
1120 Info.memVT = MVT::f16;
1121 Info.ptrVal = I.getArgOperand(1);
1122 Info.offset = 0;
1123 Info.align = Align(2);
1125 Infos.push_back(Info);
1126 return;
1127 default:
1128 return;
1129 }
1130}
1131
1132void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1133 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1134 const SelectionDAG &DAG, unsigned Depth) const {
1135 switch (Op.getOpcode()) {
1136 default:
1137 break;
1139 unsigned IntNo = Op.getConstantOperandVal(0);
1140 switch (IntNo) {
1141 default:
1142 break;
1143 case Intrinsic::wasm_bitmask: {
1144 unsigned BitWidth = Known.getBitWidth();
1145 EVT VT = Op.getOperand(1).getSimpleValueType();
1146 unsigned PossibleBits = VT.getVectorNumElements();
1147 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1148 Known.Zero |= ZeroMask;
1149 break;
1150 }
1151 }
1152 break;
1153 }
1154 case WebAssemblyISD::EXTEND_LOW_U:
1155 case WebAssemblyISD::EXTEND_HIGH_U: {
1156 // We know the high half, of each destination vector element, will be zero.
1157 SDValue SrcOp = Op.getOperand(0);
1158 EVT VT = SrcOp.getSimpleValueType();
1159 unsigned BitWidth = Known.getBitWidth();
1160 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1161 assert(BitWidth >= 8 && "Unexpected width!");
1163 Known.Zero |= Mask;
1164 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1165 assert(BitWidth >= 16 && "Unexpected width!");
1167 Known.Zero |= Mask;
1168 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1169 assert(BitWidth >= 32 && "Unexpected width!");
1171 Known.Zero |= Mask;
1172 }
1173 break;
1174 }
1175 // For 128-bit addition if the upper bits are all zero then it's known that
1176 // the upper bits of the result will have all bits guaranteed zero except the
1177 // first.
1178 case WebAssemblyISD::I64_ADD128:
1179 if (Op.getResNo() == 1) {
1180 SDValue LHS_HI = Op.getOperand(1);
1181 SDValue RHS_HI = Op.getOperand(3);
1182 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1183 Known.Zero.setBitsFrom(1);
1184 }
1185 break;
1186 }
1187}
1188
1190WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1191 if (VT.isFixedLengthVector()) {
1192 MVT EltVT = VT.getVectorElementType();
1193 // We have legal vector types with these lane types, so widening the
1194 // vector would let us use some of the lanes directly without having to
1195 // extend or truncate values.
1196 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1197 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1198 return TypeWidenVector;
1199 }
1200
1202}
1203
1204bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1205 const MachineFunction &MF, EVT VT) const {
1206 if (!Subtarget->hasFP16() || !VT.isVector())
1207 return false;
1208
1209 EVT ScalarVT = VT.getScalarType();
1210 if (!ScalarVT.isSimple())
1211 return false;
1212
1213 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1214}
1215
1216bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1217 SDValue Op, const TargetLoweringOpt &TLO) const {
1218 // ISel process runs DAGCombiner after legalization; this step is called
1219 // SelectionDAG optimization phase. This post-legalization combining process
1220 // runs DAGCombiner on each node, and if there was a change to be made,
1221 // re-runs legalization again on it and its user nodes to make sure
1222 // everythiing is in a legalized state.
1223 //
1224 // The legalization calls lowering routines, and we do our custom lowering for
1225 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1226 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1227 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1228 // turns unused vector elements into undefs. But this routine does not work
1229 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1230 // combination can result in a infinite loop, in which undefs are converted to
1231 // zeros in legalization and back to undefs in combining.
1232 //
1233 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1234 // running for build_vectors.
1235 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1236 return false;
1237 return true;
1238}
1239
1240//===----------------------------------------------------------------------===//
1241// WebAssembly Lowering private implementation.
1242//===----------------------------------------------------------------------===//
1243
1244//===----------------------------------------------------------------------===//
1245// Lowering Code
1246//===----------------------------------------------------------------------===//
1247
1248static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1250 DAG.getContext()->diagnose(
1251 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1252}
1253
1254// Test whether the given calling convention is supported.
1256 // We currently support the language-independent target-independent
1257 // conventions. We don't yet have a way to annotate calls with properties like
1258 // "cold", and we don't have any call-clobbered registers, so these are mostly
1259 // all handled the same.
1260 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1261 CallConv == CallingConv::Cold ||
1262 CallConv == CallingConv::PreserveMost ||
1263 CallConv == CallingConv::PreserveAll ||
1264 CallConv == CallingConv::CXX_FAST_TLS ||
1266 CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail;
1267}
1268
1269SDValue
1270WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1271 SmallVectorImpl<SDValue> &InVals) const {
1272 SelectionDAG &DAG = CLI.DAG;
1273 SDLoc DL = CLI.DL;
1274 SDValue Chain = CLI.Chain;
1275 SDValue Callee = CLI.Callee;
1276 MachineFunction &MF = DAG.getMachineFunction();
1277 auto Layout = MF.getDataLayout();
1278
1279 // A call through a funcref is expressed in IR as a call through the pointer
1280 // produced by the llvm.wasm.funcref.to_ptr intrinsic. Detect this here and
1281 // recover the underlying funcref value so the call can be lowered to a
1282 // table.set + call_indirect through the dedicated __funcref_call_table.
1283 bool IsFuncrefCall = false;
1284 if (Callee.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1285 Callee.getConstantOperandVal(0) == Intrinsic::wasm_funcref_to_ptr) {
1286 Callee = Callee.getOperand(1);
1287 IsFuncrefCall = true;
1288 }
1289
1290 CallingConv::ID CallConv = CLI.CallConv;
1291 if (!callingConvSupported(CallConv))
1292 fail(DL, DAG,
1293 "WebAssembly doesn't support language-specific or target-specific "
1294 "calling conventions yet");
1295 if (CLI.IsPatchPoint)
1296 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1297
1298 if (CLI.IsTailCall) {
1299 auto NoTail = [&](const char *Msg) {
1300 if (CLI.CB && CLI.CB->isMustTailCall())
1301 fail(DL, DAG, Msg);
1302 CLI.IsTailCall = false;
1303 };
1304
1305 if (!Subtarget->hasTailCall())
1306 NoTail("WebAssembly 'tail-call' feature not enabled");
1307
1308 // Varargs calls cannot be tail calls because the buffer is on the stack
1309 if (CLI.IsVarArg)
1310 NoTail("WebAssembly does not support varargs tail calls");
1311
1312 // Do not tail call unless caller and callee return types match
1313 const Function &F = MF.getFunction();
1314 const TargetMachine &TM = getTargetMachine();
1315 Type *RetTy = F.getReturnType();
1316 SmallVector<MVT, 4> CallerRetTys;
1317 SmallVector<MVT, 4> CalleeRetTys;
1318 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1319 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1320 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1321 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1322 CalleeRetTys.begin());
1323 if (!TypesMatch)
1324 NoTail("WebAssembly tail call requires caller and callee return types to "
1325 "match");
1326
1327 // If pointers to local stack values are passed, we cannot tail call
1328 if (CLI.CB) {
1329 for (auto &Arg : CLI.CB->args()) {
1330 Value *Val = Arg.get();
1331 // Trace the value back through pointer operations
1332 while (true) {
1333 Value *Src = Val->stripPointerCastsAndAliases();
1334 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1335 Src = GEP->getPointerOperand();
1336 if (Val == Src)
1337 break;
1338 Val = Src;
1339 }
1340 if (isa<AllocaInst>(Val)) {
1341 NoTail(
1342 "WebAssembly does not support tail calling with stack arguments");
1343 break;
1344 }
1345 }
1346 }
1347 }
1348
1349 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1350 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1351 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1352
1353 // The generic code may have added an sret argument. If we're lowering an
1354 // invoke function, the ABI requires that the function pointer be the first
1355 // argument, so we may have to swap the arguments.
1356 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1357 Outs[0].Flags.isSRet()) {
1358 std::swap(Outs[0], Outs[1]);
1359 std::swap(OutVals[0], OutVals[1]);
1360 }
1361
1362 bool HasSwiftSelfArg = false;
1363 bool HasSwiftErrorArg = false;
1364 bool HasSwiftAsyncArg = false;
1365 unsigned NumFixedArgs = 0;
1366 for (unsigned I = 0; I < Outs.size(); ++I) {
1367 const ISD::OutputArg &Out = Outs[I];
1368 SDValue &OutVal = OutVals[I];
1369 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1370 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1371 HasSwiftAsyncArg |= Out.Flags.isSwiftAsync();
1372 if (Out.Flags.isNest())
1373 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1374 if (Out.Flags.isInAlloca())
1375 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1376 if (Out.Flags.isInConsecutiveRegs())
1377 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1379 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1380 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1381 auto &MFI = MF.getFrameInfo();
1382 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1384 /*isSS=*/false);
1385 SDValue SizeNode =
1386 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1387 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1388 Align Alignment = Out.Flags.getNonZeroByValAlign();
1389 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode, Alignment,
1390 Alignment,
1391 /*isVolatile*/ false, /*AlwaysInline=*/false,
1392 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1393 MachinePointerInfo());
1394 OutVal = FINode;
1395 }
1396 // Count the number of fixed args *after* legalization.
1397 NumFixedArgs += !Out.Flags.isVarArg();
1398 }
1399
1400 bool IsVarArg = CLI.IsVarArg;
1401 auto PtrVT = getPointerTy(Layout);
1402
1403 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1404 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1405 // arguments are also added for callee signature. They are necessary to match
1406 // callee and caller signature for indirect call.
1407 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1408 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1409 if (!HasSwiftSelfArg) {
1410 NumFixedArgs++;
1411 ISD::ArgFlagsTy Flags;
1412 Flags.setSwiftSelf();
1413 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1414 CLI.Outs.push_back(Arg);
1415 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1416 CLI.OutVals.push_back(ArgVal);
1417 }
1418 if (!HasSwiftErrorArg) {
1419 NumFixedArgs++;
1420 ISD::ArgFlagsTy Flags;
1421 Flags.setSwiftError();
1422 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1423 CLI.Outs.push_back(Arg);
1424 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1425 CLI.OutVals.push_back(ArgVal);
1426 }
1427 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1428 NumFixedArgs++;
1429 ISD::ArgFlagsTy Flags;
1430 Flags.setSwiftAsync();
1431 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1432 CLI.Outs.push_back(Arg);
1433 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1434 CLI.OutVals.push_back(ArgVal);
1435 }
1436 }
1437
1438 // Analyze operands of the call, assigning locations to each operand.
1440 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1441
1442 if (IsVarArg) {
1443 // Outgoing non-fixed arguments are placed in a buffer. First
1444 // compute their offsets and the total amount of buffer space needed.
1445 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1446 const ISD::OutputArg &Out = Outs[I];
1447 SDValue &Arg = OutVals[I];
1448 EVT VT = Arg.getValueType();
1449 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1450 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1451 Align Alignment =
1452 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1453 unsigned Offset =
1454 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1455 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1456 Offset, VT.getSimpleVT(),
1458 }
1459 }
1460
1461 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1462
1463 SDValue FINode;
1464 if (IsVarArg && NumBytes) {
1465 // For non-fixed arguments, next emit stores to store the argument values
1466 // to the stack buffer at the offsets computed above.
1467 MaybeAlign StackAlign = Layout.getStackAlignment();
1468 assert(StackAlign && "data layout string is missing stack alignment");
1469 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1470 /*isSS=*/false);
1471 unsigned ValNo = 0;
1473 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1474 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1475 "ArgLocs should remain in order and only hold varargs args");
1476 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1477 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1478 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1479 DAG.getConstant(Offset, DL, PtrVT));
1480 Chains.push_back(
1481 DAG.getStore(Chain, DL, Arg, Add,
1483 }
1484 if (!Chains.empty())
1485 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1486 } else if (IsVarArg) {
1487 FINode = DAG.getIntPtrConstant(0, DL);
1488 }
1489
1490 if (Callee->getOpcode() == ISD::GlobalAddress) {
1491 // If the callee is a GlobalAddress node (quite common, every direct call
1492 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1493 // doesn't at MO_GOT which is not needed for direct calls.
1494 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1497 GA->getOffset());
1498 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1499 getPointerTy(DAG.getDataLayout()), Callee);
1500 }
1501
1502 // Compute the operands for the CALLn node.
1504 Ops.push_back(Chain);
1505 Ops.push_back(Callee);
1506
1507 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1508 // isn't reliable.
1509 Ops.append(OutVals.begin(),
1510 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1511 // Add a pointer to the vararg buffer.
1512 if (IsVarArg)
1513 Ops.push_back(FINode);
1514
1515 SmallVector<EVT, 8> InTys;
1516 for (const auto &In : Ins) {
1517 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1518 assert(!In.Flags.isNest() && "nest is not valid for return values");
1519 if (In.Flags.isInAlloca())
1520 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1521 if (In.Flags.isInConsecutiveRegs())
1522 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1523 if (In.Flags.isInConsecutiveRegsLast())
1524 fail(DL, DAG,
1525 "WebAssembly hasn't implemented cons regs last return values");
1526 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1527 // registers.
1528 InTys.push_back(In.VT);
1529 }
1530
1531 // Lastly, if this is a call to a funcref we need to add an instruction
1532 // table.set to the chain and transform the call.
1533 if (IsFuncrefCall) {
1534 // In the absence of function references proposal where a funcref call is
1535 // lowered to call_ref, using reference types we generate a table.set to set
1536 // the funcref to a special table used solely for this purpose, followed by
1537 // a call_indirect. Here we just generate the table set, and return the
1538 // SDValue of the table.set so that LowerCall can finalize the lowering by
1539 // generating the call_indirect.
1540 SDValue Chain = Ops[0];
1541
1543 MF.getContext(), Subtarget);
1544 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1545 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1546 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1547 SDValue TableSet = DAG.getMemIntrinsicNode(
1548 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1549 MVT::funcref, MachinePointerInfo(), Align(1),
1551
1552 Ops[0] = TableSet; // The new chain is the TableSet itself
1553 }
1554
1555 if (CLI.IsTailCall) {
1556 // ret_calls do not return values to the current frame
1557 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1558 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1559 }
1560
1561 InTys.push_back(MVT::Other);
1562 SDVTList InTyList = DAG.getVTList(InTys);
1563 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1564
1565 for (size_t I = 0; I < Ins.size(); ++I)
1566 InVals.push_back(Res.getValue(I));
1567
1568 // Return the chain
1569 return Res.getValue(Ins.size());
1570}
1571
1572bool WebAssemblyTargetLowering::CanLowerReturn(
1573 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1574 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1575 const Type *RetTy) const {
1576 // WebAssembly can only handle returning tuples with multivalue enabled
1577 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1578}
1579
1580SDValue WebAssemblyTargetLowering::LowerReturn(
1581 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1583 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1584 SelectionDAG &DAG) const {
1585 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1586 "MVP WebAssembly can only return up to one value");
1587 if (!callingConvSupported(CallConv))
1588 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1589
1590 SmallVector<SDValue, 4> RetOps(1, Chain);
1591 RetOps.append(OutVals.begin(), OutVals.end());
1592 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1593
1594 // Record the number and types of the return values.
1595 for (const ISD::OutputArg &Out : Outs) {
1596 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1597 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1598 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1599 if (Out.Flags.isInAlloca())
1600 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1601 if (Out.Flags.isInConsecutiveRegs())
1602 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1604 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1605 }
1606
1607 return Chain;
1608}
1609
1610SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1611 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1612 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1613 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1614 if (!callingConvSupported(CallConv))
1615 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1616
1617 MachineFunction &MF = DAG.getMachineFunction();
1618 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1619
1620 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1621 // of the incoming values before they're represented by virtual registers.
1622 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1623
1624 bool HasSwiftErrorArg = false;
1625 bool HasSwiftSelfArg = false;
1626 bool HasSwiftAsyncArg = false;
1627 for (const ISD::InputArg &In : Ins) {
1628 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1629 HasSwiftErrorArg |= In.Flags.isSwiftError();
1630 HasSwiftAsyncArg |= In.Flags.isSwiftAsync();
1631 if (In.Flags.isInAlloca())
1632 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1633 if (In.Flags.isNest())
1634 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1635 if (In.Flags.isInConsecutiveRegs())
1636 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1637 if (In.Flags.isInConsecutiveRegsLast())
1638 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1639 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1640 // registers.
1641 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1642 DAG.getTargetConstant(InVals.size(),
1643 DL, MVT::i32))
1644 : DAG.getUNDEF(In.VT));
1645
1646 // Record the number and types of arguments.
1647 MFI->addParam(In.VT);
1648 }
1649
1650 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1651 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1652 // arguments are also added for callee signature. They are necessary to match
1653 // callee and caller signature for indirect call.
1654 auto PtrVT = getPointerTy(MF.getDataLayout());
1655 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1656 if (!HasSwiftSelfArg) {
1657 MFI->addParam(PtrVT);
1658 }
1659 if (!HasSwiftErrorArg) {
1660 MFI->addParam(PtrVT);
1661 }
1662 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1663 MFI->addParam(PtrVT);
1664 }
1665 }
1666 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1667 // the buffer is passed as an argument.
1668 if (IsVarArg) {
1669 MVT PtrVT = getPointerTy(MF.getDataLayout());
1670 Register VarargVreg =
1672 MFI->setVarargBufferVreg(VarargVreg);
1673 Chain = DAG.getCopyToReg(
1674 Chain, DL, VarargVreg,
1675 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1676 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1677 MFI->addParam(PtrVT);
1678 }
1679
1680 // Record the number and types of arguments and results.
1681 SmallVector<MVT, 4> Params;
1684 MF.getFunction(), DAG.getTarget(), Params, Results);
1685 for (MVT VT : Results)
1686 MFI->addResult(VT);
1687 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1688 // the param logic here with ComputeSignatureVTs
1689 assert(MFI->getParams().size() == Params.size() &&
1690 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1691 Params.begin()));
1692
1693 return Chain;
1694}
1695
1696void WebAssemblyTargetLowering::ReplaceNodeResults(
1698 switch (N->getOpcode()) {
1700 // Do not add any results, signifying that N should not be custom lowered
1701 // after all. This happens because simd128 turns on custom lowering for
1702 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1703 // illegal type.
1704 break;
1708 // Do not add any results, signifying that N should not be custom lowered.
1709 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1710 break;
1711 case ISD::FP_ROUND: {
1712 EVT VT = N->getValueType(0);
1713 SDValue Src = N->getOperand(0);
1714 if (VT == MVT::v4f16 && Src.getValueType() == MVT::v4f32) {
1715 Results.push_back(
1716 DAG.getNode(WebAssemblyISD::DEMOTE_ZERO, SDLoc(N), MVT::v8f16, Src));
1717 }
1718 break;
1719 }
1720 case ISD::ADD:
1721 case ISD::SUB:
1722 Results.push_back(Replace128Op(N, DAG));
1723 break;
1724 default:
1726 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1727 }
1728}
1729
1730//===----------------------------------------------------------------------===//
1731// Custom lowering hooks.
1732//===----------------------------------------------------------------------===//
1733
1734SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1735 SelectionDAG &DAG) const {
1736 SDLoc DL(Op);
1737 switch (Op.getOpcode()) {
1738 default:
1739 llvm_unreachable("unimplemented operation lowering");
1740 return SDValue();
1741 case ISD::FrameIndex:
1742 return LowerFrameIndex(Op, DAG);
1743 case ISD::GlobalAddress:
1744 return LowerGlobalAddress(Op, DAG);
1746 return LowerGlobalTLSAddress(Op, DAG);
1748 return LowerExternalSymbol(Op, DAG);
1749 case ISD::JumpTable:
1750 return LowerJumpTable(Op, DAG);
1751 case ISD::BR_JT:
1752 return LowerBR_JT(Op, DAG);
1753 case ISD::VASTART:
1754 return LowerVASTART(Op, DAG);
1755 case ISD::BlockAddress:
1756 case ISD::BRIND:
1757 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1758 return SDValue();
1759 case ISD::RETURNADDR:
1760 return LowerRETURNADDR(Op, DAG);
1761 case ISD::FRAMEADDR:
1762 return LowerFRAMEADDR(Op, DAG);
1763 case ISD::CopyToReg:
1764 return LowerCopyToReg(Op, DAG);
1767 return LowerAccessVectorElement(Op, DAG);
1771 return LowerIntrinsic(Op, DAG);
1773 return LowerSIGN_EXTEND_INREG(Op, DAG);
1777 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1778 case ISD::BUILD_VECTOR:
1779 return LowerBUILD_VECTOR(Op, DAG);
1781 return LowerVECTOR_SHUFFLE(Op, DAG);
1782 case ISD::SETCC:
1783 return LowerSETCC(Op, DAG);
1784 case ISD::SHL:
1785 case ISD::SRA:
1786 case ISD::SRL:
1787 return LowerShift(Op, DAG);
1790 return LowerFP_TO_INT_SAT(Op, DAG);
1791 case ISD::FMINNUM:
1792 case ISD::FMINIMUMNUM:
1793 return LowerFMIN(Op, DAG);
1794 case ISD::FMAXNUM:
1795 case ISD::FMAXIMUMNUM:
1796 return LowerFMAX(Op, DAG);
1797 case ISD::LOAD:
1798 return LowerLoad(Op, DAG);
1799 case ISD::STORE:
1800 return LowerStore(Op, DAG);
1801 case ISD::CTPOP:
1802 case ISD::CTLZ:
1803 case ISD::CTTZ:
1804 return DAG.UnrollVectorOp(Op.getNode());
1805 case ISD::CLEAR_CACHE:
1806 report_fatal_error("llvm.clear_cache is not supported on wasm");
1807 case ISD::SMUL_LOHI:
1808 case ISD::UMUL_LOHI:
1809 return LowerMUL_LOHI(Op, DAG);
1810 case ISD::UADDO:
1811 return LowerUADDO(Op, DAG);
1812 }
1813}
1814
1818
1819 return false;
1820}
1821
1822static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1823 SelectionDAG &DAG) {
1825 if (!FI)
1826 return std::nullopt;
1827
1828 auto &MF = DAG.getMachineFunction();
1830}
1831
1832SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1833 SelectionDAG &DAG) const {
1834 SDLoc DL(Op);
1835 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1836 const SDValue &Value = SN->getValue();
1837 const SDValue &Base = SN->getBasePtr();
1838 const SDValue &Offset = SN->getOffset();
1839
1841 if (!Offset->isUndef())
1842 report_fatal_error("unexpected offset when storing to webassembly global",
1843 false);
1844
1845 SDVTList Tys = DAG.getVTList(MVT::Other);
1846 SDValue Ops[] = {SN->getChain(), Value, Base};
1847 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1848 SN->getMemoryVT(), SN->getMemOperand());
1849 }
1850
1851 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1852 if (!Offset->isUndef())
1853 report_fatal_error("unexpected offset when storing to webassembly local",
1854 false);
1855
1856 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1857 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1858 SDValue Ops[] = {SN->getChain(), Idx, Value};
1859 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1860 }
1861
1864 "Encountered an unlowerable store to the wasm_var address space",
1865 false);
1866
1867 return Op;
1868}
1869
1870SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1871 SelectionDAG &DAG) const {
1872 SDLoc DL(Op);
1873 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1874 const SDValue &Base = LN->getBasePtr();
1875 const SDValue &Offset = LN->getOffset();
1876
1878 if (!Offset->isUndef())
1880 "unexpected offset when loading from webassembly global", false);
1881
1882 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1883 SDValue Ops[] = {LN->getChain(), Base};
1884 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1885 LN->getMemoryVT(), LN->getMemOperand());
1886 }
1887
1888 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1889 if (!Offset->isUndef())
1891 "unexpected offset when loading from webassembly local", false);
1892
1893 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1894 EVT LocalVT = LN->getValueType(0);
1895 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1896 {LN->getChain(), Idx});
1897 }
1898
1901 "Encountered an unlowerable load from the wasm_var address space",
1902 false);
1903
1904 return Op;
1905}
1906
1907SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1908 SelectionDAG &DAG) const {
1909 assert(Subtarget->hasWideArithmetic());
1910 assert(Op.getValueType() == MVT::i64);
1911 SDLoc DL(Op);
1912 unsigned Opcode;
1913 switch (Op.getOpcode()) {
1914 case ISD::UMUL_LOHI:
1915 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1916 break;
1917 case ISD::SMUL_LOHI:
1918 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1919 break;
1920 default:
1921 llvm_unreachable("unexpected opcode");
1922 }
1923 SDValue LHS = Op.getOperand(0);
1924 SDValue RHS = Op.getOperand(1);
1925 SDValue Lo =
1926 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1927 SDValue Hi(Lo.getNode(), 1);
1928 SDValue Ops[] = {Lo, Hi};
1929 return DAG.getMergeValues(Ops, DL);
1930}
1931
1932// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1933//
1934// This enables generating a single wasm instruction for this operation where
1935// the upper half of both operands are constant zeros. The upper half of the
1936// result is then whether the overflow happened.
1937SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1938 SelectionDAG &DAG) const {
1939 assert(Subtarget->hasWideArithmetic());
1940 assert(Op.getValueType() == MVT::i64);
1941 assert(Op.getOpcode() == ISD::UADDO);
1942 SDLoc DL(Op);
1943 SDValue LHS = Op.getOperand(0);
1944 SDValue RHS = Op.getOperand(1);
1945 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1946 SDValue Result =
1947 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1948 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1949 SDValue CarryI64(Result.getNode(), 1);
1950 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1951 SDValue Ops[] = {Result, CarryI32};
1952 return DAG.getMergeValues(Ops, DL);
1953}
1954
1955SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1956 SelectionDAG &DAG) const {
1957 assert(Subtarget->hasWideArithmetic());
1958 assert(N->getValueType(0) == MVT::i128);
1959 SDLoc DL(N);
1960 unsigned Opcode;
1961 switch (N->getOpcode()) {
1962 case ISD::ADD:
1963 Opcode = WebAssemblyISD::I64_ADD128;
1964 break;
1965 case ISD::SUB:
1966 Opcode = WebAssemblyISD::I64_SUB128;
1967 break;
1968 default:
1969 llvm_unreachable("unexpected opcode");
1970 }
1971 SDValue LHS = N->getOperand(0);
1972 SDValue RHS = N->getOperand(1);
1973
1974 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1975 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1976 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1977 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1978 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1979 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1980 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1981 LHS_0, LHS_1, RHS_0, RHS_1);
1982 SDValue Result_HI(Result_LO.getNode(), 1);
1983 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1984}
1985
1986SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1987 SelectionDAG &DAG) const {
1988 SDValue Src = Op.getOperand(2);
1989 if (isa<FrameIndexSDNode>(Src.getNode())) {
1990 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1991 // the FI to some LEA-like instruction, but since we don't have that, we
1992 // need to insert some kind of instruction that can take an FI operand and
1993 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1994 // local.copy between Op and its FI operand.
1995 SDValue Chain = Op.getOperand(0);
1996 SDLoc DL(Op);
1997 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1998 EVT VT = Src.getValueType();
1999 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
2000 : WebAssembly::COPY_I64,
2001 DL, VT, Src),
2002 0);
2003 return Op.getNode()->getNumValues() == 1
2004 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
2005 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
2006 Op.getNumOperands() == 4 ? Op.getOperand(3)
2007 : SDValue());
2008 }
2009 return SDValue();
2010}
2011
2012SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
2013 SelectionDAG &DAG) const {
2014 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
2015 return DAG.getTargetFrameIndex(FI, Op.getValueType());
2016}
2017
2018SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
2019 SelectionDAG &DAG) const {
2020 SDLoc DL(Op);
2021
2022 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
2023 fail(DL, DAG,
2024 "Non-Emscripten WebAssembly hasn't implemented "
2025 "__builtin_return_address");
2026 return SDValue();
2027 }
2028
2029 unsigned Depth = Op.getConstantOperandVal(0);
2030 MakeLibCallOptions CallOptions;
2031 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
2032 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
2033 .first;
2034}
2035
2036SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
2037 SelectionDAG &DAG) const {
2038 // Non-zero depths are not supported by WebAssembly currently. Use the
2039 // legalizer's default expansion, which is to return 0 (what this function is
2040 // documented to do).
2041 if (Op.getConstantOperandVal(0) > 0)
2042 return SDValue();
2043
2045 EVT VT = Op.getValueType();
2046 Register FP =
2047 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
2048 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
2049}
2050
2051SDValue
2052WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2053 SelectionDAG &DAG) const {
2054 SDLoc DL(Op);
2055 const auto *GA = cast<GlobalAddressSDNode>(Op);
2056
2057 MachineFunction &MF = DAG.getMachineFunction();
2058 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2059 report_fatal_error("cannot use thread-local storage without bulk memory",
2060 false);
2061
2062 const GlobalValue *GV = GA->getGlobal();
2063
2064 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2065 // on other targets, if we have thread-local storage, only the local-exec
2066 // model is possible.
2067 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2068 ? GV->getThreadLocalMode()
2070
2071 // Unsupported TLS modes
2074
2075 if (model == GlobalValue::LocalExecTLSModel ||
2078 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2079 // For DSO-local TLS variables we use offset from __tls_base, or
2080 // __wasm_get_tls_base() if using libcall thread context.
2081
2082 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2083 SDValue BaseAddr(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0);
2084
2085 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2086 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2087 SDValue SymOffset =
2088 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2089
2090 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2091 }
2092
2094
2095 EVT VT = Op.getValueType();
2096 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2097 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2098 GA->getOffset(),
2100}
2101
2102SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2103 SelectionDAG &DAG) const {
2104 SDLoc DL(Op);
2105 const auto *GA = cast<GlobalAddressSDNode>(Op);
2106 EVT VT = Op.getValueType();
2107 assert(GA->getTargetFlags() == 0 &&
2108 "Unexpected target flags on generic GlobalAddressSDNode");
2110 fail(DL, DAG, "Invalid address space for WebAssembly target");
2111
2112 unsigned OperandFlags = 0;
2113 const GlobalValue *GV = GA->getGlobal();
2114 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2115 // need special treatment for tables in PIC mode.
2116 if (isPositionIndependent() &&
2118 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2119 MachineFunction &MF = DAG.getMachineFunction();
2120 MVT PtrVT = getPointerTy(MF.getDataLayout());
2121 const char *BaseName;
2122 if (GV->getValueType()->isFunctionTy()) {
2123 BaseName = MF.createExternalSymbolName("__table_base");
2125 } else {
2126 BaseName = MF.createExternalSymbolName("__memory_base");
2128 }
2130 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2131 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2132
2133 SDValue SymAddr = DAG.getNode(
2134 WebAssemblyISD::WrapperREL, DL, VT,
2135 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2136 OperandFlags));
2137
2138 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2139 }
2141 }
2142
2143 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2144 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2145 GA->getOffset(), OperandFlags));
2146}
2147
2148SDValue
2149WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2150 SelectionDAG &DAG) const {
2151 SDLoc DL(Op);
2152 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2153 EVT VT = Op.getValueType();
2154 assert(ES->getTargetFlags() == 0 &&
2155 "Unexpected target flags on generic ExternalSymbolSDNode");
2156 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2157 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2158}
2159
2160SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2161 SelectionDAG &DAG) const {
2162 // There's no need for a Wrapper node because we always incorporate a jump
2163 // table operand into a BR_TABLE instruction, rather than ever
2164 // materializing it in a register.
2165 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2166 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2167 JT->getTargetFlags());
2168}
2169
2170SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2171 SelectionDAG &DAG) const {
2172 SDLoc DL(Op);
2173 SDValue Chain = Op.getOperand(0);
2174 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2175 SDValue Index = Op.getOperand(2);
2176 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2177
2179 Ops.push_back(Chain);
2180 Ops.push_back(Index);
2181
2182 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2183 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2184
2185 // Add an operand for each case.
2186 for (auto *MBB : MBBs)
2187 Ops.push_back(DAG.getBasicBlock(MBB));
2188
2189 // Add the first MBB as a dummy default target for now. This will be replaced
2190 // with the proper default target (and the preceding range check eliminated)
2191 // if possible by WebAssemblyFixBrTableDefaults.
2192 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2193 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2194}
2195
2196SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2197 SelectionDAG &DAG) const {
2198 SDLoc DL(Op);
2199 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2200
2201 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2202 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2203
2204 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2205 MFI->getVarargBufferVreg(), PtrVT);
2206 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2207 MachinePointerInfo(SV));
2208}
2209
2210SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2211 SelectionDAG &DAG) const {
2212 MachineFunction &MF = DAG.getMachineFunction();
2213 unsigned IntNo;
2214 switch (Op.getOpcode()) {
2217 IntNo = Op.getConstantOperandVal(1);
2218 break;
2220 IntNo = Op.getConstantOperandVal(0);
2221 break;
2222 default:
2223 llvm_unreachable("Invalid intrinsic");
2224 }
2225 SDLoc DL(Op);
2226
2227 switch (IntNo) {
2228 default:
2229 return SDValue(); // Don't custom lower most intrinsics.
2230
2231 case Intrinsic::wasm_lsda: {
2232 auto PtrVT = getPointerTy(MF.getDataLayout());
2233 const char *SymName = MF.createExternalSymbolName(
2234 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2235 if (isPositionIndependent()) {
2237 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2238 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2240 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2241 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2242 SDValue SymAddr =
2243 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2244 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2245 }
2246 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2247 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2248 }
2249
2250 case Intrinsic::wasm_shuffle: {
2251 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2252 SDValue Ops[18];
2253 size_t OpIdx = 0;
2254 Ops[OpIdx++] = Op.getOperand(1);
2255 Ops[OpIdx++] = Op.getOperand(2);
2256 while (OpIdx < 18) {
2257 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2258 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2259 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2260 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2261 } else {
2262 Ops[OpIdx++] = MaskIdx;
2263 }
2264 }
2265 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2266 }
2267
2268 case Intrinsic::wasm_funcref_to_ptr: {
2269 // llvm.wasm.funcref.to_ptr only has a defined lowering when its result
2270 // feeds directly into an indirect call. Reaching here means the pointer
2271 // escapes a direct call. We haven't implemented conversion of a funcref
2272 // into a real function pointer so we crash if we get here.
2273 fail(DL, DAG,
2274 "a funcref can only be converted to a pointer to be directly called; "
2275 "the resulting pointer cannot otherwise be used");
2276 return DAG.getPOISON(Op.getValueType());
2277 }
2278
2279 case Intrinsic::thread_pointer: {
2280 return SDValue(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0);
2281 }
2282 }
2283}
2284
2285SDValue
2286WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2287 SelectionDAG &DAG) const {
2288 SDLoc DL(Op);
2289 // If sign extension operations are disabled, allow sext_inreg only if operand
2290 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2291 // extension operations, but allowing sext_inreg in this context lets us have
2292 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2293 // everywhere would be simpler in this file, but would necessitate large and
2294 // brittle patterns to undo the expansion and select extract_lane_s
2295 // instructions.
2296 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2297 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2298 return SDValue();
2299
2300 const SDValue &Extract = Op.getOperand(0);
2301 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2302 if (VecT.getVectorElementType().getSizeInBits() > 32)
2303 return SDValue();
2304 MVT ExtractedLaneT =
2305 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2306 MVT ExtractedVecT =
2307 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2308 if (ExtractedVecT == VecT)
2309 return Op;
2310
2311 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2312 const SDNode *Index = Extract.getOperand(1).getNode();
2313 if (!isa<ConstantSDNode>(Index))
2314 return SDValue();
2315 unsigned IndexVal = Index->getAsZExtVal();
2316 unsigned Scale =
2317 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2318 assert(Scale > 1);
2319 SDValue NewIndex =
2320 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2321 SDValue NewExtract = DAG.getNode(
2323 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2324 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2325 Op.getOperand(1));
2326}
2327
2328static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2329 SelectionDAG &DAG) {
2330 SDValue Source = peekThroughBitcasts(Op);
2331 if (Source.getOpcode() != ISD::VECTOR_SHUFFLE)
2332 return SDValue();
2333
2334 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2335 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2336 "expected extend_low");
2337 auto *Shuffle = cast<ShuffleVectorSDNode>(Source.getNode());
2338
2339 ArrayRef<int> Mask = Shuffle->getMask();
2340 // Look for a shuffle which moves from the high half to the low half.
2341 size_t FirstIdx = Mask.size() / 2;
2342 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2343 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2344 return SDValue();
2345 }
2346 }
2347
2348 SDLoc DL(Op);
2349 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2350 ? WebAssemblyISD::EXTEND_HIGH_S
2351 : WebAssemblyISD::EXTEND_HIGH_U;
2352 SDValue ShuffleSrc = Shuffle->getOperand(0);
2353 if (Op.getOpcode() == ISD::BITCAST)
2354 ShuffleSrc = DAG.getBitcast(Op.getValueType(), ShuffleSrc);
2355
2356 return DAG.getNode(Opc, DL, VT, ShuffleSrc);
2357}
2358
2359SDValue
2360WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2361 SelectionDAG &DAG) const {
2362 SDLoc DL(Op);
2363 EVT VT = Op.getValueType();
2364 SDValue Src = Op.getOperand(0);
2365 EVT SrcVT = Src.getValueType();
2366
2367 if (SrcVT.getVectorElementType() == MVT::i1 ||
2368 SrcVT.getVectorElementType() == MVT::i64)
2369 return SDValue();
2370
2371 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2372 "Unexpected extension factor.");
2373 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2374
2375 if (Scale != 2 && Scale != 4 && Scale != 8)
2376 return SDValue();
2377
2378 unsigned Ext;
2379 switch (Op.getOpcode()) {
2380 default:
2381 llvm_unreachable("unexpected opcode");
2384 Ext = WebAssemblyISD::EXTEND_LOW_U;
2385 break;
2387 Ext = WebAssemblyISD::EXTEND_LOW_S;
2388 break;
2389 }
2390
2391 if (Scale == 2) {
2392 // See if we can use EXTEND_HIGH.
2393 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2394 return ExtendHigh;
2395 }
2396
2397 SDValue Ret = Src;
2398 while (Scale != 1) {
2399 Ret = DAG.getNode(Ext, DL,
2400 Ret.getValueType()
2403 Ret);
2404 Scale /= 2;
2405 }
2406 assert(Ret.getValueType() == VT);
2407 return Ret;
2408}
2409
2411 SDLoc DL(Op);
2412 if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
2413 return SDValue();
2414
2415 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2416 unsigned &Index) -> bool {
2417 switch (Op.getOpcode()) {
2418 case ISD::SINT_TO_FP:
2419 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2420 break;
2421 case ISD::UINT_TO_FP:
2422 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2423 break;
2424 case ISD::FP_EXTEND:
2425 case ISD::FP16_TO_FP:
2426 Opcode = WebAssemblyISD::PROMOTE_LOW;
2427 break;
2428 default:
2429 return false;
2430 }
2431
2432 auto ExtractVector = Op.getOperand(0);
2433 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2434 return false;
2435
2436 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2437 return false;
2438
2439 SrcVec = ExtractVector.getOperand(0);
2440 Index = ExtractVector.getConstantOperandVal(1);
2441 return true;
2442 };
2443
2444 unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
2445 unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
2446 unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
2447 SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
2448
2449 if (!GetConvertedLane(Op.getOperand(0), FirstOpcode, FirstSrcVec,
2450 FirstIndex) ||
2451 !GetConvertedLane(Op.getOperand(1), SecondOpcode, SecondSrcVec,
2452 SecondIndex))
2453 return SDValue();
2454
2455 // If we're converting to v4f32, check the third and fourth lanes, too.
2456 if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(2), ThirdOpcode,
2457 ThirdSrcVec, ThirdIndex) ||
2458 !GetConvertedLane(Op.getOperand(3), FourthOpcode,
2459 FourthSrcVec, FourthIndex)))
2460 return SDValue();
2461
2462 if (FirstOpcode != SecondOpcode)
2463 return SDValue();
2464
2465 // TODO Add an optimization similar to the v2f64 below for shuffling the
2466 // vectors when the lanes are in the wrong order or come from different src
2467 // vectors.
2468 if (NumLanes == 4 &&
2469 (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
2470 FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec ||
2471 FirstSrcVec != FourthSrcVec || FirstIndex != 0 || SecondIndex != 1 ||
2472 ThirdIndex != 2 || FourthIndex != 3))
2473 return SDValue();
2474
2475 MVT ExpectedSrcVT;
2476 switch (FirstOpcode) {
2477 case WebAssemblyISD::CONVERT_LOW_S:
2478 case WebAssemblyISD::CONVERT_LOW_U:
2479 ExpectedSrcVT = MVT::v4i32;
2480 break;
2481 case WebAssemblyISD::PROMOTE_LOW:
2482 ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
2483 break;
2484 }
2485 if (FirstSrcVec.getValueType() != ExpectedSrcVT)
2486 return SDValue();
2487
2488 auto Src = FirstSrcVec;
2489 if (NumLanes == 2 &&
2490 (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
2491 // Shuffle the source vector so that the converted lanes are the low lanes.
2492 Src = DAG.getVectorShuffle(ExpectedSrcVT, DL, FirstSrcVec, SecondSrcVec,
2493 {static_cast<int>(FirstIndex),
2494 static_cast<int>(SecondIndex) + 4, -1, -1});
2495 }
2496 return DAG.getNode(FirstOpcode, DL, NumLanes == 2 ? MVT::v2f64 : MVT::v4f32,
2497 Src);
2498}
2499
2500SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2501 SelectionDAG &DAG) const {
2502 MVT VT = Op.getSimpleValueType();
2503 if (VT == MVT::v8f16) {
2504 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2505 // FP16 type, so cast them to I16s.
2506 MVT IVT = VT.changeVectorElementType(MVT::i16);
2508 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2509 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2510 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2511 return DAG.getBitcast(VT, Res);
2512 }
2513
2514 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2515 return ConvertLow;
2516
2517 SDLoc DL(Op);
2518 const EVT VecT = Op.getValueType();
2519 const EVT LaneT = Op.getOperand(0).getValueType();
2520 const size_t Lanes = Op.getNumOperands();
2521 bool CanSwizzle = VecT == MVT::v16i8;
2522
2523 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2524 // possible number of lanes at once followed by a sequence of replace_lane
2525 // instructions to individually initialize any remaining lanes.
2526
2527 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2528 // swizzled lanes should be given greater weight.
2529
2530 // TODO: Investigate looping rather than always extracting/replacing specific
2531 // lanes to fill gaps.
2532
2533 auto IsConstant = [](const SDValue &V) {
2534 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2535 };
2536
2537 // Returns the source vector and index vector pair if they exist. Checks for:
2538 // (extract_vector_elt
2539 // $src,
2540 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2541 // )
2542 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2543 auto Bail = std::make_pair(SDValue(), SDValue());
2544 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2545 return Bail;
2546 const SDValue &SwizzleSrc = Lane->getOperand(0);
2547 const SDValue &IndexExt = Lane->getOperand(1);
2548 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2549 return Bail;
2550 const SDValue &Index = IndexExt->getOperand(0);
2551 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2552 return Bail;
2553 const SDValue &SwizzleIndices = Index->getOperand(0);
2554 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2555 SwizzleIndices.getValueType() != MVT::v16i8 ||
2556 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2557 Index->getConstantOperandVal(1) != I)
2558 return Bail;
2559 return std::make_pair(SwizzleSrc, SwizzleIndices);
2560 };
2561
2562 // If the lane is extracted from another vector at a constant index, return
2563 // that vector. The source vector must not have more lanes than the dest
2564 // because the shufflevector indices are in terms of the destination lanes and
2565 // would not be able to address the smaller individual source lanes.
2566 auto GetShuffleSrc = [&](const SDValue &Lane) {
2567 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2568 return SDValue();
2569 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2570 return SDValue();
2571 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2572 VecT.getVectorNumElements())
2573 return SDValue();
2574 return Lane->getOperand(0);
2575 };
2576
2577 using ValueEntry = std::pair<SDValue, size_t>;
2578 SmallVector<ValueEntry, 16> SplatValueCounts;
2579
2580 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2581 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2582
2583 using ShuffleEntry = std::pair<SDValue, size_t>;
2584 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2585
2586 auto AddCount = [](auto &Counts, const auto &Val) {
2587 auto CountIt =
2588 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2589 if (CountIt == Counts.end()) {
2590 Counts.emplace_back(Val, 1);
2591 } else {
2592 CountIt->second++;
2593 }
2594 };
2595
2596 auto GetMostCommon = [](auto &Counts) {
2597 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2598 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2599 return *CommonIt;
2600 };
2601
2602 size_t NumConstantLanes = 0;
2603
2604 // Count eligible lanes for each type of vector creation op
2605 for (size_t I = 0; I < Lanes; ++I) {
2606 const SDValue &Lane = Op->getOperand(I);
2607 if (Lane.isUndef())
2608 continue;
2609
2610 AddCount(SplatValueCounts, Lane);
2611
2612 if (IsConstant(Lane))
2613 NumConstantLanes++;
2614 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2615 AddCount(ShuffleCounts, ShuffleSrc);
2616 if (CanSwizzle) {
2617 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2618 if (SwizzleSrcs.first)
2619 AddCount(SwizzleCounts, SwizzleSrcs);
2620 }
2621 }
2622
2623 SDValue SplatValue;
2624 size_t NumSplatLanes;
2625 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2626
2627 SDValue SwizzleSrc;
2628 SDValue SwizzleIndices;
2629 size_t NumSwizzleLanes = 0;
2630 if (SwizzleCounts.size())
2631 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2632 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2633
2634 // Shuffles can draw from up to two vectors, so find the two most common
2635 // sources.
2636 SDValue ShuffleSrc1, ShuffleSrc2;
2637 size_t NumShuffleLanes = 0;
2638 if (ShuffleCounts.size()) {
2639 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2640 llvm::erase_if(ShuffleCounts,
2641 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2642 }
2643 if (ShuffleCounts.size()) {
2644 size_t AdditionalShuffleLanes;
2645 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2646 GetMostCommon(ShuffleCounts);
2647 NumShuffleLanes += AdditionalShuffleLanes;
2648 }
2649
2650 // Predicate returning true if the lane is properly initialized by the
2651 // original instruction
2652 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2654 // Prefer swizzles over shuffles over vector consts over splats
2655 if (NumSwizzleLanes >= NumShuffleLanes &&
2656 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2657 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2658 SwizzleIndices);
2659 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2660 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2661 return Swizzled == GetSwizzleSrcs(I, Lane);
2662 };
2663 } else if (NumShuffleLanes >= NumConstantLanes &&
2664 NumShuffleLanes >= NumSplatLanes) {
2665 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2666 size_t DestLaneCount = VecT.getVectorNumElements();
2667 size_t Scale1 = 1;
2668 size_t Scale2 = 1;
2669 SDValue Src1 = ShuffleSrc1;
2670 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2671 if (Src1.getValueType() != VecT) {
2672 size_t LaneSize =
2674 assert(LaneSize > DestLaneSize);
2675 Scale1 = LaneSize / DestLaneSize;
2676 Src1 = DAG.getBitcast(VecT, Src1);
2677 }
2678 if (Src2.getValueType() != VecT) {
2679 size_t LaneSize =
2681 assert(LaneSize > DestLaneSize);
2682 Scale2 = LaneSize / DestLaneSize;
2683 Src2 = DAG.getBitcast(VecT, Src2);
2684 }
2685
2686 int Mask[16];
2687 assert(DestLaneCount <= 16);
2688 for (size_t I = 0; I < DestLaneCount; ++I) {
2689 const SDValue &Lane = Op->getOperand(I);
2690 SDValue Src = GetShuffleSrc(Lane);
2691 if (Src == ShuffleSrc1) {
2692 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2693 } else if (Src && Src == ShuffleSrc2) {
2694 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2695 } else {
2696 Mask[I] = -1;
2697 }
2698 }
2699 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2700 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2701 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2702 auto Src = GetShuffleSrc(Lane);
2703 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2704 };
2705 } else if (NumConstantLanes >= NumSplatLanes) {
2706 SmallVector<SDValue, 16> ConstLanes;
2707 for (const SDValue &Lane : Op->op_values()) {
2708 if (IsConstant(Lane)) {
2709 // Values may need to be fixed so that they will sign extend to be
2710 // within the expected range during ISel. Check whether the value is in
2711 // bounds based on the lane bit width and if it is out of bounds, lop
2712 // off the extra bits.
2713 uint64_t LaneBits = 128 / Lanes;
2714 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2715 ConstLanes.push_back(DAG.getConstant(
2716 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2717 SDLoc(Lane), LaneT));
2718 } else {
2719 ConstLanes.push_back(Lane);
2720 }
2721 } else if (LaneT.isFloatingPoint()) {
2722 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2723 } else {
2724 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2725 }
2726 }
2727 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2728 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2729 return IsConstant(Lane);
2730 };
2731 } else {
2732 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2733 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2734 (DestLaneSize == 32 || DestLaneSize == 64)) {
2735 // Could be selected to load_zero.
2736 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2737 } else {
2738 // Use a splat (which might be selected as a load splat)
2739 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2740 }
2741 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2742 return Lane == SplatValue;
2743 };
2744 }
2745
2746 assert(Result);
2747 assert(IsLaneConstructed);
2748
2749 // Add replace_lane instructions for any unhandled values
2750 for (size_t I = 0; I < Lanes; ++I) {
2751 const SDValue &Lane = Op->getOperand(I);
2752 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2753 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2754 DAG.getConstant(I, DL, MVT::i32));
2755 }
2756
2757 return Result;
2758}
2759
2760SDValue
2761WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2762 SelectionDAG &DAG) const {
2763 SDLoc DL(Op);
2764 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2765 MVT VecType = Op.getOperand(0).getSimpleValueType();
2766 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2767 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2768
2769 // Space for two vector args and sixteen mask indices
2770 SDValue Ops[18];
2771 size_t OpIdx = 0;
2772 Ops[OpIdx++] = Op.getOperand(0);
2773 Ops[OpIdx++] = Op.getOperand(1);
2774
2775 // Expand mask indices to byte indices and materialize them as operands
2776 for (int M : Mask) {
2777 for (size_t J = 0; J < LaneBytes; ++J) {
2778 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2779 // whole lane of vector input, to allow further reduction at VM. E.g.
2780 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2781 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2782 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2783 }
2784 }
2785
2786 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2787}
2788
2789SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2790 SelectionDAG &DAG) const {
2791 SDLoc DL(Op);
2792 // The legalizer does not know how to expand the unsupported comparison modes
2793 // of i64x2 vectors, so we manually unroll them here.
2794 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2796 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2797 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2798 const SDValue &CC = Op->getOperand(2);
2799 auto MakeLane = [&](unsigned I) {
2800 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2801 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2802 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2803 };
2804 return DAG.getBuildVector(Op->getValueType(0), DL,
2805 {MakeLane(0), MakeLane(1)});
2806}
2807
2808SDValue
2809WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2810 SelectionDAG &DAG) const {
2811 // Allow constant lane indices, expand variable lane indices
2812 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2813 if (isa<ConstantSDNode>(IdxNode)) {
2814 // Ensure the index type is i32 to match the tablegen patterns
2815 uint64_t Idx = IdxNode->getAsZExtVal();
2816 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2817 Ops[Op.getNumOperands() - 1] =
2818 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2819 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2820 }
2821 // Perform default expansion
2822 return SDValue();
2823}
2824
2826 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2827 // 32-bit and 64-bit unrolled shifts will have proper semantics
2828 if (LaneT.bitsGE(MVT::i32))
2829 return DAG.UnrollVectorOp(Op.getNode());
2830 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2831 SDLoc DL(Op);
2832 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2833 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2834 unsigned ShiftOpcode = Op.getOpcode();
2835 SmallVector<SDValue, 16> ShiftedElements;
2836 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2837 SmallVector<SDValue, 16> ShiftElements;
2838 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2839 SmallVector<SDValue, 16> UnrolledOps;
2840 for (size_t i = 0; i < NumLanes; ++i) {
2841 SDValue MaskedShiftValue =
2842 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2843 SDValue ShiftedValue = ShiftedElements[i];
2844 if (ShiftOpcode == ISD::SRA)
2845 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2846 ShiftedValue, DAG.getValueType(LaneT));
2847 UnrolledOps.push_back(
2848 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2849 }
2850 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2851}
2852
2853SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2854 SelectionDAG &DAG) const {
2855 SDLoc DL(Op);
2856 // Only manually lower vector shifts
2857 assert(Op.getSimpleValueType().isVector());
2858
2859 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2860 auto ShiftVal = Op.getOperand(1);
2861
2862 // Try to skip bitmask operation since it is implied inside shift instruction
2863 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2864 if (MaskOp.getOpcode() != ISD::AND)
2865 return MaskOp;
2866 SDValue LHS = MaskOp.getOperand(0);
2867 SDValue RHS = MaskOp.getOperand(1);
2868 if (MaskOp.getValueType().isVector()) {
2869 APInt MaskVal;
2870 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2871 std::swap(LHS, RHS);
2872
2873 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2874 MaskVal == MaskBits)
2875 MaskOp = LHS;
2876 } else {
2877 if (!isa<ConstantSDNode>(RHS.getNode()))
2878 std::swap(LHS, RHS);
2879
2880 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2881 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2882 MaskOp = LHS;
2883 }
2884
2885 return MaskOp;
2886 };
2887
2888 // Skip vector and operation
2889 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2890 ShiftVal = DAG.getSplatValue(ShiftVal);
2891 if (!ShiftVal)
2892 return unrollVectorShift(Op, DAG);
2893
2894 // Skip scalar and operation
2895 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2896 // Use anyext because none of the high bits can affect the shift
2897 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2898
2899 unsigned Opcode;
2900 switch (Op.getOpcode()) {
2901 case ISD::SHL:
2902 Opcode = WebAssemblyISD::VEC_SHL;
2903 break;
2904 case ISD::SRA:
2905 Opcode = WebAssemblyISD::VEC_SHR_S;
2906 break;
2907 case ISD::SRL:
2908 Opcode = WebAssemblyISD::VEC_SHR_U;
2909 break;
2910 default:
2911 llvm_unreachable("unexpected opcode");
2912 }
2913
2914 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2915}
2916
2917SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2918 SelectionDAG &DAG) const {
2919 EVT ResT = Op.getValueType();
2920 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2921
2922 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2923 (SatVT == MVT::i32 || SatVT == MVT::i64))
2924 return Op;
2925
2926 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2927 return Op;
2928
2929 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2930 return Op;
2931
2932 return SDValue();
2933}
2934
2936 return (Op->getFlags().hasNoNaNs() ||
2937 (DAG.isKnownNeverNaN(Op->getOperand(0)) &&
2938 DAG.isKnownNeverNaN(Op->getOperand(1)))) &&
2939 (Op->getFlags().hasNoSignedZeros() ||
2940 DAG.isKnownNeverLogicalZero(Op->getOperand(0)) ||
2941 DAG.isKnownNeverLogicalZero(Op->getOperand(1)));
2942}
2943
2944SDValue WebAssemblyTargetLowering::LowerFMIN(SDValue Op,
2945 SelectionDAG &DAG) const {
2946 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2947 return DAG.getNode(WebAssemblyISD::RELAXED_FMIN, SDLoc(Op),
2948 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2949 }
2950 return SDValue();
2951}
2952
2953SDValue WebAssemblyTargetLowering::LowerFMAX(SDValue Op,
2954 SelectionDAG &DAG) const {
2955 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2956 return DAG.getNode(WebAssemblyISD::RELAXED_FMAX, SDLoc(Op),
2957 Op.getValueType(), Op.getOperand(0), Op.getOperand(1));
2958 }
2959 return SDValue();
2960}
2961
2962//===----------------------------------------------------------------------===//
2963// Custom DAG combine hooks
2964//===----------------------------------------------------------------------===//
2965static SDValue
2967 auto &DAG = DCI.DAG;
2968 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2969
2970 // Hoist vector bitcasts that don't change the number of lanes out of unary
2971 // shuffles, where they are less likely to get in the way of other combines.
2972 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2973 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2974 SDValue Bitcast = N->getOperand(0);
2975 if (Bitcast.getOpcode() != ISD::BITCAST)
2976 return SDValue();
2977 if (!N->getOperand(1).isUndef())
2978 return SDValue();
2979 SDValue CastOp = Bitcast.getOperand(0);
2980 EVT SrcType = CastOp.getValueType();
2981 EVT DstType = Bitcast.getValueType();
2982 if (!SrcType.is128BitVector() ||
2983 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2984 return SDValue();
2985 SDValue NewShuffle = DAG.getVectorShuffle(
2986 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2987 return DAG.getBitcast(DstType, NewShuffle);
2988}
2989
2990/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2991/// split up into scalar instructions during legalization, and the vector
2992/// extending instructions are selected in performVectorExtendCombine below.
2993static SDValue
2996 auto &DAG = DCI.DAG;
2997 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2998 N->getOpcode() == ISD::SINT_TO_FP);
2999
3000 EVT InVT = N->getOperand(0)->getValueType(0);
3001 EVT ResVT = N->getValueType(0);
3002 MVT ExtVT;
3003 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
3004 ExtVT = MVT::v4i32;
3005 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
3006 ExtVT = MVT::v2i32;
3007 else
3008 return SDValue();
3009
3010 unsigned Op =
3012 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
3013 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
3014}
3015
3016static SDValue
3019 auto &DAG = DCI.DAG;
3020
3021 SDNodeFlags Flags = N->getFlags();
3022 SDValue Op0 = N->getOperand(0);
3023 EVT VT = N->getValueType(0);
3024
3025 // Optimize uitofp to sitofp when the sign bit is known to be zero.
3026 // Depending on the target (runtime) backend, this might be performance
3027 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
3028 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
3029 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
3030 }
3031
3032 return SDValue();
3033}
3034
3035static SDValue
3037 auto &DAG = DCI.DAG;
3038 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
3039 N->getOpcode() == ISD::ZERO_EXTEND);
3040
3041 EVT ResVT = N->getValueType(0);
3042 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3043 SDLoc DL(N);
3044
3045 if (ResVT == MVT::v16i32 && N->getOperand(0)->getValueType(0) == MVT::v16i8) {
3046 // Use a tree of extend low/high to split and extend the input in two
3047 // layers to avoid doing several shuffles and even more extends.
3048 unsigned LowOp =
3049 IsSext ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3050 unsigned HighOp =
3051 IsSext ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3052 SDValue Input = N->getOperand(0);
3053 SDValue LowHalf = DAG.getNode(LowOp, DL, MVT::v8i16, Input);
3054 SDValue HighHalf = DAG.getNode(HighOp, DL, MVT::v8i16, Input);
3055 SDValue Subvectors[] = {
3056 DAG.getNode(LowOp, DL, MVT::v4i32, LowHalf),
3057 DAG.getNode(HighOp, DL, MVT::v4i32, LowHalf),
3058 DAG.getNode(LowOp, DL, MVT::v4i32, HighHalf),
3059 DAG.getNode(HighOp, DL, MVT::v4i32, HighHalf),
3060 };
3061 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Subvectors);
3062 }
3063
3064 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
3065 // possible before the extract_subvector can be expanded.
3066 auto Extract = N->getOperand(0);
3067 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3068 return SDValue();
3069 auto Source = Extract.getOperand(0);
3070 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
3071 if (IndexNode == nullptr)
3072 return SDValue();
3073 auto Index = IndexNode->getZExtValue();
3074
3075 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
3076 // extracted subvector is the low or high half of its source.
3077 if (ResVT == MVT::v8i16) {
3078 if (Extract.getValueType() != MVT::v8i8 ||
3079 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
3080 return SDValue();
3081 } else if (ResVT == MVT::v4i32) {
3082 if (Extract.getValueType() != MVT::v4i16 ||
3083 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
3084 return SDValue();
3085 } else if (ResVT == MVT::v2i64) {
3086 if (Extract.getValueType() != MVT::v2i32 ||
3087 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
3088 return SDValue();
3089 } else {
3090 return SDValue();
3091 }
3092
3093 bool IsLow = Index == 0;
3094
3095 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3096 : WebAssemblyISD::EXTEND_HIGH_S)
3097 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3098 : WebAssemblyISD::EXTEND_HIGH_U);
3099
3100 return DAG.getNode(Op, DL, ResVT, Source);
3101}
3102
3103static SDValue
3105 auto &DAG = DCI.DAG;
3106
3107 auto GetWasmConversionOp = [](unsigned Op) {
3108 switch (Op) {
3110 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3112 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3113 case ISD::FP_ROUND:
3114 return WebAssemblyISD::DEMOTE_ZERO;
3115 }
3116 llvm_unreachable("unexpected op");
3117 };
3118
3119 auto IsZeroSplat = [](SDValue SplatVal) {
3120 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
3121 APInt SplatValue, SplatUndef;
3122 unsigned SplatBitSize;
3123 bool HasAnyUndefs;
3124 // Endianness doesn't matter in this context because we are looking for
3125 // an all-zero value.
3126 return Splat &&
3127 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3128 HasAnyUndefs) &&
3129 SplatValue == 0;
3130 };
3131
3132 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3133 // Combine this:
3134 //
3135 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3136 //
3137 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3138 //
3139 // Or this:
3140 //
3141 // (concat_vectors ({v2f32, v4f16} (fp_round ({v2f64, v4f32} $x))),
3142 // ({v2f32, v4f16} (splat 0)))
3143 //
3144 // into ({f32x4, f16x8}.demote_zero_{f64x2, f32x4} $x).
3145 EVT ResVT;
3146 EVT ExpectedConversionType;
3147 auto Conversion = N->getOperand(0);
3148 auto ConversionOp = Conversion.getOpcode();
3149 switch (ConversionOp) {
3152 ResVT = MVT::v4i32;
3153 ExpectedConversionType = MVT::v2i32;
3154 break;
3155 case ISD::FP_ROUND:
3156 if (Conversion.getValueType() == MVT::v2f32) {
3157 ResVT = MVT::v4f32;
3158 ExpectedConversionType = MVT::v2f32;
3159 } else if (Conversion.getValueType() == MVT::v4f16) {
3160 ResVT = MVT::v8f16;
3161 ExpectedConversionType = MVT::v4f16;
3162 } else {
3163 return SDValue();
3164 }
3165 break;
3166 default:
3167 return SDValue();
3168 }
3169
3170 if (N->getValueType(0) != ResVT)
3171 return SDValue();
3172
3173 if (Conversion.getValueType() != ExpectedConversionType)
3174 return SDValue();
3175
3176 auto Source = Conversion.getOperand(0);
3177 if (!((Source.getValueType() == MVT::v2f64 && ResVT == MVT::v4f32) ||
3178 (Source.getValueType() == MVT::v2f64 && ResVT == MVT::v4i32) ||
3179 (Source.getValueType() == MVT::v4f32 && ResVT == MVT::v8f16)))
3180 return SDValue();
3181
3182 if (!IsZeroSplat(N->getOperand(1)) ||
3183 N->getOperand(1).getValueType() != ExpectedConversionType)
3184 return SDValue();
3185
3186 unsigned Op = GetWasmConversionOp(ConversionOp);
3187 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3188 }
3189
3190 // Combine this:
3191 //
3192 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3193 //
3194 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3195 //
3196 // Or this:
3197 //
3198 // ({v4f32, v8f16} (fp_round (concat_vectors $x,
3199 // ({v2f64, v4f32} (splat 0)))))
3200 //
3201 // into ({f32x4, f16x8}.demote_zero_{f64x2, f32x4} $x).
3202 EVT ResVT;
3203 auto ConversionOp = N->getOpcode();
3204 switch (ConversionOp) {
3207 ResVT = MVT::v4i32;
3208 break;
3209 case ISD::FP_ROUND:
3210 ResVT = N->getValueType(0);
3211 break;
3212 default:
3213 llvm_unreachable("unexpected op");
3214 }
3215
3216 if (N->getValueType(0) != ResVT)
3217 return SDValue();
3218
3219 auto Concat = N->getOperand(0);
3220 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3221 return SDValue();
3222 EVT ConcatVT = Concat.getValueType();
3223 EVT SourceVT = Concat.getOperand(0).getValueType();
3224
3225 if (!IsZeroSplat(Concat.getOperand(1)))
3226 return SDValue();
3227
3228 if (ConversionOp == ISD::FP_ROUND) {
3229 bool IsF64ToF32 =
3230 ConcatVT == MVT::v4f64 && SourceVT == MVT::v2f64 && ResVT == MVT::v4f32;
3231 bool IsF32ToF16 =
3232 ConcatVT == MVT::v8f32 && SourceVT == MVT::v4f32 && ResVT == MVT::v8f16;
3233 if (!(IsF64ToF32 || IsF32ToF16))
3234 return SDValue();
3235 } else {
3236 if (ConcatVT != MVT::v4f64 || SourceVT != MVT::v2f64 || ResVT != MVT::v4i32)
3237 return SDValue();
3238 }
3239
3240 unsigned Op = GetWasmConversionOp(ConversionOp);
3241 return DAG.getNode(Op, SDLoc(N), ResVT, Concat.getOperand(0));
3242}
3243
3244// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3245static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3246 const SDLoc &DL, unsigned VectorWidth) {
3247 EVT VT = Vec.getValueType();
3248 EVT ElVT = VT.getVectorElementType();
3249 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3250 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3251 VT.getVectorNumElements() / Factor);
3252
3253 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3254 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3255 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3256
3257 // This is the index of the first element of the VectorWidth-bit chunk
3258 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3259 IdxVal &= ~(ElemsPerChunk - 1);
3260
3261 // If the input is a buildvector just emit a smaller one.
3262 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3263 return DAG.getBuildVector(ResultVT, DL,
3264 Vec->ops().slice(IdxVal, ElemsPerChunk));
3265
3266 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3267 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3268}
3269
3270// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3271// is the expected destination value type after recursion. In is the initial
3272// input. Note that the input should have enough leading zero bits to prevent
3273// NARROW_U from saturating results.
3275 SelectionDAG &DAG) {
3276 EVT SrcVT = In.getValueType();
3277
3278 // No truncation required, we might get here due to recursive calls.
3279 if (SrcVT == DstVT)
3280 return In;
3281
3282 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3283 unsigned NumElems = SrcVT.getVectorNumElements();
3284 if (!isPowerOf2_32(NumElems))
3285 return SDValue();
3286 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3287 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3288
3289 LLVMContext &Ctx = *DAG.getContext();
3290 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3291
3292 // Narrow to the largest type possible:
3293 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3294 EVT InVT = MVT::i16, OutVT = MVT::i8;
3295 if (SrcVT.getScalarSizeInBits() > 16) {
3296 InVT = MVT::i32;
3297 OutVT = MVT::i16;
3298 }
3299 unsigned SubSizeInBits = SrcSizeInBits / 2;
3300 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3301 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3302
3303 // Split lower/upper subvectors.
3304 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3305 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3306
3307 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3308 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3309 Lo = DAG.getBitcast(InVT, Lo);
3310 Hi = DAG.getBitcast(InVT, Hi);
3311 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3312 return DAG.getBitcast(DstVT, Res);
3313 }
3314
3315 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3316 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3317 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3318 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3319
3320 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3321 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3322 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3323}
3324
3327 auto &DAG = DCI.DAG;
3328
3329 SDValue In = N->getOperand(0);
3330 EVT InVT = In.getValueType();
3331 if (!InVT.isSimple())
3332 return SDValue();
3333
3334 EVT OutVT = N->getValueType(0);
3335 if (!OutVT.isVector())
3336 return SDValue();
3337
3338 EVT OutSVT = OutVT.getVectorElementType();
3339 EVT InSVT = InVT.getVectorElementType();
3340 // Currently only cover truncate to v16i8 or v8i16.
3341 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3342 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3343 return SDValue();
3344
3345 SDLoc DL(N);
3347 OutVT.getScalarSizeInBits());
3348 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3349 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3350}
3351
3354 using namespace llvm::SDPatternMatch;
3355 auto &DAG = DCI.DAG;
3356 SDLoc DL(N);
3357 SDValue Src = N->getOperand(0);
3358 EVT VT = N->getValueType(0);
3359 EVT SrcVT = Src.getValueType();
3360
3361 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3362 SrcVT.isFixedLengthVectorOf(MVT::i1)))
3363 return SDValue();
3364
3365 unsigned NumElts = SrcVT.getVectorNumElements();
3366 EVT Width = MVT::getIntegerVT(128 / NumElts);
3367
3368 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3369 // ==> bitmask
3370 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3371 return DAG.getZExtOrTrunc(
3372 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3373 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3374 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3375 SrcVT.changeVectorElementType(
3376 *DAG.getContext(), Width))}),
3377 DL, VT);
3378 }
3379
3380 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3381 if (NumElts == 32 || NumElts == 64) {
3382 SDValue Concat, SetCCVector;
3383 ISD::CondCode SetCond;
3384
3385 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3386 m_CondCode(SetCond)))))
3387 return SDValue();
3388 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3389 return SDValue();
3390
3391 // Reconstruct the wide bitmask from each CONCAT_VECTORS operand.
3392 // Derive the per-chunk mask/integer types from the actual operand type
3393 // instead of hardcoding v16i1 / i16 for every chunk.
3394 EVT ConcatOperandVT = Concat.getOperand(0).getValueType();
3395 unsigned ConcatOperandNumElts = ConcatOperandVT.getVectorNumElements();
3396
3397 EVT ConcatOperandMaskVT =
3398 EVT::getVectorVT(*DAG.getContext(), MVT::i1,
3399 ElementCount::getFixed(ConcatOperandNumElts));
3400 EVT ConcatOperandBitmaskVT =
3401 EVT::getIntegerVT(*DAG.getContext(), ConcatOperandNumElts);
3402 EVT ReturnVT = N->getValueType(0);
3403 SDValue ReconstructedBitmask = DAG.getConstant(0, DL, ReturnVT);
3404 // Example:
3405 // v32i16 = concat(v8i16, v8i16, v8i16, v8i16)
3406 // -> v8i1 + v8i1 + v8i1 + v8i1
3407 // -> i8 + i8 + i8 + i8
3408 // -> reconstructed i32 bitmask
3409 for (size_t I = 0; I < Concat->ops().size(); ++I) {
3410 SDValue ConcatOperand = Concat.getOperand(I);
3411 assert(ConcatOperand.getValueType() == ConcatOperandVT &&
3412 "concat_vectors operands must have the same type");
3413
3414 SDValue SetCCVectorOperand =
3415 extractSubVector(SetCCVector, I * ConcatOperandNumElts, DAG, DL, 128);
3416 if (!SetCCVectorOperand ||
3417 SetCCVectorOperand.getValueType() != ConcatOperandVT)
3418 return SDValue();
3419
3420 // Build the per-chunk mask using the correct chunk type:
3421 // v16i8 -> v16i1 -> i16
3422 // v8i16 -> v8i1 -> i8
3423 // v4i32 -> v4i1 -> i4
3424 // v2i64 -> v2i1 -> i2
3425 SDValue ConcatOperandMask = DAG.getSetCC(
3426 DL, ConcatOperandMaskVT, ConcatOperand, SetCCVectorOperand, SetCond);
3427 SDValue ConcatOperandBitmask =
3428 DAG.getBitcast(ConcatOperandBitmaskVT, ConcatOperandMask);
3429 SDValue ExtendedConcatOperandBitmask =
3430 DAG.getZExtOrTrunc(ConcatOperandBitmask, DL, ReturnVT);
3431
3432 // Shift the previously reconstructed bits to make room for this chunk.
3433 if (I != 0) {
3434 ReconstructedBitmask = DAG.getNode(
3435 ISD::SHL, DL, ReturnVT, ReconstructedBitmask,
3436 DAG.getShiftAmountConstant(ConcatOperandNumElts, ReturnVT, DL));
3437 }
3438
3439 // Merge disjoint partial bitmasks with OR.
3440 ReconstructedBitmask =
3441 DAG.getNode(ISD::OR, DL, ReturnVT, ReconstructedBitmask,
3442 ExtendedConcatOperandBitmask);
3443 }
3444
3445 return ReconstructedBitmask;
3446 }
3447
3448 return SDValue();
3449}
3450
3452 // bitmask (setcc <X>, 0, setlt) => bitmask X
3453 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3454 using namespace llvm::SDPatternMatch;
3455
3456 if (N->getConstantOperandVal(0) != Intrinsic::wasm_bitmask)
3457 return SDValue();
3458
3459 SDValue LHS;
3460 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3462 return SDValue();
3463
3464 SDLoc DL(N);
3465 return DAG.getNode(
3466 ISD::INTRINSIC_WO_CHAIN, DL, N->getValueType(0),
3467 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32), LHS});
3468}
3469
3471 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3472 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3473 // any_true (setcc <X>, 0, ne) => (any_true X)
3474 // all_true (setcc <X>, 0, ne) => (all_true X)
3475 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3476 using namespace llvm::SDPatternMatch;
3477
3478 SDValue LHS;
3479 if (N->getNumOperands() < 2 ||
3480 !sd_match(N->getOperand(1),
3482 return SDValue();
3483 EVT LT = LHS.getValueType();
3484 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3485 return SDValue();
3486
3487 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3488 ISD::CondCode SetType,
3489 Intrinsic::WASMIntrinsics InPost) {
3490 if (N->getConstantOperandVal(0) != InPre)
3491 return SDValue();
3492
3493 SDValue LHS;
3494 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3495 m_SpecificCondCode(SetType))))
3496 return SDValue();
3497
3498 SDLoc DL(N);
3499 SDValue Ret = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3500 {DAG.getConstant(InPost, DL, MVT::i32), LHS});
3501 if (SetType == ISD::SETEQ)
3502 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3503 DAG.getConstant(1, DL, MVT::i32));
3504 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3505 };
3506
3507 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3508 Intrinsic::wasm_alltrue))
3509 return AnyTrueEQ;
3510 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3511 Intrinsic::wasm_anytrue))
3512 return AllTrueEQ;
3513 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3514 Intrinsic::wasm_anytrue))
3515 return AnyTrueNE;
3516 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3517 Intrinsic::wasm_alltrue))
3518 return AllTrueNE;
3519
3520 return SDValue();
3521}
3522
3528
3530 unsigned NumElts,
3531 const MaskReduceInfo &Info,
3532 SelectionDAG &DAG) {
3533 EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
3534 MVT::getIntegerVT(128 / NumElts));
3535 assert(VecVT.getSizeInBits() == 128 &&
3536 "mask reduction should be widened to a 128-bit vector");
3537
3538 SDLoc DL(N);
3539 SDValue Mask = N->getOperand(0)->getOperand(0);
3540 SDValue Ret = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3541 {DAG.getConstant(Info.IID, DL, MVT::i32),
3542 DAG.getSExtOrTrunc(Mask, DL, VecVT)});
3543 if (Info.Invert)
3544 Ret = DAG.getNode(ISD::XOR, DL, MVT::i32, Ret,
3545 DAG.getConstant(1, DL, MVT::i32));
3546 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3547}
3548
3550 unsigned NumElts,
3551 const MaskReduceInfo &Info,
3552 SelectionDAG &DAG) {
3553 assert((NumElts == 32 || NumElts == 64) &&
3554 "combineWideMaskReduction is only for wide masks");
3555 assert(MaskVT.isFixedLengthVector() &&
3556 MaskVT.getVectorElementType() == MVT::i1);
3557 SDLoc DL(N);
3558 unsigned ChunkElts = 16;
3559 EVT ChunkMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
3560 ElementCount::getFixed(ChunkElts));
3561 EVT LegalVecVT = ChunkMaskVT.changeVectorElementType(
3562 *DAG.getContext(), MVT::getIntegerVT(128 / ChunkElts));
3563
3564 SmallVector<SDValue, 4> ChunkResults;
3565 // Split the wide mask into v16i1 chunks and reduce each chunk separately.
3566 // For example:
3567 // v32i1: [0..15] [16..31]
3568 // | |
3569 // v v
3570 // chunk0 chunk1
3571 //
3572 // v64i1: [0..15] [16..31] [32..47] [48..63]
3573 // | | | |
3574 // v v v v
3575 // chunk0 chunk1 chunk2 chunk3
3576 //
3577 // each chunk:
3578 // v16i1 -> v16i8 -> wasm_anytrue/alltrue -> i32 0/1
3579 for (unsigned I = 0; I < NumElts; I += ChunkElts) {
3580 SDValue ChunkMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ChunkMaskVT,
3581 Mask, DAG.getVectorIdxConstant(I, DL));
3582 SDValue LegalMask = DAG.getSExtOrTrunc(ChunkMask, DL, LegalVecVT);
3583 SDValue Reduced =
3584 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3585 DAG.getConstant(Info.IID, DL, MVT::i32), LegalMask);
3586 ChunkResults.push_back(Reduced);
3587 }
3588
3589 SDValue Acc = ChunkResults[0];
3590 for (unsigned I = 1; I < ChunkResults.size(); ++I)
3591 Acc =
3592 DAG.getNode(Info.WideCombineOpcode, DL, MVT::i32, Acc, ChunkResults[I]);
3593
3594 if (Info.Invert)
3595 Acc = DAG.getNode(ISD::XOR, DL, MVT::i32, Acc,
3596 DAG.getConstant(1, DL, MVT::i32));
3597
3598 return DAG.getZExtOrTrunc(Acc, DL, N->getValueType(0));
3599}
3600
3601static std::optional<MaskReduceInfo> classifyMaskReduction(SDNode *N) {
3602 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3603 if (!C)
3604 return std::nullopt;
3605
3606 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3607
3608 // setcc (bitcast mask), 0, ne -> any_true(mask)
3609 if (C->isZero() && CC == ISD::SETNE)
3610 return MaskReduceInfo{Intrinsic::wasm_anytrue, ISD::OR, false};
3611
3612 // setcc (bitcast mask), 0, eq -> !any_true(mask)
3613 if (C->isZero() && CC == ISD::SETEQ)
3614 return MaskReduceInfo{Intrinsic::wasm_anytrue, ISD::OR, true};
3615
3616 // setcc (bitcast mask), -1, eq -> all_true(mask)
3617 if (C->isAllOnes() && CC == ISD::SETEQ)
3618 return MaskReduceInfo{Intrinsic::wasm_alltrue, ISD::AND, false};
3619
3620 // setcc (bitcast mask), -1, ne -> !all_true(mask)
3621 if (C->isAllOnes() && CC == ISD::SETNE)
3622 return MaskReduceInfo{Intrinsic::wasm_alltrue, ISD::AND, true};
3623
3624 return std::nullopt;
3625}
3626
3627/// Try to convert a i128 comparison to a v16i8 comparison before type
3628/// legalization splits it up into chunks
3629static SDValue
3631 const WebAssemblySubtarget *Subtarget) {
3632
3633 SDLoc DL(N);
3634 SDValue X = N->getOperand(0);
3635 SDValue Y = N->getOperand(1);
3636 EVT VT = N->getValueType(0);
3637 EVT OpVT = X.getValueType();
3638
3639 SelectionDAG &DAG = DCI.DAG;
3641 Attribute::NoImplicitFloat))
3642 return SDValue();
3643
3644 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3645 // We're looking for an oversized integer equality comparison with SIMD
3646 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3647 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3648 return SDValue();
3649
3650 // Don't perform this combine if constructing the vector will be expensive.
3651 auto IsVectorBitCastCheap = [](SDValue X) {
3653 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3654 };
3655
3656 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3657 return SDValue();
3658
3659 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3660 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3661 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3662
3663 SDValue Intr =
3664 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3665 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3666 : Intrinsic::wasm_anytrue,
3667 DL, MVT::i32),
3668 Cmp});
3669
3670 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3671 ISD::SETNE);
3672}
3673
3676 const WebAssemblySubtarget *Subtarget) {
3677 if (!DCI.isBeforeLegalize())
3678 return SDValue();
3679
3680 EVT VT = N->getValueType(0);
3681 if (!VT.isScalarInteger())
3682 return SDValue();
3683
3684 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3685 return V;
3686
3687 SDValue LHS = N->getOperand(0);
3688 if (LHS->getOpcode() != ISD::BITCAST)
3689 return SDValue();
3690
3691 EVT FromVT = LHS->getOperand(0).getValueType();
3692 if (!FromVT.isFixedLengthVectorOf(MVT::i1))
3693 return SDValue();
3694
3695 unsigned NumElts = FromVT.getVectorNumElements();
3696 auto Info = classifyMaskReduction(N);
3697 if (!Info)
3698 return SDValue();
3699
3700 auto &DAG = DCI.DAG;
3701 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16)
3702 return combineSmallMaskReduction(N, FromVT, NumElts, *Info, DAG);
3703
3704 if (NumElts == 32 || NumElts == 64)
3705 return combineWideMaskReduction(N, LHS.getOperand(0), FromVT, NumElts,
3706 *Info, DAG);
3707
3708 return SDValue();
3709}
3710
3712 EVT VT = N->getValueType(0);
3713 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3714 return SDValue();
3715
3716 // Mul with extending inputs.
3717 SDValue LHS = N->getOperand(0);
3718 SDValue RHS = N->getOperand(1);
3719 if (LHS.getOpcode() != RHS.getOpcode())
3720 return SDValue();
3721
3722 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3723 LHS.getOpcode() != ISD::ZERO_EXTEND)
3724 return SDValue();
3725
3726 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3727 return SDValue();
3728
3729 EVT FromVT = LHS->getOperand(0).getValueType();
3730 EVT EltTy = FromVT.getVectorElementType();
3731 if (EltTy != MVT::i8)
3732 return SDValue();
3733
3734 // For an input DAG that looks like this
3735 // %a = input_type
3736 // %b = input_type
3737 // %lhs = extend %a to output_type
3738 // %rhs = extend %b to output_type
3739 // %mul = mul %lhs, %rhs
3740
3741 // input_type | output_type | instructions
3742 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3743 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3744 // | | %low_low = i32x4.ext_low_i16x8_ %low
3745 // | | %low_high = i32x4.ext_high_i16x8_ %low
3746 // | | %high_low = i32x4.ext_low_i16x8_ %high
3747 // | | %high_high = i32x4.ext_high_i16x8_ %high
3748 // | | %res = concat_vector(...)
3749 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3750 // | | %low_low = i32x4.ext_low_i16x8_ %low
3751 // | | %low_high = i32x4.ext_high_i16x8_ %low
3752 // | | %res = concat_vector(%low_low, %low_high)
3753
3754 SDLoc DL(N);
3755 unsigned NumElts = VT.getVectorNumElements();
3756 SDValue ExtendInLHS = LHS->getOperand(0);
3757 SDValue ExtendInRHS = RHS->getOperand(0);
3758 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3759 unsigned ExtendLowOpc =
3760 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3761 unsigned ExtendHighOpc =
3762 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3763
3764 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3765 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3766 };
3767 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3768 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3769 };
3770
3771 if (NumElts == 16) {
3772 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3773 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3774 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3775 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3776 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3777 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3778 SDValue SubVectors[] = {
3779 GetExtendLow(MVT::v4i32, MulLow),
3780 GetExtendHigh(MVT::v4i32, MulLow),
3781 GetExtendLow(MVT::v4i32, MulHigh),
3782 GetExtendHigh(MVT::v4i32, MulHigh),
3783 };
3784 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3785 } else {
3786 assert(NumElts == 8);
3787 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3788 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3789 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3790 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3791 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3792 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3793 }
3794 return SDValue();
3795}
3796
3799 assert(N->getOpcode() == ISD::MUL);
3800 EVT VT = N->getValueType(0);
3801 if (!VT.isVector())
3802 return SDValue();
3803
3804 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3805 return Res;
3806
3807 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3808 // extend them to v8i16.
3809 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3810 return SDValue();
3811
3812 SDLoc DL(N);
3813 SelectionDAG &DAG = DCI.DAG;
3814 SDValue LHS = N->getOperand(0);
3815 SDValue RHS = N->getOperand(1);
3816 EVT MulVT = MVT::v8i16;
3817
3818 if (VT == MVT::v8i8) {
3819 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3820 DAG.getUNDEF(MVT::v8i8));
3821 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3822 DAG.getUNDEF(MVT::v8i8));
3823 SDValue LowLHS =
3824 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3825 SDValue LowRHS =
3826 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3827 SDValue MulLow = DAG.getBitcast(
3828 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3829 // Take the low byte of each lane.
3830 SDValue Shuffle = DAG.getVectorShuffle(
3831 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3832 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3833 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3834 } else {
3835 assert(VT == MVT::v16i8 && "Expected v16i8");
3836 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3837 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3838 SDValue HighLHS =
3839 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3840 SDValue HighRHS =
3841 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3842
3843 SDValue MulLow =
3844 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3845 SDValue MulHigh =
3846 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3847
3848 // Take the low byte of each lane.
3849 return DAG.getVectorShuffle(
3850 VT, DL, MulLow, MulHigh,
3851 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3852 }
3853}
3854
3855SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3856 SelectionDAG &DAG) {
3857 SDLoc DL(In);
3858 LLVMContext &Ctx = *DAG.getContext();
3859 EVT InVT = In.getValueType();
3860 unsigned NumElems = InVT.getVectorNumElements() * 2;
3861 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3862 SDValue Concat =
3863 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3864 if (NumElems < RequiredNumElems) {
3865 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3866 }
3867 return Concat;
3868}
3869
3871 EVT OutVT = N->getValueType(0);
3872 if (!OutVT.isVector())
3873 return SDValue();
3874
3875 EVT OutElTy = OutVT.getVectorElementType();
3876 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3877 return SDValue();
3878
3879 unsigned NumElems = OutVT.getVectorNumElements();
3880 if (!isPowerOf2_32(NumElems))
3881 return SDValue();
3882
3883 EVT FPVT = N->getOperand(0)->getValueType(0);
3884 if (FPVT.getVectorElementType() != MVT::f32)
3885 return SDValue();
3886
3887 SDLoc DL(N);
3888
3889 // First, convert to i32.
3890 LLVMContext &Ctx = *DAG.getContext();
3891 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3892 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3894 OutVT.getScalarSizeInBits());
3895 // Mask out the top MSBs.
3896 SDValue Masked =
3897 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3898
3899 if (OutVT.getSizeInBits() < 128) {
3900 // Create a wide enough vector that we can use narrow.
3901 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3902 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3903 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3904 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3905 return DAG.getBitcast(
3906 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3907 } else {
3908 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3909 }
3910 return SDValue();
3911}
3912
3913// Wide vector shift operations such as v8i32 with sign-extended
3914// operands cause Type Legalizer crashes because the target-specific
3915// extension nodes cannot be directly mapped to the 256-bit size.
3916//
3917// To resolve the crash and optimize performance, we intercept the
3918// illegal v8i32 shift in DAGCombine. We convert the shift amounts
3919// into multipliers and manually split the vector into two v4i32 halves.
3920//
3921// Before: t1: v8i32 = shl (sign_extend v8i16), const_vec
3922// After : t2: v4i32 = mul (ext_low_s v8i16), (ext_low_s narrow_vec)
3923// t3: v4i32 = mul (ext_high_s v8i16), (ext_high_s narrow_vec)
3924// t4: v8i32 = concat_vectors t2, t3
3927 SelectionDAG &DAG = DCI.DAG;
3928 assert(N->getOpcode() == ISD::SHL);
3929 EVT VT = N->getValueType(0);
3930 if (VT != MVT::v8i32)
3931 return SDValue();
3932
3933 SDValue LHS = N->getOperand(0);
3934 SDValue RHS = N->getOperand(1);
3935 unsigned ExtOpc = LHS.getOpcode();
3936 if (ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND)
3937 return SDValue();
3938
3939 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
3940 return SDValue();
3941
3942 SDLoc DL(N);
3943 SDValue ExtendIn = LHS.getOperand(0);
3944 EVT FromVT = ExtendIn.getValueType();
3945 if (FromVT != MVT::v8i16)
3946 return SDValue();
3947
3948 unsigned NumElts = VT.getVectorNumElements();
3949 unsigned BitWidth = FromVT.getScalarSizeInBits();
3950 bool IsSigned = (ExtOpc == ISD::SIGN_EXTEND);
3951 unsigned MaxValidShift = IsSigned ? (BitWidth - 1) : BitWidth;
3952 SmallVector<SDValue, 16> MulConsts;
3953 for (unsigned I = 0; I < NumElts; ++I) {
3954 auto *C = dyn_cast<ConstantSDNode>(RHS.getOperand(I));
3955 if (!C)
3956 return SDValue();
3957
3958 const APInt &ShiftAmt = C->getAPIntValue();
3959 if (ShiftAmt.uge(MaxValidShift))
3960 return SDValue();
3961
3962 APInt MulAmt = APInt::getOneBitSet(BitWidth, ShiftAmt.getZExtValue());
3963 MulConsts.push_back(DAG.getConstant(MulAmt, DL, FromVT.getScalarType(),
3964 /*isTarget=*/false, /*isOpaque=*/true));
3965 }
3966
3967 SDValue NarrowConst = DAG.getBuildVector(FromVT, DL, MulConsts);
3968 unsigned ExtLowOpc =
3969 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3970 unsigned ExtHighOpc =
3971 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3972
3973 EVT HalfVT = MVT::v4i32;
3974 SDValue LHSLo = DAG.getNode(ExtLowOpc, DL, HalfVT, ExtendIn);
3975 SDValue LHSHi = DAG.getNode(ExtHighOpc, DL, HalfVT, ExtendIn);
3976 SDValue RHSLo = DAG.getNode(ExtLowOpc, DL, HalfVT, NarrowConst);
3977 SDValue RHSHi = DAG.getNode(ExtHighOpc, DL, HalfVT, NarrowConst);
3978 SDValue MulLo = DAG.getNode(ISD::MUL, DL, HalfVT, LHSLo, RHSLo);
3979 SDValue MulHi = DAG.getNode(ISD::MUL, DL, HalfVT, LHSHi, RHSHi);
3980 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, MulLo, MulHi);
3981}
3982
3983SDValue
3984WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3985 DAGCombinerInfo &DCI) const {
3986 switch (N->getOpcode()) {
3987 default:
3988 return SDValue();
3989 case ISD::BITCAST:
3990 return performBitcastCombine(N, DCI);
3991 case ISD::SETCC:
3992 return performSETCCCombine(N, DCI, Subtarget);
3994 return performVECTOR_SHUFFLECombine(N, DCI);
3995 case ISD::SIGN_EXTEND:
3996 case ISD::ZERO_EXTEND:
3997 return performVectorExtendCombine(N, DCI);
3998 case ISD::UINT_TO_FP:
3999 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
4000 return ExtCombine;
4001 return performVectorNonNegToFPCombine(N, DCI);
4002 case ISD::SINT_TO_FP:
4003 return performVectorExtendToFPCombine(N, DCI);
4006 case ISD::FP_ROUND:
4008 return performVectorTruncZeroCombine(N, DCI);
4009 case ISD::FP_TO_SINT:
4010 case ISD::FP_TO_UINT:
4011 return performConvertFPCombine(N, DCI.DAG);
4012 case ISD::TRUNCATE:
4013 return performTruncateCombine(N, DCI);
4015 if (SDValue V = performBitmaskCombine(N, DCI.DAG))
4016 return V;
4017 return performAnyAllCombine(N, DCI.DAG);
4018 }
4019 case ISD::MUL:
4020 return performMulCombine(N, DCI);
4021 case ISD::SHL:
4022 return performShiftCombine(N, DCI);
4023 }
4024}
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
#define X(NUM, ENUM, NAME)
Definition ELF.h:854
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static bool callingConvSupported(CallingConv::ID CallConv)
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static std::optional< MaskReduceInfo > classifyMaskReduction(SDNode *N)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performBitmaskCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static SDValue combineSmallMaskReduction(SDNode *N, EVT FromVT, unsigned NumElts, const MaskReduceInfo &Info, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static bool HasNoSignedZerosOrNaNs(SDValue Op, SelectionDAG &DAG)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue performShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
static SDValue combineWideMaskReduction(SDNode *N, SDValue Mask, EVT MaskVT, unsigned NumElts, const MaskReduceInfo &Info, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
Class for arbitrary precision integers.
Definition APInt.h:78
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:124
Diagnostic information for unsupported feature in backend.
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:353
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:723
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
unsigned getTargetFlags() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI bool isKnownNeverLogicalZero(SDValue Op, const APInt &DemandedElts, unsigned Depth=0) const
Test whether the given floating point SDValue (or all elements of it, if it is a vector) is known to ...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align DstAlign, Align SrcAlign, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI bool isKnownNeverNaN(SDValue Op, const APInt &DemandedElts, bool SNaN=false, unsigned Depth=0) const
Test whether the given SDValue (or all elements of it, if it is a vector) is known to never be NaN in...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:273
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:287
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:286
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:717
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:827
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:787
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:888
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:918
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:528
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:852
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ BR_CC
BR_CC - Conditional branch.
@ BRIND
BRIND - Indirect branch.
@ BR_JT
BR_JT - Jumptable branch.
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:858
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:819
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:907
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:896
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:986
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:934
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:179
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:967
@ CLEAR_CACHE
llvm.clear_cache intrinsic Operands: Input Chain, Start Addres, End Address Outputs: Output Chain
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:929
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:953
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:864
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:841
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
auto m_Value()
Match an arbitrary value and ignore it.
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
bool isValidAddressSpace(unsigned AS)
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo, const LibcallLoweringInfo *libcallLowering)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
MachineSDNode * getTLSBase(SelectionDAG &DAG, const SDLoc &DL, const WebAssemblySubtarget *Subtarget, const SDValue Chain=SDValue())
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
SDValue peekThroughFreeze(SDValue V)
Return the non-frozen source operand of V if it exists.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2088
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:266
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:230
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:475
bool isFixedLengthVector() const
Definition ValueTypes.h:199
bool isFixedLengthVectorOf(EVT EltVT) const
Return true if this is a fixed length vector with matching element type.
Definition ValueTypes.h:205
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:315
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:235
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:484
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.