LLVM 22.0.0git
WebAssemblyISelLowering.cpp
Go to the documentation of this file.
1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
32#include "llvm/IR/Function.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
52
53 // Booleans always contain 0 or 1.
55 // Except in SIMD vectors
57 // We don't know the microarchitecture here, so just reduce register pressure.
59 // Tell ISel that we have a stack pointer.
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(MVT::i32, &WebAssembly::I32RegClass);
64 addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
65 addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
66 addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
69 addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
70 addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
71 addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
72 addRegisterClass(MVT::v2i64, &WebAssembly::V128RegClass);
73 addRegisterClass(MVT::v2f64, &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(MVT::v8f16, &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(MVT::externref, &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(MVT::funcref, &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(MVT::exnref, &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
91 setOperationAction(ISD::LOAD, T, Custom);
92 setOperationAction(ISD::STORE, T, Custom);
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
97 setOperationAction(ISD::LOAD, T, Custom);
98 setOperationAction(ISD::STORE, T, Custom);
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(ISD::LOAD, MVT::v8f16, Custom);
103 setOperationAction(ISD::STORE, MVT::v8f16, Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
109 setOperationAction(ISD::LOAD, T, Custom);
110 setOperationAction(ISD::STORE, T, Custom);
111 }
112 }
113
119 setOperationAction(ISD::BRIND, MVT::Other, Custom);
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
124 setOperationAction(ISD::VASTART, MVT::Other, Custom);
125 setOperationAction(ISD::VAARG, MVT::Other, Expand);
126 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
127 setOperationAction(ISD::VAEND, MVT::Other, Expand);
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
139 // Expand floating-point library function operators.
140 for (auto Op :
141 {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
143 // Note supported floating-point library function operators that otherwise
144 // default to expand.
145 for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
146 ISD::FRINT, ISD::FROUNDEVEN})
148 // Support minimum and maximum, which otherwise default to expand.
149 setOperationAction(ISD::FMINIMUM, T, Legal);
150 setOperationAction(ISD::FMAXIMUM, T, Legal);
151 // When experimental v8f16 support is enabled these instructions don't need
152 // to be expanded.
153 if (T != MVT::v8f16) {
154 setOperationAction(ISD::FP16_TO_FP, T, Expand);
155 setOperationAction(ISD::FP_TO_FP16, T, Expand);
156 }
158 setTruncStoreAction(T, MVT::f16, Expand);
159 }
160
161 // Expand unavailable integer operations.
162 for (auto Op :
166 for (auto T : {MVT::i32, MVT::i64})
168 if (Subtarget->hasSIMD128())
169 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
171 }
172
173 if (Subtarget->hasWideArithmetic()) {
179 }
180
181 if (Subtarget->hasNontrappingFPToInt())
183 for (auto T : {MVT::i32, MVT::i64})
185
186 if (Subtarget->hasRelaxedSIMD()) {
188 {ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM},
189 {MVT::v4f32, MVT::v2f64}, Legal);
190 }
191 // SIMD-specific configuration
192 if (Subtarget->hasSIMD128()) {
193
195
196 // Combine wide-vector muls, with extend inputs, to extmul_half.
198
199 // Combine vector mask reductions into alltrue/anytrue
201
202 // Convert vector to integer bitcasts to bitmask
203 setTargetDAGCombine(ISD::BITCAST);
204
205 // Hoist bitcasts out of shuffles
207
208 // Combine extends of extract_subvectors into widening ops
210
211 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
212 // conversions ops
215
216 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
217 // into conversion ops
221
223
224 // Support saturating add/sub for i8x16 and i16x8
226 for (auto T : {MVT::v16i8, MVT::v8i16})
228
229 // Support integer abs
230 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
232
233 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
234 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
235 MVT::v2f64})
237
238 if (Subtarget->hasFP16())
240
241 // We have custom shuffle lowering to expose the shuffle mask
242 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
243 MVT::v2f64})
245
246 if (Subtarget->hasFP16())
248
249 // Support splatting
250 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
251 MVT::v2f64})
253
254 setOperationAction(ISD::AVGCEILU, {MVT::v8i16, MVT::v16i8}, Legal);
255
256 // Custom lowering since wasm shifts must have a scalar shift amount
257 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
258 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
260
261 // Custom lower lane accesses to expand out variable indices
263 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
264 MVT::v2f64})
266
267 // There is no i8x16.mul instruction
268 setOperationAction(ISD::MUL, MVT::v16i8, Expand);
269
270 // There is no vector conditional select instruction
271 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
272 MVT::v2f64})
274
275 // Expand integer operations supported for scalars but not SIMD
276 for (auto Op :
278 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
280
281 // But we do have integer min and max operations
282 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
283 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
285
286 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
287 setOperationAction(ISD::CTPOP, MVT::v16i8, Legal);
288 setOperationAction(ISD::CTLZ, MVT::v16i8, Expand);
289 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
290
291 // Custom lower bit counting operations for other types to scalarize them.
292 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
293 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
295
296 // Expand float operations supported for scalars but not SIMD
297 for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
298 ISD::FEXP, ISD::FEXP2, ISD::FEXP10})
299 for (auto T : {MVT::v4f32, MVT::v2f64})
301
302 // Unsigned comparison operations are unavailable for i64x2 vectors.
304 setCondCodeAction(CC, MVT::v2i64, Custom);
305
306 // 64x2 conversions are not in the spec
307 for (auto Op :
309 for (auto T : {MVT::v2i64, MVT::v2f64})
311
312 // But saturating fp_to_int converstions are
314 setOperationAction(Op, MVT::v4i32, Custom);
315 if (Subtarget->hasFP16()) {
316 setOperationAction(Op, MVT::v8i16, Custom);
317 }
318 }
319
320 // Support vector extending
325 }
326
327 if (Subtarget->hasFP16()) {
328 setOperationAction(ISD::FMA, MVT::v8f16, Legal);
329 }
330
331 if (Subtarget->hasRelaxedSIMD()) {
334 }
335
336 // Partial MLA reductions.
337 for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
338 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v16i8, Legal);
339 setPartialReduceMLAAction(Op, MVT::v4i32, MVT::v8i16, Legal);
340 }
341 }
342
343 // As a special case, these operators use the type to mean the type to
344 // sign-extend from.
346 if (!Subtarget->hasSignExt()) {
347 // Sign extends are legal only when extending a vector extract
348 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
349 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
351 }
354
355 // Dynamic stack allocation: use the default expansion.
356 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
357 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
358 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVTPtr, Expand);
359
363
364 // Expand these forms; we pattern-match the forms that we can handle in isel.
365 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
366 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
368
369 // We have custom switch handling.
370 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
371
372 // WebAssembly doesn't have:
373 // - Floating-point extending loads.
374 // - Floating-point truncating stores.
375 // - i1 extending loads.
376 // - truncating SIMD stores and most extending loads
377 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
378 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
379 for (auto T : MVT::integer_valuetypes())
380 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
381 setLoadExtAction(Ext, T, MVT::i1, Promote);
382 if (Subtarget->hasSIMD128()) {
383 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
384 MVT::v2f64}) {
385 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
386 if (MVT(T) != MemT) {
388 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
389 setLoadExtAction(Ext, T, MemT, Expand);
390 }
391 }
392 }
393 // But some vector extending loads are legal
394 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
395 setLoadExtAction(Ext, MVT::v8i16, MVT::v8i8, Legal);
396 setLoadExtAction(Ext, MVT::v4i32, MVT::v4i16, Legal);
397 setLoadExtAction(Ext, MVT::v2i64, MVT::v2i32, Legal);
398 }
399 setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Legal);
400 }
401
402 // Don't do anything clever with build_pairs
404
405 // Trap lowers to wasm unreachable
406 setOperationAction(ISD::TRAP, MVT::Other, Legal);
407 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
408
409 // Exception handling intrinsics
413
415
416 // Always convert switches to br_tables unless there is only one case, which
417 // is equivalent to a simple branch. This reduces code size for wasm, and we
418 // defer possible jump table optimizations to the VM.
420}
421
430
439
441WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
442 // We have wasm instructions for these
443 switch (AI->getOperation()) {
451 default:
452 break;
453 }
455}
456
457bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
458 // Implementation copied from X86TargetLowering.
459 unsigned Opc = VecOp.getOpcode();
460
461 // Assume target opcodes can't be scalarized.
462 // TODO - do we have any exceptions?
464 return false;
465
466 // If the vector op is not supported, try to convert to scalar.
467 EVT VecVT = VecOp.getValueType();
469 return true;
470
471 // If the vector op is supported, but the scalar op is not, the transform may
472 // not be worthwhile.
473 EVT ScalarVT = VecVT.getScalarType();
474 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
475}
476
477FastISel *WebAssemblyTargetLowering::createFastISel(
478 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const {
479 return WebAssembly::createFastISel(FuncInfo, LibInfo);
480}
481
482MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
483 EVT VT) const {
484 unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1);
485 if (BitWidth > 1 && BitWidth < 8)
486 BitWidth = 8;
487
488 if (BitWidth > 64) {
489 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
490 // the count to be an i32.
491 BitWidth = 32;
493 "32-bit shift counts ought to be enough for anyone");
494 }
495
498 "Unable to represent scalar shift amount type");
499 return Result;
500}
501
502// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
503// undefined result on invalid/overflow, to the WebAssembly opcode, which
504// traps on invalid/overflow.
507 const TargetInstrInfo &TII,
508 bool IsUnsigned, bool Int64,
509 bool Float64, unsigned LoweredOpcode) {
511
512 Register OutReg = MI.getOperand(0).getReg();
513 Register InReg = MI.getOperand(1).getReg();
514
515 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
516 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
517 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
518 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
519 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
520 unsigned Eqz = WebAssembly::EQZ_I32;
521 unsigned And = WebAssembly::AND_I32;
522 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
523 int64_t Substitute = IsUnsigned ? 0 : Limit;
524 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
525 auto &Context = BB->getParent()->getFunction().getContext();
526 Type *Ty = Float64 ? Type::getDoubleTy(Context) : Type::getFloatTy(Context);
527
528 const BasicBlock *LLVMBB = BB->getBasicBlock();
529 MachineFunction *F = BB->getParent();
530 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
531 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVMBB);
532 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
533
535 F->insert(It, FalseMBB);
536 F->insert(It, TrueMBB);
537 F->insert(It, DoneMBB);
538
539 // Transfer the remainder of BB and its successor edges to DoneMBB.
540 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
542
543 BB->addSuccessor(TrueMBB);
544 BB->addSuccessor(FalseMBB);
545 TrueMBB->addSuccessor(DoneMBB);
546 FalseMBB->addSuccessor(DoneMBB);
547
548 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
549 Tmp0 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
550 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
551 CmpReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
552 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
553 FalseReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
554 TrueReg = MRI.createVirtualRegister(MRI.getRegClass(OutReg));
555
556 MI.eraseFromParent();
557 // For signed numbers, we can do a single comparison to determine whether
558 // fabs(x) is within range.
559 if (IsUnsigned) {
560 Tmp0 = InReg;
561 } else {
562 BuildMI(BB, DL, TII.get(Abs), Tmp0).addReg(InReg);
563 }
564 BuildMI(BB, DL, TII.get(FConst), Tmp1)
565 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, CmpVal)));
566 BuildMI(BB, DL, TII.get(LT), CmpReg).addReg(Tmp0).addReg(Tmp1);
567
568 // For unsigned numbers, we have to do a separate comparison with zero.
569 if (IsUnsigned) {
570 Tmp1 = MRI.createVirtualRegister(MRI.getRegClass(InReg));
571 Register SecondCmpReg =
572 MRI.createVirtualRegister(&WebAssembly::I32RegClass);
573 Register AndReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
574 BuildMI(BB, DL, TII.get(FConst), Tmp1)
575 .addFPImm(cast<ConstantFP>(ConstantFP::get(Ty, 0.0)));
576 BuildMI(BB, DL, TII.get(GE), SecondCmpReg).addReg(Tmp0).addReg(Tmp1);
577 BuildMI(BB, DL, TII.get(And), AndReg).addReg(CmpReg).addReg(SecondCmpReg);
578 CmpReg = AndReg;
579 }
580
581 BuildMI(BB, DL, TII.get(Eqz), EqzReg).addReg(CmpReg);
582
583 // Create the CFG diamond to select between doing the conversion or using
584 // the substitute value.
585 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(TrueMBB).addReg(EqzReg);
586 BuildMI(FalseMBB, DL, TII.get(LoweredOpcode), FalseReg).addReg(InReg);
587 BuildMI(FalseMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
588 BuildMI(TrueMBB, DL, TII.get(IConst), TrueReg).addImm(Substitute);
589 BuildMI(*DoneMBB, DoneMBB->begin(), DL, TII.get(TargetOpcode::PHI), OutReg)
590 .addReg(FalseReg)
591 .addMBB(FalseMBB)
592 .addReg(TrueReg)
593 .addMBB(TrueMBB);
594
595 return DoneMBB;
596}
597
598// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
599// instuction to handle the zero-length case.
602 const TargetInstrInfo &TII, bool Int64) {
604
605 MachineOperand DstMem = MI.getOperand(0);
606 MachineOperand SrcMem = MI.getOperand(1);
607 MachineOperand Dst = MI.getOperand(2);
608 MachineOperand Src = MI.getOperand(3);
609 MachineOperand Len = MI.getOperand(4);
610
611 // If the length is a constant, we don't actually need the check.
612 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
613 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
614 Def->getOpcode() == WebAssembly::CONST_I64) {
615 if (Def->getOperand(1).getImm() == 0) {
616 // A zero-length memcpy is a no-op.
617 MI.eraseFromParent();
618 return BB;
619 }
620 // A non-zero-length memcpy doesn't need a zero check.
621 unsigned MemoryCopy =
622 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
623 BuildMI(*BB, MI, DL, TII.get(MemoryCopy))
624 .add(DstMem)
625 .add(SrcMem)
626 .add(Dst)
627 .add(Src)
628 .add(Len);
629 MI.eraseFromParent();
630 return BB;
631 }
632 }
633
634 // We're going to add an extra use to `Len` to test if it's zero; that
635 // use shouldn't be a kill, even if the original use is.
636 MachineOperand NoKillLen = Len;
637 NoKillLen.setIsKill(false);
638
639 // Decide on which `MachineInstr` opcode we're going to use.
640 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
641 unsigned MemoryCopy =
642 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
643
644 // Create two new basic blocks; one for the new `memory.fill` that we can
645 // branch over, and one for the rest of the instructions after the original
646 // `memory.fill`.
647 const BasicBlock *LLVMBB = BB->getBasicBlock();
648 MachineFunction *F = BB->getParent();
649 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
650 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
651
653 F->insert(It, TrueMBB);
654 F->insert(It, DoneMBB);
655
656 // Transfer the remainder of BB and its successor edges to DoneMBB.
657 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
659
660 // Connect the CFG edges.
661 BB->addSuccessor(TrueMBB);
662 BB->addSuccessor(DoneMBB);
663 TrueMBB->addSuccessor(DoneMBB);
664
665 // Create a virtual register for the `Eqz` result.
666 unsigned EqzReg;
667 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
668
669 // Erase the original `memory.copy`.
670 MI.eraseFromParent();
671
672 // Test if `Len` is zero.
673 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
674
675 // Insert a new `memory.copy`.
676 BuildMI(TrueMBB, DL, TII.get(MemoryCopy))
677 .add(DstMem)
678 .add(SrcMem)
679 .add(Dst)
680 .add(Src)
681 .add(Len);
682
683 // Create the CFG triangle.
684 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
685 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
686
687 return DoneMBB;
688}
689
690// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
691// instuction to handle the zero-length case.
694 const TargetInstrInfo &TII, bool Int64) {
696
697 MachineOperand Mem = MI.getOperand(0);
698 MachineOperand Dst = MI.getOperand(1);
699 MachineOperand Val = MI.getOperand(2);
700 MachineOperand Len = MI.getOperand(3);
701
702 // If the length is a constant, we don't actually need the check.
703 if (MachineInstr *Def = MRI.getVRegDef(Len.getReg())) {
704 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
705 Def->getOpcode() == WebAssembly::CONST_I64) {
706 if (Def->getOperand(1).getImm() == 0) {
707 // A zero-length memset is a no-op.
708 MI.eraseFromParent();
709 return BB;
710 }
711 // A non-zero-length memset doesn't need a zero check.
712 unsigned MemoryFill =
713 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
714 BuildMI(*BB, MI, DL, TII.get(MemoryFill))
715 .add(Mem)
716 .add(Dst)
717 .add(Val)
718 .add(Len);
719 MI.eraseFromParent();
720 return BB;
721 }
722 }
723
724 // We're going to add an extra use to `Len` to test if it's zero; that
725 // use shouldn't be a kill, even if the original use is.
726 MachineOperand NoKillLen = Len;
727 NoKillLen.setIsKill(false);
728
729 // Decide on which `MachineInstr` opcode we're going to use.
730 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
731 unsigned MemoryFill =
732 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
733
734 // Create two new basic blocks; one for the new `memory.fill` that we can
735 // branch over, and one for the rest of the instructions after the original
736 // `memory.fill`.
737 const BasicBlock *LLVMBB = BB->getBasicBlock();
738 MachineFunction *F = BB->getParent();
739 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(LLVMBB);
740 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(LLVMBB);
741
743 F->insert(It, TrueMBB);
744 F->insert(It, DoneMBB);
745
746 // Transfer the remainder of BB and its successor edges to DoneMBB.
747 DoneMBB->splice(DoneMBB->begin(), BB, std::next(MI.getIterator()), BB->end());
749
750 // Connect the CFG edges.
751 BB->addSuccessor(TrueMBB);
752 BB->addSuccessor(DoneMBB);
753 TrueMBB->addSuccessor(DoneMBB);
754
755 // Create a virtual register for the `Eqz` result.
756 unsigned EqzReg;
757 EqzReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
758
759 // Erase the original `memory.fill`.
760 MI.eraseFromParent();
761
762 // Test if `Len` is zero.
763 BuildMI(BB, DL, TII.get(Eqz), EqzReg).add(NoKillLen);
764
765 // Insert a new `memory.copy`.
766 BuildMI(TrueMBB, DL, TII.get(MemoryFill)).add(Mem).add(Dst).add(Val).add(Len);
767
768 // Create the CFG triangle.
769 BuildMI(BB, DL, TII.get(WebAssembly::BR_IF)).addMBB(DoneMBB).addReg(EqzReg);
770 BuildMI(TrueMBB, DL, TII.get(WebAssembly::BR)).addMBB(DoneMBB);
771
772 return DoneMBB;
773}
774
775static MachineBasicBlock *
777 const WebAssemblySubtarget *Subtarget,
778 const TargetInstrInfo &TII) {
779 MachineInstr &CallParams = *CallResults.getPrevNode();
780 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
781 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
782 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
783
784 bool IsIndirect =
785 CallParams.getOperand(0).isReg() || CallParams.getOperand(0).isFI();
786 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
787
788 bool IsFuncrefCall = false;
789 if (IsIndirect && CallParams.getOperand(0).isReg()) {
790 Register Reg = CallParams.getOperand(0).getReg();
791 const MachineFunction *MF = BB->getParent();
792 const MachineRegisterInfo &MRI = MF->getRegInfo();
793 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
794 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
795 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
796 }
797
798 unsigned CallOp;
799 if (IsIndirect && IsRetCall) {
800 CallOp = WebAssembly::RET_CALL_INDIRECT;
801 } else if (IsIndirect) {
802 CallOp = WebAssembly::CALL_INDIRECT;
803 } else if (IsRetCall) {
804 CallOp = WebAssembly::RET_CALL;
805 } else {
806 CallOp = WebAssembly::CALL;
807 }
808
809 MachineFunction &MF = *BB->getParent();
810 const MCInstrDesc &MCID = TII.get(CallOp);
811 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
812
813 // Move the function pointer to the end of the arguments for indirect calls
814 if (IsIndirect) {
815 auto FnPtr = CallParams.getOperand(0);
816 CallParams.removeOperand(0);
817
818 // For funcrefs, call_indirect is done through __funcref_call_table and the
819 // funcref is always installed in slot 0 of the table, therefore instead of
820 // having the function pointer added at the end of the params list, a zero
821 // (the index in
822 // __funcref_call_table is added).
823 if (IsFuncrefCall) {
824 Register RegZero =
825 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
826 MachineInstrBuilder MIBC0 =
827 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
828
829 BB->insert(CallResults.getIterator(), MIBC0);
830 MachineInstrBuilder(MF, CallParams).addReg(RegZero);
831 } else
832 CallParams.addOperand(FnPtr);
833 }
834
835 for (auto Def : CallResults.defs())
836 MIB.add(Def);
837
838 if (IsIndirect) {
839 // Placeholder for the type index.
840 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
841 MIB.addImm(0);
842 // The table into which this call_indirect indexes.
843 MCSymbolWasm *Table = IsFuncrefCall
845 MF.getContext(), Subtarget)
847 MF.getContext(), Subtarget);
848 if (Subtarget->hasCallIndirectOverlong()) {
849 MIB.addSym(Table);
850 } else {
851 // For the MVP there is at most one table whose number is 0, but we can't
852 // write a table symbol or issue relocations. Instead we just ensure the
853 // table is live and write a zero.
854 Table->setNoStrip();
855 MIB.addImm(0);
856 }
857 }
858
859 for (auto Use : CallParams.uses())
860 MIB.add(Use);
861
862 BB->insert(CallResults.getIterator(), MIB);
863 CallParams.eraseFromParent();
864 CallResults.eraseFromParent();
865
866 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
867 // table slot with ref.null upon call_indirect return.
868 //
869 // This generates the following code, which comes right after a call_indirect
870 // of a funcref:
871 //
872 // i32.const 0
873 // ref.null func
874 // table.set __funcref_call_table
875 if (IsIndirect && IsFuncrefCall) {
877 MF.getContext(), Subtarget);
878 Register RegZero =
879 MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
880 MachineInstr *Const0 =
881 BuildMI(MF, DL, TII.get(WebAssembly::CONST_I32), RegZero).addImm(0);
882 BB->insertAfter(MIB.getInstr()->getIterator(), Const0);
883
884 Register RegFuncref =
885 MF.getRegInfo().createVirtualRegister(&WebAssembly::FUNCREFRegClass);
886 MachineInstr *RefNull =
887 BuildMI(MF, DL, TII.get(WebAssembly::REF_NULL_FUNCREF), RegFuncref);
888 BB->insertAfter(Const0->getIterator(), RefNull);
889
890 MachineInstr *TableSet =
891 BuildMI(MF, DL, TII.get(WebAssembly::TABLE_SET_FUNCREF))
892 .addSym(Table)
893 .addReg(RegZero)
894 .addReg(RegFuncref);
895 BB->insertAfter(RefNull->getIterator(), TableSet);
896 }
897
898 return BB;
899}
900
901MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
902 MachineInstr &MI, MachineBasicBlock *BB) const {
903 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
904 DebugLoc DL = MI.getDebugLoc();
905
906 switch (MI.getOpcode()) {
907 default:
908 llvm_unreachable("Unexpected instr type to insert");
909 case WebAssembly::FP_TO_SINT_I32_F32:
910 return LowerFPToInt(MI, DL, BB, TII, false, false, false,
911 WebAssembly::I32_TRUNC_S_F32);
912 case WebAssembly::FP_TO_UINT_I32_F32:
913 return LowerFPToInt(MI, DL, BB, TII, true, false, false,
914 WebAssembly::I32_TRUNC_U_F32);
915 case WebAssembly::FP_TO_SINT_I64_F32:
916 return LowerFPToInt(MI, DL, BB, TII, false, true, false,
917 WebAssembly::I64_TRUNC_S_F32);
918 case WebAssembly::FP_TO_UINT_I64_F32:
919 return LowerFPToInt(MI, DL, BB, TII, true, true, false,
920 WebAssembly::I64_TRUNC_U_F32);
921 case WebAssembly::FP_TO_SINT_I32_F64:
922 return LowerFPToInt(MI, DL, BB, TII, false, false, true,
923 WebAssembly::I32_TRUNC_S_F64);
924 case WebAssembly::FP_TO_UINT_I32_F64:
925 return LowerFPToInt(MI, DL, BB, TII, true, false, true,
926 WebAssembly::I32_TRUNC_U_F64);
927 case WebAssembly::FP_TO_SINT_I64_F64:
928 return LowerFPToInt(MI, DL, BB, TII, false, true, true,
929 WebAssembly::I64_TRUNC_S_F64);
930 case WebAssembly::FP_TO_UINT_I64_F64:
931 return LowerFPToInt(MI, DL, BB, TII, true, true, true,
932 WebAssembly::I64_TRUNC_U_F64);
933 case WebAssembly::MEMCPY_A32:
934 return LowerMemcpy(MI, DL, BB, TII, false);
935 case WebAssembly::MEMCPY_A64:
936 return LowerMemcpy(MI, DL, BB, TII, true);
937 case WebAssembly::MEMSET_A32:
938 return LowerMemset(MI, DL, BB, TII, false);
939 case WebAssembly::MEMSET_A64:
940 return LowerMemset(MI, DL, BB, TII, true);
941 case WebAssembly::CALL_RESULTS:
942 case WebAssembly::RET_CALL_RESULTS:
943 return LowerCallResults(MI, DL, BB, Subtarget, TII);
944 }
945}
946
947std::pair<unsigned, const TargetRegisterClass *>
948WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
949 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
950 // First, see if this is a constraint that directly corresponds to a
951 // WebAssembly register class.
952 if (Constraint.size() == 1) {
953 switch (Constraint[0]) {
954 case 'r':
955 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
956 if (Subtarget->hasSIMD128() && VT.isVector()) {
957 if (VT.getSizeInBits() == 128)
958 return std::make_pair(0U, &WebAssembly::V128RegClass);
959 }
960 if (VT.isInteger() && !VT.isVector()) {
961 if (VT.getSizeInBits() <= 32)
962 return std::make_pair(0U, &WebAssembly::I32RegClass);
963 if (VT.getSizeInBits() <= 64)
964 return std::make_pair(0U, &WebAssembly::I64RegClass);
965 }
966 if (VT.isFloatingPoint() && !VT.isVector()) {
967 switch (VT.getSizeInBits()) {
968 case 32:
969 return std::make_pair(0U, &WebAssembly::F32RegClass);
970 case 64:
971 return std::make_pair(0U, &WebAssembly::F64RegClass);
972 default:
973 break;
974 }
975 }
976 break;
977 default:
978 break;
979 }
980 }
981
983}
984
985bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
986 // Assume ctz is a relatively cheap operation.
987 return true;
988}
989
990bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
991 // Assume clz is a relatively cheap operation.
992 return true;
993}
994
995bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
996 const AddrMode &AM,
997 Type *Ty, unsigned AS,
998 Instruction *I) const {
999 // WebAssembly offsets are added as unsigned without wrapping. The
1000 // isLegalAddressingMode gives us no way to determine if wrapping could be
1001 // happening, so we approximate this by accepting only non-negative offsets.
1002 if (AM.BaseOffs < 0)
1003 return false;
1004
1005 // WebAssembly has no scale register operands.
1006 if (AM.Scale != 0)
1007 return false;
1008
1009 // Everything else is legal.
1010 return true;
1011}
1012
1013bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1014 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1015 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1016 // WebAssembly supports unaligned accesses, though it should be declared
1017 // with the p2align attribute on loads and stores which do so, and there
1018 // may be a performance impact. We tell LLVM they're "fast" because
1019 // for the kinds of things that LLVM uses this for (merging adjacent stores
1020 // of constants, etc.), WebAssembly implementations will either want the
1021 // unaligned access or they'll split anyway.
1022 if (Fast)
1023 *Fast = 1;
1024 return true;
1025}
1026
1027bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1028 AttributeList Attr) const {
1029 // The current thinking is that wasm engines will perform this optimization,
1030 // so we can save on code size.
1031 return true;
1032}
1033
1034bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1035 EVT ExtT = ExtVal.getValueType();
1036 EVT MemT = cast<LoadSDNode>(ExtVal->getOperand(0))->getValueType(0);
1037 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1038 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1039 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1040}
1041
1042bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1043 const GlobalAddressSDNode *GA) const {
1044 // Wasm doesn't support function addresses with offsets
1045 const GlobalValue *GV = GA->getGlobal();
1047}
1048
1049EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1050 LLVMContext &C,
1051 EVT VT) const {
1052 if (VT.isVector())
1054
1055 // So far, all branch instructions in Wasm take an I32 condition.
1056 // The default TargetLowering::getSetCCResultType returns the pointer size,
1057 // which would be useful to reduce instruction counts when testing
1058 // against 64-bit pointers/values if at some point Wasm supports that.
1059 return EVT::getIntegerVT(C, 32);
1060}
1061
1062bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1063 const CallInst &I,
1064 MachineFunction &MF,
1065 unsigned Intrinsic) const {
1066 switch (Intrinsic) {
1067 case Intrinsic::wasm_memory_atomic_notify:
1069 Info.memVT = MVT::i32;
1070 Info.ptrVal = I.getArgOperand(0);
1071 Info.offset = 0;
1072 Info.align = Align(4);
1073 // atomic.notify instruction does not really load the memory specified with
1074 // this argument, but MachineMemOperand should either be load or store, so
1075 // we set this to a load.
1076 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1077 // instructions are treated as volatiles in the backend, so we should be
1078 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1080 return true;
1081 case Intrinsic::wasm_memory_atomic_wait32:
1083 Info.memVT = MVT::i32;
1084 Info.ptrVal = I.getArgOperand(0);
1085 Info.offset = 0;
1086 Info.align = Align(4);
1088 return true;
1089 case Intrinsic::wasm_memory_atomic_wait64:
1091 Info.memVT = MVT::i64;
1092 Info.ptrVal = I.getArgOperand(0);
1093 Info.offset = 0;
1094 Info.align = Align(8);
1096 return true;
1097 case Intrinsic::wasm_loadf16_f32:
1099 Info.memVT = MVT::f16;
1100 Info.ptrVal = I.getArgOperand(0);
1101 Info.offset = 0;
1102 Info.align = Align(2);
1104 return true;
1105 case Intrinsic::wasm_storef16_f32:
1107 Info.memVT = MVT::f16;
1108 Info.ptrVal = I.getArgOperand(1);
1109 Info.offset = 0;
1110 Info.align = Align(2);
1112 return true;
1113 default:
1114 return false;
1115 }
1116}
1117
1118void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1119 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1120 const SelectionDAG &DAG, unsigned Depth) const {
1121 switch (Op.getOpcode()) {
1122 default:
1123 break;
1125 unsigned IntNo = Op.getConstantOperandVal(0);
1126 switch (IntNo) {
1127 default:
1128 break;
1129 case Intrinsic::wasm_bitmask: {
1130 unsigned BitWidth = Known.getBitWidth();
1131 EVT VT = Op.getOperand(1).getSimpleValueType();
1132 unsigned PossibleBits = VT.getVectorNumElements();
1133 APInt ZeroMask = APInt::getHighBitsSet(BitWidth, BitWidth - PossibleBits);
1134 Known.Zero |= ZeroMask;
1135 break;
1136 }
1137 }
1138 break;
1139 }
1140 case WebAssemblyISD::EXTEND_LOW_U:
1141 case WebAssemblyISD::EXTEND_HIGH_U: {
1142 // We know the high half, of each destination vector element, will be zero.
1143 SDValue SrcOp = Op.getOperand(0);
1144 EVT VT = SrcOp.getSimpleValueType();
1145 unsigned BitWidth = Known.getBitWidth();
1146 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1147 assert(BitWidth >= 8 && "Unexpected width!");
1149 Known.Zero |= Mask;
1150 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1151 assert(BitWidth >= 16 && "Unexpected width!");
1153 Known.Zero |= Mask;
1154 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1155 assert(BitWidth >= 32 && "Unexpected width!");
1157 Known.Zero |= Mask;
1158 }
1159 break;
1160 }
1161 // For 128-bit addition if the upper bits are all zero then it's known that
1162 // the upper bits of the result will have all bits guaranteed zero except the
1163 // first.
1164 case WebAssemblyISD::I64_ADD128:
1165 if (Op.getResNo() == 1) {
1166 SDValue LHS_HI = Op.getOperand(1);
1167 SDValue RHS_HI = Op.getOperand(3);
1168 if (isNullConstant(LHS_HI) && isNullConstant(RHS_HI))
1169 Known.Zero.setBitsFrom(1);
1170 }
1171 break;
1172 }
1173}
1174
1176WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1177 if (VT.isFixedLengthVector()) {
1178 MVT EltVT = VT.getVectorElementType();
1179 // We have legal vector types with these lane types, so widening the
1180 // vector would let us use some of the lanes directly without having to
1181 // extend or truncate values.
1182 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1183 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1184 return TypeWidenVector;
1185 }
1186
1188}
1189
1190bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1191 const MachineFunction &MF, EVT VT) const {
1192 if (!Subtarget->hasFP16() || !VT.isVector())
1193 return false;
1194
1195 EVT ScalarVT = VT.getScalarType();
1196 if (!ScalarVT.isSimple())
1197 return false;
1198
1199 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1200}
1201
1202bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1203 SDValue Op, const TargetLoweringOpt &TLO) const {
1204 // ISel process runs DAGCombiner after legalization; this step is called
1205 // SelectionDAG optimization phase. This post-legalization combining process
1206 // runs DAGCombiner on each node, and if there was a change to be made,
1207 // re-runs legalization again on it and its user nodes to make sure
1208 // everythiing is in a legalized state.
1209 //
1210 // The legalization calls lowering routines, and we do our custom lowering for
1211 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1212 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1213 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1214 // turns unused vector elements into undefs. But this routine does not work
1215 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1216 // combination can result in a infinite loop, in which undefs are converted to
1217 // zeros in legalization and back to undefs in combining.
1218 //
1219 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1220 // running for build_vectors.
1221 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1222 return false;
1223 return true;
1224}
1225
1226//===----------------------------------------------------------------------===//
1227// WebAssembly Lowering private implementation.
1228//===----------------------------------------------------------------------===//
1229
1230//===----------------------------------------------------------------------===//
1231// Lowering Code
1232//===----------------------------------------------------------------------===//
1233
1234static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1236 DAG.getContext()->diagnose(
1237 DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1238}
1239
1240// Test whether the given calling convention is supported.
1242 // We currently support the language-independent target-independent
1243 // conventions. We don't yet have a way to annotate calls with properties like
1244 // "cold", and we don't have any call-clobbered registers, so these are mostly
1245 // all handled the same.
1246 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1247 CallConv == CallingConv::Cold ||
1248 CallConv == CallingConv::PreserveMost ||
1249 CallConv == CallingConv::PreserveAll ||
1250 CallConv == CallingConv::CXX_FAST_TLS ||
1252 CallConv == CallingConv::Swift;
1253}
1254
1255SDValue
1256WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1257 SmallVectorImpl<SDValue> &InVals) const {
1258 SelectionDAG &DAG = CLI.DAG;
1259 SDLoc DL = CLI.DL;
1260 SDValue Chain = CLI.Chain;
1261 SDValue Callee = CLI.Callee;
1262 MachineFunction &MF = DAG.getMachineFunction();
1263 auto Layout = MF.getDataLayout();
1264
1265 CallingConv::ID CallConv = CLI.CallConv;
1266 if (!callingConvSupported(CallConv))
1267 fail(DL, DAG,
1268 "WebAssembly doesn't support language-specific or target-specific "
1269 "calling conventions yet");
1270 if (CLI.IsPatchPoint)
1271 fail(DL, DAG, "WebAssembly doesn't support patch point yet");
1272
1273 if (CLI.IsTailCall) {
1274 auto NoTail = [&](const char *Msg) {
1275 if (CLI.CB && CLI.CB->isMustTailCall())
1276 fail(DL, DAG, Msg);
1277 CLI.IsTailCall = false;
1278 };
1279
1280 if (!Subtarget->hasTailCall())
1281 NoTail("WebAssembly 'tail-call' feature not enabled");
1282
1283 // Varargs calls cannot be tail calls because the buffer is on the stack
1284 if (CLI.IsVarArg)
1285 NoTail("WebAssembly does not support varargs tail calls");
1286
1287 // Do not tail call unless caller and callee return types match
1288 const Function &F = MF.getFunction();
1289 const TargetMachine &TM = getTargetMachine();
1290 Type *RetTy = F.getReturnType();
1291 SmallVector<MVT, 4> CallerRetTys;
1292 SmallVector<MVT, 4> CalleeRetTys;
1293 computeLegalValueVTs(F, TM, RetTy, CallerRetTys);
1294 computeLegalValueVTs(F, TM, CLI.RetTy, CalleeRetTys);
1295 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1296 std::equal(CallerRetTys.begin(), CallerRetTys.end(),
1297 CalleeRetTys.begin());
1298 if (!TypesMatch)
1299 NoTail("WebAssembly tail call requires caller and callee return types to "
1300 "match");
1301
1302 // If pointers to local stack values are passed, we cannot tail call
1303 if (CLI.CB) {
1304 for (auto &Arg : CLI.CB->args()) {
1305 Value *Val = Arg.get();
1306 // Trace the value back through pointer operations
1307 while (true) {
1308 Value *Src = Val->stripPointerCastsAndAliases();
1309 if (auto *GEP = dyn_cast<GetElementPtrInst>(Src))
1310 Src = GEP->getPointerOperand();
1311 if (Val == Src)
1312 break;
1313 Val = Src;
1314 }
1315 if (isa<AllocaInst>(Val)) {
1316 NoTail(
1317 "WebAssembly does not support tail calling with stack arguments");
1318 break;
1319 }
1320 }
1321 }
1322 }
1323
1324 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1325 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1326 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1327
1328 // The generic code may have added an sret argument. If we're lowering an
1329 // invoke function, the ABI requires that the function pointer be the first
1330 // argument, so we may have to swap the arguments.
1331 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1332 Outs[0].Flags.isSRet()) {
1333 std::swap(Outs[0], Outs[1]);
1334 std::swap(OutVals[0], OutVals[1]);
1335 }
1336
1337 bool HasSwiftSelfArg = false;
1338 bool HasSwiftErrorArg = false;
1339 unsigned NumFixedArgs = 0;
1340 for (unsigned I = 0; I < Outs.size(); ++I) {
1341 const ISD::OutputArg &Out = Outs[I];
1342 SDValue &OutVal = OutVals[I];
1343 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1344 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1345 if (Out.Flags.isNest())
1346 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1347 if (Out.Flags.isInAlloca())
1348 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1349 if (Out.Flags.isInConsecutiveRegs())
1350 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1352 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1353 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1354 auto &MFI = MF.getFrameInfo();
1355 int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
1357 /*isSS=*/false);
1358 SDValue SizeNode =
1359 DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
1360 SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1361 Chain = DAG.getMemcpy(Chain, DL, FINode, OutVal, SizeNode,
1363 /*isVolatile*/ false, /*AlwaysInline=*/false,
1364 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
1365 MachinePointerInfo());
1366 OutVal = FINode;
1367 }
1368 // Count the number of fixed args *after* legalization.
1369 NumFixedArgs += !Out.Flags.isVarArg();
1370 }
1371
1372 bool IsVarArg = CLI.IsVarArg;
1373 auto PtrVT = getPointerTy(Layout);
1374
1375 // For swiftcc, emit additional swiftself and swifterror arguments
1376 // if there aren't. These additional arguments are also added for callee
1377 // signature They are necessary to match callee and caller signature for
1378 // indirect call.
1379 if (CallConv == CallingConv::Swift) {
1380 Type *PtrTy = PointerType::getUnqual(*DAG.getContext());
1381 if (!HasSwiftSelfArg) {
1382 NumFixedArgs++;
1383 ISD::ArgFlagsTy Flags;
1384 Flags.setSwiftSelf();
1385 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1386 CLI.Outs.push_back(Arg);
1387 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1388 CLI.OutVals.push_back(ArgVal);
1389 }
1390 if (!HasSwiftErrorArg) {
1391 NumFixedArgs++;
1392 ISD::ArgFlagsTy Flags;
1393 Flags.setSwiftError();
1394 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1395 CLI.Outs.push_back(Arg);
1396 SDValue ArgVal = DAG.getUNDEF(PtrVT);
1397 CLI.OutVals.push_back(ArgVal);
1398 }
1399 }
1400
1401 // Analyze operands of the call, assigning locations to each operand.
1403 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1404
1405 if (IsVarArg) {
1406 // Outgoing non-fixed arguments are placed in a buffer. First
1407 // compute their offsets and the total amount of buffer space needed.
1408 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1409 const ISD::OutputArg &Out = Outs[I];
1410 SDValue &Arg = OutVals[I];
1411 EVT VT = Arg.getValueType();
1412 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1413 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
1414 Align Alignment =
1415 std::max(Out.Flags.getNonZeroOrigAlign(), Layout.getABITypeAlign(Ty));
1416 unsigned Offset =
1417 CCInfo.AllocateStack(Layout.getTypeAllocSize(Ty), Alignment);
1418 CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(),
1419 Offset, VT.getSimpleVT(),
1421 }
1422 }
1423
1424 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1425
1426 SDValue FINode;
1427 if (IsVarArg && NumBytes) {
1428 // For non-fixed arguments, next emit stores to store the argument values
1429 // to the stack buffer at the offsets computed above.
1430 MaybeAlign StackAlign = Layout.getStackAlignment();
1431 assert(StackAlign && "data layout string is missing stack alignment");
1432 int FI = MF.getFrameInfo().CreateStackObject(NumBytes, *StackAlign,
1433 /*isSS=*/false);
1434 unsigned ValNo = 0;
1436 for (SDValue Arg : drop_begin(OutVals, NumFixedArgs)) {
1437 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1438 "ArgLocs should remain in order and only hold varargs args");
1439 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1440 FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
1441 SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, FINode,
1442 DAG.getConstant(Offset, DL, PtrVT));
1443 Chains.push_back(
1444 DAG.getStore(Chain, DL, Arg, Add,
1446 }
1447 if (!Chains.empty())
1448 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
1449 } else if (IsVarArg) {
1450 FINode = DAG.getIntPtrConstant(0, DL);
1451 }
1452
1453 if (Callee->getOpcode() == ISD::GlobalAddress) {
1454 // If the callee is a GlobalAddress node (quite common, every direct call
1455 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1456 // doesn't at MO_GOT which is not needed for direct calls.
1457 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Callee);
1460 GA->getOffset());
1461 Callee = DAG.getNode(WebAssemblyISD::Wrapper, DL,
1462 getPointerTy(DAG.getDataLayout()), Callee);
1463 }
1464
1465 // Compute the operands for the CALLn node.
1467 Ops.push_back(Chain);
1468 Ops.push_back(Callee);
1469
1470 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1471 // isn't reliable.
1472 Ops.append(OutVals.begin(),
1473 IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1474 // Add a pointer to the vararg buffer.
1475 if (IsVarArg)
1476 Ops.push_back(FINode);
1477
1478 SmallVector<EVT, 8> InTys;
1479 for (const auto &In : Ins) {
1480 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1481 assert(!In.Flags.isNest() && "nest is not valid for return values");
1482 if (In.Flags.isInAlloca())
1483 fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values");
1484 if (In.Flags.isInConsecutiveRegs())
1485 fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values");
1486 if (In.Flags.isInConsecutiveRegsLast())
1487 fail(DL, DAG,
1488 "WebAssembly hasn't implemented cons regs last return values");
1489 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1490 // registers.
1491 InTys.push_back(In.VT);
1492 }
1493
1494 // Lastly, if this is a call to a funcref we need to add an instruction
1495 // table.set to the chain and transform the call.
1497 CLI.CB->getCalledOperand()->getType())) {
1498 // In the absence of function references proposal where a funcref call is
1499 // lowered to call_ref, using reference types we generate a table.set to set
1500 // the funcref to a special table used solely for this purpose, followed by
1501 // a call_indirect. Here we just generate the table set, and return the
1502 // SDValue of the table.set so that LowerCall can finalize the lowering by
1503 // generating the call_indirect.
1504 SDValue Chain = Ops[0];
1505
1507 MF.getContext(), Subtarget);
1508 SDValue Sym = DAG.getMCSymbol(Table, PtrVT);
1509 SDValue TableSlot = DAG.getConstant(0, DL, MVT::i32);
1510 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1511 SDValue TableSet = DAG.getMemIntrinsicNode(
1512 WebAssemblyISD::TABLE_SET, DL, DAG.getVTList(MVT::Other), TableSetOps,
1513 MVT::funcref,
1514 // Machine Mem Operand args
1515 MachinePointerInfo(
1517 CLI.CB->getCalledOperand()->getPointerAlignment(DAG.getDataLayout()),
1519
1520 Ops[0] = TableSet; // The new chain is the TableSet itself
1521 }
1522
1523 if (CLI.IsTailCall) {
1524 // ret_calls do not return values to the current frame
1525 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1526 return DAG.getNode(WebAssemblyISD::RET_CALL, DL, NodeTys, Ops);
1527 }
1528
1529 InTys.push_back(MVT::Other);
1530 SDVTList InTyList = DAG.getVTList(InTys);
1531 SDValue Res = DAG.getNode(WebAssemblyISD::CALL, DL, InTyList, Ops);
1532
1533 for (size_t I = 0; I < Ins.size(); ++I)
1534 InVals.push_back(Res.getValue(I));
1535
1536 // Return the chain
1537 return Res.getValue(Ins.size());
1538}
1539
1540bool WebAssemblyTargetLowering::CanLowerReturn(
1541 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1542 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1543 const Type *RetTy) const {
1544 // WebAssembly can only handle returning tuples with multivalue enabled
1545 return WebAssembly::canLowerReturn(Outs.size(), Subtarget);
1546}
1547
1548SDValue WebAssemblyTargetLowering::LowerReturn(
1549 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1551 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1552 SelectionDAG &DAG) const {
1553 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1554 "MVP WebAssembly can only return up to one value");
1555 if (!callingConvSupported(CallConv))
1556 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1557
1558 SmallVector<SDValue, 4> RetOps(1, Chain);
1559 RetOps.append(OutVals.begin(), OutVals.end());
1560 Chain = DAG.getNode(WebAssemblyISD::RETURN, DL, MVT::Other, RetOps);
1561
1562 // Record the number and types of the return values.
1563 for (const ISD::OutputArg &Out : Outs) {
1564 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1565 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1566 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1567 if (Out.Flags.isInAlloca())
1568 fail(DL, DAG, "WebAssembly hasn't implemented inalloca results");
1569 if (Out.Flags.isInConsecutiveRegs())
1570 fail(DL, DAG, "WebAssembly hasn't implemented cons regs results");
1572 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results");
1573 }
1574
1575 return Chain;
1576}
1577
1578SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1579 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1580 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1581 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1582 if (!callingConvSupported(CallConv))
1583 fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
1584
1585 MachineFunction &MF = DAG.getMachineFunction();
1586 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1587
1588 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1589 // of the incoming values before they're represented by virtual registers.
1590 MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
1591
1592 bool HasSwiftErrorArg = false;
1593 bool HasSwiftSelfArg = false;
1594 for (const ISD::InputArg &In : Ins) {
1595 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1596 HasSwiftErrorArg |= In.Flags.isSwiftError();
1597 if (In.Flags.isInAlloca())
1598 fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments");
1599 if (In.Flags.isNest())
1600 fail(DL, DAG, "WebAssembly hasn't implemented nest arguments");
1601 if (In.Flags.isInConsecutiveRegs())
1602 fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments");
1603 if (In.Flags.isInConsecutiveRegsLast())
1604 fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
1605 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1606 // registers.
1607 InVals.push_back(In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT,
1608 DAG.getTargetConstant(InVals.size(),
1609 DL, MVT::i32))
1610 : DAG.getUNDEF(In.VT));
1611
1612 // Record the number and types of arguments.
1613 MFI->addParam(In.VT);
1614 }
1615
1616 // For swiftcc, emit additional swiftself and swifterror arguments
1617 // if there aren't. These additional arguments are also added for callee
1618 // signature They are necessary to match callee and caller signature for
1619 // indirect call.
1620 auto PtrVT = getPointerTy(MF.getDataLayout());
1621 if (CallConv == CallingConv::Swift) {
1622 if (!HasSwiftSelfArg) {
1623 MFI->addParam(PtrVT);
1624 }
1625 if (!HasSwiftErrorArg) {
1626 MFI->addParam(PtrVT);
1627 }
1628 }
1629 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1630 // the buffer is passed as an argument.
1631 if (IsVarArg) {
1632 MVT PtrVT = getPointerTy(MF.getDataLayout());
1633 Register VarargVreg =
1635 MFI->setVarargBufferVreg(VarargVreg);
1636 Chain = DAG.getCopyToReg(
1637 Chain, DL, VarargVreg,
1638 DAG.getNode(WebAssemblyISD::ARGUMENT, DL, PtrVT,
1639 DAG.getTargetConstant(Ins.size(), DL, MVT::i32)));
1640 MFI->addParam(PtrVT);
1641 }
1642
1643 // Record the number and types of arguments and results.
1644 SmallVector<MVT, 4> Params;
1647 MF.getFunction(), DAG.getTarget(), Params, Results);
1648 for (MVT VT : Results)
1649 MFI->addResult(VT);
1650 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1651 // the param logic here with ComputeSignatureVTs
1652 assert(MFI->getParams().size() == Params.size() &&
1653 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1654 Params.begin()));
1655
1656 return Chain;
1657}
1658
1659void WebAssemblyTargetLowering::ReplaceNodeResults(
1661 switch (N->getOpcode()) {
1663 // Do not add any results, signifying that N should not be custom lowered
1664 // after all. This happens because simd128 turns on custom lowering for
1665 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1666 // illegal type.
1667 break;
1670 // Do not add any results, signifying that N should not be custom lowered.
1671 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1672 break;
1673 case ISD::ADD:
1674 case ISD::SUB:
1675 Results.push_back(Replace128Op(N, DAG));
1676 break;
1677 default:
1679 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1680 }
1681}
1682
1683//===----------------------------------------------------------------------===//
1684// Custom lowering hooks.
1685//===----------------------------------------------------------------------===//
1686
1687SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1688 SelectionDAG &DAG) const {
1689 SDLoc DL(Op);
1690 switch (Op.getOpcode()) {
1691 default:
1692 llvm_unreachable("unimplemented operation lowering");
1693 return SDValue();
1694 case ISD::FrameIndex:
1695 return LowerFrameIndex(Op, DAG);
1696 case ISD::GlobalAddress:
1697 return LowerGlobalAddress(Op, DAG);
1699 return LowerGlobalTLSAddress(Op, DAG);
1701 return LowerExternalSymbol(Op, DAG);
1702 case ISD::JumpTable:
1703 return LowerJumpTable(Op, DAG);
1704 case ISD::BR_JT:
1705 return LowerBR_JT(Op, DAG);
1706 case ISD::VASTART:
1707 return LowerVASTART(Op, DAG);
1708 case ISD::BlockAddress:
1709 case ISD::BRIND:
1710 fail(DL, DAG, "WebAssembly hasn't implemented computed gotos");
1711 return SDValue();
1712 case ISD::RETURNADDR:
1713 return LowerRETURNADDR(Op, DAG);
1714 case ISD::FRAMEADDR:
1715 return LowerFRAMEADDR(Op, DAG);
1716 case ISD::CopyToReg:
1717 return LowerCopyToReg(Op, DAG);
1720 return LowerAccessVectorElement(Op, DAG);
1724 return LowerIntrinsic(Op, DAG);
1726 return LowerSIGN_EXTEND_INREG(Op, DAG);
1730 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1731 case ISD::BUILD_VECTOR:
1732 return LowerBUILD_VECTOR(Op, DAG);
1734 return LowerVECTOR_SHUFFLE(Op, DAG);
1735 case ISD::SETCC:
1736 return LowerSETCC(Op, DAG);
1737 case ISD::SHL:
1738 case ISD::SRA:
1739 case ISD::SRL:
1740 return LowerShift(Op, DAG);
1743 return LowerFP_TO_INT_SAT(Op, DAG);
1744 case ISD::LOAD:
1745 return LowerLoad(Op, DAG);
1746 case ISD::STORE:
1747 return LowerStore(Op, DAG);
1748 case ISD::CTPOP:
1749 case ISD::CTLZ:
1750 case ISD::CTTZ:
1751 return DAG.UnrollVectorOp(Op.getNode());
1752 case ISD::CLEAR_CACHE:
1753 report_fatal_error("llvm.clear_cache is not supported on wasm");
1754 case ISD::SMUL_LOHI:
1755 case ISD::UMUL_LOHI:
1756 return LowerMUL_LOHI(Op, DAG);
1757 case ISD::UADDO:
1758 return LowerUADDO(Op, DAG);
1759 }
1760}
1761
1765
1766 return false;
1767}
1768
1769static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1770 SelectionDAG &DAG) {
1772 if (!FI)
1773 return std::nullopt;
1774
1775 auto &MF = DAG.getMachineFunction();
1777}
1778
1779SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1780 SelectionDAG &DAG) const {
1781 SDLoc DL(Op);
1782 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
1783 const SDValue &Value = SN->getValue();
1784 const SDValue &Base = SN->getBasePtr();
1785 const SDValue &Offset = SN->getOffset();
1786
1788 if (!Offset->isUndef())
1789 report_fatal_error("unexpected offset when storing to webassembly global",
1790 false);
1791
1792 SDVTList Tys = DAG.getVTList(MVT::Other);
1793 SDValue Ops[] = {SN->getChain(), Value, Base};
1794 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_SET, DL, Tys, Ops,
1795 SN->getMemoryVT(), SN->getMemOperand());
1796 }
1797
1798 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1799 if (!Offset->isUndef())
1800 report_fatal_error("unexpected offset when storing to webassembly local",
1801 false);
1802
1803 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1804 SDVTList Tys = DAG.getVTList(MVT::Other); // The chain.
1805 SDValue Ops[] = {SN->getChain(), Idx, Value};
1806 return DAG.getNode(WebAssemblyISD::LOCAL_SET, DL, Tys, Ops);
1807 }
1808
1811 "Encountered an unlowerable store to the wasm_var address space",
1812 false);
1813
1814 return Op;
1815}
1816
1817SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1818 SelectionDAG &DAG) const {
1819 SDLoc DL(Op);
1820 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
1821 const SDValue &Base = LN->getBasePtr();
1822 const SDValue &Offset = LN->getOffset();
1823
1825 if (!Offset->isUndef())
1827 "unexpected offset when loading from webassembly global", false);
1828
1829 SDVTList Tys = DAG.getVTList(LN->getValueType(0), MVT::Other);
1830 SDValue Ops[] = {LN->getChain(), Base};
1831 return DAG.getMemIntrinsicNode(WebAssemblyISD::GLOBAL_GET, DL, Tys, Ops,
1832 LN->getMemoryVT(), LN->getMemOperand());
1833 }
1834
1835 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Base, DAG)) {
1836 if (!Offset->isUndef())
1838 "unexpected offset when loading from webassembly local", false);
1839
1840 SDValue Idx = DAG.getTargetConstant(*Local, Base, MVT::i32);
1841 EVT LocalVT = LN->getValueType(0);
1842 return DAG.getNode(WebAssemblyISD::LOCAL_GET, DL, {LocalVT, MVT::Other},
1843 {LN->getChain(), Idx});
1844 }
1845
1848 "Encountered an unlowerable load from the wasm_var address space",
1849 false);
1850
1851 return Op;
1852}
1853
1854SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1855 SelectionDAG &DAG) const {
1856 assert(Subtarget->hasWideArithmetic());
1857 assert(Op.getValueType() == MVT::i64);
1858 SDLoc DL(Op);
1859 unsigned Opcode;
1860 switch (Op.getOpcode()) {
1861 case ISD::UMUL_LOHI:
1862 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1863 break;
1864 case ISD::SMUL_LOHI:
1865 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1866 break;
1867 default:
1868 llvm_unreachable("unexpected opcode");
1869 }
1870 SDValue LHS = Op.getOperand(0);
1871 SDValue RHS = Op.getOperand(1);
1872 SDValue Lo =
1873 DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64), LHS, RHS);
1874 SDValue Hi(Lo.getNode(), 1);
1875 SDValue Ops[] = {Lo, Hi};
1876 return DAG.getMergeValues(Ops, DL);
1877}
1878
1879// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1880//
1881// This enables generating a single wasm instruction for this operation where
1882// the upper half of both operands are constant zeros. The upper half of the
1883// result is then whether the overflow happened.
1884SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1885 SelectionDAG &DAG) const {
1886 assert(Subtarget->hasWideArithmetic());
1887 assert(Op.getValueType() == MVT::i64);
1888 assert(Op.getOpcode() == ISD::UADDO);
1889 SDLoc DL(Op);
1890 SDValue LHS = Op.getOperand(0);
1891 SDValue RHS = Op.getOperand(1);
1892 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
1893 SDValue Result =
1894 DAG.getNode(WebAssemblyISD::I64_ADD128, DL,
1895 DAG.getVTList(MVT::i64, MVT::i64), LHS, Zero, RHS, Zero);
1896 SDValue CarryI64(Result.getNode(), 1);
1897 SDValue CarryI32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, CarryI64);
1898 SDValue Ops[] = {Result, CarryI32};
1899 return DAG.getMergeValues(Ops, DL);
1900}
1901
1902SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1903 SelectionDAG &DAG) const {
1904 assert(Subtarget->hasWideArithmetic());
1905 assert(N->getValueType(0) == MVT::i128);
1906 SDLoc DL(N);
1907 unsigned Opcode;
1908 switch (N->getOpcode()) {
1909 case ISD::ADD:
1910 Opcode = WebAssemblyISD::I64_ADD128;
1911 break;
1912 case ISD::SUB:
1913 Opcode = WebAssemblyISD::I64_SUB128;
1914 break;
1915 default:
1916 llvm_unreachable("unexpected opcode");
1917 }
1918 SDValue LHS = N->getOperand(0);
1919 SDValue RHS = N->getOperand(1);
1920
1921 SDValue C0 = DAG.getConstant(0, DL, MVT::i64);
1922 SDValue C1 = DAG.getConstant(1, DL, MVT::i64);
1923 SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C0);
1924 SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, LHS, C1);
1925 SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C0);
1926 SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, RHS, C1);
1927 SDValue Result_LO = DAG.getNode(Opcode, DL, DAG.getVTList(MVT::i64, MVT::i64),
1928 LHS_0, LHS_1, RHS_0, RHS_1);
1929 SDValue Result_HI(Result_LO.getNode(), 1);
1930 return DAG.getNode(ISD::BUILD_PAIR, DL, N->getVTList(), Result_LO, Result_HI);
1931}
1932
1933SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1934 SelectionDAG &DAG) const {
1935 SDValue Src = Op.getOperand(2);
1936 if (isa<FrameIndexSDNode>(Src.getNode())) {
1937 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1938 // the FI to some LEA-like instruction, but since we don't have that, we
1939 // need to insert some kind of instruction that can take an FI operand and
1940 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1941 // local.copy between Op and its FI operand.
1942 SDValue Chain = Op.getOperand(0);
1943 SDLoc DL(Op);
1944 Register Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
1945 EVT VT = Src.getValueType();
1946 SDValue Copy(DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
1947 : WebAssembly::COPY_I64,
1948 DL, VT, Src),
1949 0);
1950 return Op.getNode()->getNumValues() == 1
1951 ? DAG.getCopyToReg(Chain, DL, Reg, Copy)
1952 : DAG.getCopyToReg(Chain, DL, Reg, Copy,
1953 Op.getNumOperands() == 4 ? Op.getOperand(3)
1954 : SDValue());
1955 }
1956 return SDValue();
1957}
1958
1959SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1960 SelectionDAG &DAG) const {
1961 int FI = cast<FrameIndexSDNode>(Op)->getIndex();
1962 return DAG.getTargetFrameIndex(FI, Op.getValueType());
1963}
1964
1965SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1966 SelectionDAG &DAG) const {
1967 SDLoc DL(Op);
1968
1969 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1970 fail(DL, DAG,
1971 "Non-Emscripten WebAssembly hasn't implemented "
1972 "__builtin_return_address");
1973 return SDValue();
1974 }
1975
1976 unsigned Depth = Op.getConstantOperandVal(0);
1977 MakeLibCallOptions CallOptions;
1978 return makeLibCall(DAG, RTLIB::RETURN_ADDRESS, Op.getValueType(),
1979 {DAG.getConstant(Depth, DL, MVT::i32)}, CallOptions, DL)
1980 .first;
1981}
1982
1983SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1984 SelectionDAG &DAG) const {
1985 // Non-zero depths are not supported by WebAssembly currently. Use the
1986 // legalizer's default expansion, which is to return 0 (what this function is
1987 // documented to do).
1988 if (Op.getConstantOperandVal(0) > 0)
1989 return SDValue();
1990
1992 EVT VT = Op.getValueType();
1993 Register FP =
1994 Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
1995 return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(Op), FP, VT);
1996}
1997
1998SDValue
1999WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2000 SelectionDAG &DAG) const {
2001 SDLoc DL(Op);
2002 const auto *GA = cast<GlobalAddressSDNode>(Op);
2003
2004 MachineFunction &MF = DAG.getMachineFunction();
2005 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2006 report_fatal_error("cannot use thread-local storage without bulk memory",
2007 false);
2008
2009 const GlobalValue *GV = GA->getGlobal();
2010
2011 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2012 // on other targets, if we have thread-local storage, only the local-exec
2013 // model is possible.
2014 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2015 ? GV->getThreadLocalMode()
2017
2018 // Unsupported TLS modes
2021
2022 if (model == GlobalValue::LocalExecTLSModel ||
2025 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2026 // For DSO-local TLS variables we use offset from __tls_base
2027
2028 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2029 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2030 : WebAssembly::GLOBAL_GET_I32;
2031 const char *BaseName = MF.createExternalSymbolName("__tls_base");
2032
2034 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2035 DAG.getTargetExternalSymbol(BaseName, PtrVT)),
2036 0);
2037
2038 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2039 GV, DL, PtrVT, GA->getOffset(), WebAssemblyII::MO_TLS_BASE_REL);
2040 SDValue SymOffset =
2041 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, TLSOffset);
2042
2043 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymOffset);
2044 }
2045
2047
2048 EVT VT = Op.getValueType();
2049 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2050 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2051 GA->getOffset(),
2053}
2054
2055SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2056 SelectionDAG &DAG) const {
2057 SDLoc DL(Op);
2058 const auto *GA = cast<GlobalAddressSDNode>(Op);
2059 EVT VT = Op.getValueType();
2060 assert(GA->getTargetFlags() == 0 &&
2061 "Unexpected target flags on generic GlobalAddressSDNode");
2063 fail(DL, DAG, "Invalid address space for WebAssembly target");
2064
2065 unsigned OperandFlags = 0;
2066 const GlobalValue *GV = GA->getGlobal();
2067 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2068 // need special treatment for tables in PIC mode.
2069 if (isPositionIndependent() &&
2071 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2072 MachineFunction &MF = DAG.getMachineFunction();
2073 MVT PtrVT = getPointerTy(MF.getDataLayout());
2074 const char *BaseName;
2075 if (GV->getValueType()->isFunctionTy()) {
2076 BaseName = MF.createExternalSymbolName("__table_base");
2078 } else {
2079 BaseName = MF.createExternalSymbolName("__memory_base");
2081 }
2083 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2084 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2085
2086 SDValue SymAddr = DAG.getNode(
2087 WebAssemblyISD::WrapperREL, DL, VT,
2088 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, GA->getOffset(),
2089 OperandFlags));
2090
2091 return DAG.getNode(ISD::ADD, DL, VT, BaseAddr, SymAddr);
2092 }
2094 }
2095
2096 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2097 DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT,
2098 GA->getOffset(), OperandFlags));
2099}
2100
2101SDValue
2102WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2103 SelectionDAG &DAG) const {
2104 SDLoc DL(Op);
2105 const auto *ES = cast<ExternalSymbolSDNode>(Op);
2106 EVT VT = Op.getValueType();
2107 assert(ES->getTargetFlags() == 0 &&
2108 "Unexpected target flags on generic ExternalSymbolSDNode");
2109 return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
2110 DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
2111}
2112
2113SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2114 SelectionDAG &DAG) const {
2115 // There's no need for a Wrapper node because we always incorporate a jump
2116 // table operand into a BR_TABLE instruction, rather than ever
2117 // materializing it in a register.
2118 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2119 return DAG.getTargetJumpTable(JT->getIndex(), Op.getValueType(),
2120 JT->getTargetFlags());
2121}
2122
2123SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2124 SelectionDAG &DAG) const {
2125 SDLoc DL(Op);
2126 SDValue Chain = Op.getOperand(0);
2127 const auto *JT = cast<JumpTableSDNode>(Op.getOperand(1));
2128 SDValue Index = Op.getOperand(2);
2129 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2130
2132 Ops.push_back(Chain);
2133 Ops.push_back(Index);
2134
2135 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2136 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2137
2138 // Add an operand for each case.
2139 for (auto *MBB : MBBs)
2140 Ops.push_back(DAG.getBasicBlock(MBB));
2141
2142 // Add the first MBB as a dummy default target for now. This will be replaced
2143 // with the proper default target (and the preceding range check eliminated)
2144 // if possible by WebAssemblyFixBrTableDefaults.
2145 Ops.push_back(DAG.getBasicBlock(*MBBs.begin()));
2146 return DAG.getNode(WebAssemblyISD::BR_TABLE, DL, MVT::Other, Ops);
2147}
2148
2149SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2150 SelectionDAG &DAG) const {
2151 SDLoc DL(Op);
2152 EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout());
2153
2154 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2155 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
2156
2157 SDValue ArgN = DAG.getCopyFromReg(DAG.getEntryNode(), DL,
2158 MFI->getVarargBufferVreg(), PtrVT);
2159 return DAG.getStore(Op.getOperand(0), DL, ArgN, Op.getOperand(1),
2160 MachinePointerInfo(SV));
2161}
2162
2163SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2164 SelectionDAG &DAG) const {
2165 MachineFunction &MF = DAG.getMachineFunction();
2166 unsigned IntNo;
2167 switch (Op.getOpcode()) {
2170 IntNo = Op.getConstantOperandVal(1);
2171 break;
2173 IntNo = Op.getConstantOperandVal(0);
2174 break;
2175 default:
2176 llvm_unreachable("Invalid intrinsic");
2177 }
2178 SDLoc DL(Op);
2179
2180 switch (IntNo) {
2181 default:
2182 return SDValue(); // Don't custom lower most intrinsics.
2183
2184 case Intrinsic::wasm_lsda: {
2185 auto PtrVT = getPointerTy(MF.getDataLayout());
2186 const char *SymName = MF.createExternalSymbolName(
2187 "GCC_except_table" + std::to_string(MF.getFunctionNumber()));
2188 if (isPositionIndependent()) {
2190 SymName, PtrVT, WebAssemblyII::MO_MEMORY_BASE_REL);
2191 const char *BaseName = MF.createExternalSymbolName("__memory_base");
2193 DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT,
2194 DAG.getTargetExternalSymbol(BaseName, PtrVT));
2195 SDValue SymAddr =
2196 DAG.getNode(WebAssemblyISD::WrapperREL, DL, PtrVT, Node);
2197 return DAG.getNode(ISD::ADD, DL, PtrVT, BaseAddr, SymAddr);
2198 }
2199 SDValue Node = DAG.getTargetExternalSymbol(SymName, PtrVT);
2200 return DAG.getNode(WebAssemblyISD::Wrapper, DL, PtrVT, Node);
2201 }
2202
2203 case Intrinsic::wasm_shuffle: {
2204 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2205 SDValue Ops[18];
2206 size_t OpIdx = 0;
2207 Ops[OpIdx++] = Op.getOperand(1);
2208 Ops[OpIdx++] = Op.getOperand(2);
2209 while (OpIdx < 18) {
2210 const SDValue &MaskIdx = Op.getOperand(OpIdx + 1);
2211 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2212 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2213 Ops[OpIdx++] = DAG.getConstant(0, DL, MVT::i32, isTarget);
2214 } else {
2215 Ops[OpIdx++] = MaskIdx;
2216 }
2217 }
2218 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2219 }
2220
2221 case Intrinsic::thread_pointer: {
2222 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2223 auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2224 : WebAssembly::GLOBAL_GET_I32;
2225 const char *TlsBase = MF.createExternalSymbolName("__tls_base");
2226 return SDValue(
2227 DAG.getMachineNode(GlobalGet, DL, PtrVT,
2228 DAG.getTargetExternalSymbol(TlsBase, PtrVT)),
2229 0);
2230 }
2231 }
2232}
2233
2234SDValue
2235WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2236 SelectionDAG &DAG) const {
2237 SDLoc DL(Op);
2238 // If sign extension operations are disabled, allow sext_inreg only if operand
2239 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2240 // extension operations, but allowing sext_inreg in this context lets us have
2241 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2242 // everywhere would be simpler in this file, but would necessitate large and
2243 // brittle patterns to undo the expansion and select extract_lane_s
2244 // instructions.
2245 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2246 if (Op.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2247 return SDValue();
2248
2249 const SDValue &Extract = Op.getOperand(0);
2250 MVT VecT = Extract.getOperand(0).getSimpleValueType();
2251 if (VecT.getVectorElementType().getSizeInBits() > 32)
2252 return SDValue();
2253 MVT ExtractedLaneT =
2254 cast<VTSDNode>(Op.getOperand(1).getNode())->getVT().getSimpleVT();
2255 MVT ExtractedVecT =
2256 MVT::getVectorVT(ExtractedLaneT, 128 / ExtractedLaneT.getSizeInBits());
2257 if (ExtractedVecT == VecT)
2258 return Op;
2259
2260 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2261 const SDNode *Index = Extract.getOperand(1).getNode();
2262 if (!isa<ConstantSDNode>(Index))
2263 return SDValue();
2264 unsigned IndexVal = Index->getAsZExtVal();
2265 unsigned Scale =
2266 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2267 assert(Scale > 1);
2268 SDValue NewIndex =
2269 DAG.getConstant(IndexVal * Scale, DL, Index->getValueType(0));
2270 SDValue NewExtract = DAG.getNode(
2272 DAG.getBitcast(ExtractedVecT, Extract.getOperand(0)), NewIndex);
2273 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Op.getValueType(), NewExtract,
2274 Op.getOperand(1));
2275}
2276
2277static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2278 SelectionDAG &DAG) {
2279 if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2280 return SDValue();
2281
2282 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2283 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2284 "expected extend_low");
2285 auto *Shuffle = cast<ShuffleVectorSDNode>(Op.getNode());
2286
2287 ArrayRef<int> Mask = Shuffle->getMask();
2288 // Look for a shuffle which moves from the high half to the low half.
2289 size_t FirstIdx = Mask.size() / 2;
2290 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2291 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2292 return SDValue();
2293 }
2294 }
2295
2296 SDLoc DL(Op);
2297 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2298 ? WebAssemblyISD::EXTEND_HIGH_S
2299 : WebAssemblyISD::EXTEND_HIGH_U;
2300 return DAG.getNode(Opc, DL, VT, Shuffle->getOperand(0));
2301}
2302
2303SDValue
2304WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2305 SelectionDAG &DAG) const {
2306 SDLoc DL(Op);
2307 EVT VT = Op.getValueType();
2308 SDValue Src = Op.getOperand(0);
2309 EVT SrcVT = Src.getValueType();
2310
2311 if (SrcVT.getVectorElementType() == MVT::i1 ||
2312 SrcVT.getVectorElementType() == MVT::i64)
2313 return SDValue();
2314
2315 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2316 "Unexpected extension factor.");
2317 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2318
2319 if (Scale != 2 && Scale != 4 && Scale != 8)
2320 return SDValue();
2321
2322 unsigned Ext;
2323 switch (Op.getOpcode()) {
2324 default:
2325 llvm_unreachable("unexpected opcode");
2328 Ext = WebAssemblyISD::EXTEND_LOW_U;
2329 break;
2331 Ext = WebAssemblyISD::EXTEND_LOW_S;
2332 break;
2333 }
2334
2335 if (Scale == 2) {
2336 // See if we can use EXTEND_HIGH.
2337 if (auto ExtendHigh = GetExtendHigh(Op.getOperand(0), Ext, VT, DAG))
2338 return ExtendHigh;
2339 }
2340
2341 SDValue Ret = Src;
2342 while (Scale != 1) {
2343 Ret = DAG.getNode(Ext, DL,
2344 Ret.getValueType()
2345 .widenIntegerVectorElementType(*DAG.getContext())
2346 .getHalfNumVectorElementsVT(*DAG.getContext()),
2347 Ret);
2348 Scale /= 2;
2349 }
2350 assert(Ret.getValueType() == VT);
2351 return Ret;
2352}
2353
2355 SDLoc DL(Op);
2356 if (Op.getValueType() != MVT::v2f64)
2357 return SDValue();
2358
2359 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2360 unsigned &Index) -> bool {
2361 switch (Op.getOpcode()) {
2362 case ISD::SINT_TO_FP:
2363 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2364 break;
2365 case ISD::UINT_TO_FP:
2366 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2367 break;
2368 case ISD::FP_EXTEND:
2369 Opcode = WebAssemblyISD::PROMOTE_LOW;
2370 break;
2371 default:
2372 return false;
2373 }
2374
2375 auto ExtractVector = Op.getOperand(0);
2376 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2377 return false;
2378
2379 if (!isa<ConstantSDNode>(ExtractVector.getOperand(1).getNode()))
2380 return false;
2381
2382 SrcVec = ExtractVector.getOperand(0);
2383 Index = ExtractVector.getConstantOperandVal(1);
2384 return true;
2385 };
2386
2387 unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2388 SDValue LHSSrcVec, RHSSrcVec;
2389 if (!GetConvertedLane(Op.getOperand(0), LHSOpcode, LHSSrcVec, LHSIndex) ||
2390 !GetConvertedLane(Op.getOperand(1), RHSOpcode, RHSSrcVec, RHSIndex))
2391 return SDValue();
2392
2393 if (LHSOpcode != RHSOpcode)
2394 return SDValue();
2395
2396 MVT ExpectedSrcVT;
2397 switch (LHSOpcode) {
2398 case WebAssemblyISD::CONVERT_LOW_S:
2399 case WebAssemblyISD::CONVERT_LOW_U:
2400 ExpectedSrcVT = MVT::v4i32;
2401 break;
2402 case WebAssemblyISD::PROMOTE_LOW:
2403 ExpectedSrcVT = MVT::v4f32;
2404 break;
2405 }
2406 if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2407 return SDValue();
2408
2409 auto Src = LHSSrcVec;
2410 if (LHSIndex != 0 || RHSIndex != 1 || LHSSrcVec != RHSSrcVec) {
2411 // Shuffle the source vector so that the converted lanes are the low lanes.
2412 Src = DAG.getVectorShuffle(
2413 ExpectedSrcVT, DL, LHSSrcVec, RHSSrcVec,
2414 {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + 4, -1, -1});
2415 }
2416 return DAG.getNode(LHSOpcode, DL, MVT::v2f64, Src);
2417}
2418
2419SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2420 SelectionDAG &DAG) const {
2421 MVT VT = Op.getSimpleValueType();
2422 if (VT == MVT::v8f16) {
2423 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2424 // FP16 type, so cast them to I16s.
2425 MVT IVT = VT.changeVectorElementType(MVT::i16);
2427 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2428 NewOps.push_back(DAG.getBitcast(MVT::i16, Op.getOperand(I)));
2429 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(), IVT, NewOps);
2430 return DAG.getBitcast(VT, Res);
2431 }
2432
2433 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2434 return ConvertLow;
2435
2436 SDLoc DL(Op);
2437 const EVT VecT = Op.getValueType();
2438 const EVT LaneT = Op.getOperand(0).getValueType();
2439 const size_t Lanes = Op.getNumOperands();
2440 bool CanSwizzle = VecT == MVT::v16i8;
2441
2442 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2443 // possible number of lanes at once followed by a sequence of replace_lane
2444 // instructions to individually initialize any remaining lanes.
2445
2446 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2447 // swizzled lanes should be given greater weight.
2448
2449 // TODO: Investigate looping rather than always extracting/replacing specific
2450 // lanes to fill gaps.
2451
2452 auto IsConstant = [](const SDValue &V) {
2453 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2454 };
2455
2456 // Returns the source vector and index vector pair if they exist. Checks for:
2457 // (extract_vector_elt
2458 // $src,
2459 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2460 // )
2461 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2462 auto Bail = std::make_pair(SDValue(), SDValue());
2463 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2464 return Bail;
2465 const SDValue &SwizzleSrc = Lane->getOperand(0);
2466 const SDValue &IndexExt = Lane->getOperand(1);
2467 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2468 return Bail;
2469 const SDValue &Index = IndexExt->getOperand(0);
2470 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2471 return Bail;
2472 const SDValue &SwizzleIndices = Index->getOperand(0);
2473 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2474 SwizzleIndices.getValueType() != MVT::v16i8 ||
2475 Index->getOperand(1)->getOpcode() != ISD::Constant ||
2476 Index->getConstantOperandVal(1) != I)
2477 return Bail;
2478 return std::make_pair(SwizzleSrc, SwizzleIndices);
2479 };
2480
2481 // If the lane is extracted from another vector at a constant index, return
2482 // that vector. The source vector must not have more lanes than the dest
2483 // because the shufflevector indices are in terms of the destination lanes and
2484 // would not be able to address the smaller individual source lanes.
2485 auto GetShuffleSrc = [&](const SDValue &Lane) {
2486 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2487 return SDValue();
2488 if (!isa<ConstantSDNode>(Lane->getOperand(1).getNode()))
2489 return SDValue();
2490 if (Lane->getOperand(0).getValueType().getVectorNumElements() >
2491 VecT.getVectorNumElements())
2492 return SDValue();
2493 return Lane->getOperand(0);
2494 };
2495
2496 using ValueEntry = std::pair<SDValue, size_t>;
2497 SmallVector<ValueEntry, 16> SplatValueCounts;
2498
2499 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2500 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2501
2502 using ShuffleEntry = std::pair<SDValue, size_t>;
2503 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2504
2505 auto AddCount = [](auto &Counts, const auto &Val) {
2506 auto CountIt =
2507 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2508 if (CountIt == Counts.end()) {
2509 Counts.emplace_back(Val, 1);
2510 } else {
2511 CountIt->second++;
2512 }
2513 };
2514
2515 auto GetMostCommon = [](auto &Counts) {
2516 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2517 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2518 return *CommonIt;
2519 };
2520
2521 size_t NumConstantLanes = 0;
2522
2523 // Count eligible lanes for each type of vector creation op
2524 for (size_t I = 0; I < Lanes; ++I) {
2525 const SDValue &Lane = Op->getOperand(I);
2526 if (Lane.isUndef())
2527 continue;
2528
2529 AddCount(SplatValueCounts, Lane);
2530
2531 if (IsConstant(Lane))
2532 NumConstantLanes++;
2533 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2534 AddCount(ShuffleCounts, ShuffleSrc);
2535 if (CanSwizzle) {
2536 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2537 if (SwizzleSrcs.first)
2538 AddCount(SwizzleCounts, SwizzleSrcs);
2539 }
2540 }
2541
2542 SDValue SplatValue;
2543 size_t NumSplatLanes;
2544 std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
2545
2546 SDValue SwizzleSrc;
2547 SDValue SwizzleIndices;
2548 size_t NumSwizzleLanes = 0;
2549 if (SwizzleCounts.size())
2550 std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
2551 NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2552
2553 // Shuffles can draw from up to two vectors, so find the two most common
2554 // sources.
2555 SDValue ShuffleSrc1, ShuffleSrc2;
2556 size_t NumShuffleLanes = 0;
2557 if (ShuffleCounts.size()) {
2558 std::tie(ShuffleSrc1, NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2559 llvm::erase_if(ShuffleCounts,
2560 [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2561 }
2562 if (ShuffleCounts.size()) {
2563 size_t AdditionalShuffleLanes;
2564 std::tie(ShuffleSrc2, AdditionalShuffleLanes) =
2565 GetMostCommon(ShuffleCounts);
2566 NumShuffleLanes += AdditionalShuffleLanes;
2567 }
2568
2569 // Predicate returning true if the lane is properly initialized by the
2570 // original instruction
2571 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2573 // Prefer swizzles over shuffles over vector consts over splats
2574 if (NumSwizzleLanes >= NumShuffleLanes &&
2575 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2576 Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
2577 SwizzleIndices);
2578 auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
2579 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2580 return Swizzled == GetSwizzleSrcs(I, Lane);
2581 };
2582 } else if (NumShuffleLanes >= NumConstantLanes &&
2583 NumShuffleLanes >= NumSplatLanes) {
2584 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2585 size_t DestLaneCount = VecT.getVectorNumElements();
2586 size_t Scale1 = 1;
2587 size_t Scale2 = 1;
2588 SDValue Src1 = ShuffleSrc1;
2589 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VecT);
2590 if (Src1.getValueType() != VecT) {
2591 size_t LaneSize =
2593 assert(LaneSize > DestLaneSize);
2594 Scale1 = LaneSize / DestLaneSize;
2595 Src1 = DAG.getBitcast(VecT, Src1);
2596 }
2597 if (Src2.getValueType() != VecT) {
2598 size_t LaneSize =
2600 assert(LaneSize > DestLaneSize);
2601 Scale2 = LaneSize / DestLaneSize;
2602 Src2 = DAG.getBitcast(VecT, Src2);
2603 }
2604
2605 int Mask[16];
2606 assert(DestLaneCount <= 16);
2607 for (size_t I = 0; I < DestLaneCount; ++I) {
2608 const SDValue &Lane = Op->getOperand(I);
2609 SDValue Src = GetShuffleSrc(Lane);
2610 if (Src == ShuffleSrc1) {
2611 Mask[I] = Lane->getConstantOperandVal(1) * Scale1;
2612 } else if (Src && Src == ShuffleSrc2) {
2613 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(1) * Scale2;
2614 } else {
2615 Mask[I] = -1;
2616 }
2617 }
2618 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2619 Result = DAG.getVectorShuffle(VecT, DL, Src1, Src2, MaskRef);
2620 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2621 auto Src = GetShuffleSrc(Lane);
2622 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2623 };
2624 } else if (NumConstantLanes >= NumSplatLanes) {
2625 SmallVector<SDValue, 16> ConstLanes;
2626 for (const SDValue &Lane : Op->op_values()) {
2627 if (IsConstant(Lane)) {
2628 // Values may need to be fixed so that they will sign extend to be
2629 // within the expected range during ISel. Check whether the value is in
2630 // bounds based on the lane bit width and if it is out of bounds, lop
2631 // off the extra bits.
2632 uint64_t LaneBits = 128 / Lanes;
2633 if (auto *Const = dyn_cast<ConstantSDNode>(Lane.getNode())) {
2634 ConstLanes.push_back(DAG.getConstant(
2635 Const->getAPIntValue().trunc(LaneBits).getZExtValue(),
2636 SDLoc(Lane), LaneT));
2637 } else {
2638 ConstLanes.push_back(Lane);
2639 }
2640 } else if (LaneT.isFloatingPoint()) {
2641 ConstLanes.push_back(DAG.getConstantFP(0, DL, LaneT));
2642 } else {
2643 ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
2644 }
2645 }
2646 Result = DAG.getBuildVector(VecT, DL, ConstLanes);
2647 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2648 return IsConstant(Lane);
2649 };
2650 } else {
2651 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2652 if (NumSplatLanes == 1 && Op->getOperand(0) == SplatValue &&
2653 (DestLaneSize == 32 || DestLaneSize == 64)) {
2654 // Could be selected to load_zero.
2655 Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecT, SplatValue);
2656 } else {
2657 // Use a splat (which might be selected as a load splat)
2658 Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
2659 }
2660 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2661 return Lane == SplatValue;
2662 };
2663 }
2664
2665 assert(Result);
2666 assert(IsLaneConstructed);
2667
2668 // Add replace_lane instructions for any unhandled values
2669 for (size_t I = 0; I < Lanes; ++I) {
2670 const SDValue &Lane = Op->getOperand(I);
2671 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2672 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
2673 DAG.getConstant(I, DL, MVT::i32));
2674 }
2675
2676 return Result;
2677}
2678
2679SDValue
2680WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2681 SelectionDAG &DAG) const {
2682 SDLoc DL(Op);
2683 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op.getNode())->getMask();
2684 MVT VecType = Op.getOperand(0).getSimpleValueType();
2685 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2686 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2687
2688 // Space for two vector args and sixteen mask indices
2689 SDValue Ops[18];
2690 size_t OpIdx = 0;
2691 Ops[OpIdx++] = Op.getOperand(0);
2692 Ops[OpIdx++] = Op.getOperand(1);
2693
2694 // Expand mask indices to byte indices and materialize them as operands
2695 for (int M : Mask) {
2696 for (size_t J = 0; J < LaneBytes; ++J) {
2697 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2698 // whole lane of vector input, to allow further reduction at VM. E.g.
2699 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2700 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2701 Ops[OpIdx++] = DAG.getConstant(ByteIndex, DL, MVT::i32);
2702 }
2703 }
2704
2705 return DAG.getNode(WebAssemblyISD::SHUFFLE, DL, Op.getValueType(), Ops);
2706}
2707
2708SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2709 SelectionDAG &DAG) const {
2710 SDLoc DL(Op);
2711 // The legalizer does not know how to expand the unsupported comparison modes
2712 // of i64x2 vectors, so we manually unroll them here.
2713 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2715 DAG.ExtractVectorElements(Op->getOperand(0), LHS);
2716 DAG.ExtractVectorElements(Op->getOperand(1), RHS);
2717 const SDValue &CC = Op->getOperand(2);
2718 auto MakeLane = [&](unsigned I) {
2719 return DAG.getNode(ISD::SELECT_CC, DL, MVT::i64, LHS[I], RHS[I],
2720 DAG.getConstant(uint64_t(-1), DL, MVT::i64),
2721 DAG.getConstant(uint64_t(0), DL, MVT::i64), CC);
2722 };
2723 return DAG.getBuildVector(Op->getValueType(0), DL,
2724 {MakeLane(0), MakeLane(1)});
2725}
2726
2727SDValue
2728WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2729 SelectionDAG &DAG) const {
2730 // Allow constant lane indices, expand variable lane indices
2731 SDNode *IdxNode = Op.getOperand(Op.getNumOperands() - 1).getNode();
2732 if (isa<ConstantSDNode>(IdxNode)) {
2733 // Ensure the index type is i32 to match the tablegen patterns
2734 uint64_t Idx = IdxNode->getAsZExtVal();
2735 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2736 Ops[Op.getNumOperands() - 1] =
2737 DAG.getConstant(Idx, SDLoc(IdxNode), MVT::i32);
2738 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Ops);
2739 }
2740 // Perform default expansion
2741 return SDValue();
2742}
2743
2745 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2746 // 32-bit and 64-bit unrolled shifts will have proper semantics
2747 if (LaneT.bitsGE(MVT::i32))
2748 return DAG.UnrollVectorOp(Op.getNode());
2749 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2750 SDLoc DL(Op);
2751 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2752 SDValue Mask = DAG.getConstant(LaneT.getSizeInBits() - 1, DL, MVT::i32);
2753 unsigned ShiftOpcode = Op.getOpcode();
2754 SmallVector<SDValue, 16> ShiftedElements;
2755 DAG.ExtractVectorElements(Op.getOperand(0), ShiftedElements, 0, 0, MVT::i32);
2756 SmallVector<SDValue, 16> ShiftElements;
2757 DAG.ExtractVectorElements(Op.getOperand(1), ShiftElements, 0, 0, MVT::i32);
2758 SmallVector<SDValue, 16> UnrolledOps;
2759 for (size_t i = 0; i < NumLanes; ++i) {
2760 SDValue MaskedShiftValue =
2761 DAG.getNode(ISD::AND, DL, MVT::i32, ShiftElements[i], Mask);
2762 SDValue ShiftedValue = ShiftedElements[i];
2763 if (ShiftOpcode == ISD::SRA)
2764 ShiftedValue = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32,
2765 ShiftedValue, DAG.getValueType(LaneT));
2766 UnrolledOps.push_back(
2767 DAG.getNode(ShiftOpcode, DL, MVT::i32, ShiftedValue, MaskedShiftValue));
2768 }
2769 return DAG.getBuildVector(Op.getValueType(), DL, UnrolledOps);
2770}
2771
2772SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2773 SelectionDAG &DAG) const {
2774 SDLoc DL(Op);
2775
2776 // Only manually lower vector shifts
2777 assert(Op.getSimpleValueType().isVector());
2778
2779 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2780 auto ShiftVal = Op.getOperand(1);
2781
2782 // Try to skip bitmask operation since it is implied inside shift instruction
2783 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2784 if (MaskOp.getOpcode() != ISD::AND)
2785 return MaskOp;
2786 SDValue LHS = MaskOp.getOperand(0);
2787 SDValue RHS = MaskOp.getOperand(1);
2788 if (MaskOp.getValueType().isVector()) {
2789 APInt MaskVal;
2790 if (!ISD::isConstantSplatVector(RHS.getNode(), MaskVal))
2791 std::swap(LHS, RHS);
2792
2793 if (ISD::isConstantSplatVector(RHS.getNode(), MaskVal) &&
2794 MaskVal == MaskBits)
2795 MaskOp = LHS;
2796 } else {
2797 if (!isa<ConstantSDNode>(RHS.getNode()))
2798 std::swap(LHS, RHS);
2799
2800 auto ConstantRHS = dyn_cast<ConstantSDNode>(RHS.getNode());
2801 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2802 MaskOp = LHS;
2803 }
2804
2805 return MaskOp;
2806 };
2807
2808 // Skip vector and operation
2809 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2810 ShiftVal = DAG.getSplatValue(ShiftVal);
2811 if (!ShiftVal)
2812 return unrollVectorShift(Op, DAG);
2813
2814 // Skip scalar and operation
2815 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2816 // Use anyext because none of the high bits can affect the shift
2817 ShiftVal = DAG.getAnyExtOrTrunc(ShiftVal, DL, MVT::i32);
2818
2819 unsigned Opcode;
2820 switch (Op.getOpcode()) {
2821 case ISD::SHL:
2822 Opcode = WebAssemblyISD::VEC_SHL;
2823 break;
2824 case ISD::SRA:
2825 Opcode = WebAssemblyISD::VEC_SHR_S;
2826 break;
2827 case ISD::SRL:
2828 Opcode = WebAssemblyISD::VEC_SHR_U;
2829 break;
2830 default:
2831 llvm_unreachable("unexpected opcode");
2832 }
2833
2834 return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(0), ShiftVal);
2835}
2836
2837SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2838 SelectionDAG &DAG) const {
2839 EVT ResT = Op.getValueType();
2840 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
2841
2842 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2843 (SatVT == MVT::i32 || SatVT == MVT::i64))
2844 return Op;
2845
2846 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2847 return Op;
2848
2849 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2850 return Op;
2851
2852 return SDValue();
2853}
2854
2855//===----------------------------------------------------------------------===//
2856// Custom DAG combine hooks
2857//===----------------------------------------------------------------------===//
2858static SDValue
2860 auto &DAG = DCI.DAG;
2861 auto Shuffle = cast<ShuffleVectorSDNode>(N);
2862
2863 // Hoist vector bitcasts that don't change the number of lanes out of unary
2864 // shuffles, where they are less likely to get in the way of other combines.
2865 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2866 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2867 SDValue Bitcast = N->getOperand(0);
2868 if (Bitcast.getOpcode() != ISD::BITCAST)
2869 return SDValue();
2870 if (!N->getOperand(1).isUndef())
2871 return SDValue();
2872 SDValue CastOp = Bitcast.getOperand(0);
2873 EVT SrcType = CastOp.getValueType();
2874 EVT DstType = Bitcast.getValueType();
2875 if (!SrcType.is128BitVector() ||
2876 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2877 return SDValue();
2878 SDValue NewShuffle = DAG.getVectorShuffle(
2879 SrcType, SDLoc(N), CastOp, DAG.getUNDEF(SrcType), Shuffle->getMask());
2880 return DAG.getBitcast(DstType, NewShuffle);
2881}
2882
2883/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2884/// split up into scalar instructions during legalization, and the vector
2885/// extending instructions are selected in performVectorExtendCombine below.
2886static SDValue
2889 auto &DAG = DCI.DAG;
2890 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2891 N->getOpcode() == ISD::SINT_TO_FP);
2892
2893 EVT InVT = N->getOperand(0)->getValueType(0);
2894 EVT ResVT = N->getValueType(0);
2895 MVT ExtVT;
2896 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
2897 ExtVT = MVT::v4i32;
2898 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
2899 ExtVT = MVT::v2i32;
2900 else
2901 return SDValue();
2902
2903 unsigned Op =
2905 SDValue Conv = DAG.getNode(Op, SDLoc(N), ExtVT, N->getOperand(0));
2906 return DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, Conv);
2907}
2908
2909static SDValue
2912 auto &DAG = DCI.DAG;
2913
2914 SDNodeFlags Flags = N->getFlags();
2915 SDValue Op0 = N->getOperand(0);
2916 EVT VT = N->getValueType(0);
2917
2918 // Optimize uitofp to sitofp when the sign bit is known to be zero.
2919 // Depending on the target (runtime) backend, this might be performance
2920 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
2921 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op0))) {
2922 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
2923 }
2924
2925 return SDValue();
2926}
2927
2928static SDValue
2930 auto &DAG = DCI.DAG;
2931 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
2932 N->getOpcode() == ISD::ZERO_EXTEND);
2933
2934 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2935 // possible before the extract_subvector can be expanded.
2936 auto Extract = N->getOperand(0);
2937 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2938 return SDValue();
2939 auto Source = Extract.getOperand(0);
2940 auto *IndexNode = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
2941 if (IndexNode == nullptr)
2942 return SDValue();
2943 auto Index = IndexNode->getZExtValue();
2944
2945 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2946 // extracted subvector is the low or high half of its source.
2947 EVT ResVT = N->getValueType(0);
2948 if (ResVT == MVT::v8i16) {
2949 if (Extract.getValueType() != MVT::v8i8 ||
2950 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
2951 return SDValue();
2952 } else if (ResVT == MVT::v4i32) {
2953 if (Extract.getValueType() != MVT::v4i16 ||
2954 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
2955 return SDValue();
2956 } else if (ResVT == MVT::v2i64) {
2957 if (Extract.getValueType() != MVT::v2i32 ||
2958 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
2959 return SDValue();
2960 } else {
2961 return SDValue();
2962 }
2963
2964 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2965 bool IsLow = Index == 0;
2966
2967 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2968 : WebAssemblyISD::EXTEND_HIGH_S)
2969 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2970 : WebAssemblyISD::EXTEND_HIGH_U);
2971
2972 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
2973}
2974
2975static SDValue
2977 auto &DAG = DCI.DAG;
2978
2979 auto GetWasmConversionOp = [](unsigned Op) {
2980 switch (Op) {
2982 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
2984 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
2985 case ISD::FP_ROUND:
2986 return WebAssemblyISD::DEMOTE_ZERO;
2987 }
2988 llvm_unreachable("unexpected op");
2989 };
2990
2991 auto IsZeroSplat = [](SDValue SplatVal) {
2992 auto *Splat = dyn_cast<BuildVectorSDNode>(SplatVal.getNode());
2993 APInt SplatValue, SplatUndef;
2994 unsigned SplatBitSize;
2995 bool HasAnyUndefs;
2996 // Endianness doesn't matter in this context because we are looking for
2997 // an all-zero value.
2998 return Splat &&
2999 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3000 HasAnyUndefs) &&
3001 SplatValue == 0;
3002 };
3003
3004 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3005 // Combine this:
3006 //
3007 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3008 //
3009 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3010 //
3011 // Or this:
3012 //
3013 // (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3014 //
3015 // into (f32x4.demote_zero_f64x2 $x).
3016 EVT ResVT;
3017 EVT ExpectedConversionType;
3018 auto Conversion = N->getOperand(0);
3019 auto ConversionOp = Conversion.getOpcode();
3020 switch (ConversionOp) {
3023 ResVT = MVT::v4i32;
3024 ExpectedConversionType = MVT::v2i32;
3025 break;
3026 case ISD::FP_ROUND:
3027 ResVT = MVT::v4f32;
3028 ExpectedConversionType = MVT::v2f32;
3029 break;
3030 default:
3031 return SDValue();
3032 }
3033
3034 if (N->getValueType(0) != ResVT)
3035 return SDValue();
3036
3037 if (Conversion.getValueType() != ExpectedConversionType)
3038 return SDValue();
3039
3040 auto Source = Conversion.getOperand(0);
3041 if (Source.getValueType() != MVT::v2f64)
3042 return SDValue();
3043
3044 if (!IsZeroSplat(N->getOperand(1)) ||
3045 N->getOperand(1).getValueType() != ExpectedConversionType)
3046 return SDValue();
3047
3048 unsigned Op = GetWasmConversionOp(ConversionOp);
3049 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3050 }
3051
3052 // Combine this:
3053 //
3054 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3055 //
3056 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3057 //
3058 // Or this:
3059 //
3060 // (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3061 //
3062 // into (f32x4.demote_zero_f64x2 $x).
3063 EVT ResVT;
3064 auto ConversionOp = N->getOpcode();
3065 switch (ConversionOp) {
3068 ResVT = MVT::v4i32;
3069 break;
3070 case ISD::FP_ROUND:
3071 ResVT = MVT::v4f32;
3072 break;
3073 default:
3074 llvm_unreachable("unexpected op");
3075 }
3076
3077 if (N->getValueType(0) != ResVT)
3078 return SDValue();
3079
3080 auto Concat = N->getOperand(0);
3081 if (Concat.getValueType() != MVT::v4f64)
3082 return SDValue();
3083
3084 auto Source = Concat.getOperand(0);
3085 if (Source.getValueType() != MVT::v2f64)
3086 return SDValue();
3087
3088 if (!IsZeroSplat(Concat.getOperand(1)) ||
3089 Concat.getOperand(1).getValueType() != MVT::v2f64)
3090 return SDValue();
3091
3092 unsigned Op = GetWasmConversionOp(ConversionOp);
3093 return DAG.getNode(Op, SDLoc(N), ResVT, Source);
3094}
3095
3096// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3097static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3098 const SDLoc &DL, unsigned VectorWidth) {
3099 EVT VT = Vec.getValueType();
3100 EVT ElVT = VT.getVectorElementType();
3101 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3102 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
3103 VT.getVectorNumElements() / Factor);
3104
3105 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3106 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3107 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3108
3109 // This is the index of the first element of the VectorWidth-bit chunk
3110 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3111 IdxVal &= ~(ElemsPerChunk - 1);
3112
3113 // If the input is a buildvector just emit a smaller one.
3114 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3115 return DAG.getBuildVector(ResultVT, DL,
3116 Vec->ops().slice(IdxVal, ElemsPerChunk));
3117
3118 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, DL);
3119 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResultVT, Vec, VecIdx);
3120}
3121
3122// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3123// is the expected destination value type after recursion. In is the initial
3124// input. Note that the input should have enough leading zero bits to prevent
3125// NARROW_U from saturating results.
3127 SelectionDAG &DAG) {
3128 EVT SrcVT = In.getValueType();
3129
3130 // No truncation required, we might get here due to recursive calls.
3131 if (SrcVT == DstVT)
3132 return In;
3133
3134 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3135 unsigned NumElems = SrcVT.getVectorNumElements();
3136 if (!isPowerOf2_32(NumElems))
3137 return SDValue();
3138 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3139 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3140
3141 LLVMContext &Ctx = *DAG.getContext();
3142 EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2);
3143
3144 // Narrow to the largest type possible:
3145 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3146 EVT InVT = MVT::i16, OutVT = MVT::i8;
3147 if (SrcVT.getScalarSizeInBits() > 16) {
3148 InVT = MVT::i32;
3149 OutVT = MVT::i16;
3150 }
3151 unsigned SubSizeInBits = SrcSizeInBits / 2;
3152 InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits());
3153 OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits());
3154
3155 // Split lower/upper subvectors.
3156 SDValue Lo = extractSubVector(In, 0, DAG, DL, SubSizeInBits);
3157 SDValue Hi = extractSubVector(In, NumElems / 2, DAG, DL, SubSizeInBits);
3158
3159 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3160 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3161 Lo = DAG.getBitcast(InVT, Lo);
3162 Hi = DAG.getBitcast(InVT, Hi);
3163 SDValue Res = DAG.getNode(WebAssemblyISD::NARROW_U, DL, OutVT, Lo, Hi);
3164 return DAG.getBitcast(DstVT, Res);
3165 }
3166
3167 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3168 EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2);
3169 Lo = truncateVectorWithNARROW(PackedVT, Lo, DL, DAG);
3170 Hi = truncateVectorWithNARROW(PackedVT, Hi, DL, DAG);
3171
3172 PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems);
3173 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
3174 return truncateVectorWithNARROW(DstVT, Res, DL, DAG);
3175}
3176
3179 auto &DAG = DCI.DAG;
3180
3181 SDValue In = N->getOperand(0);
3182 EVT InVT = In.getValueType();
3183 if (!InVT.isSimple())
3184 return SDValue();
3185
3186 EVT OutVT = N->getValueType(0);
3187 if (!OutVT.isVector())
3188 return SDValue();
3189
3190 EVT OutSVT = OutVT.getVectorElementType();
3191 EVT InSVT = InVT.getVectorElementType();
3192 // Currently only cover truncate to v16i8 or v8i16.
3193 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3194 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3195 return SDValue();
3196
3197 SDLoc DL(N);
3199 OutVT.getScalarSizeInBits());
3200 In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT));
3201 return truncateVectorWithNARROW(OutVT, In, DL, DAG);
3202}
3203
3206 using namespace llvm::SDPatternMatch;
3207 auto &DAG = DCI.DAG;
3208 SDLoc DL(N);
3209 SDValue Src = N->getOperand(0);
3210 EVT VT = N->getValueType(0);
3211 EVT SrcVT = Src.getValueType();
3212
3213 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3214 SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3215 return SDValue();
3216
3217 unsigned NumElts = SrcVT.getVectorNumElements();
3218 EVT Width = MVT::getIntegerVT(128 / NumElts);
3219
3220 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3221 // ==> bitmask
3222 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3223 return DAG.getZExtOrTrunc(
3224 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3225 {DAG.getConstant(Intrinsic::wasm_bitmask, DL, MVT::i32),
3226 DAG.getSExtOrTrunc(N->getOperand(0), DL,
3227 SrcVT.changeVectorElementType(Width))}),
3228 DL, VT);
3229 }
3230
3231 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3232 if (NumElts == 32 || NumElts == 64) {
3233 // Strategy: We will setcc them seperately in v16i8 -> v16i1
3234 // Bitcast them to i16, extend them to either i32 or i64.
3235 // Add them together, shifting left 1 by 1.
3236 SDValue Concat, SetCCVector;
3237 ISD::CondCode SetCond;
3238
3239 if (!sd_match(N, m_BitCast(m_c_SetCC(m_Value(Concat), m_Value(SetCCVector),
3240 m_CondCode(SetCond)))))
3241 return SDValue();
3242 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3243 return SDValue();
3244
3245 uint64_t ElementWidth =
3247
3248 SmallVector<SDValue> VectorsToShuffle;
3249 for (size_t I = 0; I < Concat->ops().size(); I++) {
3250 VectorsToShuffle.push_back(DAG.getBitcast(
3251 MVT::i16,
3252 DAG.getSetCC(DL, MVT::v16i1, Concat->ops()[I],
3253 extractSubVector(SetCCVector, I * (128 / ElementWidth),
3254 DAG, DL, 128),
3255 SetCond)));
3256 }
3257
3258 MVT ReturnType = VectorsToShuffle.size() == 2 ? MVT::i32 : MVT::i64;
3259 SDValue ReturningInteger = DAG.getConstant(0, DL, ReturnType);
3260
3261 for (SDValue V : VectorsToShuffle) {
3262 ReturningInteger = DAG.getNode(
3263 ISD::SHL, DL, ReturnType,
3264 {DAG.getShiftAmountConstant(16, ReturnType, DL), ReturningInteger});
3265
3266 SDValue ExtendedV = DAG.getZExtOrTrunc(V, DL, ReturnType);
3267 ReturningInteger =
3268 DAG.getNode(ISD::ADD, DL, ReturnType, {ReturningInteger, ExtendedV});
3269 }
3270
3271 return ReturningInteger;
3272 }
3273
3274 return SDValue();
3275}
3276
3278 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3279 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3280 // any_true (setcc <X>, 0, ne) => (any_true X)
3281 // all_true (setcc <X>, 0, ne) => (all_true X)
3282 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3283 using namespace llvm::SDPatternMatch;
3284
3285 SDValue LHS;
3286 if (N->getNumOperands() < 2 ||
3287 !sd_match(N->getOperand(1),
3289 return SDValue();
3290 EVT LT = LHS.getValueType();
3291 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3292 return SDValue();
3293
3294 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3295 ISD::CondCode SetType,
3296 Intrinsic::WASMIntrinsics InPost) {
3297 if (N->getConstantOperandVal(0) != InPre)
3298 return SDValue();
3299
3300 SDValue LHS;
3301 if (!sd_match(N->getOperand(1), m_c_SetCC(m_Value(LHS), m_Zero(),
3302 m_SpecificCondCode(SetType))))
3303 return SDValue();
3304
3305 SDLoc DL(N);
3306 SDValue Ret = DAG.getZExtOrTrunc(
3307 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3308 {DAG.getConstant(InPost, DL, MVT::i32), LHS}),
3309 DL, MVT::i1);
3310 if (SetType == ISD::SETEQ)
3311 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3312 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3313 };
3314
3315 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3316 Intrinsic::wasm_alltrue))
3317 return AnyTrueEQ;
3318 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3319 Intrinsic::wasm_anytrue))
3320 return AllTrueEQ;
3321 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3322 Intrinsic::wasm_anytrue))
3323 return AnyTrueNE;
3324 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3325 Intrinsic::wasm_alltrue))
3326 return AllTrueNE;
3327
3328 return SDValue();
3329}
3330
3331template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3332 Intrinsic::ID Intrin>
3334 SDValue LHS = N->getOperand(0);
3335 SDValue RHS = N->getOperand(1);
3336 SDValue Cond = N->getOperand(2);
3337 if (MatchCond != cast<CondCodeSDNode>(Cond)->get())
3338 return SDValue();
3339
3340 if (MatchRHS != cast<ConstantSDNode>(RHS)->getSExtValue())
3341 return SDValue();
3342
3343 SDLoc DL(N);
3344 SDValue Ret = DAG.getZExtOrTrunc(
3345 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3346 {DAG.getConstant(Intrin, DL, MVT::i32),
3347 DAG.getSExtOrTrunc(LHS->getOperand(0), DL, VecVT)}),
3348 DL, MVT::i1);
3349 if (RequiresNegate)
3350 Ret = DAG.getNOT(DL, Ret, MVT::i1);
3351 return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
3352}
3353
3354/// Try to convert a i128 comparison to a v16i8 comparison before type
3355/// legalization splits it up into chunks
3356static SDValue
3358 const WebAssemblySubtarget *Subtarget) {
3359
3360 SDLoc DL(N);
3361 SDValue X = N->getOperand(0);
3362 SDValue Y = N->getOperand(1);
3363 EVT VT = N->getValueType(0);
3364 EVT OpVT = X.getValueType();
3365
3366 SelectionDAG &DAG = DCI.DAG;
3368 Attribute::NoImplicitFloat))
3369 return SDValue();
3370
3371 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
3372 // We're looking for an oversized integer equality comparison with SIMD
3373 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3374 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(CC))
3375 return SDValue();
3376
3377 // Don't perform this combine if constructing the vector will be expensive.
3378 auto IsVectorBitCastCheap = [](SDValue X) {
3380 return isa<ConstantSDNode>(X) || X.getOpcode() == ISD::LOAD;
3381 };
3382
3383 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3384 return SDValue();
3385
3386 SDValue VecX = DAG.getBitcast(MVT::v16i8, X);
3387 SDValue VecY = DAG.getBitcast(MVT::v16i8, Y);
3388 SDValue Cmp = DAG.getSetCC(DL, MVT::v16i8, VecX, VecY, CC);
3389
3390 SDValue Intr =
3391 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
3392 {DAG.getConstant(CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3393 : Intrinsic::wasm_anytrue,
3394 DL, MVT::i32),
3395 Cmp});
3396
3397 return DAG.getSetCC(DL, VT, Intr, DAG.getConstant(0, DL, MVT::i32),
3398 ISD::SETNE);
3399}
3400
3403 const WebAssemblySubtarget *Subtarget) {
3404 if (!DCI.isBeforeLegalize())
3405 return SDValue();
3406
3407 EVT VT = N->getValueType(0);
3408 if (!VT.isScalarInteger())
3409 return SDValue();
3410
3411 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3412 return V;
3413
3414 SDValue LHS = N->getOperand(0);
3415 if (LHS->getOpcode() != ISD::BITCAST)
3416 return SDValue();
3417
3418 EVT FromVT = LHS->getOperand(0).getValueType();
3419 if (!FromVT.isFixedLengthVector() || FromVT.getVectorElementType() != MVT::i1)
3420 return SDValue();
3421
3422 unsigned NumElts = FromVT.getVectorNumElements();
3423 if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
3424 return SDValue();
3425
3426 if (!cast<ConstantSDNode>(N->getOperand(1)))
3427 return SDValue();
3428
3429 EVT VecVT = FromVT.changeVectorElementType(MVT::getIntegerVT(128 / NumElts));
3430 auto &DAG = DCI.DAG;
3431 // setcc (iN (bitcast (vNi1 X))), 0, ne
3432 // ==> any_true (vNi1 X)
3434 N, VecVT, DAG)) {
3435 return Match;
3436 }
3437 // setcc (iN (bitcast (vNi1 X))), 0, eq
3438 // ==> xor (any_true (vNi1 X)), -1
3440 N, VecVT, DAG)) {
3441 return Match;
3442 }
3443 // setcc (iN (bitcast (vNi1 X))), -1, eq
3444 // ==> all_true (vNi1 X)
3446 N, VecVT, DAG)) {
3447 return Match;
3448 }
3449 // setcc (iN (bitcast (vNi1 X))), -1, ne
3450 // ==> xor (all_true (vNi1 X)), -1
3452 N, VecVT, DAG)) {
3453 return Match;
3454 }
3455 return SDValue();
3456}
3457
3459 EVT VT = N->getValueType(0);
3460 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3461 return SDValue();
3462
3463 // Mul with extending inputs.
3464 SDValue LHS = N->getOperand(0);
3465 SDValue RHS = N->getOperand(1);
3466 if (LHS.getOpcode() != RHS.getOpcode())
3467 return SDValue();
3468
3469 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3470 LHS.getOpcode() != ISD::ZERO_EXTEND)
3471 return SDValue();
3472
3473 if (LHS->getOperand(0).getValueType() != RHS->getOperand(0).getValueType())
3474 return SDValue();
3475
3476 EVT FromVT = LHS->getOperand(0).getValueType();
3477 EVT EltTy = FromVT.getVectorElementType();
3478 if (EltTy != MVT::i8)
3479 return SDValue();
3480
3481 // For an input DAG that looks like this
3482 // %a = input_type
3483 // %b = input_type
3484 // %lhs = extend %a to output_type
3485 // %rhs = extend %b to output_type
3486 // %mul = mul %lhs, %rhs
3487
3488 // input_type | output_type | instructions
3489 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3490 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3491 // | | %low_low = i32x4.ext_low_i16x8_ %low
3492 // | | %low_high = i32x4.ext_high_i16x8_ %low
3493 // | | %high_low = i32x4.ext_low_i16x8_ %high
3494 // | | %high_high = i32x4.ext_high_i16x8_ %high
3495 // | | %res = concat_vector(...)
3496 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3497 // | | %low_low = i32x4.ext_low_i16x8_ %low
3498 // | | %low_high = i32x4.ext_high_i16x8_ %low
3499 // | | %res = concat_vector(%low_low, %low_high)
3500
3501 SDLoc DL(N);
3502 unsigned NumElts = VT.getVectorNumElements();
3503 SDValue ExtendInLHS = LHS->getOperand(0);
3504 SDValue ExtendInRHS = RHS->getOperand(0);
3505 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3506 unsigned ExtendLowOpc =
3507 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3508 unsigned ExtendHighOpc =
3509 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3510
3511 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3512 return DAG.getNode(ExtendLowOpc, DL, VT, Op);
3513 };
3514 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3515 return DAG.getNode(ExtendHighOpc, DL, VT, Op);
3516 };
3517
3518 if (NumElts == 16) {
3519 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3520 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3521 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3522 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3523 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3524 SDValue MulHigh = DAG.getNode(ISD::MUL, DL, MVT::v8i16, HighLHS, HighRHS);
3525 SDValue SubVectors[] = {
3526 GetExtendLow(MVT::v4i32, MulLow),
3527 GetExtendHigh(MVT::v4i32, MulLow),
3528 GetExtendLow(MVT::v4i32, MulHigh),
3529 GetExtendHigh(MVT::v4i32, MulHigh),
3530 };
3531 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubVectors);
3532 } else {
3533 assert(NumElts == 8);
3534 SDValue LowLHS = DAG.getNode(LHS->getOpcode(), DL, MVT::v8i16, ExtendInLHS);
3535 SDValue LowRHS = DAG.getNode(RHS->getOpcode(), DL, MVT::v8i16, ExtendInRHS);
3536 SDValue MulLow = DAG.getNode(ISD::MUL, DL, MVT::v8i16, LowLHS, LowRHS);
3537 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3538 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3539 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
3540 }
3541 return SDValue();
3542}
3543
3546 assert(N->getOpcode() == ISD::MUL);
3547 EVT VT = N->getValueType(0);
3548 if (!VT.isVector())
3549 return SDValue();
3550
3551 if (auto Res = TryWideExtMulCombine(N, DCI.DAG))
3552 return Res;
3553
3554 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3555 // extend them to v8i16. Only do this before legalization in case a narrow
3556 // vector is widened and may be simplified later.
3557 if (!DCI.isBeforeLegalize() || (VT != MVT::v8i8 && VT != MVT::v16i8))
3558 return SDValue();
3559
3560 SDLoc DL(N);
3561 SelectionDAG &DAG = DCI.DAG;
3562 SDValue LHS = N->getOperand(0);
3563 SDValue RHS = N->getOperand(1);
3564 EVT MulVT = MVT::v8i16;
3565
3566 if (VT == MVT::v8i8) {
3567 SDValue PromotedLHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, LHS,
3568 DAG.getUNDEF(MVT::v8i8));
3569 SDValue PromotedRHS = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, RHS,
3570 DAG.getUNDEF(MVT::v8i8));
3571 SDValue LowLHS =
3572 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedLHS);
3573 SDValue LowRHS =
3574 DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, PromotedRHS);
3575 SDValue MulLow = DAG.getBitcast(
3576 MVT::v16i8, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3577 // Take the low byte of each lane.
3578 SDValue Shuffle = DAG.getVectorShuffle(
3579 MVT::v16i8, DL, MulLow, DAG.getUNDEF(MVT::v16i8),
3580 {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3581 return extractSubVector(Shuffle, 0, DAG, DL, 64);
3582 } else {
3583 assert(VT == MVT::v16i8 && "Expected v16i8");
3584 SDValue LowLHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, LHS);
3585 SDValue LowRHS = DAG.getNode(WebAssemblyISD::EXTEND_LOW_U, DL, MulVT, RHS);
3586 SDValue HighLHS =
3587 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, LHS);
3588 SDValue HighRHS =
3589 DAG.getNode(WebAssemblyISD::EXTEND_HIGH_U, DL, MulVT, RHS);
3590
3591 SDValue MulLow =
3592 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, LowLHS, LowRHS));
3593 SDValue MulHigh =
3594 DAG.getBitcast(VT, DAG.getNode(ISD::MUL, DL, MulVT, HighLHS, HighRHS));
3595
3596 // Take the low byte of each lane.
3597 return DAG.getVectorShuffle(
3598 VT, DL, MulLow, MulHigh,
3599 {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3600 }
3601}
3602
3603SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3604 SelectionDAG &DAG) {
3605 SDLoc DL(In);
3606 LLVMContext &Ctx = *DAG.getContext();
3607 EVT InVT = In.getValueType();
3608 unsigned NumElems = InVT.getVectorNumElements() * 2;
3609 EVT OutVT = EVT::getVectorVT(Ctx, InVT.getVectorElementType(), NumElems);
3610 SDValue Concat =
3611 DAG.getNode(ISD::CONCAT_VECTORS, DL, OutVT, In, DAG.getPOISON(InVT));
3612 if (NumElems < RequiredNumElems) {
3613 return DoubleVectorWidth(Concat, RequiredNumElems, DAG);
3614 }
3615 return Concat;
3616}
3617
3619 EVT OutVT = N->getValueType(0);
3620 if (!OutVT.isVector())
3621 return SDValue();
3622
3623 EVT OutElTy = OutVT.getVectorElementType();
3624 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3625 return SDValue();
3626
3627 unsigned NumElems = OutVT.getVectorNumElements();
3628 if (!isPowerOf2_32(NumElems))
3629 return SDValue();
3630
3631 EVT FPVT = N->getOperand(0)->getValueType(0);
3632 if (FPVT.getVectorElementType() != MVT::f32)
3633 return SDValue();
3634
3635 SDLoc DL(N);
3636
3637 // First, convert to i32.
3638 LLVMContext &Ctx = *DAG.getContext();
3639 EVT IntVT = EVT::getVectorVT(Ctx, MVT::i32, NumElems);
3640 SDValue ToInt = DAG.getNode(N->getOpcode(), DL, IntVT, N->getOperand(0));
3642 OutVT.getScalarSizeInBits());
3643 // Mask out the top MSBs.
3644 SDValue Masked =
3645 DAG.getNode(ISD::AND, DL, IntVT, ToInt, DAG.getConstant(Mask, DL, IntVT));
3646
3647 if (OutVT.getSizeInBits() < 128) {
3648 // Create a wide enough vector that we can use narrow.
3649 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3650 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3651 SDValue WideVector = DoubleVectorWidth(Masked, NumRequiredElems, DAG);
3652 SDValue Trunc = truncateVectorWithNARROW(NarrowedVT, WideVector, DL, DAG);
3653 return DAG.getBitcast(
3654 OutVT, extractSubVector(Trunc, 0, DAG, DL, OutVT.getSizeInBits()));
3655 } else {
3656 return truncateVectorWithNARROW(OutVT, Masked, DL, DAG);
3657 }
3658 return SDValue();
3659}
3660
3661SDValue
3662WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3663 DAGCombinerInfo &DCI) const {
3664 switch (N->getOpcode()) {
3665 default:
3666 return SDValue();
3667 case ISD::BITCAST:
3668 return performBitcastCombine(N, DCI);
3669 case ISD::SETCC:
3670 return performSETCCCombine(N, DCI, Subtarget);
3672 return performVECTOR_SHUFFLECombine(N, DCI);
3673 case ISD::SIGN_EXTEND:
3674 case ISD::ZERO_EXTEND:
3675 return performVectorExtendCombine(N, DCI);
3676 case ISD::UINT_TO_FP:
3677 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
3678 return ExtCombine;
3679 return performVectorNonNegToFPCombine(N, DCI);
3680 case ISD::SINT_TO_FP:
3681 return performVectorExtendToFPCombine(N, DCI);
3684 case ISD::FP_ROUND:
3686 return performVectorTruncZeroCombine(N, DCI);
3687 case ISD::FP_TO_SINT:
3688 case ISD::FP_TO_UINT:
3689 return performConvertFPCombine(N, DCI.DAG);
3690 case ISD::TRUNCATE:
3691 return performTruncateCombine(N, DCI);
3693 return performAnyAllCombine(N, DCI.DAG);
3694 case ISD::MUL:
3695 return performMulCombine(N, DCI);
3696 }
3697}
unsigned const MachineRegisterInfo * MRI
static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget)
static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI)
return SDValue()
static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
Hexagon Common GEP
const HexagonInstrInfo * TII
#define _
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
MachineInstr unsigned OpIdx
static SDValue performVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI)
static SDValue combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget)
Try to map an integer comparison with size > XLEN to vector instructions before type legalization spl...
const SmallVectorImpl< MachineOperand > & Cond
Contains matchers for matching SelectionDAG nodes and values.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static MachineBasicBlock * LowerFPToInt(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool IsUnsigned, bool Int64, bool Float64, unsigned LoweredOpcode)
static bool callingConvSupported(CallingConv::ID CallConv)
static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerMemcpy(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
static std::optional< unsigned > IsWebAssemblyLocal(SDValue Op, SelectionDAG &DAG)
static SDValue performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue performVectorNonNegToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG)
static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG)
static MachineBasicBlock * LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB, const WebAssemblySubtarget *Subtarget, const TargetInstrInfo &TII)
static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG)
static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT, SelectionDAG &DAG)
SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG)
static SDValue performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool IsWebAssemblyGlobal(SDValue Op)
static MachineBasicBlock * LowerMemset(MachineInstr &MI, DebugLoc DL, MachineBasicBlock *BB, const TargetInstrInfo &TII, bool Int64)
SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems, SelectionDAG &DAG)
static SDValue performVectorExtendToFPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get split up into scalar instr...
static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG)
static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, const SDLoc &DL, unsigned VectorWidth)
static SDValue performBitcastCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL, SelectionDAG &DAG)
This file defines the interfaces that WebAssembly uses to lower LLVM code into a selection DAG.
This file provides WebAssembly-specific target descriptions.
This file declares WebAssembly-specific per-machine-function information.
This file declares the WebAssembly-specific subclass of TargetSubtarget.
This file declares the WebAssembly-specific subclass of TargetMachine.
This file contains the declaration of the WebAssembly-specific type parsing utility functions.
This file contains the declaration of the WebAssembly-specific utility functions.
X86 cmov Conversion
static constexpr int Concat[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
This class represents a function call, abstracting a target machine's calling convention.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A debug info location.
Definition DebugLoc.h:124
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
ThreadLocalMode getThreadLocalMode() const
Type * getValueType() const
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
Describe properties that are true of each instruction in the target description file.
void setNoStrip() const
Machine Value Type.
bool is128BitVector() const
Return true if this is a 128-bit vector type.
@ INVALID_SIMPLE_VALUE_TYPE
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
MVT changeVectorElementType(MVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
bool isFixedLengthVector() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
const char * createExternalSymbolName(StringRef Name)
Allocate a string and populate it with the given external symbol name.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addFPImm(const ConstantFP *Val) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
mop_range defs()
Returns all explicit operands that are register definitions.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
mop_range uses()
Returns all operands which may be register uses.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
const std::vector< MachineJumpTableEntry > & getJumpTables() const
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getShiftAmountConstant(uint64_t Val, EVT VT, const SDLoc &DL)
LLVM_ABI SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getBasicBlock(MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const
Return the preferred vector type legalization action.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
virtual bool isBinOp(unsigned Opcode) const
Return true if the node is a math/logic binary operator.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
Primary interface to the complete machine description for the target machine.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI const Value * stripPointerCastsAndAliases() const
Strip off pointer casts, all-zero GEPs, address space casts, and aliases.
Definition Value.cpp:705
static std::optional< unsigned > getLocalForStackObject(MachineFunction &MF, int FrameIndex)
WebAssemblyTargetLowering(const TargetMachine &TM, const WebAssemblySubtarget &STI)
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const override
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const override
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
self_iterator getIterator()
Definition ilist_node.h:123
#define INT64_MIN
Definition DataTypes.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ CXX_FAST_TLS
Used for access functions.
Definition CallingConv.h:72
@ WASM_EmscriptenInvoke
For emscripten __invoke_* functions.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ FMULADD
FMULADD - Performs a * b + c, with, or without, intermediate rounding.
Definition ISDOpcodes.h:521
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ TargetConstant
TargetConstant* - Like Constant*, but the DAG does not do any folding, simplification,...
Definition ISDOpcodes.h:174
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:933
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
OperandFlags
These are flags set on operands, but should be considered private, all access should go through the M...
Definition MCInstrDesc.h:51
CastOperator_match< OpTy, Instruction::BitCast > m_BitCast(const OpTy &Op)
Matches BitCast.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
bool sd_match(SDNode *N, const SelectionDAG *DAG, Pattern &&P)
CondCode_match m_SpecificCondCode(ISD::CondCode CC)
Match a conditional code SDNode with a specific ISD::CondCode.
CondCode_match m_CondCode()
Match any conditional code SDNode.
TernaryOpc_match< T0_P, T1_P, T2_P, true, false > m_c_SetCC(const T0_P &LHS, const T1_P &RHS, const T2_P &CC)
MCSymbolWasm * getOrCreateFunctionTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __indirect_function_table, for use in call_indirect and in function bitcasts.
bool isWebAssemblyFuncrefType(const Type *Ty)
Return true if this is a WebAssembly Funcref Type.
bool isWebAssemblyTableType(const Type *Ty)
Return true if the table represents a WebAssembly table type.
MCSymbolWasm * getOrCreateFuncrefCallTableSymbol(MCContext &Ctx, const WebAssemblySubtarget *Subtarget)
Returns the __funcref_call_table, for use in funcref calls when lowered to table.set + call_indirect.
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
bool isValidAddressSpace(unsigned AS)
bool canLowerReturn(size_t ResultSize, const WebAssemblySubtarget *Subtarget)
Returns true if the function's return value(s) can be lowered directly, i.e., not indirectly via a po...
bool isWasmVarAddressSpace(unsigned AS)
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void computeSignatureVTs(const FunctionType *Ty, const Function *TargetFunc, const Function &ContextFunc, const TargetMachine &TM, SmallVectorImpl< MVT > &Params, SmallVectorImpl< MVT > &Results)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Add
Sum of integers.
DWARFExpression::Operation Op
auto max_element(R &&Range)
Provide wrappers to std::max_element which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:2030
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1758
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
void computeLegalValueVTs(const WebAssemblyTargetLowering &TLI, LLVMContext &Ctx, const DataLayout &DL, Type *Ty, SmallVectorImpl< MVT > &ValueVTs)
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:243
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isFixedLengthVector() const
Definition ValueTypes.h:181
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
Align getNonZeroOrigAlign() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
Align getNonZeroByValAlign() const
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
Matching combinators.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This structure is used to pass arguments to makeLibCall function.