LLVM 22.0.0git
WebAssemblyTargetTransformInfo.cpp
Go to the documentation of this file.
1//===-- WebAssemblyTargetTransformInfo.cpp - WebAssembly-specific TTI -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file defines the WebAssembly-specific TargetTransformInfo
11/// implementation.
12///
13//===----------------------------------------------------------------------===//
14
17#include "llvm/IR/IntrinsicsWebAssembly.h"
19
21using namespace llvm;
22
23#define DEBUG_TYPE "wasmtti"
24
26WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
27 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
29}
30
31unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
32 unsigned Result = BaseT::getNumberOfRegisters(ClassID);
33
34 // For SIMD, use at least 16 registers, as a rough guess.
35 bool Vector = (ClassID == 1);
36 if (Vector)
37 Result = std::max(Result, 16u);
38
39 return Result;
40}
41
44 switch (K) {
46 return TypeSize::getFixed(64);
48 return TypeSize::getFixed(getST()->hasSIMD128() ? 128 : 64);
50 return TypeSize::getScalable(0);
51 }
52
53 llvm_unreachable("Unsupported register kind");
54}
55
57 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
59 ArrayRef<const Value *> Args, const Instruction *CxtI) const {
60
63 Opcode, Ty, CostKind, Op1Info, Op2Info);
64
65 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
66 switch (Opcode) {
67 case Instruction::LShr:
68 case Instruction::AShr:
69 case Instruction::Shl:
70 // SIMD128's shifts currently only accept a scalar shift count. For each
71 // element, we'll need to extract, op, insert. The following is a rough
72 // approximation.
73 if (!Op2Info.isUniform())
74 Cost =
75 cast<FixedVectorType>(VTy)->getNumElements() *
77 getArithmeticInstrCost(Opcode, VTy->getElementType(), CostKind) +
79 break;
80 }
81 }
82 return Cost;
83}
84
86 unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH,
88 int ISD = TLI->InstructionOpcodeToISD(Opcode);
89 auto SrcTy = TLI->getValueType(DL, Src);
90 auto DstTy = TLI->getValueType(DL, Dst);
91
92 if (!SrcTy.isSimple() || !DstTy.isSimple()) {
93 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
94 }
95
96 if (!ST->hasSIMD128()) {
97 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
98 }
99
100 auto DstVT = DstTy.getSimpleVT();
101 auto SrcVT = SrcTy.getSimpleVT();
102
103 if (I && I->hasOneUser()) {
104 auto *SingleUser = cast<Instruction>(*I->user_begin());
105 int UserISD = TLI->InstructionOpcodeToISD(SingleUser->getOpcode());
106
107 // extmul_low support
108 if (UserISD == ISD::MUL &&
110 // Free low extensions.
111 if ((SrcVT == MVT::v8i8 && DstVT == MVT::v8i16) ||
112 (SrcVT == MVT::v4i16 && DstVT == MVT::v4i32) ||
113 (SrcVT == MVT::v2i32 && DstVT == MVT::v2i64)) {
114 return 0;
115 }
116 // Will require an additional extlow operation for the intermediate
117 // i16/i32 value.
118 if ((SrcVT == MVT::v4i8 && DstVT == MVT::v4i32) ||
119 (SrcVT == MVT::v2i16 && DstVT == MVT::v2i64)) {
120 return 1;
121 }
122 }
123 }
124
125 static constexpr TypeConversionCostTblEntry ConversionTbl[] = {
126 // extend_low
127 {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1},
128 {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1},
129 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1},
130 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1},
131 {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1},
132 {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1},
133 // 2 x extend_low
134 {ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2},
135 {ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2},
136 {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2},
137 {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2},
138 // extend_low, extend_high
139 {ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2},
140 {ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2},
141 {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2},
142 {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2},
143 {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2},
144 {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2},
145 // 2x extend_low, extend_high
146 {ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 4},
147 {ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 4},
148 {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4},
149 {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4},
150 // shuffle
151 {ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 2},
152 {ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 4},
153 {ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 2},
154 {ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 4},
155 // narrow, and
156 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2},
157 {ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2},
158 // narrow, 2x and
159 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3},
160 // 3x narrow, 4x and
161 {ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 7},
162 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7},
163 // 7x narrow, 8x and
164 {ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 15},
165 // convert_i32x4
166 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
167 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1},
168 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
169 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1},
170 // extend_low, convert
171 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},
172 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2},
173 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
174 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2},
175 // extend_low x 2, convert
176 {ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
177 {ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3},
178 {ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
179 {ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3},
180 // several shuffles
181 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
182 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
183 {ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 10},
184 {ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10},
185 /// trunc_sat, const, and, 3x narrow
186 {ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 6},
187 {ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 6},
188 {ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 6},
189 {ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 6},
190 /// trunc_sat, const, and, narrow
191 {ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 4},
192 {ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 4},
193 {ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4},
194 {ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4},
195 // 2x trunc_sat, const, 2x and, 3x narrow
196 {ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 8},
197 {ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f32, 8},
198 // 2x trunc_sat, const, 2x and, narrow
199 {ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 6},
200 {ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 6},
201 };
202
203 if (const auto *Entry =
204 ConvertCostTableLookup(ConversionTbl, ISD, DstVT, SrcVT)) {
205 return Entry->Cost;
206 }
207
208 return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
209}
210
212WebAssemblyTTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
214
215 Options.AllowOverlappingLoads = true;
216
217 if (ST->hasSIMD128())
218 Options.LoadSizes.push_back(16);
219
220 Options.LoadSizes.append({8, 4, 2, 1});
221 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
222 Options.NumLoadsPerBlock = Options.MaxNumLoads;
223
224 return Options;
225}
226
228 unsigned Opcode, Type *Ty, Align Alignment, unsigned AddressSpace,
230 const Instruction *I) const {
231 if (!ST->hasSIMD128() || !isa<FixedVectorType>(Ty)) {
232 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
233 CostKind);
234 }
235
236 EVT VT = TLI->getValueType(DL, Ty, true);
237 // Type legalization can't handle structs
238 if (VT == MVT::Other)
239 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
240 CostKind);
241
242 auto LT = getTypeLegalizationCost(Ty);
243 if (!LT.first.isValid())
245
246 int ISD = TLI->InstructionOpcodeToISD(Opcode);
247 unsigned width = VT.getSizeInBits();
248 if (ISD == ISD::LOAD) {
249 // 128-bit loads are a single instruction. 32-bit and 64-bit vector loads
250 // can be lowered to load32_zero and load64_zero respectively. Assume SIMD
251 // loads are twice as expensive as scalar.
252 switch (width) {
253 default:
254 break;
255 case 32:
256 case 64:
257 case 128:
258 return 2;
259 }
260 } else if (ISD == ISD::STORE) {
261 // For stores, we can use store lane operations.
262 switch (width) {
263 default:
264 break;
265 case 8:
266 case 16:
267 case 32:
268 case 64:
269 case 128:
270 return 2;
271 }
272 }
273
274 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, CostKind);
275}
276
278 unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef<unsigned> Indices,
279 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
280 bool UseMaskForCond, bool UseMaskForGaps) const {
281 assert(Factor >= 2 && "Invalid interleave factor");
282
283 auto *VecTy = cast<VectorType>(Ty);
284 if (!ST->hasSIMD128() || !isa<FixedVectorType>(VecTy)) {
286 }
287
288 if (UseMaskForCond || UseMaskForGaps)
289 return BaseT::getInterleavedMemoryOpCost(Opcode, Ty, Factor, Indices,
290 Alignment, AddressSpace, CostKind,
291 UseMaskForCond, UseMaskForGaps);
292
293 constexpr unsigned MaxInterleaveFactor = 4;
294 if (Factor <= MaxInterleaveFactor) {
295 unsigned MinElts = VecTy->getElementCount().getKnownMinValue();
296 // Ensure the number of vector elements is greater than 1.
297 if (MinElts < 2 || MinElts % Factor != 0)
299
300 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
301 // Ensure the element type is legal.
302 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
304
305 auto *SubVecTy =
306 VectorType::get(VecTy->getElementType(),
307 VecTy->getElementCount().divideCoefficientBy(Factor));
308 InstructionCost MemCost =
309 getMemoryOpCost(Opcode, SubVecTy, Alignment, AddressSpace, CostKind);
310
311 unsigned VecSize = DL.getTypeSizeInBits(SubVecTy);
312 unsigned MaxVecSize = 128;
313 unsigned NumAccesses =
314 std::max<unsigned>(1, (MinElts * ElSize + MaxVecSize - 1) / VecSize);
315
316 // A stride of two is commonly supported via dedicated instructions, so it
317 // should be relatively cheap for all element sizes. A stride of four is
318 // more expensive as it will likely require more shuffles. Using two
319 // simd128 inputs is considered more expensive and we mainly account for
320 // shuffling two inputs (32 bytes), but we do model 4 x v4i32 to enable
321 // arithmetic kernels.
322 static const CostTblEntry ShuffleCostTbl[] = {
323 // One reg.
324 {2, MVT::v2i8, 1}, // interleave 2 x 2i8 into 4i8
325 {2, MVT::v4i8, 1}, // interleave 2 x 4i8 into 8i8
326 {2, MVT::v8i8, 1}, // interleave 2 x 8i8 into 16i8
327 {2, MVT::v2i16, 1}, // interleave 2 x 2i16 into 4i16
328 {2, MVT::v4i16, 1}, // interleave 2 x 4i16 into 8i16
329 {2, MVT::v2i32, 1}, // interleave 2 x 2i32 into 4i32
330
331 // Two regs.
332 {2, MVT::v16i8, 2}, // interleave 2 x 16i8 into 32i8
333 {2, MVT::v8i16, 2}, // interleave 2 x 8i16 into 16i16
334 {2, MVT::v4i32, 2}, // interleave 2 x 4i32 into 8i32
335
336 // One reg.
337 {4, MVT::v2i8, 4}, // interleave 4 x 2i8 into 8i8
338 {4, MVT::v4i8, 4}, // interleave 4 x 4i8 into 16i8
339 {4, MVT::v2i16, 4}, // interleave 4 x 2i16 into 8i16
340
341 // Two regs.
342 {4, MVT::v8i8, 16}, // interleave 4 x 8i8 into 32i8
343 {4, MVT::v4i16, 8}, // interleave 4 x 4i16 into 16i16
344 {4, MVT::v2i32, 4}, // interleave 4 x 2i32 into 8i32
345
346 // Four regs.
347 {4, MVT::v4i32, 16}, // interleave 4 x 4i32 into 16i32
348 };
349
350 EVT ETy = TLI->getValueType(DL, SubVecTy);
351 if (const auto *Entry =
352 CostTableLookup(ShuffleCostTbl, Factor, ETy.getSimpleVT()))
353 return Entry->Cost + (NumAccesses * MemCost);
354 }
355
356 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
357 Alignment, AddressSpace, CostKind,
358 UseMaskForCond, UseMaskForGaps);
359}
360
362 unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index,
363 const Value *Op0, const Value *Op1) const {
365 Opcode, Val, CostKind, Index, Op0, Op1);
366
367 // SIMD128's insert/extract currently only take constant indices.
368 if (Index == -1u)
370
371 return Cost;
372}
373
375 unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType,
377 TTI::PartialReductionExtendKind OpBExtend, std::optional<unsigned> BinOp,
380 if (!VF.isFixed() || !ST->hasSIMD128())
381 return Invalid;
382
384 return Invalid;
385
386 if (Opcode != Instruction::Add)
387 return Invalid;
388
389 EVT AccumEVT = EVT::getEVT(AccumType);
390 // TODO: Add i64 accumulator.
391 if (AccumEVT != MVT::i32)
392 return Invalid;
393
394 // Possible options:
395 // - i16x8.extadd_pairwise_i8x16_sx
396 // - i32x4.extadd_pairwise_i16x8_sx
397 // - i32x4.dot_i16x8_s
398 // Only try to support dot, for now.
399
400 EVT InputEVT = EVT::getEVT(InputTypeA);
401 if (!((InputEVT == MVT::i16 && VF.getFixedValue() == 8) ||
402 (InputEVT == MVT::i8 && VF.getFixedValue() == 16))) {
403 return Invalid;
404 }
405
406 if (OpAExtend == TTI::PR_None)
407 return Invalid;
408
410 if (!BinOp)
411 return Cost;
412
413 if (OpAExtend != OpBExtend)
414 return Invalid;
415
416 if (*BinOp != Instruction::Mul)
417 return Invalid;
418
419 if (InputTypeA != InputTypeB)
420 return Invalid;
421
422 // Signed inputs can lower to dot
423 if (InputEVT == MVT::i16 && VF.getFixedValue() == 8)
424 return OpAExtend == TTI::PR_SignExtend ? Cost : Cost * 2;
425
426 // Double the size of the lowered sequence.
427 if (InputEVT == MVT::i8 && VF.getFixedValue() == 16)
428 return OpAExtend == TTI::PR_SignExtend ? Cost * 2 : Cost * 4;
429
430 return Invalid;
431}
432
434 const IntrinsicInst *II) const {
435
436 switch (II->getIntrinsicID()) {
437 default:
438 break;
439 case Intrinsic::vector_reduce_fadd:
441 }
443}
444
447 OptimizationRemarkEmitter *ORE) const {
448 // Scan the loop: don't unroll loops with calls. This is a standard approach
449 // for most (all?) targets.
450 for (BasicBlock *BB : L->blocks())
451 for (Instruction &I : *BB)
454 if (isLoweredToCall(F))
455 return;
456
457 // The chosen threshold is within the range of 'LoopMicroOpBufferSize' of
458 // the various microarchitectures that use the BasicTTI implementation and
459 // has been selected through heuristics across multiple cores and runtimes.
460 UP.Partial = UP.Runtime = UP.UpperBound = true;
461 UP.PartialThreshold = 30;
462
463 // Avoid unrolling when optimizing for size.
464 UP.OptSizeThreshold = 0;
466
467 // Set number of instructions optimized when "back edge"
468 // becomes "fall through" to default value of 2.
469 UP.BEInsns = 2;
470}
471
473 return getST()->hasTailCall();
474}
475
478 using namespace llvm::PatternMatch;
479
480 if (!I->getType()->isVectorTy() || !I->isShift())
481 return false;
482
483 Value *V = I->getOperand(1);
484 // We dont need to sink constant splat.
485 if (isa<Constant>(V))
486 return false;
487
489 m_Value(), m_ZeroMask()))) {
490 // Sink insert
491 Ops.push_back(&cast<Instruction>(V)->getOperandUse(0));
492 // Sink shuffle
493 Ops.push_back(&I->getOperandUse(1));
494 return true;
495 }
496
497 return false;
498}
499
500/// Attempt to convert [relaxed_]swizzle to shufflevector if the mask is
501/// constant.
504 bool IsRelaxed) {
505 auto *V = dyn_cast<Constant>(II.getArgOperand(1));
506 if (!V)
507 return nullptr;
508
509 auto *VecTy = cast<FixedVectorType>(II.getType());
510 unsigned NumElts = VecTy->getNumElements();
511 assert(NumElts == 16);
512
513 // Construct a shuffle mask from constant integers or UNDEFs.
514 int Indexes[16];
515 bool AnyOutOfBounds = false;
516
517 for (unsigned I = 0; I < NumElts; ++I) {
518 Constant *COp = V->getAggregateElement(I);
519 if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
520 return nullptr;
521
522 if (isa<UndefValue>(COp)) {
523 Indexes[I] = -1;
524 continue;
525 }
526
527 if (IsRelaxed && cast<ConstantInt>(COp)->getSExtValue() >= NumElts) {
528 // The relaxed_swizzle operation always returns 0 if the lane index is
529 // less than 0 when interpreted as a signed value. For lane indices above
530 // 15, however, it can choose between returning 0 or the lane at `Index %
531 // 16`. However, the choice must be made consistently. As the WebAssembly
532 // spec states:
533 //
534 // "The result of relaxed operators are implementation-dependent, because
535 // the set of possible results may depend on properties of the host
536 // environment, such as its hardware. Technically, their behaviour is
537 // controlled by a set of global parameters to the semantics that an
538 // implementation can instantiate in different ways. These choices are
539 // fixed, that is, parameters are constant during the execution of any
540 // given program."
541 //
542 // The WebAssembly runtime may choose differently from us, so we can't
543 // optimize a relaxed swizzle with lane indices above 15.
544 return nullptr;
545 }
546
547 uint64_t Index = cast<ConstantInt>(COp)->getZExtValue();
548 if (Index >= NumElts) {
549 AnyOutOfBounds = true;
550 // If there are out-of-bounds indices, the swizzle instruction returns
551 // zeroes in those lanes. We'll provide an all-zeroes vector as the
552 // second argument to shufflevector and read the first element from it.
553 Indexes[I] = NumElts;
554 continue;
555 }
556
557 Indexes[I] = Index;
558 }
559
560 auto *V1 = II.getArgOperand(0);
561 auto *V2 =
562 AnyOutOfBounds ? Constant::getNullValue(VecTy) : PoisonValue::get(VecTy);
563
564 return Builder.CreateShuffleVector(V1, V2, ArrayRef(Indexes, NumElts));
565}
566
567std::optional<Instruction *>
569 IntrinsicInst &II) const {
570 Intrinsic::ID IID = II.getIntrinsicID();
571 switch (IID) {
572 case Intrinsic::wasm_swizzle:
573 case Intrinsic::wasm_relaxed_swizzle:
574 if (Value *V = simplifyWasmSwizzle(
575 II, IC.Builder, IID == Intrinsic::wasm_relaxed_swizzle)) {
576 return IC.replaceInstUsesWith(II, V);
577 }
578 break;
579 }
580
581 return std::nullopt;
582}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
Cost tables and simple lookup functions.
static const int MaxVecSize
This file provides the interface for the instcombine pass implementation.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
static const unsigned MaxInterleaveFactor
Maximum vectorization interleave count.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
static const Function * getCalledFunction(const Value *V)
uint64_t IntrinsicInst * II
static Value * simplifyWasmSwizzle(const IntrinsicInst &II, InstCombiner::BuilderTy &Builder, bool IsRelaxed)
Attempt to convert [relaxed_]swizzle to shufflevector if the mask is constant.
This file a TargetTransformInfoImplBase conforming object specific to the WebAssembly target machine.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond=false, bool UseMaskForGaps=false) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
std::pair< InstructionCost, MVT > getTypeLegalizationCost(Type *Ty) const
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
LLVM_ABI Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
The core instruction combiner logic.
IRBuilder< TargetFolder, IRBuilderCallbackInserter > BuilderTy
An IRBuilder that automatically inserts new instructions into the worklist.
Instruction * replaceInstUsesWith(Instruction &I, Value *V)
A combiner-aware RAUW-like routine.
BuilderTy & Builder
static InstructionCost getInvalid(CostType Val=0)
A wrapper class for inspecting calls to intrinsic functions.
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
The optimization diagnostic interface.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
The main scalar evolution driver.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
virtual unsigned getNumberOfRegisters(unsigned ClassID) const
virtual bool isLoweredToCall(const Function *F) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
PopcntSupportKind
Flags indicating the kind of support for population count.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
@ TCC_Basic
The cost of a typical 'add' instruction.
CastContextHint
Represents a hint about the context in which a cast is used.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM Value Representation.
Definition Value.h:75
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
InstructionCost getInterleavedMemoryOpCost(unsigned Opcode, Type *Ty, unsigned Factor, ArrayRef< unsigned > Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) const override
InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, TTI::OperandValueInfo OpInfo={TTI::OK_AnyValue, TTI::OP_None}, const Instruction *I=nullptr) const override
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const override
std::optional< Instruction * > instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override
TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const override
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Op2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr) const override
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
InstructionCost getPartialReductionCost(unsigned Opcode, Type *InputTypeA, Type *InputTypeB, Type *AccumType, ElementCount VF, TTI::PartialReductionExtendKind OpAExtend, TTI::PartialReductionExtendKind OpBExtend, std::optional< unsigned > BinOp, TTI::TargetCostKind CostKind) const override
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index, const Value *Op0, const Value *Op1) const override
TTI::ReductionShuffle getPreferredExpandedReductionShuffle(const IntrinsicInst *II) const override
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP, OptimizationRemarkEmitter *ORE) const override
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const override
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind, const Instruction *I=nullptr) const override
unsigned getNumberOfRegisters(unsigned ClassID) const override
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
ISD namespace - This namespace contains an enum which represents all of the SelectionDAG node types a...
Definition ISDOpcodes.h:24
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:874
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:838
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:844
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:920
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:850
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
bool match(Val *V, const Pattern &P)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
const CostTblEntryT< CostType > * CostTableLookup(ArrayRef< CostTblEntryT< CostType > > Tbl, int ISD, MVT Ty)
Find in cost table.
Definition CostTable.h:35
InstructionCost Cost
TypeConversionCostTblEntryT< unsigned > TypeConversionCostTblEntry
Definition CostTable.h:61
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
CostTblEntryT< unsigned > CostTblEntry
Definition CostTable.h:30
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
const TypeConversionCostTblEntryT< CostType > * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntryT< CostType > > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table.
Definition CostTable.h:66
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
Returns options for expansion of memcmp. IsZeroCmp is.
Parameters that control the generic loop unrolling transformation.
bool UpperBound
Allow using trip count upper bound to unroll loops.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold,...
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable).