LLVM 20.0.0git
ScalarizeMaskedMemIntrin.cpp
Go to the documentation of this file.
1//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
2// intrinsics
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass replaces masked memory intrinsics - when unsupported by the target
11// - with a chain of basic blocks, that deal with the elements one-by-one if the
12// appropriate mask bit is set.
13//
14//===----------------------------------------------------------------------===//
15
17#include "llvm/ADT/Twine.h"
20#include "llvm/IR/BasicBlock.h"
21#include "llvm/IR/Constant.h"
22#include "llvm/IR/Constants.h"
24#include "llvm/IR/Dominators.h"
25#include "llvm/IR/Function.h"
26#include "llvm/IR/IRBuilder.h"
27#include "llvm/IR/Instruction.h"
30#include "llvm/IR/Type.h"
31#include "llvm/IR/Value.h"
33#include "llvm/Pass.h"
37#include <cassert>
38#include <optional>
39
40using namespace llvm;
41
42#define DEBUG_TYPE "scalarize-masked-mem-intrin"
43
44namespace {
45
46class ScalarizeMaskedMemIntrinLegacyPass : public FunctionPass {
47public:
48 static char ID; // Pass identification, replacement for typeid
49
50 explicit ScalarizeMaskedMemIntrinLegacyPass() : FunctionPass(ID) {
53 }
54
55 bool runOnFunction(Function &F) override;
56
57 StringRef getPassName() const override {
58 return "Scalarize Masked Memory Intrinsics";
59 }
60
61 void getAnalysisUsage(AnalysisUsage &AU) const override {
64 }
65};
66
67} // end anonymous namespace
68
69static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
70 const TargetTransformInfo &TTI, const DataLayout &DL,
71 DomTreeUpdater *DTU);
72static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
74 const DataLayout &DL, DomTreeUpdater *DTU);
75
76char ScalarizeMaskedMemIntrinLegacyPass::ID = 0;
77
78INITIALIZE_PASS_BEGIN(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
79 "Scalarize unsupported masked memory intrinsics", false,
80 false)
83INITIALIZE_PASS_END(ScalarizeMaskedMemIntrinLegacyPass, DEBUG_TYPE,
84 "Scalarize unsupported masked memory intrinsics", false,
85 false)
86
88 return new ScalarizeMaskedMemIntrinLegacyPass();
89}
90
91static bool isConstantIntVector(Value *Mask) {
92 Constant *C = dyn_cast<Constant>(Mask);
93 if (!C)
94 return false;
95
96 unsigned NumElts = cast<FixedVectorType>(Mask->getType())->getNumElements();
97 for (unsigned i = 0; i != NumElts; ++i) {
98 Constant *CElt = C->getAggregateElement(i);
99 if (!CElt || !isa<ConstantInt>(CElt))
100 return false;
101 }
102
103 return true;
104}
105
106static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth,
107 unsigned Idx) {
108 return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
109}
110
111// Translate a masked load intrinsic like
112// <16 x i32 > @llvm.masked.load( <16 x i32>* %addr, i32 align,
113// <16 x i1> %mask, <16 x i32> %passthru)
114// to a chain of basic blocks, with loading element one-by-one if
115// the appropriate mask bit is set
116//
117// %1 = bitcast i8* %addr to i32*
118// %2 = extractelement <16 x i1> %mask, i32 0
119// br i1 %2, label %cond.load, label %else
120//
121// cond.load: ; preds = %0
122// %3 = getelementptr i32* %1, i32 0
123// %4 = load i32* %3
124// %5 = insertelement <16 x i32> %passthru, i32 %4, i32 0
125// br label %else
126//
127// else: ; preds = %0, %cond.load
128// %res.phi.else = phi <16 x i32> [ %5, %cond.load ], [ poison, %0 ]
129// %6 = extractelement <16 x i1> %mask, i32 1
130// br i1 %6, label %cond.load1, label %else2
131//
132// cond.load1: ; preds = %else
133// %7 = getelementptr i32* %1, i32 1
134// %8 = load i32* %7
135// %9 = insertelement <16 x i32> %res.phi.else, i32 %8, i32 1
136// br label %else2
137//
138// else2: ; preds = %else, %cond.load1
139// %res.phi.else3 = phi <16 x i32> [ %9, %cond.load1 ], [ %res.phi.else, %else ]
140// %10 = extractelement <16 x i1> %mask, i32 2
141// br i1 %10, label %cond.load4, label %else5
142//
144 DomTreeUpdater *DTU, bool &ModifiedDT) {
145 Value *Ptr = CI->getArgOperand(0);
146 Value *Alignment = CI->getArgOperand(1);
147 Value *Mask = CI->getArgOperand(2);
148 Value *Src0 = CI->getArgOperand(3);
149
150 const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
151 VectorType *VecType = cast<FixedVectorType>(CI->getType());
152
153 Type *EltTy = VecType->getElementType();
154
155 IRBuilder<> Builder(CI->getContext());
156 Instruction *InsertPt = CI;
157 BasicBlock *IfBlock = CI->getParent();
158
159 Builder.SetInsertPoint(InsertPt);
161
162 // Short-cut if the mask is all-true.
163 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
164 Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
165 CI->replaceAllUsesWith(NewI);
166 CI->eraseFromParent();
167 return;
168 }
169
170 // Adjust alignment for the scalar instruction.
171 const Align AdjustedAlignVal =
172 commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
173 unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
174
175 // The result vector
176 Value *VResult = Src0;
177
178 if (isConstantIntVector(Mask)) {
179 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
180 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
181 continue;
182 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
183 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
184 VResult = Builder.CreateInsertElement(VResult, Load, Idx);
185 }
186 CI->replaceAllUsesWith(VResult);
187 CI->eraseFromParent();
188 return;
189 }
190
191 // If the mask is not v1i1, use scalar bit test operations. This generates
192 // better results on X86 at least.
193 Value *SclrMask;
194 if (VectorWidth != 1) {
195 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
196 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
197 }
198
199 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
200 // Fill the "else" block, created in the previous iteration
201 //
202 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
203 // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
204 // %cond = icmp ne i16 %mask_1, 0
205 // br i1 %mask_1, label %cond.load, label %else
206 //
207 Value *Predicate;
208 if (VectorWidth != 1) {
209 Value *Mask = Builder.getInt(APInt::getOneBitSet(
210 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
211 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
212 Builder.getIntN(VectorWidth, 0));
213 } else {
214 Predicate = Builder.CreateExtractElement(Mask, Idx);
215 }
216
217 // Create "cond" block
218 //
219 // %EltAddr = getelementptr i32* %1, i32 0
220 // %Elt = load i32* %EltAddr
221 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
222 //
223 Instruction *ThenTerm =
224 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
225 /*BranchWeights=*/nullptr, DTU);
226
227 BasicBlock *CondBlock = ThenTerm->getParent();
228 CondBlock->setName("cond.load");
229
230 Builder.SetInsertPoint(CondBlock->getTerminator());
231 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
232 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AdjustedAlignVal);
233 Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
234
235 // Create "else" block, fill it in the next iteration
236 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
237 NewIfBlock->setName("else");
238 BasicBlock *PrevIfBlock = IfBlock;
239 IfBlock = NewIfBlock;
240
241 // Create the phi to join the new and previous value.
242 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
243 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
244 Phi->addIncoming(NewVResult, CondBlock);
245 Phi->addIncoming(VResult, PrevIfBlock);
246 VResult = Phi;
247 }
248
249 CI->replaceAllUsesWith(VResult);
250 CI->eraseFromParent();
251
252 ModifiedDT = true;
253}
254
255// Translate a masked store intrinsic, like
256// void @llvm.masked.store(<16 x i32> %src, <16 x i32>* %addr, i32 align,
257// <16 x i1> %mask)
258// to a chain of basic blocks, that stores element one-by-one if
259// the appropriate mask bit is set
260//
261// %1 = bitcast i8* %addr to i32*
262// %2 = extractelement <16 x i1> %mask, i32 0
263// br i1 %2, label %cond.store, label %else
264//
265// cond.store: ; preds = %0
266// %3 = extractelement <16 x i32> %val, i32 0
267// %4 = getelementptr i32* %1, i32 0
268// store i32 %3, i32* %4
269// br label %else
270//
271// else: ; preds = %0, %cond.store
272// %5 = extractelement <16 x i1> %mask, i32 1
273// br i1 %5, label %cond.store1, label %else2
274//
275// cond.store1: ; preds = %else
276// %6 = extractelement <16 x i32> %val, i32 1
277// %7 = getelementptr i32* %1, i32 1
278// store i32 %6, i32* %7
279// br label %else2
280// . . .
282 DomTreeUpdater *DTU, bool &ModifiedDT) {
283 Value *Src = CI->getArgOperand(0);
284 Value *Ptr = CI->getArgOperand(1);
285 Value *Alignment = CI->getArgOperand(2);
286 Value *Mask = CI->getArgOperand(3);
287
288 const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
289 auto *VecType = cast<VectorType>(Src->getType());
290
291 Type *EltTy = VecType->getElementType();
292
293 IRBuilder<> Builder(CI->getContext());
294 Instruction *InsertPt = CI;
295 Builder.SetInsertPoint(InsertPt);
297
298 // Short-cut if the mask is all-true.
299 if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
300 Builder.CreateAlignedStore(Src, Ptr, AlignVal);
301 CI->eraseFromParent();
302 return;
303 }
304
305 // Adjust alignment for the scalar instruction.
306 const Align AdjustedAlignVal =
307 commonAlignment(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
308 unsigned VectorWidth = cast<FixedVectorType>(VecType)->getNumElements();
309
310 if (isConstantIntVector(Mask)) {
311 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
312 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
313 continue;
314 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
315 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
316 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
317 }
318 CI->eraseFromParent();
319 return;
320 }
321
322 // If the mask is not v1i1, use scalar bit test operations. This generates
323 // better results on X86 at least.
324 Value *SclrMask;
325 if (VectorWidth != 1) {
326 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
327 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
328 }
329
330 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
331 // Fill the "else" block, created in the previous iteration
332 //
333 // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
334 // %cond = icmp ne i16 %mask_1, 0
335 // br i1 %mask_1, label %cond.store, label %else
336 //
337 Value *Predicate;
338 if (VectorWidth != 1) {
339 Value *Mask = Builder.getInt(APInt::getOneBitSet(
340 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
341 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
342 Builder.getIntN(VectorWidth, 0));
343 } else {
344 Predicate = Builder.CreateExtractElement(Mask, Idx);
345 }
346
347 // Create "cond" block
348 //
349 // %OneElt = extractelement <16 x i32> %Src, i32 Idx
350 // %EltAddr = getelementptr i32* %1, i32 0
351 // %store i32 %OneElt, i32* %EltAddr
352 //
353 Instruction *ThenTerm =
354 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
355 /*BranchWeights=*/nullptr, DTU);
356
357 BasicBlock *CondBlock = ThenTerm->getParent();
358 CondBlock->setName("cond.store");
359
360 Builder.SetInsertPoint(CondBlock->getTerminator());
361 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
362 Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, Idx);
363 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
364
365 // Create "else" block, fill it in the next iteration
366 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
367 NewIfBlock->setName("else");
368
369 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
370 }
371 CI->eraseFromParent();
372
373 ModifiedDT = true;
374}
375
376// Translate a masked gather intrinsic like
377// <16 x i32 > @llvm.masked.gather.v16i32( <16 x i32*> %Ptrs, i32 4,
378// <16 x i1> %Mask, <16 x i32> %Src)
379// to a chain of basic blocks, with loading element one-by-one if
380// the appropriate mask bit is set
381//
382// %Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
383// %Mask0 = extractelement <16 x i1> %Mask, i32 0
384// br i1 %Mask0, label %cond.load, label %else
385//
386// cond.load:
387// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
388// %Load0 = load i32, i32* %Ptr0, align 4
389// %Res0 = insertelement <16 x i32> poison, i32 %Load0, i32 0
390// br label %else
391//
392// else:
393// %res.phi.else = phi <16 x i32>[%Res0, %cond.load], [poison, %0]
394// %Mask1 = extractelement <16 x i1> %Mask, i32 1
395// br i1 %Mask1, label %cond.load1, label %else2
396//
397// cond.load1:
398// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
399// %Load1 = load i32, i32* %Ptr1, align 4
400// %Res1 = insertelement <16 x i32> %res.phi.else, i32 %Load1, i32 1
401// br label %else2
402// . . .
403// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
404// ret <16 x i32> %Result
406 DomTreeUpdater *DTU, bool &ModifiedDT) {
407 Value *Ptrs = CI->getArgOperand(0);
408 Value *Alignment = CI->getArgOperand(1);
409 Value *Mask = CI->getArgOperand(2);
410 Value *Src0 = CI->getArgOperand(3);
411
412 auto *VecType = cast<FixedVectorType>(CI->getType());
413 Type *EltTy = VecType->getElementType();
414
415 IRBuilder<> Builder(CI->getContext());
416 Instruction *InsertPt = CI;
417 BasicBlock *IfBlock = CI->getParent();
418 Builder.SetInsertPoint(InsertPt);
419 MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
420
422
423 // The result vector
424 Value *VResult = Src0;
425 unsigned VectorWidth = VecType->getNumElements();
426
427 // Shorten the way if the mask is a vector of constants.
428 if (isConstantIntVector(Mask)) {
429 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
430 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
431 continue;
432 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
433 LoadInst *Load =
434 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
435 VResult =
436 Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
437 }
438 CI->replaceAllUsesWith(VResult);
439 CI->eraseFromParent();
440 return;
441 }
442
443 // If the mask is not v1i1, use scalar bit test operations. This generates
444 // better results on X86 at least.
445 Value *SclrMask;
446 if (VectorWidth != 1) {
447 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
448 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
449 }
450
451 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
452 // Fill the "else" block, created in the previous iteration
453 //
454 // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
455 // %cond = icmp ne i16 %mask_1, 0
456 // br i1 %Mask1, label %cond.load, label %else
457 //
458
459 Value *Predicate;
460 if (VectorWidth != 1) {
461 Value *Mask = Builder.getInt(APInt::getOneBitSet(
462 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
463 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
464 Builder.getIntN(VectorWidth, 0));
465 } else {
466 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
467 }
468
469 // Create "cond" block
470 //
471 // %EltAddr = getelementptr i32* %1, i32 0
472 // %Elt = load i32* %EltAddr
473 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
474 //
475 Instruction *ThenTerm =
476 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
477 /*BranchWeights=*/nullptr, DTU);
478
479 BasicBlock *CondBlock = ThenTerm->getParent();
480 CondBlock->setName("cond.load");
481
482 Builder.SetInsertPoint(CondBlock->getTerminator());
483 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
484 LoadInst *Load =
485 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
486 Value *NewVResult =
487 Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
488
489 // Create "else" block, fill it in the next iteration
490 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
491 NewIfBlock->setName("else");
492 BasicBlock *PrevIfBlock = IfBlock;
493 IfBlock = NewIfBlock;
494
495 // Create the phi to join the new and previous value.
496 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
497 PHINode *Phi = Builder.CreatePHI(VecType, 2, "res.phi.else");
498 Phi->addIncoming(NewVResult, CondBlock);
499 Phi->addIncoming(VResult, PrevIfBlock);
500 VResult = Phi;
501 }
502
503 CI->replaceAllUsesWith(VResult);
504 CI->eraseFromParent();
505
506 ModifiedDT = true;
507}
508
509// Translate a masked scatter intrinsic, like
510// void @llvm.masked.scatter.v16i32(<16 x i32> %Src, <16 x i32*>* %Ptrs, i32 4,
511// <16 x i1> %Mask)
512// to a chain of basic blocks, that stores element one-by-one if
513// the appropriate mask bit is set.
514//
515// %Ptrs = getelementptr i32, i32* %ptr, <16 x i64> %ind
516// %Mask0 = extractelement <16 x i1> %Mask, i32 0
517// br i1 %Mask0, label %cond.store, label %else
518//
519// cond.store:
520// %Elt0 = extractelement <16 x i32> %Src, i32 0
521// %Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
522// store i32 %Elt0, i32* %Ptr0, align 4
523// br label %else
524//
525// else:
526// %Mask1 = extractelement <16 x i1> %Mask, i32 1
527// br i1 %Mask1, label %cond.store1, label %else2
528//
529// cond.store1:
530// %Elt1 = extractelement <16 x i32> %Src, i32 1
531// %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
532// store i32 %Elt1, i32* %Ptr1, align 4
533// br label %else2
534// . . .
536 DomTreeUpdater *DTU, bool &ModifiedDT) {
537 Value *Src = CI->getArgOperand(0);
538 Value *Ptrs = CI->getArgOperand(1);
539 Value *Alignment = CI->getArgOperand(2);
540 Value *Mask = CI->getArgOperand(3);
541
542 auto *SrcFVTy = cast<FixedVectorType>(Src->getType());
543
544 assert(
545 isa<VectorType>(Ptrs->getType()) &&
546 isa<PointerType>(cast<VectorType>(Ptrs->getType())->getElementType()) &&
547 "Vector of pointers is expected in masked scatter intrinsic");
548
549 IRBuilder<> Builder(CI->getContext());
550 Instruction *InsertPt = CI;
551 Builder.SetInsertPoint(InsertPt);
553
554 MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
555 unsigned VectorWidth = SrcFVTy->getNumElements();
556
557 // Shorten the way if the mask is a vector of constants.
558 if (isConstantIntVector(Mask)) {
559 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
560 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
561 continue;
562 Value *OneElt =
563 Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
564 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
565 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
566 }
567 CI->eraseFromParent();
568 return;
569 }
570
571 // If the mask is not v1i1, use scalar bit test operations. This generates
572 // better results on X86 at least.
573 Value *SclrMask;
574 if (VectorWidth != 1) {
575 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
576 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
577 }
578
579 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
580 // Fill the "else" block, created in the previous iteration
581 //
582 // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
583 // %cond = icmp ne i16 %mask_1, 0
584 // br i1 %Mask1, label %cond.store, label %else
585 //
586 Value *Predicate;
587 if (VectorWidth != 1) {
588 Value *Mask = Builder.getInt(APInt::getOneBitSet(
589 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
590 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
591 Builder.getIntN(VectorWidth, 0));
592 } else {
593 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
594 }
595
596 // Create "cond" block
597 //
598 // %Elt1 = extractelement <16 x i32> %Src, i32 1
599 // %Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
600 // %store i32 %Elt1, i32* %Ptr1
601 //
602 Instruction *ThenTerm =
603 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
604 /*BranchWeights=*/nullptr, DTU);
605
606 BasicBlock *CondBlock = ThenTerm->getParent();
607 CondBlock->setName("cond.store");
608
609 Builder.SetInsertPoint(CondBlock->getTerminator());
610 Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
611 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
612 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
613
614 // Create "else" block, fill it in the next iteration
615 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
616 NewIfBlock->setName("else");
617
618 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
619 }
620 CI->eraseFromParent();
621
622 ModifiedDT = true;
623}
624
626 DomTreeUpdater *DTU, bool &ModifiedDT) {
627 Value *Ptr = CI->getArgOperand(0);
628 Value *Mask = CI->getArgOperand(1);
629 Value *PassThru = CI->getArgOperand(2);
630 Align Alignment = CI->getParamAlign(0).valueOrOne();
631
632 auto *VecType = cast<FixedVectorType>(CI->getType());
633
634 Type *EltTy = VecType->getElementType();
635
636 IRBuilder<> Builder(CI->getContext());
637 Instruction *InsertPt = CI;
638 BasicBlock *IfBlock = CI->getParent();
639
640 Builder.SetInsertPoint(InsertPt);
642
643 unsigned VectorWidth = VecType->getNumElements();
644
645 // The result vector
646 Value *VResult = PassThru;
647
648 // Adjust alignment for the scalar instruction.
649 const Align AdjustedAlignment =
650 commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
651
652 // Shorten the way if the mask is a vector of constants.
653 // Create a build_vector pattern, with loads/poisons as necessary and then
654 // shuffle blend with the pass through value.
655 if (isConstantIntVector(Mask)) {
656 unsigned MemIndex = 0;
657 VResult = PoisonValue::get(VecType);
658 SmallVector<int, 16> ShuffleMask(VectorWidth, PoisonMaskElem);
659 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
660 Value *InsertElt;
661 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue()) {
662 InsertElt = PoisonValue::get(EltTy);
663 ShuffleMask[Idx] = Idx + VectorWidth;
664 } else {
665 Value *NewPtr =
666 Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
667 InsertElt = Builder.CreateAlignedLoad(EltTy, NewPtr, AdjustedAlignment,
668 "Load" + Twine(Idx));
669 ShuffleMask[Idx] = Idx;
670 ++MemIndex;
671 }
672 VResult = Builder.CreateInsertElement(VResult, InsertElt, Idx,
673 "Res" + Twine(Idx));
674 }
675 VResult = Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
676 CI->replaceAllUsesWith(VResult);
677 CI->eraseFromParent();
678 return;
679 }
680
681 // If the mask is not v1i1, use scalar bit test operations. This generates
682 // better results on X86 at least.
683 Value *SclrMask;
684 if (VectorWidth != 1) {
685 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
686 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
687 }
688
689 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
690 // Fill the "else" block, created in the previous iteration
691 //
692 // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
693 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
694 // br i1 %mask_1, label %cond.load, label %else
695 //
696
697 Value *Predicate;
698 if (VectorWidth != 1) {
699 Value *Mask = Builder.getInt(APInt::getOneBitSet(
700 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
701 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
702 Builder.getIntN(VectorWidth, 0));
703 } else {
704 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
705 }
706
707 // Create "cond" block
708 //
709 // %EltAddr = getelementptr i32* %1, i32 0
710 // %Elt = load i32* %EltAddr
711 // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
712 //
713 Instruction *ThenTerm =
714 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
715 /*BranchWeights=*/nullptr, DTU);
716
717 BasicBlock *CondBlock = ThenTerm->getParent();
718 CondBlock->setName("cond.load");
719
720 Builder.SetInsertPoint(CondBlock->getTerminator());
721 LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, AdjustedAlignment);
722 Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
723
724 // Move the pointer if there are more blocks to come.
725 Value *NewPtr;
726 if ((Idx + 1) != VectorWidth)
727 NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
728
729 // Create "else" block, fill it in the next iteration
730 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
731 NewIfBlock->setName("else");
732 BasicBlock *PrevIfBlock = IfBlock;
733 IfBlock = NewIfBlock;
734
735 // Create the phi to join the new and previous value.
736 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
737 PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
738 ResultPhi->addIncoming(NewVResult, CondBlock);
739 ResultPhi->addIncoming(VResult, PrevIfBlock);
740 VResult = ResultPhi;
741
742 // Add a PHI for the pointer if this isn't the last iteration.
743 if ((Idx + 1) != VectorWidth) {
744 PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
745 PtrPhi->addIncoming(NewPtr, CondBlock);
746 PtrPhi->addIncoming(Ptr, PrevIfBlock);
747 Ptr = PtrPhi;
748 }
749 }
750
751 CI->replaceAllUsesWith(VResult);
752 CI->eraseFromParent();
753
754 ModifiedDT = true;
755}
756
758 DomTreeUpdater *DTU,
759 bool &ModifiedDT) {
760 Value *Src = CI->getArgOperand(0);
761 Value *Ptr = CI->getArgOperand(1);
762 Value *Mask = CI->getArgOperand(2);
763 Align Alignment = CI->getParamAlign(1).valueOrOne();
764
765 auto *VecType = cast<FixedVectorType>(Src->getType());
766
767 IRBuilder<> Builder(CI->getContext());
768 Instruction *InsertPt = CI;
769 BasicBlock *IfBlock = CI->getParent();
770
771 Builder.SetInsertPoint(InsertPt);
773
774 Type *EltTy = VecType->getElementType();
775
776 // Adjust alignment for the scalar instruction.
777 const Align AdjustedAlignment =
778 commonAlignment(Alignment, EltTy->getPrimitiveSizeInBits() / 8);
779
780 unsigned VectorWidth = VecType->getNumElements();
781
782 // Shorten the way if the mask is a vector of constants.
783 if (isConstantIntVector(Mask)) {
784 unsigned MemIndex = 0;
785 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
786 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
787 continue;
788 Value *OneElt =
789 Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
790 Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
791 Builder.CreateAlignedStore(OneElt, NewPtr, AdjustedAlignment);
792 ++MemIndex;
793 }
794 CI->eraseFromParent();
795 return;
796 }
797
798 // If the mask is not v1i1, use scalar bit test operations. This generates
799 // better results on X86 at least.
800 Value *SclrMask;
801 if (VectorWidth != 1) {
802 Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
803 SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
804 }
805
806 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
807 // Fill the "else" block, created in the previous iteration
808 //
809 // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
810 // br i1 %mask_1, label %cond.store, label %else
811 //
812 Value *Predicate;
813 if (VectorWidth != 1) {
814 Value *Mask = Builder.getInt(APInt::getOneBitSet(
815 VectorWidth, adjustForEndian(DL, VectorWidth, Idx)));
816 Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
817 Builder.getIntN(VectorWidth, 0));
818 } else {
819 Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
820 }
821
822 // Create "cond" block
823 //
824 // %OneElt = extractelement <16 x i32> %Src, i32 Idx
825 // %EltAddr = getelementptr i32* %1, i32 0
826 // %store i32 %OneElt, i32* %EltAddr
827 //
828 Instruction *ThenTerm =
829 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
830 /*BranchWeights=*/nullptr, DTU);
831
832 BasicBlock *CondBlock = ThenTerm->getParent();
833 CondBlock->setName("cond.store");
834
835 Builder.SetInsertPoint(CondBlock->getTerminator());
836 Value *OneElt = Builder.CreateExtractElement(Src, Idx);
837 Builder.CreateAlignedStore(OneElt, Ptr, AdjustedAlignment);
838
839 // Move the pointer if there are more blocks to come.
840 Value *NewPtr;
841 if ((Idx + 1) != VectorWidth)
842 NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
843
844 // Create "else" block, fill it in the next iteration
845 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
846 NewIfBlock->setName("else");
847 BasicBlock *PrevIfBlock = IfBlock;
848 IfBlock = NewIfBlock;
849
850 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
851
852 // Add a PHI for the pointer if this isn't the last iteration.
853 if ((Idx + 1) != VectorWidth) {
854 PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
855 PtrPhi->addIncoming(NewPtr, CondBlock);
856 PtrPhi->addIncoming(Ptr, PrevIfBlock);
857 Ptr = PtrPhi;
858 }
859 }
860 CI->eraseFromParent();
861
862 ModifiedDT = true;
863}
864
866 DomTreeUpdater *DTU,
867 bool &ModifiedDT) {
868 // If we extend histogram to return a result someday (like the updated vector)
869 // then we'll need to support it here.
870 assert(CI->getType()->isVoidTy() && "Histogram with non-void return.");
871 Value *Ptrs = CI->getArgOperand(0);
872 Value *Inc = CI->getArgOperand(1);
873 Value *Mask = CI->getArgOperand(2);
874
875 auto *AddrType = cast<FixedVectorType>(Ptrs->getType());
876 Type *EltTy = Inc->getType();
877
878 IRBuilder<> Builder(CI->getContext());
879 Instruction *InsertPt = CI;
880 Builder.SetInsertPoint(InsertPt);
881
883
884 // FIXME: Do we need to add an alignment parameter to the intrinsic?
885 unsigned VectorWidth = AddrType->getNumElements();
886
887 // Shorten the way if the mask is a vector of constants.
888 if (isConstantIntVector(Mask)) {
889 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
890 if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
891 continue;
892 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
893 LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
894 Value *Add = Builder.CreateAdd(Load, Inc);
895 Builder.CreateStore(Add, Ptr);
896 }
897 CI->eraseFromParent();
898 return;
899 }
900
901 for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
902 Value *Predicate =
903 Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
904
905 Instruction *ThenTerm =
906 SplitBlockAndInsertIfThen(Predicate, InsertPt, /*Unreachable=*/false,
907 /*BranchWeights=*/nullptr, DTU);
908
909 BasicBlock *CondBlock = ThenTerm->getParent();
910 CondBlock->setName("cond.histogram.update");
911
912 Builder.SetInsertPoint(CondBlock->getTerminator());
913 Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
914 LoadInst *Load = Builder.CreateLoad(EltTy, Ptr, "Load" + Twine(Idx));
915 Value *Add = Builder.CreateAdd(Load, Inc);
916 Builder.CreateStore(Add, Ptr);
917
918 // Create "else" block, fill it in the next iteration
919 BasicBlock *NewIfBlock = ThenTerm->getSuccessor(0);
920 NewIfBlock->setName("else");
921 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->begin());
922 }
923
924 CI->eraseFromParent();
925 ModifiedDT = true;
926}
927
929 DominatorTree *DT) {
930 std::optional<DomTreeUpdater> DTU;
931 if (DT)
932 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
933
934 bool EverMadeChange = false;
935 bool MadeChange = true;
936 auto &DL = F.getDataLayout();
937 while (MadeChange) {
938 MadeChange = false;
940 bool ModifiedDTOnIteration = false;
941 MadeChange |= optimizeBlock(BB, ModifiedDTOnIteration, TTI, DL,
942 DTU ? &*DTU : nullptr);
943
944 // Restart BB iteration if the dominator tree of the Function was changed
945 if (ModifiedDTOnIteration)
946 break;
947 }
948
949 EverMadeChange |= MadeChange;
950 }
951 return EverMadeChange;
952}
953
954bool ScalarizeMaskedMemIntrinLegacyPass::runOnFunction(Function &F) {
955 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
956 DominatorTree *DT = nullptr;
957 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
958 DT = &DTWP->getDomTree();
959 return runImpl(F, TTI, DT);
960}
961
964 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
966 if (!runImpl(F, TTI, DT))
967 return PreservedAnalyses::all();
971 return PA;
972}
973
974static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT,
975 const TargetTransformInfo &TTI, const DataLayout &DL,
976 DomTreeUpdater *DTU) {
977 bool MadeChange = false;
978
979 BasicBlock::iterator CurInstIterator = BB.begin();
980 while (CurInstIterator != BB.end()) {
981 if (CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
982 MadeChange |= optimizeCallInst(CI, ModifiedDT, TTI, DL, DTU);
983 if (ModifiedDT)
984 return true;
985 }
986
987 return MadeChange;
988}
989
990static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT,
992 const DataLayout &DL, DomTreeUpdater *DTU) {
993 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
994 if (II) {
995 // The scalarization code below does not work for scalable vectors.
996 if (isa<ScalableVectorType>(II->getType()) ||
997 any_of(II->args(),
998 [](Value *V) { return isa<ScalableVectorType>(V->getType()); }))
999 return false;
1000
1001 switch (II->getIntrinsicID()) {
1002 default:
1003 break;
1004 case Intrinsic::experimental_vector_histogram_add:
1006 CI->getArgOperand(1)->getType()))
1007 return false;
1008 scalarizeMaskedVectorHistogram(DL, CI, DTU, ModifiedDT);
1009 return true;
1010 case Intrinsic::masked_load:
1011 // Scalarize unsupported vector masked load
1013 CI->getType(),
1014 cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue()))
1015 return false;
1016 scalarizeMaskedLoad(DL, CI, DTU, ModifiedDT);
1017 return true;
1018 case Intrinsic::masked_store:
1020 CI->getArgOperand(0)->getType(),
1021 cast<ConstantInt>(CI->getArgOperand(2))->getAlignValue()))
1022 return false;
1023 scalarizeMaskedStore(DL, CI, DTU, ModifiedDT);
1024 return true;
1025 case Intrinsic::masked_gather: {
1026 MaybeAlign MA =
1027 cast<ConstantInt>(CI->getArgOperand(1))->getMaybeAlignValue();
1028 Type *LoadTy = CI->getType();
1029 Align Alignment = DL.getValueOrABITypeAlignment(MA,
1030 LoadTy->getScalarType());
1031 if (TTI.isLegalMaskedGather(LoadTy, Alignment) &&
1032 !TTI.forceScalarizeMaskedGather(cast<VectorType>(LoadTy), Alignment))
1033 return false;
1034 scalarizeMaskedGather(DL, CI, DTU, ModifiedDT);
1035 return true;
1036 }
1037 case Intrinsic::masked_scatter: {
1038 MaybeAlign MA =
1039 cast<ConstantInt>(CI->getArgOperand(2))->getMaybeAlignValue();
1040 Type *StoreTy = CI->getArgOperand(0)->getType();
1041 Align Alignment = DL.getValueOrABITypeAlignment(MA,
1042 StoreTy->getScalarType());
1043 if (TTI.isLegalMaskedScatter(StoreTy, Alignment) &&
1044 !TTI.forceScalarizeMaskedScatter(cast<VectorType>(StoreTy),
1045 Alignment))
1046 return false;
1047 scalarizeMaskedScatter(DL, CI, DTU, ModifiedDT);
1048 return true;
1049 }
1050 case Intrinsic::masked_expandload:
1052 CI->getType(),
1054 return false;
1055 scalarizeMaskedExpandLoad(DL, CI, DTU, ModifiedDT);
1056 return true;
1057 case Intrinsic::masked_compressstore:
1059 CI->getArgOperand(0)->getType(),
1061 return false;
1062 scalarizeMaskedCompressStore(DL, CI, DTU, ModifiedDT);
1063 return true;
1064 }
1065 }
1066
1067 return false;
1068}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Error unsupported(const char *Str, const Triple &T)
Definition: MachO.cpp:71
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
static bool runImpl(Function &F, const TargetLowering &TLI)
#define F(x, y, z)
Definition: MD5.cpp:55
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void scalarizeMaskedVectorHistogram(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static unsigned adjustForEndian(const DataLayout &DL, unsigned VectorWidth, unsigned Idx)
static void scalarizeMaskedScatter(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static void scalarizeMaskedExpandLoad(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static void scalarizeMaskedCompressStore(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static void scalarizeMaskedStore(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool runImpl(Function &F, const TargetTransformInfo &TTI, DominatorTree *DT)
static void scalarizeMaskedLoad(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static void scalarizeMaskedGather(const DataLayout &DL, CallInst *CI, DomTreeUpdater *DTU, bool &ModifiedDT)
static bool isConstantIntVector(Value *Mask)
#define DEBUG_TYPE
Scalarize unsupported masked memory intrinsics
This pass exposes codegen information to IR-level passes.
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:219
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:424
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
AttributeSet getParamAttrs(unsigned ArgNo) const
The attributes for the argument or parameter at the given index are returned.
MaybeAlign getAlignment() const
Definition: Attributes.cpp:926
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1838
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1542
This class represents a function call, abstracting a target machine's calling convention.
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:109
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2480
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:2468
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:536
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1812
Value * CreateConstInBoundsGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1894
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:217
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2253
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2405
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2135
ConstantInt * getIntN(unsigned N, uint64_t C)
Get a constant N-bit value, zero extended or truncated from a 64-bit value.
Definition: IRBuilder.h:494
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1795
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2502
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1480
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1808
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1332
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1831
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:499
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2674
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
An instruction for reading from memory.
Definition: Instructions.h:174
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isLegalMaskedScatter(Type *DataType, Align Alignment) const
Return true if the target supports masked scatter.
bool isLegalMaskedExpandLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked expand load.
bool isLegalMaskedGather(Type *DataType, Align Alignment) const
Return true if the target supports masked gather.
bool forceScalarizeMaskedGather(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.gather intrinsics.
bool isLegalMaskedCompressStore(Type *DataType, Align Alignment) const
Return true if the target supports masked compress store.
bool isLegalMaskedStore(Type *DataType, Align Alignment) const
Return true if the target supports masked store.
bool isLegalMaskedVectorHistogram(Type *AddrType, Type *DataType) const
bool forceScalarizeMaskedScatter(VectorType *Type, Align Alignment) const
Return true if the target forces scalarizing of llvm.masked.scatter intrinsics.
bool isLegalMaskedLoad(Type *DataType, Align Alignment) const
Return true if the target supports masked load.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
const ParentTy * getParent() const
Definition: ilist_node.h:32
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
FunctionPass * createScalarizeMaskedMemIntrinLegacyPass()
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
void initializeScalarizeMaskedMemIntrinLegacyPassPass(PassRegistry &)
constexpr int PoisonMaskElem
@ Add
Sum of integers.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:141
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)