Go to the documentation of this file.
48 #include "llvm/IR/IntrinsicsX86.h"
56 using namespace PatternMatch;
58 #define DEBUG_TYPE "pre-amx-config"
62 if (Operand->getType()->isX86_AMXTy())
68 return II->
getIntrinsicID() == Intrinsic::x86_tileloadd64_internal ||
73 return II->
getIntrinsicID() == Intrinsic::x86_tilestored64_internal;
79 if (Operand->getType()->isX86_AMXTy())
86 if ((isa<CallInst>(
I) && !isa<IntrinsicInst>(
I)) ||
I->isTerminator())
93 class X86PreAMXConfig {
100 bool preTileConfig();
102 bool findConfigShapes(PosAndShapesMap &PosAndShapes);
163 Value *PalettePos =
Builder.CreateGEP(I8Ty, I8Ptr, PaletteOffset);
164 Builder.CreateStore(PaletteValue, PalettePos);
166 for (
int I = 0,
E = Shapes.size() / 2;
I <
E;
I++) {
169 const std::string ShapeName =
"amx.tmm." + itostr(
I);
170 Value *RowPos =
Builder.CreateGEP(I8Ty, I8Ptr, RowOffset,
171 ShapeName +
".shape.row");
172 Value *ColPos =
Builder.CreateGEP(I8Ty, I8Ptr, ColOffset);
174 ShapeName +
".shape.col");
175 Value *Row = Shapes[
I * 2];
176 Value *Col = Shapes[
I * 2 + 1];
177 Row =
Builder.CreateTrunc(Row, I8Ty);
178 Builder.CreateStore(Row, RowPos);
179 Builder.CreateStore(Col, ColPos);
183 void X86PreAMXConfig::addTileConfig(
Instruction *ModelStart,
188 unsigned AddrSpace =
DL.getAllocaAddrSpace();
194 new AllocaInst(V512Ty, AddrSpace,
"", &
F.getEntryBlock().front());
195 Addr->setAlignment(Alignment);
200 preWriteTileCfg(I8Ptr,
Builder, Shapes);
202 Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, std::nullopt,
219 if (
Op->getType()->isX86_AMXTy())
226 return Loads.
empty() && (
ST == cast<Value>(KeyAMX));
233 if (!
Op->getType()->isX86_AMXTy())
237 "All KeyAMX's tile definiation should comes from TileLoad!");
245 return Shapes.size() != 0;
268 for (
auto I = Iter,
E =
BB->end();
I !=
E; ++
I) {
277 if (!checkVolatileModel(Loads, II, KeyAMX))
282 assert(!KeyAMX &&
"Too many key amx intrinsic!");
286 assert(PosEnd !=
BB->end() &&
"Not find TileStore!");
290 KeyAMX = dyn_cast<IntrinsicInst>(&*PosEnd);
293 assert(Shapes.empty() &&
"Shapes should be clean.");
294 getKeyAMXShapes(KeyAMX, Shapes);
310 bool X86PreAMXConfig::findConfigShapes(PosAndShapesMap &PosAndShapes) {
321 I = getShapesAndConfigPosEnd(
I, PosAndShapes[&*
I]);
358 bool X86PreAMXConfig::preTileConfig() {
359 PosAndShapesMap PosAndShapes;
360 bool NeedCfg = findConfigShapes(PosAndShapes);
363 for (
auto &IPAndShapes : PosAndShapes)
364 addTileConfig(IPAndShapes.first, IPAndShapes.second);
391 X86PreAMXConfig PCFG(
F);
392 C = PCFG.preTileConfig();
406 static const char PassName[] =
"Pre AMX Tile Config";
413 return new X86PreAMXConfigPass();
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
A parsed version of the target data layout string in and methods for querying it.
InstListType::iterator iterator
Instruction iterators...
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
static bool isAMXIntrinsic(IntrinsicInst *II)
The instances of the Type class are immutable: once they are created, they are never changed.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
This class implements a map that also provides access to all stored values in a deterministic order.
static IntegerType * getInt8Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
LLVM Basic Block Representation.
static bool onlyTileDef(IntrinsicInst *II)
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
(vector float) vec_cmpeq(*A, *B) C
Represent the analysis usage information of a pass.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
This struct is a compact representation of a valid (non-zero power of two) alignment.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
static bool isTileStore(IntrinsicInst *II)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Target-Independent Code Generator Pass Configuration Options.
static bool isTileLoad(IntrinsicInst *II)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
This is an important class for using LLVM in a threaded context.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Primary interface to the complete machine description for the target machine.
A Module instance is used to store all the information related to an LLVM module.
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
FunctionPass * createX86PreAMXConfigPass()
The pass insert tile config intrinsics for AMX fast register allocation.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Type * getType() const
All values are typed, get the type of this value.
Common base class shared among various IRBuilders.
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool runOnFunction(Function &F, bool PostInlining)
static IntegerType * getInt64Ty(LLVMContext &C)
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
static const char PassName[]
static bool brokenVolatile(Instruction *I)
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
A wrapper class for inspecting calls to intrinsic functions.
unsigned getNumOperands() const
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
static IntegerType * getInt16Ty(LLVMContext &C)
const char LLVMTargetMachineRef TM
FunctionPass class - This class is used to implement most global optimizations.
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
AnalysisUsage & addRequired()
an instruction to allocate memory on the stack
Value * getOperand(unsigned i) const
LLVM Value Representation.
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
void initializeX86PreAMXConfigPassPass(PassRegistry &)
bool isX86_AMXTy() const
Return true if this is X86 AMX.
bool contains(const T &V) const
Check if the SmallSet contains the given element.