49#include "llvm/IR/IntrinsicsX86.h"
57using namespace PatternMatch;
59#define DEBUG_TYPE "pre-amx-config"
63 if (Operand->getType()->isX86_AMXTy())
69 return II->
getIntrinsicID() == Intrinsic::x86_tileloadd64_internal ||
74 return II->
getIntrinsicID() == Intrinsic::x86_tilestored64_internal;
80 if (Operand->getType()->isX86_AMXTy())
87 if ((isa<CallInst>(
I) && !isa<IntrinsicInst>(
I)) ||
I->isTerminator())
94class X86PreAMXConfig {
101 bool preTileConfig();
103 bool findConfigShapes(PosAndShapesMap &PosAndShapes);
164 Value *PalettePos =
Builder.CreateGEP(I8Ty, I8Ptr, PaletteOffset);
165 Builder.CreateStore(PaletteValue, PalettePos);
167 for (
int I = 0,
E = Shapes.
size() / 2;
I <
E;
I++) {
170 const std::string ShapeName =
"amx.tmm." + itostr(
I);
171 Value *RowPos =
Builder.CreateGEP(I8Ty, I8Ptr, RowOffset,
172 ShapeName +
".shape.row");
173 Value *ColPos =
Builder.CreateGEP(I8Ty, I8Ptr, ColOffset);
174 ColPos =
Builder.CreateBitCast(ColPos, PointerType::get(I16Ty, 0),
175 ShapeName +
".shape.col");
176 Value *Row = Shapes[
I * 2];
177 Value *Col = Shapes[
I * 2 + 1];
178 Row =
Builder.CreateTrunc(Row, I8Ty);
179 Builder.CreateStore(Row, RowPos);
180 Builder.CreateStore(Col, ColPos);
184void X86PreAMXConfig::addTileConfig(
Instruction *ModelStart,
189 unsigned AddrSpace =
DL.getAllocaAddrSpace();
191 Type *V512Ty = VectorType::get(
Builder.getInt32Ty(), 16,
false);
195 new AllocaInst(V512Ty, AddrSpace,
"", &
F.getEntryBlock().front());
196 Addr->setAlignment(Alignment);
201 preWriteTileCfg(I8Ptr, Builder, Shapes);
203 Builder.CreateIntrinsic(Intrinsic::x86_ldtilecfg_internal, std::nullopt,
220 if (
Op->getType()->isX86_AMXTy())
227 return Loads.
empty() && (
ST == cast<Value>(KeyAMX));
234 if (!
Op->getType()->isX86_AMXTy())
238 "All KeyAMX's tile definiation should comes from TileLoad!");
246 return Shapes.
size() != 0;
269 for (
auto I = Iter,
E = BB->
end();
I !=
E; ++
I) {
278 if (!checkVolatileModel(Loads, II, KeyAMX))
283 assert(!KeyAMX &&
"Too many key amx intrinsic!");
287 assert(PosEnd != BB->
end() &&
"Not find TileStore!");
291 KeyAMX = dyn_cast<IntrinsicInst>(&*PosEnd);
294 assert(Shapes.
empty() &&
"Shapes should be clean.");
295 getKeyAMXShapes(KeyAMX, Shapes);
311bool X86PreAMXConfig::findConfigShapes(PosAndShapesMap &PosAndShapes) {
322 I = getShapesAndConfigPosEnd(
I, PosAndShapes[&*
I]);
359bool X86PreAMXConfig::preTileConfig() {
360 PosAndShapesMap PosAndShapes;
361 bool NeedCfg = findConfigShapes(PosAndShapes);
364 for (
auto &IPAndShapes : PosAndShapes)
365 addTileConfig(IPAndShapes.first, IPAndShapes.second);
392 X86PreAMXConfig PCFG(
F);
393 C = PCFG.preTileConfig();
407static const char PassName[] =
"Pre AMX Tile Config";
408char X86PreAMXConfigPass::ID = 0;
414 return new X86PreAMXConfigPass();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static const T * Find(StringRef S, ArrayRef< T > A)
Find KV in array using binary search.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
Target-Independent Code Generator Pass Configuration Options pass.
static bool brokenVolatile(Instruction *I)
static bool onlyTileDef(IntrinsicInst *II)
static bool isAMXIntrinsic(IntrinsicInst *II)
static bool isTileLoad(IntrinsicInst *II)
static bool isTileStore(IntrinsicInst *II)
static const char PassName[]
an instruction to allocate memory on the stack
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
This is an important class for using LLVM in a threaded context.
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
bool contains(const T &V) const
Check if the SmallSet contains the given element.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
static IntegerType * getInt16Ty(LLVMContext &C)
static IntegerType * getInt8Ty(LLVMContext &C)
bool isX86_AMXTy() const
Return true if this is X86 AMX.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void initializeX86PreAMXConfigPassPass(PassRegistry &)
FunctionPass * createX86PreAMXConfigPass()
The pass insert tile config intrinsics for AMX fast register allocation.
This struct is a compact representation of a valid (non-zero power of two) alignment.