Go to the documentation of this file.
32 #ifndef LLVM_ANALYSIS_INTERVALITERATOR_H
33 #define LLVM_ANALYSIS_INTERVALITERATOR_H
64 return IP->getBlockInterval(
BB);
72 Int->Nodes.push_back(
BB);
86 template<
class NodeTy,
class OrigContainer_t,
class GT = GraphTraits<NodeTy *>,
87 class IGT = GraphTraits<Inverse<NodeTy *>>>
89 std::vector<std::pair<Interval *, typename Interval::succ_iterator>> IntStack;
90 std::set<BasicBlock *> Visited;
91 OrigContainer_t *OrigContainer;
102 if (!ProcessInterval(&
M->front())) {
109 OrigContainer(
x.OrigContainer), IOwnMem(
x.IOwnMem) {
115 if (!ProcessInterval(
IP.getRootInterval())) {
122 while (!IntStack.empty()) {
129 return IntStack ==
x.IntStack;
139 assert(!IntStack.empty() &&
"Attempting to use interval iterator at end!");
144 EndIt =
succ_end(IntStack.back().first);
145 while (SuccIt != EndIt) {
148 if (Done)
return *
this;
152 if (IOwnMem)
delete IntStack.back().first;
156 }
while (!IntStack.empty());
175 bool ProcessInterval(NodeTy *
Node) {
177 if (!Visited.insert(Header).second)
183 for (
typename GT::ChildIteratorType
I = GT::child_begin(
Node),
184 E = GT::child_end(
Node);
I !=
E; ++
I)
187 IntStack.push_back(std::make_pair(Int,
succ_begin(Int)));
200 assert(Int &&
"Null interval == bad!");
205 if (Visited.count(NodeHeader)) {
206 if (Int->contains(NodeHeader)) {
209 if (!
Int->isSuccessor(NodeHeader))
210 Int->Successors.push_back(NodeHeader);
213 for (
typename IGT::ChildIteratorType
I = IGT::child_begin(
Node),
214 E = IGT::child_end(
Node);
I !=
E; ++
I) {
215 if (!
Int->contains(*
I)) {
216 if (!
Int->isSuccessor(NodeHeader))
217 Int->Successors.push_back(NodeHeader);
225 Visited.insert(NodeHeader);
227 if (
Int->isSuccessor(NodeHeader)) {
234 for (
typename GT::ChildIteratorType It = GT::child_begin(
Node),
235 End = GT::child_end(
Node); It != End; ++It)
246 bool DeleteInts =
true) {
264 #endif // LLVM_ANALYSIS_INTERVALITERATOR_H
const Interval * operator*() const
This is an optimization pass for GlobalISel generic memory operations.
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Interval::succ_iterator succ_end(Interval *I)
bool operator!=(const IntervalIterator &x) const
IntervalIterator operator++(int)
BasicBlock * getSourceGraphNode(Function *, BasicBlock *BB)
void addNodeToInterval(Interval *Int, BasicBlock *BB)
alloca< 16 x float >, align 16 %tmp2=alloca< 16 x float >, align 16 store< 16 x float > %A,< 16 x float > *%tmp %s=bitcast< 16 x float > *%tmp to i8 *%s2=bitcast< 16 x float > *%tmp2 to i8 *call void @llvm.memcpy.i64(i8 *%s, i8 *%s2, i64 64, i32 16) %R=load< 16 x float > *%tmp2 ret< 16 x float > %R } declare void @llvm.memcpy.i64(i8 *nocapture, i8 *nocapture, i64, i32) nounwind which compiles to:_foo:subl $140, %esp movaps %xmm3, 112(%esp) movaps %xmm2, 96(%esp) movaps %xmm1, 80(%esp) movaps %xmm0, 64(%esp) movl 60(%esp), %eax movl %eax, 124(%esp) movl 56(%esp), %eax movl %eax, 120(%esp) movl 52(%esp), %eax< many many more 32-bit copies > movaps(%esp), %xmm0 movaps 16(%esp), %xmm1 movaps 32(%esp), %xmm2 movaps 48(%esp), %xmm3 addl $140, %esp ret On Nehalem, it may even be cheaper to just use movups when unaligned than to fall back to lower-granularity chunks. Implement processor-specific optimizations for parity with GCC on these processors. GCC does two optimizations:1. ix86_pad_returns inserts a noop before ret instructions if immediately preceded by a conditional branch or is the target of a jump. 2. ix86_avoid_jump_misspredicts inserts noops in cases where a 16-byte block of code contains more than 3 branches. The first one is done for all AMDs, Core2, and "Generic" The second one is done for:Atom, Pentium Pro, all AMDs, Pentium 4, Nocona, Core 2, and "Generic" Testcase:int x(int a) { return(a &0xf0)> >4 tmp
IntervalIterator< Interval, IntervalPartition > interval_part_interval_iterator
LLVM Basic Block Representation.
IntervalIterator< BasicBlock, Function > function_interval_iterator
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
void erase_value(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Interval Class - An Interval is a set of nodes defined such that every node in the interval has all o...
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
const Interval * operator->() const
@ BasicBlock
Various leaf nodes.
IntervalIterator(IntervalPartition &IP, bool OwnMemory)
IntervalIterator()=default
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
function_interval_iterator intervals_end(Function *)
IntervalIterator & operator++()
bool operator==(const IntervalIterator &x) const
function_interval_iterator intervals_begin(Function *F, bool DeleteInts=true)
std::vector< BasicBlock * >::iterator succ_iterator
BasicBlock * getNodeHeader(BasicBlock *BB)
IntervalIterator(Function *M, bool OwnMemory)
std::forward_iterator_tag iterator_category
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
IntervalIterator(IntervalIterator &&x)
std::pair< uint64_t, uint64_t > Interval