LLVM 23.0.0git
IRSimilarityIdentifier.cpp
Go to the documentation of this file.
1//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Implementation file for the IRSimilarityIdentifier for identifying
11// similarities in IR including the IRInstructionMapper.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
18#include "llvm/IR/Intrinsics.h"
19#include "llvm/IR/Operator.h"
20#include "llvm/IR/User.h"
23
24using namespace llvm;
25using namespace IRSimilarity;
26
27namespace llvm {
29 DisableBranches("no-ir-sim-branch-matching", cl::init(false),
31 cl::desc("disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
33
35 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
37 cl::desc("disable outlining indirect calls."));
38
39static cl::opt<bool>
40 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
41 cl::desc("only allow matching call instructions if the "
42 "name and type signature match."));
43
45 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
46 cl::desc("Don't match or outline intrinsics"));
47} // namespace llvm
48
51 : Inst(&I), Legal(Legality), IDL(&IDList) {
53}
54
56 // We check for whether we have a comparison instruction. If it is, we
57 // find the "less than" version of the predicate for consistency for
58 // comparison instructions throught the program.
61 if (Predicate != C->getPredicate())
62 RevisedPredicate = Predicate;
63 }
64
65 // Here we collect the operands and their types for determining whether
66 // the structure of the operand use matches between two different candidates.
67 for (Use &OI : Inst->operands()) {
69 // If we have a CmpInst where the predicate is reversed, it means the
70 // operands must be reversed as well.
71 OperVals.insert(OperVals.begin(), OI.get());
72 continue;
73 }
74
75 OperVals.push_back(OI.get());
76 }
77
78 // We capture the incoming BasicBlocks as values as well as the incoming
79 // Values in order to check for structural similarity.
81 llvm::append_range(OperVals, PN->blocks());
82}
83
86
88 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
89 assert((isa<UncondBrInst, CondBrInst>(Inst)) && "Instruction must be branch");
90
92
93 BBNumIt = BasicBlockToInteger.find(Inst->getParent());
94 assert(BBNumIt != BasicBlockToInteger.end() &&
95 "Could not find location for BasicBlock!");
96
97 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
98
99 for (Value *V : getBlockOperVals()) {
101 BBNumIt = BasicBlockToInteger.find(Successor);
102 assert(BBNumIt != BasicBlockToInteger.end() &&
103 "Could not find number for BasicBlock!");
104 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
105
106 int Relative = OtherBlockNumber - CurrentBlockNumber;
107 RelativeBlockLocations.push_back(Relative);
108 }
109}
110
113 return OperVals;
115 return ArrayRef<Value *>(OperVals).drop_front(1);
116
117 if (PHINode *PN = dyn_cast<PHINode>(Inst))
118 return ArrayRef<Value *>(
119 std::next(OperVals.begin(), PN->getNumIncomingValues()),
120 OperVals.end()
121 );
122
123 llvm_unreachable("Instruction must be branch or PHINode");
124}
125
126void IRInstructionData::setCalleeName(bool MatchByName) {
128 assert(CI && "Instruction must be call");
129
130 CalleeName = "";
132 // To hash intrinsics, we use the opcode, and types like the other
133 // instructions, but also, the Intrinsic ID, and the Name of the
134 // intrinsic.
135 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
136 FunctionType *FT = II->getFunctionType();
137 // If there is an overloaded name, we have to use the complex version
138 // of getName to get the entire string.
139 if (Intrinsic::isOverloaded(IntrinsicID))
140 CalleeName =
141 Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
142 // If there is not an overloaded name, we only need to use this version.
143 else
144 CalleeName = Intrinsic::getName(IntrinsicID).str();
145
146 return;
147 }
148
149 if (!CI->isIndirectCall() && MatchByName)
151}
152
154 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
155 assert(isa<PHINode>(Inst) && "Instruction must be phi node");
156
159
160 BBNumIt = BasicBlockToInteger.find(PN->getParent());
161 assert(BBNumIt != BasicBlockToInteger.end() &&
162 "Could not find location for BasicBlock!");
163
164 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
165
166 // Convert the incoming blocks of the PHINode to an integer value, based on
167 // the relative distances between the current block and the incoming block.
168 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
169 BasicBlock *Incoming = PN->getIncomingBlock(Idx);
170 BBNumIt = BasicBlockToInteger.find(Incoming);
171 assert(BBNumIt != BasicBlockToInteger.end() &&
172 "Could not find number for BasicBlock!");
173 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
174
175 int Relative = OtherBlockNumber - CurrentBlockNumber;
176 RelativeBlockLocations.push_back(Relative);
177 }
178}
179
181 switch (CI->getPredicate()) {
190 return CI->getSwappedPredicate();
191 default:
192 return CI->getPredicate();
193 }
194}
195
198 "Can only get a predicate from a compare instruction");
199
201 return *RevisedPredicate;
202
203 return cast<CmpInst>(Inst)->getPredicate();
204}
205
208 "Can only get a name from a call instruction");
209
210 assert(CalleeName && "CalleeName has not been set");
211
212 return *CalleeName;
213}
214
216 const IRInstructionData &B) {
217
218 if (!A.Legal || !B.Legal)
219 return false;
220
221 // Check if we are performing the same sort of operation on the same types
222 // but not on the same values.
223 if (!A.Inst->isSameOperationAs(B.Inst)) {
224 // If there is a predicate, this means that either there is a swapped
225 // predicate, or that the types are different, we want to make sure that
226 // the predicates are equivalent via swapping.
227 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
228
229 if (A.getPredicate() != B.getPredicate())
230 return false;
231
232 // If the predicates are the same via swap, make sure that the types are
233 // still the same.
234 auto ZippedTypes = zip(A.OperVals, B.OperVals);
235
236 return all_of(
237 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
238 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
239 });
240 }
241
242 return false;
243 }
244
245 // Since any GEP Instruction operands after the first operand cannot be
246 // defined by a register, we must make sure that the operands after the first
247 // are the same in the two instructions
248 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
249 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
250
251 // If the instructions do not have the same inbounds restrictions, we do
252 // not consider them the same.
253 if (GEP->isInBounds() != OtherGEP->isInBounds())
254 return false;
255
256 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
257
258 // We increment here since we do not care about the first instruction,
259 // we only care about the following operands since they must be the
260 // exact same to be considered similar.
261 return all_of(drop_begin(ZippedOperands),
262 [](std::tuple<llvm::Use &, llvm::Use &> R) {
263 return std::get<0>(R) == std::get<1>(R);
264 });
265 }
266
267 // If the instructions are functions calls, we make sure that the function
268 // name is the same. We already know that the types are since is
269 // isSameOperationAs is true.
270 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
271 if (A.getCalleeName() != B.getCalleeName())
272 return false;
273 }
274
277 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
278 return false;
279
280 return true;
281}
282
283// TODO: This is the same as the MachineOutliner, and should be consolidated
284// into the same interface.
286 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
287 std::vector<unsigned> &IntegerMapping) {
288 BasicBlock::iterator It = BB.begin();
289
290 std::vector<unsigned> IntegerMappingForBB;
291 std::vector<IRInstructionData *> InstrListForBB;
292
293 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
294 switch (InstClassifier.visit(*It)) {
295 case InstrType::Legal:
296 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
297 break;
299 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
300 break;
302 AddedIllegalLastTime = false;
303 break;
304 }
305 }
306
308 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
309 for (IRInstructionData *ID : InstrListForBB)
310 this->IDL->push_back(*ID);
311 llvm::append_range(InstrList, InstrListForBB);
312 llvm::append_range(IntegerMapping, IntegerMappingForBB);
313}
314
315// TODO: This is the same as the MachineOutliner, and should be consolidated
316// into the same interface.
318 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
319 std::vector<IRInstructionData *> &InstrListForBB) {
320 // We added something legal, so we should unset the AddedLegalLastTime
321 // flag.
322 AddedIllegalLastTime = false;
323
324 // If we have at least two adjacent legal instructions (which may have
325 // invisible instructions in between), remember that.
327 HaveLegalRange = true;
329
330 // Get the integer for this instruction or give it the current
331 // LegalInstrNumber.
333 InstrListForBB.push_back(ID);
334
336 ID->setBranchSuccessors(BasicBlockToInteger);
337
338 if (isa<CallInst>(*It))
339 ID->setCalleeName(EnableMatchCallsByName);
340
341 if (isa<PHINode>(*It))
342 ID->setPHIPredecessors(BasicBlockToInteger);
343
344 // Add to the instruction list
345 bool WasInserted;
347 ResultIt;
348 std::tie(ResultIt, WasInserted) =
349 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
350 unsigned INumber = ResultIt->second;
351
352 // There was an insertion.
353 if (WasInserted)
355
356 IntegerMappingForBB.push_back(INumber);
357
358 // Make sure we don't overflow or use any integers reserved by the DenseMap.
360 "Instruction mapping overflow!");
361
363 "Tried to assign DenseMap tombstone or empty key to instruction.");
365 "Tried to assign DenseMap tombstone or empty key to instruction.");
366
367 return INumber;
368}
369
375
380
385
386// TODO: This is the same as the MachineOutliner, and should be consolidated
387// into the same interface.
389 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
390 std::vector<IRInstructionData *> &InstrListForBB, bool End) {
391 // Can't combine an illegal instruction. Set the flag.
393
394 // Only add one illegal number per range of legal numbers.
396 return IllegalInstrNumber;
397
398 IRInstructionData *ID = nullptr;
399 if (!End)
400 ID = allocateIRInstructionData(*It, false, *IDL);
401 else
403 InstrListForBB.push_back(ID);
404
405 // Remember that we added an illegal number last time.
407 unsigned INumber = IllegalInstrNumber;
408 IntegerMappingForBB.push_back(IllegalInstrNumber--);
409
411 "Instruction mapping overflow!");
412
414 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
415
417 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
418
419 return INumber;
420}
421
422IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
423 IRInstructionData *FirstInstIt,
424 IRInstructionData *LastInstIt)
425 : StartIdx(StartIdx), Len(Len) {
426
427 assert(FirstInstIt != nullptr && "Instruction is nullptr!");
428 assert(LastInstIt != nullptr && "Instruction is nullptr!");
429 assert(StartIdx + Len > StartIdx &&
430 "Overflow for IRSimilarityCandidate range?");
431 assert(Len - 1 == static_cast<unsigned>(std::distance(
432 iterator(FirstInstIt), iterator(LastInstIt))) &&
433 "Length of the first and last IRInstructionData do not match the "
434 "given length");
435
436 // We iterate over the given instructions, and map each unique value
437 // to a unique number in the IRSimilarityCandidate ValueToNumber and
438 // NumberToValue maps. A constant get its own value globally, the individual
439 // uses of the constants are not considered to be unique.
440 //
441 // IR: Mapping Added:
442 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
443 // %add2 = add i32 %a, %1 %add2 -> 4
444 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
445 //
446 // when replace with global values, starting from 1, would be
447 //
448 // 3 = add i32 1, 2
449 // 4 = add i32 1, 3
450 // 6 = add i32 5, 2
451 unsigned LocalValNumber = 1;
453 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
454 // Map the operand values to an unsigned integer if it does not already
455 // have an unsigned integer assigned to it.
456 for (Value *Arg : ID->OperVals)
457 if (ValueToNumber.try_emplace(Arg, LocalValNumber).second) {
458 NumberToValue.try_emplace(LocalValNumber, Arg);
459 LocalValNumber++;
460 }
461
462 // Mapping the instructions to an unsigned integer if it is not already
463 // exist in the mapping.
464 if (ValueToNumber.try_emplace(ID->Inst, LocalValNumber).second) {
465 NumberToValue.try_emplace(LocalValNumber, ID->Inst);
466 LocalValNumber++;
467 }
468 }
469
470 // Setting the first and last instruction data pointers for the candidate. If
471 // we got through the entire for loop without hitting an assert, we know
472 // that both of these instructions are not nullptrs.
473 FirstInst = FirstInstIt;
474 LastInst = LastInstIt;
475
476 // Add the basic blocks contained in the set into the global value numbering.
478 getBasicBlocks(BBSet);
479 for (BasicBlock *BB : BBSet) {
480 if (ValueToNumber.try_emplace(BB, LocalValNumber).second) {
481 NumberToValue.try_emplace(LocalValNumber, BB);
482 LocalValNumber++;
483 }
484 }
485}
486
488 const IRSimilarityCandidate &B) {
489 if (A.getLength() != B.getLength())
490 return false;
491
492 auto InstrDataForBoth =
493 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
494
495 return all_of(InstrDataForBoth,
496 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
497 IRInstructionData &A = std::get<0>(R);
498 IRInstructionData &B = std::get<1>(R);
499 if (!A.Legal || !B.Legal)
500 return false;
501 return isClose(A, B);
502 });
503}
504
505/// Determine if one or more of the assigned global value numbers for the
506/// operands in \p TargetValueNumbers is in the current mapping set for operand
507/// numbers in \p SourceOperands. The set of possible corresponding global
508/// value numbers are replaced with the most recent version of compatible
509/// values.
510///
511/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
512/// value number for the source IRInstructionCandidate.
513/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
514/// IRSimilarityCandidate global value numbers to a set of possible numbers in
515/// the target.
516/// \param [in] SourceOperands - The operands in the original
517/// IRSimilarityCandidate in the current instruction.
518/// \param [in] TargetValueNumbers - The global value numbers of the operands in
519/// the corresponding Instruction in the other IRSimilarityCandidate.
520/// \returns true if there exists a possible mapping between the source
521/// Instruction operands and the target Instruction operands, and false if not.
523 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
524 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
525 ArrayRef<Value *> &SourceOperands,
526 DenseSet<unsigned> &TargetValueNumbers){
527
529
530 unsigned ArgVal;
531 bool WasInserted;
532
533 // Iterate over the operands in the source IRSimilarityCandidate to determine
534 // whether there exists an operand in the other IRSimilarityCandidate that
535 // creates a valid mapping of Value to Value between the
536 // IRSimilarityCaniddates.
537 for (Value *V : SourceOperands) {
538 ArgVal = SourceValueToNumberMapping.find(V)->second;
539
540 // Instead of finding a current mapping, we attempt to insert a set.
541 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
542 std::make_pair(ArgVal, TargetValueNumbers));
543
544 // We need to iterate over the items in other IRSimilarityCandidate's
545 // Instruction to determine whether there is a valid mapping of
546 // Value to Value.
547 DenseSet<unsigned> NewSet;
548 for (unsigned &Curr : ValueMappingIt->second)
549 // If we can find the value in the mapping, we add it to the new set.
550 if (TargetValueNumbers.contains(Curr))
551 NewSet.insert(Curr);
552
553 // If we could not find a Value, return 0.
554 if (NewSet.empty())
555 return false;
556
557 // Otherwise replace the old mapping with the newly constructed one.
558 if (NewSet.size() != ValueMappingIt->second.size())
559 ValueMappingIt->second.swap(NewSet);
560
561 // We have reached no conclusions about the mapping, and cannot remove
562 // any items from the other operands, so we move to check the next operand.
563 if (ValueMappingIt->second.size() != 1)
564 continue;
565
566 unsigned ValToRemove = *ValueMappingIt->second.begin();
567 // When there is only one item left in the mapping for and operand, remove
568 // the value from the other operands. If it results in there being no
569 // mapping, return false, it means the mapping is wrong
570 for (Value *InnerV : SourceOperands) {
571 if (V == InnerV)
572 continue;
573
574 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
575 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
576 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
577 continue;
578
579 ValueMappingIt->second.erase(ValToRemove);
580 if (ValueMappingIt->second.empty())
581 return false;
582 }
583 }
584
585 return true;
586}
587
588/// Determine if operand number \p TargetArgVal is in the current mapping set
589/// for operand number \p SourceArgVal.
590///
591/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
592/// value numbers from source IRSimilarityCandidate to target
593/// IRSimilarityCandidate.
594/// \param [in] SourceArgVal The global value number for an operand in the
595/// in the original candidate.
596/// \param [in] TargetArgVal The global value number for the corresponding
597/// operand in the other candidate.
598/// \returns True if there exists a mapping and false if not.
600 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
601 unsigned SourceArgVal, unsigned TargetArgVal) {
602 // We are given two unsigned integers representing the global values of
603 // the operands in different IRSimilarityCandidates and a current mapping
604 // between the two.
605 //
606 // Source Operand GVN: 1
607 // Target Operand GVN: 2
608 // CurrentMapping: {1: {1, 2}}
609 //
610 // Since we have mapping, and the target operand is contained in the set, we
611 // update it to:
612 // CurrentMapping: {1: {2}}
613 // and can return true. But, if the mapping was
614 // CurrentMapping: {1: {3}}
615 // we would return false.
616
617 bool WasInserted;
619
620 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
621 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
622
623 // If we created a new mapping, then we are done.
624 if (WasInserted)
625 return true;
626
627 // If there is more than one option in the mapping set, and the target value
628 // is included in the mapping set replace that set with one that only includes
629 // the target value, as it is the only valid mapping via the non commutative
630 // instruction.
631
632 DenseSet<unsigned> &TargetSet = Val->second;
633 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
634 TargetSet.clear();
635 TargetSet.insert(TargetArgVal);
636 return true;
637 }
638
639 // Return true if we can find the value in the set.
640 return TargetSet.contains(TargetArgVal);
641}
642
645 // Iterators to keep track of where we are in the operands for each
646 // Instruction.
647 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
648 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
649 unsigned OperandLength = A.OperVals.size();
650
651 // For each operand, get the value numbering and ensure it is consistent.
652 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
653 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
654 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
655
656 // Attempt to add a set with only the target value. If there is no mapping
657 // we can create it here.
658 //
659 // For an instruction like a subtraction:
660 // IRSimilarityCandidateA: IRSimilarityCandidateB:
661 // %resultA = sub %a, %b %resultB = sub %d, %e
662 //
663 // We map %a -> %d and %b -> %e.
664 //
665 // And check to see whether their mapping is consistent in
666 // checkNumberingAndReplace.
667
668 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
669 return false;
670
671 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
672 return false;
673 }
674 return true;
675}
676
679 DenseSet<unsigned> ValueNumbersA;
680 DenseSet<unsigned> ValueNumbersB;
681
682 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
683 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
684 unsigned OperandLength = A.OperVals.size();
685
686 // Find the value number sets for the operands.
687 for (unsigned Idx = 0; Idx < OperandLength;
688 Idx++, VItA++, VItB++) {
689 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
690 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
691 }
692
693 // Iterate over the operands in the first IRSimilarityCandidate and make sure
694 // there exists a possible mapping with the operands in the second
695 // IRSimilarityCandidate.
696 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
697 A.ValueNumberMapping, A.OperVals,
698 ValueNumbersB))
699 return false;
700
701 // Iterate over the operands in the second IRSimilarityCandidate and make sure
702 // there exists a possible mapping with the operands in the first
703 // IRSimilarityCandidate.
704 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
705 B.ValueNumberMapping, B.OperVals,
706 ValueNumbersA))
707 return false;
708
709 return true;
710}
711
713 const unsigned InstValA, const unsigned &InstValB,
714 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
715 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
717 bool WasInserted;
718 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
719 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
720 if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
721 return false;
722 else if (ValueMappingIt->second.size() != 1) {
723 // Snapshot the set before iterating: when InstValA maps to itself the
724 // erase below removes InstValA from the very set being iterated, which
725 // invalidates the range iterator under backward-shift deletion.
726 SmallVector<unsigned> OtherVals(ValueMappingIt->second.begin(),
727 ValueMappingIt->second.end());
728 for (unsigned OtherVal : OtherVals) {
729 if (OtherVal == InstValB)
730 continue;
731 auto OtherValIt = ValueNumberMappingA.find(OtherVal);
732 if (OtherValIt == ValueNumberMappingA.end())
733 continue;
734 OtherValIt->second.erase(InstValA);
735 }
736 ValueNumberMappingA.erase(ValueMappingIt);
737 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
738 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
739 }
740
741 return true;
742}
743
746 // Get the basic blocks the label refers to.
747 BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
748 BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
749
750 // Get the basic blocks contained in each region.
751 DenseSet<BasicBlock *> BasicBlockA;
752 DenseSet<BasicBlock *> BasicBlockB;
753 A.IRSC.getBasicBlocks(BasicBlockA);
754 B.IRSC.getBasicBlocks(BasicBlockB);
755
756 // Determine if the block is contained in the region.
757 bool AContained = BasicBlockA.contains(ABB);
758 bool BContained = BasicBlockB.contains(BBB);
759
760 // Both blocks need to be contained in the region, or both need to be outside
761 // the region.
762 if (AContained != BContained)
763 return false;
764
765 // If both are contained, then we need to make sure that the relative
766 // distance to the target blocks are the same.
767 if (AContained)
768 return A.RelativeLocation == B.RelativeLocation;
769 return true;
770}
771
778
783
786 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
787 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
788 if (A.getLength() != B.getLength())
789 return false;
790
791 if (A.ValueToNumber.size() != B.ValueToNumber.size())
792 return false;
793
794 iterator ItA = A.begin();
795 iterator ItB = B.begin();
796
797 // These ValueNumber Mapping sets create a create a mapping between the values
798 // in one candidate to values in the other candidate. If we create a set with
799 // one element, and that same element maps to the original element in the
800 // candidate we have a good mapping.
801
802 // Iterate over the instructions contained in each candidate
803 unsigned SectionLength = A.getStartIdx() + A.getLength();
804 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
805 ItA++, ItB++, Loc++) {
806 // Make sure the instructions are similar to one another.
807 if (!isClose(*ItA, *ItB))
808 return false;
809
810 Instruction *IA = ItA->Inst;
811 Instruction *IB = ItB->Inst;
812
813 if (!ItA->Legal || !ItB->Legal)
814 return false;
815
816 // Get the operand sets for the instructions.
817 ArrayRef<Value *> OperValsA = ItA->OperVals;
818 ArrayRef<Value *> OperValsB = ItB->OperVals;
819
820 unsigned InstValA = A.ValueToNumber.find(IA)->second;
821 unsigned InstValB = B.ValueToNumber.find(IB)->second;
822
823 // Ensure that the mappings for the instructions exists.
824 if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
825 ValueNumberMappingB))
826 return false;
827
828 if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
829 ValueNumberMappingA))
830 return false;
831
832 // We have different paths for commutative instructions and non-commutative
833 // instructions since commutative instructions could allow multiple mappings
834 // to certain values.
835 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
836 !isa<IntrinsicInst>(IA)) {
838 {A, OperValsA, ValueNumberMappingA},
839 {B, OperValsB, ValueNumberMappingB}))
840 return false;
841 continue;
842 }
843
844 // Handle the non-commutative cases.
846 {A, OperValsA, ValueNumberMappingA},
847 {B, OperValsB, ValueNumberMappingB}))
848 return false;
849
850 // Here we check that between two corresponding instructions,
851 // when referring to a basic block in the same region, the
852 // relative locations are the same. And, that the instructions refer to
853 // basic blocks outside the region in the same corresponding locations.
854
855 // We are able to make the assumption about blocks outside of the region
856 // since the target block labels are considered values and will follow the
857 // same number matching that we defined for the other instructions in the
858 // region. So, at this point, in each location we target a specific block
859 // outside the region, we are targeting a corresponding block in each
860 // analagous location in the region we are comparing to.
862 IA->getOpcode() != IB->getOpcode())
863 continue;
864
865 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
866 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
867 ArrayRef<Value *> ABL = ItA->getBlockOperVals();
868 ArrayRef<Value *> BBL = ItB->getBlockOperVals();
869
870 // Check to make sure that the number of operands, and branching locations
871 // between BranchInsts is the same.
872 if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
873 ABL.size() != BBL.size())
874 return false;
875
876 assert(RelBlockLocsA.size() == ABL.size() &&
877 "Block information vectors not the same size.");
878 assert(RelBlockLocsB.size() == BBL.size() &&
879 "Block information vectors not the same size.");
880
881 ZippedRelativeLocationsT ZippedRelativeLocations =
882 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
883 if (any_of(ZippedRelativeLocations,
884 [&A, &B](std::tuple<int, int, Value *, Value *> R) {
886 {A, std::get<0>(R), std::get<2>(R)},
887 {B, std::get<1>(R), std::get<3>(R)});
888 }))
889 return false;
890 }
891 return true;
892}
893
895 const IRSimilarityCandidate &B) {
896 auto DoesOverlap = [](const IRSimilarityCandidate &X,
897 const IRSimilarityCandidate &Y) {
898 // Check:
899 // XXXXXX X starts before Y ends
900 // YYYYYYY Y starts after X starts
901 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
902 };
903
904 return DoesOverlap(A, B) || DoesOverlap(B, A);
905}
906
907void IRSimilarityIdentifier::populateMapper(
908 Module &M, std::vector<IRInstructionData *> &InstrList,
909 std::vector<unsigned> &IntegerMapping) {
910
911 std::vector<IRInstructionData *> InstrListForModule;
912 std::vector<unsigned> IntegerMappingForModule;
913 // Iterate over the functions in the module to map each Instruction in each
914 // BasicBlock to an unsigned integer.
915 Mapper.initializeForBBs(M);
916
917 for (Function &F : M) {
918
919 if (F.empty())
920 continue;
921
922 for (BasicBlock &BB : F) {
923
924 // BB has potential to have similarity since it has a size greater than 2
925 // and can therefore match other regions greater than 2. Map it to a list
926 // of unsigned integers.
927 Mapper.convertToUnsignedVec(BB, InstrListForModule,
928 IntegerMappingForModule);
929 }
930
931 BasicBlock::iterator It = F.begin()->end();
932 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
933 true);
934 if (InstrListForModule.size() > 0)
935 Mapper.IDL->push_back(*InstrListForModule.back());
936 }
937
938 // Insert the InstrListForModule at the end of the overall InstrList so that
939 // we can have a long InstrList for the entire set of Modules being analyzed.
940 llvm::append_range(InstrList, InstrListForModule);
941 // Do the same as above, but for IntegerMapping.
942 llvm::append_range(IntegerMapping, IntegerMappingForModule);
943}
944
945void IRSimilarityIdentifier::populateMapper(
946 ArrayRef<std::unique_ptr<Module>> &Modules,
947 std::vector<IRInstructionData *> &InstrList,
948 std::vector<unsigned> &IntegerMapping) {
949
950 // Iterate over, and map the instructions in each module.
951 for (const std::unique_ptr<Module> &M : Modules)
952 populateMapper(*M, InstrList, IntegerMapping);
953}
954
955/// From a repeated subsequence, find all the different instances of the
956/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
957/// the IRInstructionData in subsequence.
958///
959/// \param [in] Mapper - The instruction mapper for basic correctness checks.
960/// \param [in] InstrList - The vector that holds the instruction data.
961/// \param [in] IntegerMapping - The vector that holds the mapped integers.
962/// \param [out] CandsForRepSubstring - The vector to store the generated
963/// IRSimilarityCandidates.
965 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
966 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
967 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
968
969 unsigned StringLen = RS.Length;
970 if (StringLen < 2)
971 return;
972
973 // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
974 for (const unsigned &StartIdx : RS.StartIndices) {
975 unsigned EndIdx = StartIdx + StringLen - 1;
976
977 // Check that this subsequence does not contain an illegal instruction.
978 bool ContainsIllegal = false;
979 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
980 unsigned Key = IntegerMapping[CurrIdx];
981 if (Key > Mapper.IllegalInstrNumber) {
982 ContainsIllegal = true;
983 break;
984 }
985 }
986
987 // If we have an illegal instruction, we should not create an
988 // IRSimilarityCandidate for this region.
989 if (ContainsIllegal)
990 continue;
991
992 // We are getting iterators to the instructions in this region of code
993 // by advancing the start and end indices from the start of the
994 // InstrList.
995 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
996 std::advance(StartIt, StartIdx);
997 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
998 std::advance(EndIt, EndIdx);
999
1000 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1001 }
1002}
1003
1005 IRSimilarityCandidate &SourceCand,
1006 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
1007 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
1008 assert(SourceCand.CanonNumToNumber.size() != 0 &&
1009 "Base canonical relationship is empty!");
1010 assert(SourceCand.NumberToCanonNum.size() != 0 &&
1011 "Base canonical relationship is empty!");
1012
1013 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1014 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1015
1016 DenseSet<unsigned> UsedGVNs;
1017 // Iterate over the mappings provided from this candidate to SourceCand. We
1018 // are then able to map the GVN in this candidate to the same canonical number
1019 // given to the corresponding GVN in SourceCand.
1020 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
1021 unsigned SourceGVN = GVNMapping.first;
1022
1023 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1024
1025 unsigned ResultGVN;
1026 // We need special handling if we have more than one potential value. This
1027 // means that there are at least two GVNs that could correspond to this GVN.
1028 // This could lead to potential swapping later on, so we make a decision
1029 // here to ensure a one-to-one mapping.
1030 if (GVNMapping.second.size() > 1) {
1031 bool Found = false;
1032 for (unsigned Val : GVNMapping.second) {
1033 // We make sure the target value number hasn't already been reserved.
1034 if (UsedGVNs.contains(Val))
1035 continue;
1036
1037 // We make sure that the opposite mapping is still consistent.
1039 FromSourceMapping.find(Val);
1040
1041 if (!It->second.contains(SourceGVN))
1042 continue;
1043
1044 // We pick the first item that satisfies these conditions.
1045 Found = true;
1046 ResultGVN = Val;
1047 break;
1048 }
1049
1050 assert(Found && "Could not find matching value for source GVN");
1051 (void)Found;
1052
1053 } else
1054 ResultGVN = *GVNMapping.second.begin();
1055
1056 // Whatever GVN is found, we mark it as used.
1057 UsedGVNs.insert(ResultGVN);
1058
1059 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1060 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1061 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1062 }
1063
1065 getBasicBlocks(BBSet);
1066 // Find canonical numbers for the BasicBlocks in the current candidate.
1067 // This is done by finding the corresponding value for the first instruction
1068 // in the block in the current candidate, finding the matching value in the
1069 // source candidate. Then by finding the parent of this value, use the
1070 // canonical number of the block in the source candidate for the canonical
1071 // number in the current candidate.
1072 for (BasicBlock *BB : BBSet) {
1073 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1074
1075 // We can skip the BasicBlock if the canonical numbering has already been
1076 // found in a separate instruction.
1077 if (NumberToCanonNum.contains(BBGVNForCurrCand))
1078 continue;
1079
1080 // If the basic block is the starting block, then the shared instruction may
1081 // not be the first instruction in the block, it will be the first
1082 // instruction in the similarity region.
1083 Value *FirstOutlineInst =
1084 BB == getStartBB() ? frontInstruction() : &*BB->begin();
1085
1086 unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1087 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1088 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1089 Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1090 BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1091 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1092 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1093 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1094 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1095 }
1096}
1097
1099 IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
1100 IRSimilarityCandidate &TargetCandLarge) {
1101 assert(!SourceCand.CanonNumToNumber.empty() &&
1102 "Canonical Relationship is non-empty");
1103 assert(!SourceCand.NumberToCanonNum.empty() &&
1104 "Canonical Relationship is non-empty");
1105
1106 assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1107 "Canonical Relationship is non-empty");
1108 assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1109 "Canonical Relationship is non-empty");
1110
1111 assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1112 "Canonical Relationship is non-empty");
1113 assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1114 "Canonical Relationship is non-empty");
1115
1116 assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1117 assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1118
1119 // We're going to use the larger candidates as a "bridge" to create the
1120 // canonical number for the target candidate since we have idetified two
1121 // candidates as subsequences of larger sequences, and therefore must be
1122 // structurally similar.
1123 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1124 Value *CurrVal = ValueNumPair.first;
1125 unsigned TargetCandGVN = ValueNumPair.second;
1126
1127 // Find the numbering in the large candidate that surrounds the
1128 // current candidate.
1129 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1130 assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1131
1132 // Get the canonical numbering in the large target candidate.
1133 std::optional<unsigned> OTargetCandCanon =
1134 TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1135 assert(OTargetCandCanon.has_value() &&
1136 "Canononical Number not found for GVN");
1137
1138 // Get the GVN in the large source candidate from the canonical numbering.
1139 std::optional<unsigned> OLargeSourceGVN =
1140 SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
1141 assert(OLargeSourceGVN.has_value() &&
1142 "GVN Number not found for Canonical Number");
1143
1144 // Get the Value from the GVN in the large source candidate.
1145 std::optional<Value *> OLargeSourceV =
1146 SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1147 assert(OLargeSourceV.has_value() && "Value not found for GVN");
1148
1149 // Get the GVN number for the Value in the source candidate.
1150 std::optional<unsigned> OSourceGVN =
1151 SourceCand.getGVN(OLargeSourceV.value());
1152 assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1153
1154 // Get the canonical numbering from the GVN/
1155 std::optional<unsigned> OSourceCanon =
1156 SourceCand.getCanonicalNum(OSourceGVN.value());
1157 assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1158
1159 // Insert the canonical numbering and GVN pair into their respective
1160 // mappings.
1161 CanonNumToNumber.insert(
1162 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1163 NumberToCanonNum.insert(
1164 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1165 }
1166}
1167
1169 IRSimilarityCandidate &CurrCand) {
1170 assert(CurrCand.CanonNumToNumber.size() == 0 &&
1171 "Canonical Relationship is non-empty");
1172 assert(CurrCand.NumberToCanonNum.size() == 0 &&
1173 "Canonical Relationship is non-empty");
1174
1175 unsigned CanonNum = 0;
1176 // Iterate over the value numbers found, the order does not matter in this
1177 // case.
1178 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1179 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1180 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1181 CanonNum++;
1182 }
1183}
1184
1185/// Look for larger IRSimilarityCandidates From the previously matched
1186/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
1187/// an overlap, return a pair of structurally similar, larger
1188/// IRSimilarityCandidates.
1189///
1190/// \param [in] CandA - The first candidate we are trying to determine the
1191/// structure of.
1192/// \param [in] CandB - The second candidate we are trying to determine the
1193/// structure of.
1194/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1195/// a circuit to the IRSimilarityCandidates that include this instruction.
1196/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1197/// number representing the structural group assigned to it.
1198static std::optional<
1199 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1202 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1204 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
1205 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
1206 DenseSet<unsigned> IncludedGroupsA;
1207 DenseSet<unsigned> IncludedGroupsB;
1208
1209 // Find the overall similarity group numbers that fully contain the candidate,
1210 // and record the larger candidate for each group.
1211 auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1212 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1213 Result;
1214
1215 unsigned CandAStart = CandA.getStartIdx();
1216 unsigned CandAEnd = CandA.getEndIdx();
1217 unsigned CandBStart = CandB.getStartIdx();
1218 unsigned CandBEnd = CandB.getEndIdx();
1219 if (IdxToCandidateIt == IndexToIncludedCand.end())
1220 return Result;
1221 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1222 if (MatchedCand->getStartIdx() > CandAStart ||
1223 (MatchedCand->getEndIdx() < CandAEnd))
1224 continue;
1225 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1226 IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1227 IncludedGroupsA.insert(GroupNum);
1228 }
1229
1230 // Find the overall similarity group numbers that fully contain the next
1231 // candidate, and record the larger candidate for each group.
1232 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1233 if (IdxToCandidateIt == IndexToIncludedCand.end())
1234 return Result;
1235 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1236 if (MatchedCand->getStartIdx() > CandBStart ||
1237 MatchedCand->getEndIdx() < CandBEnd)
1238 continue;
1239 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1240 IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1241 IncludedGroupsB.insert(GroupNum);
1242 }
1243
1244 // Find the intersection between the two groups, these are the groups where
1245 // the larger candidates exist.
1246 set_intersect(IncludedGroupsA, IncludedGroupsB);
1247
1248 // If there is no intersection between the sets, then we cannot determine
1249 // whether or not there is a match.
1250 if (IncludedGroupsA.empty())
1251 return Result;
1252
1253 // Create a pair that contains the larger candidates.
1254 auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1255 auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1256 Result = std::make_pair(ItA->second, ItB->second);
1257 return Result;
1258}
1259
1260/// From the list of IRSimilarityCandidates, perform a comparison between each
1261/// IRSimilarityCandidate to determine if there are overlapping
1262/// IRInstructionData, or if they do not have the same structure.
1263///
1264/// \param [in] CandsForRepSubstring - The vector containing the
1265/// IRSimilarityCandidates.
1266/// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1267/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1268/// vector are structurally similar to one another.
1269/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1270/// a circuit to the IRSimilarityCandidates that include this instruction.
1271/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1272/// number representing the structural group assigned to it.
1274 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1275 DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
1276 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1278 ) {
1279 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1280 InnerCandEndIt;
1281
1282 // IRSimilarityCandidates each have a structure for operand use. It is
1283 // possible that two instances of the same subsequences have different
1284 // structure. Each type of structure found is assigned a number. This
1285 // DenseMap maps an IRSimilarityCandidate to which type of similarity
1286 // discovered it fits within.
1288
1289 // Find the compatibility from each candidate to the others to determine
1290 // which candidates overlap and which have the same structure by mapping
1291 // each structure to a different group.
1292 bool SameStructure;
1293 bool Inserted;
1294 unsigned CurrentGroupNum = 0;
1295 unsigned OuterGroupNum;
1299
1300 // Iterate over the candidates to determine its structural and overlapping
1301 // compatibility with other instructions
1302 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1303 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1304 for (CandIt = CandsForRepSubstring.begin(),
1305 CandEndIt = CandsForRepSubstring.end();
1306 CandIt != CandEndIt; CandIt++) {
1307
1308 // Determine if it has an assigned structural group already.
1309 // If not, we assign it one, and add it to our mapping.
1310 std::tie(CandToGroupIt, Inserted) =
1311 CandToGroup.try_emplace(&*CandIt, CurrentGroupNum);
1312 if (Inserted)
1313 ++CurrentGroupNum;
1314
1315 // Get the structural group number from the iterator.
1316 OuterGroupNum = CandToGroupIt->second;
1317
1318 // Check if we already have a list of IRSimilarityCandidates for the current
1319 // structural group. Create one if one does not exist.
1320 CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1321 if (CurrentGroupPair == StructuralGroups.end()) {
1323 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1324 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1325 }
1326
1327 // Iterate over the IRSimilarityCandidates following the current
1328 // IRSimilarityCandidate in the list to determine whether the two
1329 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
1330 // of IRSimilarityCandidates.
1331 for (InnerCandIt = std::next(CandIt),
1332 InnerCandEndIt = CandsForRepSubstring.end();
1333 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1334
1335 // We check if the inner item has a group already, if it does, we skip it.
1336 CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1337 if (CandToGroupItInner != CandToGroup.end())
1338 continue;
1339
1340 // Check if we have found structural similarity between two candidates
1341 // that fully contains the first and second candidates.
1342 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1343 LargerPair = CheckLargerCands(
1344 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1345
1346 // If a pair was found, it means that we can assume that these smaller
1347 // substrings are also structurally similar. Use the larger candidates to
1348 // determine the canonical mapping between the two sections.
1349 if (LargerPair.has_value()) {
1350 SameStructure = true;
1351 InnerCandIt->createCanonicalRelationFrom(
1352 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1353 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1354 CurrentGroupPair->second.push_back(*InnerCandIt);
1355 continue;
1356 }
1357
1358 // Otherwise we determine if they have the same structure and add it to
1359 // vector if they match.
1360 ValueNumberMappingA.clear();
1361 ValueNumberMappingB.clear();
1363 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1364 if (!SameStructure)
1365 continue;
1366
1367 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1368 ValueNumberMappingB);
1369 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1370 CurrentGroupPair->second.push_back(*InnerCandIt);
1371 }
1372 }
1373}
1374
1375void IRSimilarityIdentifier::findCandidates(
1376 std::vector<IRInstructionData *> &InstrList,
1377 std::vector<unsigned> &IntegerMapping) {
1378 SuffixTree ST(IntegerMapping);
1379
1380 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1381 std::vector<SimilarityGroup> NewCandidateGroups;
1382
1383 DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1384 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
1385 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1386
1387 // Iterate over the subsequences found by the Suffix Tree to create
1388 // IRSimilarityCandidates for each repeated subsequence and determine which
1389 // instances are structurally similar to one another.
1390
1391 // Sort the suffix tree from longest substring to shortest.
1392 std::vector<SuffixTree::RepeatedSubstring> RSes;
1393 for (SuffixTree::RepeatedSubstring &RS : ST)
1394 RSes.push_back(RS);
1395
1396 llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
1397 const SuffixTree::RepeatedSubstring &RHS) {
1398 return LHS.Length > RHS.Length;
1399 });
1400 for (SuffixTree::RepeatedSubstring &RS : RSes) {
1401 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1402 CandsForRepSubstring);
1403
1404 if (CandsForRepSubstring.size() < 2)
1405 continue;
1406
1407 findCandidateStructures(CandsForRepSubstring, StructuralGroups,
1408 IndexToIncludedCand, CandToGroup);
1409 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1410 // We only add the group if it contains more than one
1411 // IRSimilarityCandidate. If there is only one, that means there is no
1412 // other repeated subsequence with the same structure.
1413 if (Group.second.size() > 1) {
1414 SimilarityCandidates->push_back(Group.second);
1415 // Iterate over each candidate in the group, and add an entry for each
1416 // instruction included with a mapping to a set of
1417 // IRSimilarityCandidates that include that instruction.
1418 for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
1419 for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1420 Idx <= Edx; ++Idx)
1421 IndexToIncludedCand[Idx].insert(&IRCand);
1422 // Add mapping of candidate to the overall similarity group number.
1423 CandToGroup.insert(
1424 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1425 }
1426 }
1427 }
1428
1429 CandsForRepSubstring.clear();
1430 StructuralGroups.clear();
1431 NewCandidateGroups.clear();
1432 }
1433}
1434
1436 ArrayRef<std::unique_ptr<Module>> Modules) {
1438
1439 std::vector<IRInstructionData *> InstrList;
1440 std::vector<unsigned> IntegerMapping;
1441 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1442 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1443 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1444 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1445 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1446
1447 populateMapper(Modules, InstrList, IntegerMapping);
1448 findCandidates(InstrList, IntegerMapping);
1449
1450 return *SimilarityCandidates;
1451}
1452
1455 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1456 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1457 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1458 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1459 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1460
1461 std::vector<IRInstructionData *> InstrList;
1462 std::vector<unsigned> IntegerMapping;
1463
1464 populateMapper(M, InstrList, IntegerMapping);
1465 findCandidates(InstrList, IntegerMapping);
1466
1467 return *SimilarityCandidates;
1468}
1469
1471 "ir-similarity-identifier", false, true)
1472
1474 : ModulePass(ID) {}
1475
1482
1484 IRSI.reset();
1485 return false;
1486}
1487
1489 IRSI->findSimilarity(M);
1490 return false;
1491}
1492
1493AnalysisKey IRSimilarityAnalysis::Key;
1498 false);
1499 IRSI.findSimilarity(M);
1500 return IRSI;
1501}
1502
1506 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1507 IRSI.getSimilarity();
1508
1509 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1510 OS << CandVec.size() << " candidates of length "
1511 << CandVec.begin()->getLength() << ". Found in: \n";
1512 for (IRSimilarityCandidate &Cand : CandVec) {
1513 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
1514 << ", Basic Block: ";
1515 if (Cand.front()->Inst->getParent()->getName().str() == "")
1516 OS << "(unnamed)";
1517 else
1518 OS << Cand.front()->Inst->getParent()->getName().str();
1519 OS << "\n Start Instruction: ";
1520 Cand.frontInstruction()->print(OS);
1521 OS << "\n End Instruction: ";
1522 Cand.backInstruction()->print(OS);
1523 OS << "\n";
1524 }
1525 }
1526
1527 return PreservedAnalyses::all();
1528}
1529
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseMap class.
Hexagon Common GEP
bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)
Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...
detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT
static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)
From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...
static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)
From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...
static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)
Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...
static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)
Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file defines generic set operations that may be used on set's of different types,...
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
const_pointer iterator
Definition ArrayRef.h:47
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
LLVM_ABI bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition InstrTypes.h:728
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:744
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:745
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:752
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:753
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:890
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:254
bool erase(const KeyT &Val)
Definition DenseMap.h:332
DenseMapIterator< KeyT, ValueT, KeyInfoT, BucketT > iterator
Definition DenseMap.h:78
unsigned size() const
Definition DenseMap.h:114
bool empty() const
Definition DenseMap.h:113
iterator begin()
Definition DenseMap.h:82
iterator end()
Definition DenseMap.h:85
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition DenseMap.h:169
void swap(DerivedT &RHS)
Definition DenseMap.h:390
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:239
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
Class to represent function types.
ArrayRef< Type * > params() const
LLVM_ABI PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
LLVM_ABI Result run(Module &M, ModuleAnalysisManager &)
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
LLVM_ABI void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)
Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...
static LLVM_ABI bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
static LLVM_ABI bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)
Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< Value * > fromGVN(unsigned Num)
Finds the Value associate with Num if it exists.
static LLVM_ABI bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)
Compare the relative locations in A and B and check that the distances match if both locations are co...
static LLVM_ABI bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
static LLVM_ABI void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)
Create a mapping from the value numbering to a different separate set of numbers.
static LLVM_ABI bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.
LLVM_ABI IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
std::optional< unsigned > fromCanonicalNum(unsigned N)
Find the global value number from the canonical number N stored in the candidate.
static LLVM_ABI bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
static LLVM_ABI bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
LLVM_ABI SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)
std::optional< SimilarityGroupList > & getSimilarity()
A wrapper class for inspecting calls to intrinsic functions.
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition Pass.h:255
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::string str() const
Get the contents as an std::string.
Definition StringRef.h:222
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM Value Representation.
Definition Value.h:75
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:212
iterator find(const_arg_type_t< ValueT > V)
Definition DenseSet.h:177
size_type size() const
Definition DenseSet.h:87
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:185
const ParentTy * getParent() const
Definition ilist_node.h:34
ilist_select_iterator_type< OptionsT, false, false > iterator
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
LLVM_ABI bool isClose(const IRInstructionData &A, const IRInstructionData &B)
Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...
LLVM_ABI StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
LLVM_ABI bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
void stable_sort(R &&Range)
Definition STLExtras.h:2115
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
ArrayRef(const T &OneElt) -> ArrayRef< T >
static cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Module > ModuleAnalysisManager
Convenience typedef for the Module analysis manager.
Definition MIRParser.h:39
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition Analysis.h:29
An information struct used to provide DenseMap with the various necessary components for a given valu...
This provides the utilities for hashing an Instruction to an unsigned integer.
LLVM_ABI StringRef getCalleeName() const
Get the callee name that the call instruction is using for hashing the instruction.
LLVM_ABI void initializeInstruction()
Fills data stuctures for IRInstructionData when it is constructed from a.
SmallVector< int, 4 > RelativeBlockLocations
This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...
std::optional< std::string > CalleeName
This is only relevant if we are wrapping a CallInst.
LLVM_ABI IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Gather the information that is difficult to gather for an Instruction, or is changed.
LLVM_ABI ArrayRef< Value * > getBlockOperVals()
Get the BasicBlock based operands for PHINodes and BranchInsts.
Instruction * Inst
The source Instruction that is being wrapped.
static LLVM_ABI CmpInst::Predicate predicateForConsistency(CmpInst *CI)
A function that swaps the predicates to their less than form if they are in a greater than form.
LLVM_ABI void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...
SmallVector< Value *, 4 > OperVals
The values of the operands in the Instruction.
LLVM_ABI void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...
bool Legal
The legality of the wrapped instruction.
LLVM_ABI void setCalleeName(bool MatchByName=true)
For an IRInstructionData containing a CallInst, set the function name appropriately.
LLVM_ABI CmpInst::Predicate getPredicate() const
Get the predicate that the compare instruction is using for hashing the instruction.
std::optional< CmpInst::Predicate > RevisedPredicate
This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...
Helper struct for converting the Instructions in a Module into a vector of unsigned integers.
DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap
Correspondence from IRInstructionData to unsigned integers.
SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator
This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...
SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator
This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...
bool EnableMatchCallsByName
Marks whether we should use exact function names, as well as types to find similarity between calls.
unsigned LegalInstrNumber
The next available integer to assign to a legal Instruction to.
unsigned IllegalInstrNumber
The starting illegal instruction number to map to.
InstructionClassification InstClassifier
Maps an Instruction to a member of InstrType.
bool HaveLegalRange
Marks whether we have found a set of instructions that is long enough to be considered for similarity...
LLVM_ABI IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Get an allocated IRInstructionData struct using the InstDataAllocator.
bool CanCombineWithPrevInstr
Marks whether we found a illegal instruction in the previous step.
DenseMap< BasicBlock *, unsigned > BasicBlockToInteger
A mapping for a basic block in a module to its assigned number/location in the module.
LLVM_ABI IRInstructionDataList * allocateIRInstructionDataList()
Get an allocated IRInstructionDataList object using the IDLAllocator.
LLVM_ABI unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)
Maps an Instruction to a legal integer.
LLVM_ABI void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)
Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.
bool AddedIllegalLastTime
Set if we added an illegal number in the previous step.
LLVM_ABI unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)
Maps an Instruction to an illegal integer.
A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...
A repeated substring in the tree.
Definition SuffixTree.h:50