LLVM 19.0.0git
IRSimilarityIdentifier.cpp
Go to the documentation of this file.
1//===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// \file
10// Implementation file for the IRSimilarityIdentifier for identifying
11// similarities in IR including the IRInstructionMapper.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/DenseMap.h"
18#include "llvm/IR/Intrinsics.h"
19#include "llvm/IR/Operator.h"
20#include "llvm/IR/User.h"
23
24using namespace llvm;
25using namespace IRSimilarity;
26
27namespace llvm {
29 DisableBranches("no-ir-sim-branch-matching", cl::init(false),
31 cl::desc("disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
33
35 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
37 cl::desc("disable outlining indirect calls."));
38
40 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
41 cl::desc("only allow matching call instructions if the "
42 "name and type signature match."));
43
45 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
46 cl::desc("Don't match or outline intrinsics"));
47} // namespace llvm
48
51 : Inst(&I), Legal(Legality), IDL(&IDList) {
53}
54
56 // We check for whether we have a comparison instruction. If it is, we
57 // find the "less than" version of the predicate for consistency for
58 // comparison instructions throught the program.
59 if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
61 if (Predicate != C->getPredicate())
62 RevisedPredicate = Predicate;
63 }
64
65 // Here we collect the operands and their types for determining whether
66 // the structure of the operand use matches between two different candidates.
67 for (Use &OI : Inst->operands()) {
68 if (isa<CmpInst>(Inst) && RevisedPredicate) {
69 // If we have a CmpInst where the predicate is reversed, it means the
70 // operands must be reversed as well.
71 OperVals.insert(OperVals.begin(), OI.get());
72 continue;
73 }
74
75 OperVals.push_back(OI.get());
76 }
77
78 // We capture the incoming BasicBlocks as values as well as the incoming
79 // Values in order to check for structural similarity.
80 if (PHINode *PN = dyn_cast<PHINode>(Inst))
81 for (BasicBlock *BB : PN->blocks())
83}
84
86 : IDL(&IDList) {}
87
89 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
90 assert(isa<BranchInst>(Inst) && "Instruction must be branch");
91
92 BranchInst *BI = cast<BranchInst>(Inst);
94
95 BBNumIt = BasicBlockToInteger.find(BI->getParent());
96 assert(BBNumIt != BasicBlockToInteger.end() &&
97 "Could not find location for BasicBlock!");
98
99 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
100
101 for (Value *V : getBlockOperVals()) {
102 BasicBlock *Successor = cast<BasicBlock>(V);
103 BBNumIt = BasicBlockToInteger.find(Successor);
104 assert(BBNumIt != BasicBlockToInteger.end() &&
105 "Could not find number for BasicBlock!");
106 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
107
108 int Relative = OtherBlockNumber - CurrentBlockNumber;
110 }
111}
112
114 assert((isa<BranchInst>(Inst) ||
115 isa<PHINode>(Inst)) && "Instruction must be branch or PHINode");
116
117 if (BranchInst *BI = dyn_cast<BranchInst>(Inst))
118 return ArrayRef<Value *>(
119 std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),
120 OperVals.end()
121 );
122
123 if (PHINode *PN = dyn_cast<PHINode>(Inst))
124 return ArrayRef<Value *>(
125 std::next(OperVals.begin(), PN->getNumIncomingValues()),
126 OperVals.end()
127 );
128
129 return ArrayRef<Value *>();
130}
131
132void IRInstructionData::setCalleeName(bool MatchByName) {
133 CallInst *CI = dyn_cast<CallInst>(Inst);
134 assert(CI && "Instruction must be call");
135
136 CalleeName = "";
137 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
138 // To hash intrinsics, we use the opcode, and types like the other
139 // instructions, but also, the Intrinsic ID, and the Name of the
140 // intrinsic.
141 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
142 FunctionType *FT = II->getFunctionType();
143 // If there is an overloaded name, we have to use the complex version
144 // of getName to get the entire string.
145 if (Intrinsic::isOverloaded(IntrinsicID))
146 CalleeName =
147 Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
148 // If there is not an overloaded name, we only need to use this version.
149 else
150 CalleeName = Intrinsic::getName(IntrinsicID).str();
151
152 return;
153 }
154
155 if (!CI->isIndirectCall() && MatchByName)
157}
158
160 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
161 assert(isa<PHINode>(Inst) && "Instruction must be phi node");
162
163 PHINode *PN = cast<PHINode>(Inst);
165
166 BBNumIt = BasicBlockToInteger.find(PN->getParent());
167 assert(BBNumIt != BasicBlockToInteger.end() &&
168 "Could not find location for BasicBlock!");
169
170 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
171
172 // Convert the incoming blocks of the PHINode to an integer value, based on
173 // the relative distances between the current block and the incoming block.
174 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
176 BBNumIt = BasicBlockToInteger.find(Incoming);
177 assert(BBNumIt != BasicBlockToInteger.end() &&
178 "Could not find number for BasicBlock!");
179 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
180
181 int Relative = OtherBlockNumber - CurrentBlockNumber;
183 }
184}
185
187 switch (CI->getPredicate()) {
196 return CI->getSwappedPredicate();
197 default:
198 return CI->getPredicate();
199 }
200}
201
203 assert(isa<CmpInst>(Inst) &&
204 "Can only get a predicate from a compare instruction");
205
207 return *RevisedPredicate;
208
209 return cast<CmpInst>(Inst)->getPredicate();
210}
211
213 assert(isa<CallInst>(Inst) &&
214 "Can only get a name from a call instruction");
215
216 assert(CalleeName && "CalleeName has not been set");
217
218 return *CalleeName;
219}
220
222 const IRInstructionData &B) {
223
224 if (!A.Legal || !B.Legal)
225 return false;
226
227 // Check if we are performing the same sort of operation on the same types
228 // but not on the same values.
229 if (!A.Inst->isSameOperationAs(B.Inst)) {
230 // If there is a predicate, this means that either there is a swapped
231 // predicate, or that the types are different, we want to make sure that
232 // the predicates are equivalent via swapping.
233 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
234
235 if (A.getPredicate() != B.getPredicate())
236 return false;
237
238 // If the predicates are the same via swap, make sure that the types are
239 // still the same.
240 auto ZippedTypes = zip(A.OperVals, B.OperVals);
241
242 return all_of(
243 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
244 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
245 });
246 }
247
248 return false;
249 }
250
251 // Since any GEP Instruction operands after the first operand cannot be
252 // defined by a register, we must make sure that the operands after the first
253 // are the same in the two instructions
254 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
255 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
256
257 // If the instructions do not have the same inbounds restrictions, we do
258 // not consider them the same.
259 if (GEP->isInBounds() != OtherGEP->isInBounds())
260 return false;
261
262 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
263
264 // We increment here since we do not care about the first instruction,
265 // we only care about the following operands since they must be the
266 // exact same to be considered similar.
267 return all_of(drop_begin(ZippedOperands),
268 [](std::tuple<llvm::Use &, llvm::Use &> R) {
269 return std::get<0>(R) == std::get<1>(R);
270 });
271 }
272
273 // If the instructions are functions calls, we make sure that the function
274 // name is the same. We already know that the types are since is
275 // isSameOperationAs is true.
276 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
277 if (A.getCalleeName() != B.getCalleeName())
278 return false;
279 }
280
281 if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
282 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
283 return false;
284
285 return true;
286}
287
288// TODO: This is the same as the MachineOutliner, and should be consolidated
289// into the same interface.
291 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
292 std::vector<unsigned> &IntegerMapping) {
293 BasicBlock::iterator It = BB.begin();
294
295 std::vector<unsigned> IntegerMappingForBB;
296 std::vector<IRInstructionData *> InstrListForBB;
297
298 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
299 switch (InstClassifier.visit(*It)) {
300 case InstrType::Legal:
301 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
302 break;
304 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
305 break;
307 AddedIllegalLastTime = false;
308 break;
309 }
310 }
311
313 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
314 for (IRInstructionData *ID : InstrListForBB)
315 this->IDL->push_back(*ID);
316 llvm::append_range(InstrList, InstrListForBB);
317 llvm::append_range(IntegerMapping, IntegerMappingForBB);
318}
319
320// TODO: This is the same as the MachineOutliner, and should be consolidated
321// into the same interface.
323 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
324 std::vector<IRInstructionData *> &InstrListForBB) {
325 // We added something legal, so we should unset the AddedLegalLastTime
326 // flag.
327 AddedIllegalLastTime = false;
328
329 // If we have at least two adjacent legal instructions (which may have
330 // invisible instructions in between), remember that.
332 HaveLegalRange = true;
334
335 // Get the integer for this instruction or give it the current
336 // LegalInstrNumber.
338 InstrListForBB.push_back(ID);
339
340 if (isa<BranchInst>(*It))
341 ID->setBranchSuccessors(BasicBlockToInteger);
342
343 if (isa<CallInst>(*It))
344 ID->setCalleeName(EnableMatchCallsByName);
345
346 if (isa<PHINode>(*It))
347 ID->setPHIPredecessors(BasicBlockToInteger);
348
349 // Add to the instruction list
350 bool WasInserted;
352 ResultIt;
353 std::tie(ResultIt, WasInserted) =
354 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
355 unsigned INumber = ResultIt->second;
356
357 // There was an insertion.
358 if (WasInserted)
360
361 IntegerMappingForBB.push_back(INumber);
362
363 // Make sure we don't overflow or use any integers reserved by the DenseMap.
365 "Instruction mapping overflow!");
366
368 "Tried to assign DenseMap tombstone or empty key to instruction.");
370 "Tried to assign DenseMap tombstone or empty key to instruction.");
371
372 return INumber;
373}
374
378 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
379}
380
383 return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
384}
385
388 return new (IDLAllocator->Allocate()) IRInstructionDataList();
389}
390
391// TODO: This is the same as the MachineOutliner, and should be consolidated
392// into the same interface.
394 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
395 std::vector<IRInstructionData *> &InstrListForBB, bool End) {
396 // Can't combine an illegal instruction. Set the flag.
398
399 // Only add one illegal number per range of legal numbers.
401 return IllegalInstrNumber;
402
403 IRInstructionData *ID = nullptr;
404 if (!End)
405 ID = allocateIRInstructionData(*It, false, *IDL);
406 else
408 InstrListForBB.push_back(ID);
409
410 // Remember that we added an illegal number last time.
412 unsigned INumber = IllegalInstrNumber;
413 IntegerMappingForBB.push_back(IllegalInstrNumber--);
414
416 "Instruction mapping overflow!");
417
419 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
420
422 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
423
424 return INumber;
425}
426
427IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
428 IRInstructionData *FirstInstIt,
429 IRInstructionData *LastInstIt)
430 : StartIdx(StartIdx), Len(Len) {
431
432 assert(FirstInstIt != nullptr && "Instruction is nullptr!");
433 assert(LastInstIt != nullptr && "Instruction is nullptr!");
434 assert(StartIdx + Len > StartIdx &&
435 "Overflow for IRSimilarityCandidate range?");
436 assert(Len - 1 == static_cast<unsigned>(std::distance(
437 iterator(FirstInstIt), iterator(LastInstIt))) &&
438 "Length of the first and last IRInstructionData do not match the "
439 "given length");
440
441 // We iterate over the given instructions, and map each unique value
442 // to a unique number in the IRSimilarityCandidate ValueToNumber and
443 // NumberToValue maps. A constant get its own value globally, the individual
444 // uses of the constants are not considered to be unique.
445 //
446 // IR: Mapping Added:
447 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
448 // %add2 = add i32 %a, %1 %add2 -> 4
449 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
450 //
451 // when replace with global values, starting from 1, would be
452 //
453 // 3 = add i32 1, 2
454 // 4 = add i32 1, 3
455 // 6 = add i32 5, 2
456 unsigned LocalValNumber = 1;
458 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
459 // Map the operand values to an unsigned integer if it does not already
460 // have an unsigned integer assigned to it.
461 for (Value *Arg : ID->OperVals)
462 if (!ValueToNumber.contains(Arg)) {
463 ValueToNumber.try_emplace(Arg, LocalValNumber);
464 NumberToValue.try_emplace(LocalValNumber, Arg);
465 LocalValNumber++;
466 }
467
468 // Mapping the instructions to an unsigned integer if it is not already
469 // exist in the mapping.
470 if (!ValueToNumber.contains(ID->Inst)) {
471 ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
472 NumberToValue.try_emplace(LocalValNumber, ID->Inst);
473 LocalValNumber++;
474 }
475 }
476
477 // Setting the first and last instruction data pointers for the candidate. If
478 // we got through the entire for loop without hitting an assert, we know
479 // that both of these instructions are not nullptrs.
480 FirstInst = FirstInstIt;
481 LastInst = LastInstIt;
482
483 // Add the basic blocks contained in the set into the global value numbering.
485 getBasicBlocks(BBSet);
486 for (BasicBlock *BB : BBSet) {
487 if (ValueToNumber.contains(BB))
488 continue;
489
490 ValueToNumber.try_emplace(BB, LocalValNumber);
491 NumberToValue.try_emplace(LocalValNumber, BB);
492 LocalValNumber++;
493 }
494}
495
497 const IRSimilarityCandidate &B) {
498 if (A.getLength() != B.getLength())
499 return false;
500
501 auto InstrDataForBoth =
502 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
503
504 return all_of(InstrDataForBoth,
505 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
506 IRInstructionData &A = std::get<0>(R);
507 IRInstructionData &B = std::get<1>(R);
508 if (!A.Legal || !B.Legal)
509 return false;
510 return isClose(A, B);
511 });
512}
513
514/// Determine if one or more of the assigned global value numbers for the
515/// operands in \p TargetValueNumbers is in the current mapping set for operand
516/// numbers in \p SourceOperands. The set of possible corresponding global
517/// value numbers are replaced with the most recent version of compatible
518/// values.
519///
520/// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
521/// value number for the source IRInstructionCandidate.
522/// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
523/// IRSimilarityCandidate global value numbers to a set of possible numbers in
524/// the target.
525/// \param [in] SourceOperands - The operands in the original
526/// IRSimilarityCandidate in the current instruction.
527/// \param [in] TargetValueNumbers - The global value numbers of the operands in
528/// the corresponding Instruction in the other IRSimilarityCandidate.
529/// \returns true if there exists a possible mapping between the source
530/// Instruction operands and the target Instruction operands, and false if not.
532 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
533 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
534 ArrayRef<Value *> &SourceOperands,
535 DenseSet<unsigned> &TargetValueNumbers){
536
537 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
538
539 unsigned ArgVal;
540 bool WasInserted;
541
542 // Iterate over the operands in the source IRSimilarityCandidate to determine
543 // whether there exists an operand in the other IRSimilarityCandidate that
544 // creates a valid mapping of Value to Value between the
545 // IRSimilarityCaniddates.
546 for (Value *V : SourceOperands) {
547 ArgVal = SourceValueToNumberMapping.find(V)->second;
548
549 // Instead of finding a current mapping, we attempt to insert a set.
550 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
551 std::make_pair(ArgVal, TargetValueNumbers));
552
553 // We need to iterate over the items in other IRSimilarityCandidate's
554 // Instruction to determine whether there is a valid mapping of
555 // Value to Value.
556 DenseSet<unsigned> NewSet;
557 for (unsigned &Curr : ValueMappingIt->second)
558 // If we can find the value in the mapping, we add it to the new set.
559 if (TargetValueNumbers.contains(Curr))
560 NewSet.insert(Curr);
561
562 // If we could not find a Value, return 0.
563 if (NewSet.empty())
564 return false;
565
566 // Otherwise replace the old mapping with the newly constructed one.
567 if (NewSet.size() != ValueMappingIt->second.size())
568 ValueMappingIt->second.swap(NewSet);
569
570 // We have reached no conclusions about the mapping, and cannot remove
571 // any items from the other operands, so we move to check the next operand.
572 if (ValueMappingIt->second.size() != 1)
573 continue;
574
575 unsigned ValToRemove = *ValueMappingIt->second.begin();
576 // When there is only one item left in the mapping for and operand, remove
577 // the value from the other operands. If it results in there being no
578 // mapping, return false, it means the mapping is wrong
579 for (Value *InnerV : SourceOperands) {
580 if (V == InnerV)
581 continue;
582
583 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
584 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
585 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
586 continue;
587
588 ValueMappingIt->second.erase(ValToRemove);
589 if (ValueMappingIt->second.empty())
590 return false;
591 }
592 }
593
594 return true;
595}
596
597/// Determine if operand number \p TargetArgVal is in the current mapping set
598/// for operand number \p SourceArgVal.
599///
600/// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
601/// value numbers from source IRSimilarityCandidate to target
602/// IRSimilarityCandidate.
603/// \param [in] SourceArgVal The global value number for an operand in the
604/// in the original candidate.
605/// \param [in] TargetArgVal The global value number for the corresponding
606/// operand in the other candidate.
607/// \returns True if there exists a mapping and false if not.
609 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
610 unsigned SourceArgVal, unsigned TargetArgVal) {
611 // We are given two unsigned integers representing the global values of
612 // the operands in different IRSimilarityCandidates and a current mapping
613 // between the two.
614 //
615 // Source Operand GVN: 1
616 // Target Operand GVN: 2
617 // CurrentMapping: {1: {1, 2}}
618 //
619 // Since we have mapping, and the target operand is contained in the set, we
620 // update it to:
621 // CurrentMapping: {1: {2}}
622 // and can return true. But, if the mapping was
623 // CurrentMapping: {1: {3}}
624 // we would return false.
625
626 bool WasInserted;
628
629 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
630 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
631
632 // If we created a new mapping, then we are done.
633 if (WasInserted)
634 return true;
635
636 // If there is more than one option in the mapping set, and the target value
637 // is included in the mapping set replace that set with one that only includes
638 // the target value, as it is the only valid mapping via the non commutative
639 // instruction.
640
641 DenseSet<unsigned> &TargetSet = Val->second;
642 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
643 TargetSet.clear();
644 TargetSet.insert(TargetArgVal);
645 return true;
646 }
647
648 // Return true if we can find the value in the set.
649 return TargetSet.contains(TargetArgVal);
650}
651
654 // Iterators to keep track of where we are in the operands for each
655 // Instruction.
656 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
657 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
658 unsigned OperandLength = A.OperVals.size();
659
660 // For each operand, get the value numbering and ensure it is consistent.
661 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
662 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
663 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
664
665 // Attempt to add a set with only the target value. If there is no mapping
666 // we can create it here.
667 //
668 // For an instruction like a subtraction:
669 // IRSimilarityCandidateA: IRSimilarityCandidateB:
670 // %resultA = sub %a, %b %resultB = sub %d, %e
671 //
672 // We map %a -> %d and %b -> %e.
673 //
674 // And check to see whether their mapping is consistent in
675 // checkNumberingAndReplace.
676
677 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
678 return false;
679
680 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
681 return false;
682 }
683 return true;
684}
685
688 DenseSet<unsigned> ValueNumbersA;
689 DenseSet<unsigned> ValueNumbersB;
690
691 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
692 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
693 unsigned OperandLength = A.OperVals.size();
694
695 // Find the value number sets for the operands.
696 for (unsigned Idx = 0; Idx < OperandLength;
697 Idx++, VItA++, VItB++) {
698 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
699 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
700 }
701
702 // Iterate over the operands in the first IRSimilarityCandidate and make sure
703 // there exists a possible mapping with the operands in the second
704 // IRSimilarityCandidate.
705 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
706 A.ValueNumberMapping, A.OperVals,
707 ValueNumbersB))
708 return false;
709
710 // Iterate over the operands in the second IRSimilarityCandidate and make sure
711 // there exists a possible mapping with the operands in the first
712 // IRSimilarityCandidate.
713 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
714 B.ValueNumberMapping, B.OperVals,
715 ValueNumbersA))
716 return false;
717
718 return true;
719}
720
722 const unsigned InstValA, const unsigned &InstValB,
723 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
724 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
726 bool WasInserted;
727 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
728 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
729 if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
730 return false;
731 else if (ValueMappingIt->second.size() != 1) {
732 for (unsigned OtherVal : ValueMappingIt->second) {
733 if (OtherVal == InstValB)
734 continue;
735 if (!ValueNumberMappingA.contains(OtherVal))
736 continue;
737 if (!ValueNumberMappingA[OtherVal].contains(InstValA))
738 continue;
739 ValueNumberMappingA[OtherVal].erase(InstValA);
740 }
741 ValueNumberMappingA.erase(ValueMappingIt);
742 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
743 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
744 }
745
746 return true;
747}
748
751 // Get the basic blocks the label refers to.
752 BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
753 BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
754
755 // Get the basic blocks contained in each region.
756 DenseSet<BasicBlock *> BasicBlockA;
757 DenseSet<BasicBlock *> BasicBlockB;
758 A.IRSC.getBasicBlocks(BasicBlockA);
759 B.IRSC.getBasicBlocks(BasicBlockB);
760
761 // Determine if the block is contained in the region.
762 bool AContained = BasicBlockA.contains(ABB);
763 bool BContained = BasicBlockB.contains(BBB);
764
765 // Both blocks need to be contained in the region, or both need to be outside
766 // the region.
767 if (AContained != BContained)
768 return false;
769
770 // If both are contained, then we need to make sure that the relative
771 // distance to the target blocks are the same.
772 if (AContained)
773 return A.RelativeLocation == B.RelativeLocation;
774 return true;
775}
776
778 const IRSimilarityCandidate &B) {
781 return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
782}
783
788
791 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
792 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
793 if (A.getLength() != B.getLength())
794 return false;
795
796 if (A.ValueToNumber.size() != B.ValueToNumber.size())
797 return false;
798
799 iterator ItA = A.begin();
800 iterator ItB = B.begin();
801
802 // These ValueNumber Mapping sets create a create a mapping between the values
803 // in one candidate to values in the other candidate. If we create a set with
804 // one element, and that same element maps to the original element in the
805 // candidate we have a good mapping.
806
807 // Iterate over the instructions contained in each candidate
808 unsigned SectionLength = A.getStartIdx() + A.getLength();
809 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
810 ItA++, ItB++, Loc++) {
811 // Make sure the instructions are similar to one another.
812 if (!isClose(*ItA, *ItB))
813 return false;
814
815 Instruction *IA = ItA->Inst;
816 Instruction *IB = ItB->Inst;
817
818 if (!ItA->Legal || !ItB->Legal)
819 return false;
820
821 // Get the operand sets for the instructions.
822 ArrayRef<Value *> OperValsA = ItA->OperVals;
823 ArrayRef<Value *> OperValsB = ItB->OperVals;
824
825 unsigned InstValA = A.ValueToNumber.find(IA)->second;
826 unsigned InstValB = B.ValueToNumber.find(IB)->second;
827
828 // Ensure that the mappings for the instructions exists.
829 if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
830 ValueNumberMappingB))
831 return false;
832
833 if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
834 ValueNumberMappingA))
835 return false;
836
837 // We have different paths for commutative instructions and non-commutative
838 // instructions since commutative instructions could allow multiple mappings
839 // to certain values.
840 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
841 !isa<IntrinsicInst>(IA)) {
843 {A, OperValsA, ValueNumberMappingA},
844 {B, OperValsB, ValueNumberMappingB}))
845 return false;
846 continue;
847 }
848
849 // Handle the non-commutative cases.
851 {A, OperValsA, ValueNumberMappingA},
852 {B, OperValsB, ValueNumberMappingB}))
853 return false;
854
855 // Here we check that between two corresponding instructions,
856 // when referring to a basic block in the same region, the
857 // relative locations are the same. And, that the instructions refer to
858 // basic blocks outside the region in the same corresponding locations.
859
860 // We are able to make the assumption about blocks outside of the region
861 // since the target block labels are considered values and will follow the
862 // same number matching that we defined for the other instructions in the
863 // region. So, at this point, in each location we target a specific block
864 // outside the region, we are targeting a corresponding block in each
865 // analagous location in the region we are comparing to.
866 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
867 !(isa<PHINode>(IA) && isa<PHINode>(IB)))
868 continue;
869
870 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
871 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
872 ArrayRef<Value *> ABL = ItA->getBlockOperVals();
873 ArrayRef<Value *> BBL = ItB->getBlockOperVals();
874
875 // Check to make sure that the number of operands, and branching locations
876 // between BranchInsts is the same.
877 if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
878 ABL.size() != BBL.size())
879 return false;
880
881 assert(RelBlockLocsA.size() == ABL.size() &&
882 "Block information vectors not the same size.");
883 assert(RelBlockLocsB.size() == BBL.size() &&
884 "Block information vectors not the same size.");
885
886 ZippedRelativeLocationsT ZippedRelativeLocations =
887 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
888 if (any_of(ZippedRelativeLocations,
889 [&A, &B](std::tuple<int, int, Value *, Value *> R) {
891 {A, std::get<0>(R), std::get<2>(R)},
892 {B, std::get<1>(R), std::get<3>(R)});
893 }))
894 return false;
895 }
896 return true;
897}
898
900 const IRSimilarityCandidate &B) {
901 auto DoesOverlap = [](const IRSimilarityCandidate &X,
902 const IRSimilarityCandidate &Y) {
903 // Check:
904 // XXXXXX X starts before Y ends
905 // YYYYYYY Y starts after X starts
906 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
907 };
908
909 return DoesOverlap(A, B) || DoesOverlap(B, A);
910}
911
912void IRSimilarityIdentifier::populateMapper(
913 Module &M, std::vector<IRInstructionData *> &InstrList,
914 std::vector<unsigned> &IntegerMapping) {
915
916 std::vector<IRInstructionData *> InstrListForModule;
917 std::vector<unsigned> IntegerMappingForModule;
918 // Iterate over the functions in the module to map each Instruction in each
919 // BasicBlock to an unsigned integer.
920 Mapper.initializeForBBs(M);
921
922 for (Function &F : M) {
923
924 if (F.empty())
925 continue;
926
927 for (BasicBlock &BB : F) {
928
929 // BB has potential to have similarity since it has a size greater than 2
930 // and can therefore match other regions greater than 2. Map it to a list
931 // of unsigned integers.
932 Mapper.convertToUnsignedVec(BB, InstrListForModule,
933 IntegerMappingForModule);
934 }
935
936 BasicBlock::iterator It = F.begin()->end();
937 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
938 true);
939 if (InstrListForModule.size() > 0)
940 Mapper.IDL->push_back(*InstrListForModule.back());
941 }
942
943 // Insert the InstrListForModule at the end of the overall InstrList so that
944 // we can have a long InstrList for the entire set of Modules being analyzed.
945 llvm::append_range(InstrList, InstrListForModule);
946 // Do the same as above, but for IntegerMapping.
947 llvm::append_range(IntegerMapping, IntegerMappingForModule);
948}
949
950void IRSimilarityIdentifier::populateMapper(
951 ArrayRef<std::unique_ptr<Module>> &Modules,
952 std::vector<IRInstructionData *> &InstrList,
953 std::vector<unsigned> &IntegerMapping) {
954
955 // Iterate over, and map the instructions in each module.
956 for (const std::unique_ptr<Module> &M : Modules)
957 populateMapper(*M, InstrList, IntegerMapping);
958}
959
960/// From a repeated subsequence, find all the different instances of the
961/// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
962/// the IRInstructionData in subsequence.
963///
964/// \param [in] Mapper - The instruction mapper for basic correctness checks.
965/// \param [in] InstrList - The vector that holds the instruction data.
966/// \param [in] IntegerMapping - The vector that holds the mapped integers.
967/// \param [out] CandsForRepSubstring - The vector to store the generated
968/// IRSimilarityCandidates.
970 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
971 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
972 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
973
974 unsigned StringLen = RS.Length;
975 if (StringLen < 2)
976 return;
977
978 // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
979 for (const unsigned &StartIdx : RS.StartIndices) {
980 unsigned EndIdx = StartIdx + StringLen - 1;
981
982 // Check that this subsequence does not contain an illegal instruction.
983 bool ContainsIllegal = false;
984 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
985 unsigned Key = IntegerMapping[CurrIdx];
986 if (Key > Mapper.IllegalInstrNumber) {
987 ContainsIllegal = true;
988 break;
989 }
990 }
991
992 // If we have an illegal instruction, we should not create an
993 // IRSimilarityCandidate for this region.
994 if (ContainsIllegal)
995 continue;
996
997 // We are getting iterators to the instructions in this region of code
998 // by advancing the start and end indices from the start of the
999 // InstrList.
1000 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
1001 std::advance(StartIt, StartIdx);
1002 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
1003 std::advance(EndIt, EndIdx);
1004
1005 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1006 }
1007}
1008
1010 IRSimilarityCandidate &SourceCand,
1011 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
1012 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
1013 assert(SourceCand.CanonNumToNumber.size() != 0 &&
1014 "Base canonical relationship is empty!");
1015 assert(SourceCand.NumberToCanonNum.size() != 0 &&
1016 "Base canonical relationship is empty!");
1017
1018 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1019 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1020
1021 DenseSet<unsigned> UsedGVNs;
1022 // Iterate over the mappings provided from this candidate to SourceCand. We
1023 // are then able to map the GVN in this candidate to the same canonical number
1024 // given to the corresponding GVN in SourceCand.
1025 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
1026 unsigned SourceGVN = GVNMapping.first;
1027
1028 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1029
1030 unsigned ResultGVN;
1031 // We need special handling if we have more than one potential value. This
1032 // means that there are at least two GVNs that could correspond to this GVN.
1033 // This could lead to potential swapping later on, so we make a decision
1034 // here to ensure a one-to-one mapping.
1035 if (GVNMapping.second.size() > 1) {
1036 bool Found = false;
1037 for (unsigned Val : GVNMapping.second) {
1038 // We make sure the target value number hasn't already been reserved.
1039 if (UsedGVNs.contains(Val))
1040 continue;
1041
1042 // We make sure that the opposite mapping is still consistent.
1044 FromSourceMapping.find(Val);
1045
1046 if (!It->second.contains(SourceGVN))
1047 continue;
1048
1049 // We pick the first item that satisfies these conditions.
1050 Found = true;
1051 ResultGVN = Val;
1052 break;
1053 }
1054
1055 assert(Found && "Could not find matching value for source GVN");
1056 (void)Found;
1057
1058 } else
1059 ResultGVN = *GVNMapping.second.begin();
1060
1061 // Whatever GVN is found, we mark it as used.
1062 UsedGVNs.insert(ResultGVN);
1063
1064 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1065 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1066 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1067 }
1068
1070 getBasicBlocks(BBSet);
1071 // Find canonical numbers for the BasicBlocks in the current candidate.
1072 // This is done by finding the corresponding value for the first instruction
1073 // in the block in the current candidate, finding the matching value in the
1074 // source candidate. Then by finding the parent of this value, use the
1075 // canonical number of the block in the source candidate for the canonical
1076 // number in the current candidate.
1077 for (BasicBlock *BB : BBSet) {
1078 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1079
1080 // We can skip the BasicBlock if the canonical numbering has already been
1081 // found in a separate instruction.
1082 if (NumberToCanonNum.contains(BBGVNForCurrCand))
1083 continue;
1084
1085 // If the basic block is the starting block, then the shared instruction may
1086 // not be the first instruction in the block, it will be the first
1087 // instruction in the similarity region.
1088 Value *FirstOutlineInst = BB == getStartBB()
1090 : &*BB->instructionsWithoutDebug().begin();
1091
1092 unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1093 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1094 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1095 Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1096 BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1097 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1098 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1099 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1100 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1101 }
1102}
1103
1105 IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
1106 IRSimilarityCandidate &TargetCandLarge) {
1107 assert(!SourceCand.CanonNumToNumber.empty() &&
1108 "Canonical Relationship is non-empty");
1109 assert(!SourceCand.NumberToCanonNum.empty() &&
1110 "Canonical Relationship is non-empty");
1111
1112 assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1113 "Canonical Relationship is non-empty");
1114 assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1115 "Canonical Relationship is non-empty");
1116
1117 assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1118 "Canonical Relationship is non-empty");
1119 assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1120 "Canonical Relationship is non-empty");
1121
1122 assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1123 assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1124
1125 // We're going to use the larger candidates as a "bridge" to create the
1126 // canonical number for the target candidate since we have idetified two
1127 // candidates as subsequences of larger sequences, and therefore must be
1128 // structurally similar.
1129 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1130 Value *CurrVal = ValueNumPair.first;
1131 unsigned TargetCandGVN = ValueNumPair.second;
1132
1133 // Find the numbering in the large candidate that surrounds the
1134 // current candidate.
1135 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1136 assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1137
1138 // Get the canonical numbering in the large target candidate.
1139 std::optional<unsigned> OTargetCandCanon =
1140 TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1141 assert(OTargetCandCanon.has_value() &&
1142 "Canononical Number not found for GVN");
1143
1144 // Get the GVN in the large source candidate from the canonical numbering.
1145 std::optional<unsigned> OLargeSourceGVN =
1146 SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
1147 assert(OLargeSourceGVN.has_value() &&
1148 "GVN Number not found for Canonical Number");
1149
1150 // Get the Value from the GVN in the large source candidate.
1151 std::optional<Value *> OLargeSourceV =
1152 SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1153 assert(OLargeSourceV.has_value() && "Value not found for GVN");
1154
1155 // Get the GVN number for the Value in the source candidate.
1156 std::optional<unsigned> OSourceGVN =
1157 SourceCand.getGVN(OLargeSourceV.value());
1158 assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1159
1160 // Get the canonical numbering from the GVN/
1161 std::optional<unsigned> OSourceCanon =
1162 SourceCand.getCanonicalNum(OSourceGVN.value());
1163 assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1164
1165 // Insert the canonical numbering and GVN pair into their respective
1166 // mappings.
1167 CanonNumToNumber.insert(
1168 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1169 NumberToCanonNum.insert(
1170 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1171 }
1172}
1173
1175 IRSimilarityCandidate &CurrCand) {
1176 assert(CurrCand.CanonNumToNumber.size() == 0 &&
1177 "Canonical Relationship is non-empty");
1178 assert(CurrCand.NumberToCanonNum.size() == 0 &&
1179 "Canonical Relationship is non-empty");
1180
1181 unsigned CanonNum = 0;
1182 // Iterate over the value numbers found, the order does not matter in this
1183 // case.
1184 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1185 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1186 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1187 CanonNum++;
1188 }
1189}
1190
1191/// Look for larger IRSimilarityCandidates From the previously matched
1192/// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
1193/// an overlap, return a pair of structurally similar, larger
1194/// IRSimilarityCandidates.
1195///
1196/// \param [in] CandA - The first candidate we are trying to determine the
1197/// structure of.
1198/// \param [in] CandB - The second candidate we are trying to determine the
1199/// structure of.
1200/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1201/// a circuit to the IRSimilarityCandidates that include this instruction.
1202/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1203/// number representing the structural group assigned to it.
1204static std::optional<
1205 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1208 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1210 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
1211 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
1212 DenseSet<unsigned> IncludedGroupsA;
1213 DenseSet<unsigned> IncludedGroupsB;
1214
1215 // Find the overall similarity group numbers that fully contain the candidate,
1216 // and record the larger candidate for each group.
1217 auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1218 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1219 Result;
1220
1221 unsigned CandAStart = CandA.getStartIdx();
1222 unsigned CandAEnd = CandA.getEndIdx();
1223 unsigned CandBStart = CandB.getStartIdx();
1224 unsigned CandBEnd = CandB.getEndIdx();
1225 if (IdxToCandidateIt == IndexToIncludedCand.end())
1226 return Result;
1227 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1228 if (MatchedCand->getStartIdx() > CandAStart ||
1229 (MatchedCand->getEndIdx() < CandAEnd))
1230 continue;
1231 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1232 IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1233 IncludedGroupsA.insert(GroupNum);
1234 }
1235
1236 // Find the overall similarity group numbers that fully contain the next
1237 // candidate, and record the larger candidate for each group.
1238 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1239 if (IdxToCandidateIt == IndexToIncludedCand.end())
1240 return Result;
1241 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1242 if (MatchedCand->getStartIdx() > CandBStart ||
1243 MatchedCand->getEndIdx() < CandBEnd)
1244 continue;
1245 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1246 IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1247 IncludedGroupsB.insert(GroupNum);
1248 }
1249
1250 // Find the intersection between the two groups, these are the groups where
1251 // the larger candidates exist.
1252 set_intersect(IncludedGroupsA, IncludedGroupsB);
1253
1254 // If there is no intersection between the sets, then we cannot determine
1255 // whether or not there is a match.
1256 if (IncludedGroupsA.empty())
1257 return Result;
1258
1259 // Create a pair that contains the larger candidates.
1260 auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1261 auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1262 Result = std::make_pair(ItA->second, ItB->second);
1263 return Result;
1264}
1265
1266/// From the list of IRSimilarityCandidates, perform a comparison between each
1267/// IRSimilarityCandidate to determine if there are overlapping
1268/// IRInstructionData, or if they do not have the same structure.
1269///
1270/// \param [in] CandsForRepSubstring - The vector containing the
1271/// IRSimilarityCandidates.
1272/// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1273/// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1274/// vector are structurally similar to one another.
1275/// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1276/// a circuit to the IRSimilarityCandidates that include this instruction.
1277/// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1278/// number representing the structural group assigned to it.
1280 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1281 DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
1282 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1284 ) {
1285 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1286 InnerCandEndIt;
1287
1288 // IRSimilarityCandidates each have a structure for operand use. It is
1289 // possible that two instances of the same subsequences have different
1290 // structure. Each type of structure found is assigned a number. This
1291 // DenseMap maps an IRSimilarityCandidate to which type of similarity
1292 // discovered it fits within.
1294
1295 // Find the compatibility from each candidate to the others to determine
1296 // which candidates overlap and which have the same structure by mapping
1297 // each structure to a different group.
1298 bool SameStructure;
1299 bool Inserted;
1300 unsigned CurrentGroupNum = 0;
1301 unsigned OuterGroupNum;
1305
1306 // Iterate over the candidates to determine its structural and overlapping
1307 // compatibility with other instructions
1308 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1309 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1310 for (CandIt = CandsForRepSubstring.begin(),
1311 CandEndIt = CandsForRepSubstring.end();
1312 CandIt != CandEndIt; CandIt++) {
1313
1314 // Determine if it has an assigned structural group already.
1315 CandToGroupIt = CandToGroup.find(&*CandIt);
1316 if (CandToGroupIt == CandToGroup.end()) {
1317 // If not, we assign it one, and add it to our mapping.
1318 std::tie(CandToGroupIt, Inserted) =
1319 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1320 }
1321
1322 // Get the structural group number from the iterator.
1323 OuterGroupNum = CandToGroupIt->second;
1324
1325 // Check if we already have a list of IRSimilarityCandidates for the current
1326 // structural group. Create one if one does not exist.
1327 CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1328 if (CurrentGroupPair == StructuralGroups.end()) {
1330 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1331 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1332 }
1333
1334 // Iterate over the IRSimilarityCandidates following the current
1335 // IRSimilarityCandidate in the list to determine whether the two
1336 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
1337 // of IRSimilarityCandidates.
1338 for (InnerCandIt = std::next(CandIt),
1339 InnerCandEndIt = CandsForRepSubstring.end();
1340 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1341
1342 // We check if the inner item has a group already, if it does, we skip it.
1343 CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1344 if (CandToGroupItInner != CandToGroup.end())
1345 continue;
1346
1347 // Check if we have found structural similarity between two candidates
1348 // that fully contains the first and second candidates.
1349 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1350 LargerPair = CheckLargerCands(
1351 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1352
1353 // If a pair was found, it means that we can assume that these smaller
1354 // substrings are also structurally similar. Use the larger candidates to
1355 // determine the canonical mapping between the two sections.
1356 if (LargerPair.has_value()) {
1357 SameStructure = true;
1358 InnerCandIt->createCanonicalRelationFrom(
1359 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1360 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1361 CurrentGroupPair->second.push_back(*InnerCandIt);
1362 continue;
1363 }
1364
1365 // Otherwise we determine if they have the same structure and add it to
1366 // vector if they match.
1367 ValueNumberMappingA.clear();
1368 ValueNumberMappingB.clear();
1370 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1371 if (!SameStructure)
1372 continue;
1373
1374 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1375 ValueNumberMappingB);
1376 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1377 CurrentGroupPair->second.push_back(*InnerCandIt);
1378 }
1379 }
1380}
1381
1382void IRSimilarityIdentifier::findCandidates(
1383 std::vector<IRInstructionData *> &InstrList,
1384 std::vector<unsigned> &IntegerMapping) {
1385 SuffixTree ST(IntegerMapping);
1386
1387 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1388 std::vector<SimilarityGroup> NewCandidateGroups;
1389
1390 DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1393
1394 // Iterate over the subsequences found by the Suffix Tree to create
1395 // IRSimilarityCandidates for each repeated subsequence and determine which
1396 // instances are structurally similar to one another.
1397
1398 // Sort the suffix tree from longest substring to shortest.
1399 std::vector<SuffixTree::RepeatedSubstring> RSes;
1400 for (SuffixTree::RepeatedSubstring &RS : ST)
1401 RSes.push_back(RS);
1402
1404 const SuffixTree::RepeatedSubstring &RHS) {
1405 return LHS.Length > RHS.Length;
1406 });
1407 for (SuffixTree::RepeatedSubstring &RS : RSes) {
1408 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1409 CandsForRepSubstring);
1410
1411 if (CandsForRepSubstring.size() < 2)
1412 continue;
1413
1414 findCandidateStructures(CandsForRepSubstring, StructuralGroups,
1415 IndexToIncludedCand, CandToGroup);
1416 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1417 // We only add the group if it contains more than one
1418 // IRSimilarityCandidate. If there is only one, that means there is no
1419 // other repeated subsequence with the same structure.
1420 if (Group.second.size() > 1) {
1421 SimilarityCandidates->push_back(Group.second);
1422 // Iterate over each candidate in the group, and add an entry for each
1423 // instruction included with a mapping to a set of
1424 // IRSimilarityCandidates that include that instruction.
1425 for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
1426 for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1427 Idx <= Edx; ++Idx) {
1429 IdIt;
1430 IdIt = IndexToIncludedCand.find(Idx);
1431 bool Inserted = false;
1432 if (IdIt == IndexToIncludedCand.end())
1433 std::tie(IdIt, Inserted) = IndexToIncludedCand.insert(
1434 std::make_pair(Idx, DenseSet<IRSimilarityCandidate *>()));
1435 IdIt->second.insert(&IRCand);
1436 }
1437 // Add mapping of candidate to the overall similarity group number.
1438 CandToGroup.insert(
1439 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1440 }
1441 }
1442 }
1443
1444 CandsForRepSubstring.clear();
1445 StructuralGroups.clear();
1446 NewCandidateGroups.clear();
1447 }
1448}
1449
1451 ArrayRef<std::unique_ptr<Module>> Modules) {
1453
1454 std::vector<IRInstructionData *> InstrList;
1455 std::vector<unsigned> IntegerMapping;
1456 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1457 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1458 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1459 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1460 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1461
1462 populateMapper(Modules, InstrList, IntegerMapping);
1463 findCandidates(InstrList, IntegerMapping);
1464
1465 return *SimilarityCandidates;
1466}
1467
1470 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1471 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1472 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1473 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1474 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1475
1476 std::vector<IRInstructionData *> InstrList;
1477 std::vector<unsigned> IntegerMapping;
1478
1479 populateMapper(M, InstrList, IntegerMapping);
1480 findCandidates(InstrList, IntegerMapping);
1481
1482 return *SimilarityCandidates;
1483}
1484
1486 "ir-similarity-identifier", false, true)
1487
1489 : ModulePass(ID) {
1492}
1493
1497 false));
1498 return false;
1499}
1500
1502 IRSI.reset();
1503 return false;
1504}
1505
1507 IRSI->findSimilarity(M);
1508 return false;
1509}
1510
1511AnalysisKey IRSimilarityAnalysis::Key;
1516 false);
1517 IRSI.findSimilarity(M);
1518 return IRSI;
1519}
1520
1524 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1525 IRSI.getSimilarity();
1526
1527 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1528 OS << CandVec.size() << " candidates of length "
1529 << CandVec.begin()->getLength() << ". Found in: \n";
1530 for (IRSimilarityCandidate &Cand : CandVec) {
1531 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
1532 << ", Basic Block: ";
1533 if (Cand.front()->Inst->getParent()->getName().str() == "")
1534 OS << "(unnamed)";
1535 else
1536 OS << Cand.front()->Inst->getParent()->getName().str();
1537 OS << "\n Start Instruction: ";
1538 Cand.frontInstruction()->print(OS);
1539 OS << "\n End Instruction: ";
1540 Cand.backInstruction()->print(OS);
1541 OS << "\n";
1542 }
1543 }
1544
1545 return PreservedAnalyses::all();
1546}
1547
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
bool checkNumberingAndReplace(DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, unsigned SourceArgVal, unsigned TargetArgVal)
Determine if operand number TargetArgVal is in the current mapping set for operand number SourceArgVa...
detail::zippy< detail::zip_shortest, SmallVector< int, 4 > &, SmallVector< int, 4 > &, ArrayRef< Value * > &, ArrayRef< Value * > & > ZippedRelativeLocationsT
static void findCandidateStructures(std::vector< IRSimilarityCandidate > &CandsForRepSubstring, DenseMap< unsigned, SimilarityGroup > &StructuralGroups, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToOverallGroup)
From the list of IRSimilarityCandidates, perform a comparison between each IRSimilarityCandidate to d...
static void createCandidatesFromSuffixTree(const IRInstructionMapper &Mapper, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping, SuffixTree::RepeatedSubstring &RS, std::vector< IRSimilarityCandidate > &CandsForRepSubstring)
From a repeated subsequence, find all the different instances of the subsequence from the InstrList,...
static bool checkNumberingAndReplaceCommutative(const DenseMap< Value *, unsigned > &SourceValueToNumberMapping, DenseMap< unsigned, DenseSet< unsigned > > &CurrentSrcTgtNumberMapping, ArrayRef< Value * > &SourceOperands, DenseSet< unsigned > &TargetValueNumbers)
Determine if one or more of the assigned global value numbers for the operands in TargetValueNumbers ...
static std::optional< std::pair< IRSimilarityCandidate *, IRSimilarityCandidate * > > CheckLargerCands(IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB, DenseMap< unsigned, DenseSet< IRSimilarityCandidate * > > &IndexToIncludedCand, DenseMap< IRSimilarityCandidate *, unsigned > &CandToGroup)
Look for larger IRSimilarityCandidates From the previously matched IRSimilarityCandidates that fully ...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
if(VerifyEach)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines generic set operations that may be used on set's of different types,...
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:348
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:500
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:164
Conditional or Unconditional Branch instruction.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1703
bool isIndirectCall() const
Return true if the callsite is an indirect call.
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:955
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:965
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:969
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:970
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:989
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:988
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:992
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition: InstrTypes.h:977
@ ICMP_SGE
signed greater or equal
Definition: InstrTypes.h:993
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition: InstrTypes.h:978
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1128
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1066
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&... Args)
Definition: DenseMap.h:235
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
iterator end()
Definition: DenseMap.h:84
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
void swap(DenseMap &RHS)
Definition: DenseMap.h:785
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Class to represent function types.
Definition: DerivedTypes.h:103
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
An analysis pass that runs and returns the IRSimilarityIdentifier run on the Module.
Result run(Module &M, ModuleAnalysisManager &)
An analysis pass based on legacy pass manager that runs and returns IRSimilarityIdentifier run on the...
bool doInitialization(Module &M) override
doInitialization - Virtual method overridden by subclasses to do any necessary initialization before ...
bool doFinalization(Module &M) override
doFinalization - Virtual method overriden by subclasses to do any necessary clean up after all passes...
bool runOnModule(Module &M) override
runOnModule - Virtual method overriden by subclasses to process the module being operated on.
This is a class that wraps a range of IRInstructionData from one point to another in the vector of IR...
void createCanonicalRelationFrom(IRSimilarityCandidate &SourceCand, DenseMap< unsigned, DenseSet< unsigned > > &ToSourceMapping, DenseMap< unsigned, DenseSet< unsigned > > &FromSourceMapping)
Create a mapping for the value numbering of the calling IRSimilarityCandidate, to a different separat...
static bool isSimilar(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
std::optional< unsigned > getGVN(Value *V)
Finds the positive number associated with V if it has been mapped.
static bool compareAssignmentMapping(const unsigned InstValA, const unsigned &InstValB, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingA, DenseMap< unsigned, DenseSet< unsigned > > &ValueNumberMappingB)
Compare the GVN of the assignment value in corresponding instructions in IRSimilarityCandidates A and...
void getBasicBlocks(DenseSet< BasicBlock * > &BBSet) const
std::optional< Value * > fromGVN(unsigned Num)
Finds the Value associate with Num if it exists.
static bool checkRelativeLocations(RelativeLocMapping A, RelativeLocMapping B)
Compare the relative locations in A and B and check that the distances match if both locations are co...
static bool compareStructure(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
static void createCanonicalMappingFor(IRSimilarityCandidate &CurrCand)
Create a mapping from the value numbering to a different separate set of numbers.
static bool overlap(const IRSimilarityCandidate &A, const IRSimilarityCandidate &B)
Compare the start and end indices of the two IRSimilarityCandidates for whether they overlap.
IRSimilarityCandidate(unsigned StartIdx, unsigned Len, IRInstructionData *FirstInstIt, IRInstructionData *LastInstIt)
std::optional< unsigned > getCanonicalNum(unsigned N)
Find the canonical number from the global value number N stored in the candidate.
std::optional< unsigned > fromCanonicalNum(unsigned N)
Find the global value number from the canonical number N stored in the candidate.
static bool compareNonCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
static bool compareCommutativeOperandMapping(OperandMapping A, OperandMapping B)
Compare the operands in A and B and check that the current mapping of global value numbers from A to ...
This class puts all the pieces of the IRInstructionData, IRInstructionMapper, IRSimilarityCandidate t...
SimilarityGroupList & findSimilarity(ArrayRef< std::unique_ptr< Module > > Modules)
std::optional< SimilarityGroupList > & getSimilarity()
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
const BasicBlock * getParent() const
Definition: Instruction.h:150
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
ModulePass class - This class is used to implement unstructured interprocedural optimizations and ana...
Definition: Pass.h:251
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
size_t size() const
Definition: SmallVector.h:91
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
LLVM Value Representation.
Definition: Value.h:74
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
iterator find(const_arg_type_t< ValueT > V)
Definition: DenseSet.h:179
size_type size() const
Definition: DenseSet.h:81
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
typename ilist_select_iterator_type< OptionsT::has_iterator_bits, OptionsT, false, false >::type iterator
Definition: simple_ilist.h:97
void push_back(reference Node)
Insert a node at the back; never copies.
Definition: simple_ilist.h:153
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::vector< SimilarityGroup > SimilarityGroupList
std::vector< IRSimilarityCandidate > SimilarityGroup
bool isClose(const IRInstructionData &A, const IRInstructionData &B)
Compare one IRInstructionData class to another IRInstructionData class for whether they are performin...
StringRef getName(ID id)
Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
Definition: Function.cpp:1013
bool isOverloaded(ID id)
Returns true if the intrinsic can be overloaded.
Definition: Function.cpp:1436
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:862
void stable_sort(R &&Range)
Definition: STLExtras.h:1975
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
Definition: SetOperations.h:40
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2053
cl::opt< bool > DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), cl::ReallyHidden, cl::desc("disable outlining indirect calls."))
Definition: IROutliner.cpp:42
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
cl::opt< bool > DisableBranches("no-ir-sim-branch-matching", cl::init(false), cl::ReallyHidden, cl::desc("disable similarity matching, and outlining, " "across branches for debugging purposes."))
Definition: IROutliner.cpp:38
cl::opt< bool > DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, cl::desc("Don't match or outline intrinsics"))
Definition: IROutliner.cpp:46
void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry &)
cl::opt< bool > MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, cl::desc("only allow matching call instructions if the " "name and type signature match."))
A special type used by analysis passes to provide an address that identifies that particular analysis...
Definition: Analysis.h:26
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:50
This provides the utilities for hashing an Instruction to an unsigned integer.
StringRef getCalleeName() const
Get the callee name that the call instruction is using for hashing the instruction.
void initializeInstruction()
Fills data stuctures for IRInstructionData when it is constructed from a.
SmallVector< int, 4 > RelativeBlockLocations
This structure holds the distances of how far "ahead of" or "behind" the target blocks of a branch,...
std::optional< std::string > CalleeName
This is only relevant if we are wrapping a CallInst.
IRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Gather the information that is difficult to gather for an Instruction, or is changed.
ArrayRef< Value * > getBlockOperVals()
Get the BasicBlock based operands for PHINodes and BranchInsts.
Instruction * Inst
The source Instruction that is being wrapped.
static CmpInst::Predicate predicateForConsistency(CmpInst *CI)
A function that swaps the predicates to their less than form if they are in a greater than form.
void setPHIPredecessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a PHINode, finds the relative distances from the incoming basic b...
SmallVector< Value *, 4 > OperVals
The values of the operands in the Instruction.
void setBranchSuccessors(DenseMap< BasicBlock *, unsigned > &BasicBlockToInteger)
For an IRInstructionData containing a branch, finds the relative distances from the source basic bloc...
void setCalleeName(bool MatchByName=true)
For an IRInstructionData containing a CallInst, set the function name appropriately.
CmpInst::Predicate getPredicate() const
Get the predicate that the compare instruction is using for hashing the instruction.
std::optional< CmpInst::Predicate > RevisedPredicate
This is only relevant if we are wrapping a CmpInst where we needed to change the predicate of a compa...
Helper struct for converting the Instructions in a Module into a vector of unsigned integers.
DenseMap< IRInstructionData *, unsigned, IRInstructionDataTraits > InstructionIntegerMap
Correspondence from IRInstructionData to unsigned integers.
SpecificBumpPtrAllocator< IRInstructionDataList > * IDLAllocator
This allocator pointer is in charge of creating the IRInstructionDataList so it is not deallocated un...
SpecificBumpPtrAllocator< IRInstructionData > * InstDataAllocator
This allocator pointer is in charge of holding on to the IRInstructionData so it is not deallocated u...
bool EnableMatchCallsByName
Marks whether we should use exact function names, as well as types to find similarity between calls.
unsigned LegalInstrNumber
The next available integer to assign to a legal Instruction to.
unsigned IllegalInstrNumber
The starting illegal instruction number to map to.
InstructionClassification InstClassifier
Maps an Instruction to a member of InstrType.
void initializeForBBs(Function &F, unsigned &BBNumber)
Assigns values to all the basic blocks in function F starting from integer BBNumber.
bool HaveLegalRange
Marks whether we have found a set of instructions that is long enough to be considered for similarity...
IRInstructionData * allocateIRInstructionData(Instruction &I, bool Legality, IRInstructionDataList &IDL)
Get an allocated IRInstructionData struct using the InstDataAllocator.
bool CanCombineWithPrevInstr
Marks whether we found a illegal instruction in the previous step.
DenseMap< BasicBlock *, unsigned > BasicBlockToInteger
A mapping for a basic block in a module to its assigned number/location in the module.
IRInstructionDataList * allocateIRInstructionDataList()
Get an allocated IRInstructionDataList object using the IDLAllocator.
unsigned mapToLegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB)
Maps an Instruction to a legal integer.
void convertToUnsignedVec(BasicBlock &BB, std::vector< IRInstructionData * > &InstrList, std::vector< unsigned > &IntegerMapping)
Maps the Instructions in a BasicBlock BB to legal or illegal integers determined by InstrType.
bool AddedIllegalLastTime
Set if we added an illegal number in the previous step.
unsigned mapToIllegalUnsigned(BasicBlock::iterator &It, std::vector< unsigned > &IntegerMappingForBB, std::vector< IRInstructionData * > &InstrListForBB, bool End=false)
Maps an Instruction to an illegal integer.
A helper struct to hold the candidate, for a branch instruction, the relative location of a label,...
Incoming for lane maks phi as machine instruction, incoming register Reg and incoming block Block are...
A repeated substring in the tree.
Definition: SuffixTree.h:46
SmallVector< unsigned > StartIndices
The start indices of each occurrence.
Definition: SuffixTree.h:51
unsigned Length
The length of the string.
Definition: SuffixTree.h:48