File: | tools/clang/lib/Analysis/CloneDetection.cpp |
Warning: | line 324, column 30 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===--- CloneDetection.cpp - Finds code clones in an AST -------*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// | |||
9 | /// This file implements classes for searching and analyzing source code clones. | |||
10 | /// | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | #include "clang/Analysis/CloneDetection.h" | |||
14 | ||||
15 | #include "clang/AST/DataCollection.h" | |||
16 | #include "clang/AST/DeclTemplate.h" | |||
17 | #include "llvm/Support/MD5.h" | |||
18 | #include "llvm/Support/Path.h" | |||
19 | ||||
20 | using namespace clang; | |||
21 | ||||
22 | StmtSequence::StmtSequence(const CompoundStmt *Stmt, const Decl *D, | |||
23 | unsigned StartIndex, unsigned EndIndex) | |||
24 | : S(Stmt), D(D), StartIndex(StartIndex), EndIndex(EndIndex) { | |||
25 | assert(Stmt && "Stmt must not be a nullptr")((Stmt && "Stmt must not be a nullptr") ? static_cast <void> (0) : __assert_fail ("Stmt && \"Stmt must not be a nullptr\"" , "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Analysis/CloneDetection.cpp" , 25, __PRETTY_FUNCTION__)); | |||
26 | assert(StartIndex < EndIndex && "Given array should not be empty")((StartIndex < EndIndex && "Given array should not be empty" ) ? static_cast<void> (0) : __assert_fail ("StartIndex < EndIndex && \"Given array should not be empty\"" , "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Analysis/CloneDetection.cpp" , 26, __PRETTY_FUNCTION__)); | |||
27 | assert(EndIndex <= Stmt->size() && "Given array too big for this Stmt")((EndIndex <= Stmt->size() && "Given array too big for this Stmt" ) ? static_cast<void> (0) : __assert_fail ("EndIndex <= Stmt->size() && \"Given array too big for this Stmt\"" , "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Analysis/CloneDetection.cpp" , 27, __PRETTY_FUNCTION__)); | |||
28 | } | |||
29 | ||||
30 | StmtSequence::StmtSequence(const Stmt *Stmt, const Decl *D) | |||
31 | : S(Stmt), D(D), StartIndex(0), EndIndex(0) {} | |||
32 | ||||
33 | StmtSequence::StmtSequence() | |||
34 | : S(nullptr), D(nullptr), StartIndex(0), EndIndex(0) {} | |||
35 | ||||
36 | bool StmtSequence::contains(const StmtSequence &Other) const { | |||
37 | // If both sequences reside in different declarations, they can never contain | |||
38 | // each other. | |||
39 | if (D != Other.D) | |||
40 | return false; | |||
41 | ||||
42 | const SourceManager &SM = getASTContext().getSourceManager(); | |||
43 | ||||
44 | // Otherwise check if the start and end locations of the current sequence | |||
45 | // surround the other sequence. | |||
46 | bool StartIsInBounds = | |||
47 | SM.isBeforeInTranslationUnit(getBeginLoc(), Other.getBeginLoc()) || | |||
48 | getBeginLoc() == Other.getBeginLoc(); | |||
49 | if (!StartIsInBounds) | |||
50 | return false; | |||
51 | ||||
52 | bool EndIsInBounds = | |||
53 | SM.isBeforeInTranslationUnit(Other.getEndLoc(), getEndLoc()) || | |||
54 | Other.getEndLoc() == getEndLoc(); | |||
55 | return EndIsInBounds; | |||
56 | } | |||
57 | ||||
58 | StmtSequence::iterator StmtSequence::begin() const { | |||
59 | if (!holdsSequence()) { | |||
60 | return &S; | |||
61 | } | |||
62 | auto CS = cast<CompoundStmt>(S); | |||
63 | return CS->body_begin() + StartIndex; | |||
64 | } | |||
65 | ||||
66 | StmtSequence::iterator StmtSequence::end() const { | |||
67 | if (!holdsSequence()) { | |||
68 | return reinterpret_cast<StmtSequence::iterator>(&S) + 1; | |||
69 | } | |||
70 | auto CS = cast<CompoundStmt>(S); | |||
71 | return CS->body_begin() + EndIndex; | |||
72 | } | |||
73 | ||||
74 | ASTContext &StmtSequence::getASTContext() const { | |||
75 | assert(D)((D) ? static_cast<void> (0) : __assert_fail ("D", "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Analysis/CloneDetection.cpp" , 75, __PRETTY_FUNCTION__)); | |||
76 | return D->getASTContext(); | |||
77 | } | |||
78 | ||||
79 | SourceLocation StmtSequence::getBeginLoc() const { | |||
80 | return front()->getBeginLoc(); | |||
81 | } | |||
82 | ||||
83 | SourceLocation StmtSequence::getEndLoc() const { return back()->getEndLoc(); } | |||
84 | ||||
85 | SourceRange StmtSequence::getSourceRange() const { | |||
86 | return SourceRange(getBeginLoc(), getEndLoc()); | |||
87 | } | |||
88 | ||||
89 | void CloneDetector::analyzeCodeBody(const Decl *D) { | |||
90 | assert(D)((D) ? static_cast<void> (0) : __assert_fail ("D", "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Analysis/CloneDetection.cpp" , 90, __PRETTY_FUNCTION__)); | |||
91 | assert(D->hasBody())((D->hasBody()) ? static_cast<void> (0) : __assert_fail ("D->hasBody()", "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Analysis/CloneDetection.cpp" , 91, __PRETTY_FUNCTION__)); | |||
92 | ||||
93 | Sequences.push_back(StmtSequence(D->getBody(), D)); | |||
94 | } | |||
95 | ||||
96 | /// Returns true if and only if \p Stmt contains at least one other | |||
97 | /// sequence in the \p Group. | |||
98 | static bool containsAnyInGroup(StmtSequence &Seq, | |||
99 | CloneDetector::CloneGroup &Group) { | |||
100 | for (StmtSequence &GroupSeq : Group) { | |||
101 | if (Seq.contains(GroupSeq)) | |||
102 | return true; | |||
103 | } | |||
104 | return false; | |||
105 | } | |||
106 | ||||
107 | /// Returns true if and only if all sequences in \p OtherGroup are | |||
108 | /// contained by a sequence in \p Group. | |||
109 | static bool containsGroup(CloneDetector::CloneGroup &Group, | |||
110 | CloneDetector::CloneGroup &OtherGroup) { | |||
111 | // We have less sequences in the current group than we have in the other, | |||
112 | // so we will never fulfill the requirement for returning true. This is only | |||
113 | // possible because we know that a sequence in Group can contain at most | |||
114 | // one sequence in OtherGroup. | |||
115 | if (Group.size() < OtherGroup.size()) | |||
116 | return false; | |||
117 | ||||
118 | for (StmtSequence &Stmt : Group) { | |||
119 | if (!containsAnyInGroup(Stmt, OtherGroup)) | |||
120 | return false; | |||
121 | } | |||
122 | return true; | |||
123 | } | |||
124 | ||||
125 | void OnlyLargestCloneConstraint::constrain( | |||
126 | std::vector<CloneDetector::CloneGroup> &Result) { | |||
127 | std::vector<unsigned> IndexesToRemove; | |||
128 | ||||
129 | // Compare every group in the result with the rest. If one groups contains | |||
130 | // another group, we only need to return the bigger group. | |||
131 | // Note: This doesn't scale well, so if possible avoid calling any heavy | |||
132 | // function from this loop to minimize the performance impact. | |||
133 | for (unsigned i = 0; i < Result.size(); ++i) { | |||
134 | for (unsigned j = 0; j < Result.size(); ++j) { | |||
135 | // Don't compare a group with itself. | |||
136 | if (i == j) | |||
137 | continue; | |||
138 | ||||
139 | if (containsGroup(Result[j], Result[i])) { | |||
140 | IndexesToRemove.push_back(i); | |||
141 | break; | |||
142 | } | |||
143 | } | |||
144 | } | |||
145 | ||||
146 | // Erasing a list of indexes from the vector should be done with decreasing | |||
147 | // indexes. As IndexesToRemove is constructed with increasing values, we just | |||
148 | // reverse iterate over it to get the desired order. | |||
149 | for (auto I = IndexesToRemove.rbegin(); I != IndexesToRemove.rend(); ++I) { | |||
150 | Result.erase(Result.begin() + *I); | |||
151 | } | |||
152 | } | |||
153 | ||||
154 | bool FilenamePatternConstraint::isAutoGenerated( | |||
155 | const CloneDetector::CloneGroup &Group) { | |||
156 | std::string Error; | |||
157 | if (IgnoredFilesPattern.empty() || Group.empty() || | |||
158 | !IgnoredFilesRegex->isValid(Error)) | |||
159 | return false; | |||
160 | ||||
161 | for (const StmtSequence &S : Group) { | |||
162 | const SourceManager &SM = S.getASTContext().getSourceManager(); | |||
163 | StringRef Filename = llvm::sys::path::filename( | |||
164 | SM.getFilename(S.getContainingDecl()->getLocation())); | |||
165 | if (IgnoredFilesRegex->match(Filename)) | |||
166 | return true; | |||
167 | } | |||
168 | ||||
169 | return false; | |||
170 | } | |||
171 | ||||
172 | /// This class defines what a type II code clone is: If it collects for two | |||
173 | /// statements the same data, then those two statements are considered to be | |||
174 | /// clones of each other. | |||
175 | /// | |||
176 | /// All collected data is forwarded to the given data consumer of the type T. | |||
177 | /// The data consumer class needs to provide a member method with the signature: | |||
178 | /// update(StringRef Str) | |||
179 | namespace { | |||
180 | template <class T> | |||
181 | class CloneTypeIIStmtDataCollector | |||
182 | : public ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>> { | |||
183 | ASTContext &Context; | |||
184 | /// The data sink to which all data is forwarded. | |||
185 | T &DataConsumer; | |||
186 | ||||
187 | template <class Ty> void addData(const Ty &Data) { | |||
188 | data_collection::addDataToConsumer(DataConsumer, Data); | |||
189 | } | |||
190 | ||||
191 | public: | |||
192 | CloneTypeIIStmtDataCollector(const Stmt *S, ASTContext &Context, | |||
193 | T &DataConsumer) | |||
194 | : Context(Context), DataConsumer(DataConsumer) { | |||
195 | this->Visit(S); | |||
196 | } | |||
197 | ||||
198 | // Define a visit method for each class to collect data and subsequently visit | |||
199 | // all parent classes. This uses a template so that custom visit methods by us | |||
200 | // take precedence. | |||
201 | #define DEF_ADD_DATA(CLASS, CODE) \ | |||
202 | template <class = void> void Visit##CLASS(const CLASS *S) { \ | |||
203 | CODE; \ | |||
204 | ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \ | |||
205 | } | |||
206 | ||||
207 | #include "clang/AST/StmtDataCollectors.inc" | |||
208 | ||||
209 | // Type II clones ignore variable names and literals, so let's skip them. | |||
210 | #define SKIP(CLASS) \ | |||
211 | void Visit##CLASS(const CLASS *S) { \ | |||
212 | ConstStmtVisitor<CloneTypeIIStmtDataCollector<T>>::Visit##CLASS(S); \ | |||
213 | } | |||
214 | SKIP(DeclRefExpr) | |||
215 | SKIP(MemberExpr) | |||
216 | SKIP(IntegerLiteral) | |||
217 | SKIP(FloatingLiteral) | |||
218 | SKIP(StringLiteral) | |||
219 | SKIP(CXXBoolLiteralExpr) | |||
220 | SKIP(CharacterLiteral) | |||
221 | #undef SKIP | |||
222 | }; | |||
223 | } // end anonymous namespace | |||
224 | ||||
225 | static size_t createHash(llvm::MD5 &Hash) { | |||
226 | size_t HashCode; | |||
227 | ||||
228 | // Create the final hash code for the current Stmt. | |||
229 | llvm::MD5::MD5Result HashResult; | |||
230 | Hash.final(HashResult); | |||
231 | ||||
232 | // Copy as much as possible of the generated hash code to the Stmt's hash | |||
233 | // code. | |||
234 | std::memcpy(&HashCode, &HashResult, | |||
235 | std::min(sizeof(HashCode), sizeof(HashResult))); | |||
236 | ||||
237 | return HashCode; | |||
238 | } | |||
239 | ||||
240 | /// Generates and saves a hash code for the given Stmt. | |||
241 | /// \param S The given Stmt. | |||
242 | /// \param D The Decl containing S. | |||
243 | /// \param StmtsByHash Output parameter that will contain the hash codes for | |||
244 | /// each StmtSequence in the given Stmt. | |||
245 | /// \return The hash code of the given Stmt. | |||
246 | /// | |||
247 | /// If the given Stmt is a CompoundStmt, this method will also generate | |||
248 | /// hashes for all possible StmtSequences in the children of this Stmt. | |||
249 | static size_t | |||
250 | saveHash(const Stmt *S, const Decl *D, | |||
251 | std::vector<std::pair<size_t, StmtSequence>> &StmtsByHash) { | |||
252 | llvm::MD5 Hash; | |||
253 | ASTContext &Context = D->getASTContext(); | |||
254 | ||||
255 | CloneTypeIIStmtDataCollector<llvm::MD5>(S, Context, Hash); | |||
256 | ||||
257 | auto CS = dyn_cast<CompoundStmt>(S); | |||
258 | SmallVector<size_t, 8> ChildHashes; | |||
259 | ||||
260 | for (const Stmt *Child : S->children()) { | |||
261 | if (Child == nullptr) { | |||
262 | ChildHashes.push_back(0); | |||
263 | continue; | |||
264 | } | |||
265 | size_t ChildHash = saveHash(Child, D, StmtsByHash); | |||
266 | Hash.update( | |||
267 | StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash))); | |||
268 | ChildHashes.push_back(ChildHash); | |||
269 | } | |||
270 | ||||
271 | if (CS) { | |||
272 | // If we're in a CompoundStmt, we hash all possible combinations of child | |||
273 | // statements to find clones in those subsequences. | |||
274 | // We first go through every possible starting position of a subsequence. | |||
275 | for (unsigned Pos = 0; Pos < CS->size(); ++Pos) { | |||
276 | // Then we try all possible lengths this subsequence could have and | |||
277 | // reuse the same hash object to make sure we only hash every child | |||
278 | // hash exactly once. | |||
279 | llvm::MD5 Hash; | |||
280 | for (unsigned Length = 1; Length <= CS->size() - Pos; ++Length) { | |||
281 | // Grab the current child hash and put it into our hash. We do | |||
282 | // -1 on the index because we start counting the length at 1. | |||
283 | size_t ChildHash = ChildHashes[Pos + Length - 1]; | |||
284 | Hash.update( | |||
285 | StringRef(reinterpret_cast<char *>(&ChildHash), sizeof(ChildHash))); | |||
286 | // If we have at least two elements in our subsequence, we can start | |||
287 | // saving it. | |||
288 | if (Length > 1) { | |||
289 | llvm::MD5 SubHash = Hash; | |||
290 | StmtsByHash.push_back(std::make_pair( | |||
291 | createHash(SubHash), StmtSequence(CS, D, Pos, Pos + Length))); | |||
292 | } | |||
293 | } | |||
294 | } | |||
295 | } | |||
296 | ||||
297 | size_t HashCode = createHash(Hash); | |||
298 | StmtsByHash.push_back(std::make_pair(HashCode, StmtSequence(S, D))); | |||
299 | return HashCode; | |||
300 | } | |||
301 | ||||
302 | namespace { | |||
303 | /// Wrapper around FoldingSetNodeID that it can be used as the template | |||
304 | /// argument of the StmtDataCollector. | |||
305 | class FoldingSetNodeIDWrapper { | |||
306 | ||||
307 | llvm::FoldingSetNodeID &FS; | |||
308 | ||||
309 | public: | |||
310 | FoldingSetNodeIDWrapper(llvm::FoldingSetNodeID &FS) : FS(FS) {} | |||
311 | ||||
312 | void update(StringRef Str) { FS.AddString(Str); } | |||
313 | }; | |||
314 | } // end anonymous namespace | |||
315 | ||||
316 | /// Writes the relevant data from all statements and child statements | |||
317 | /// in the given StmtSequence into the given FoldingSetNodeID. | |||
318 | static void CollectStmtSequenceData(const StmtSequence &Sequence, | |||
319 | FoldingSetNodeIDWrapper &OutputData) { | |||
320 | for (const Stmt *S : Sequence) { | |||
321 | CloneTypeIIStmtDataCollector<FoldingSetNodeIDWrapper>( | |||
322 | S, Sequence.getASTContext(), OutputData); | |||
323 | ||||
324 | for (const Stmt *Child : S->children()) { | |||
| ||||
325 | if (!Child) | |||
326 | continue; | |||
327 | ||||
328 | CollectStmtSequenceData(StmtSequence(Child, Sequence.getContainingDecl()), | |||
329 | OutputData); | |||
330 | } | |||
331 | } | |||
332 | } | |||
333 | ||||
334 | /// Returns true if both sequences are clones of each other. | |||
335 | static bool areSequencesClones(const StmtSequence &LHS, | |||
336 | const StmtSequence &RHS) { | |||
337 | // We collect the data from all statements in the sequence as we did before | |||
338 | // when generating a hash value for each sequence. But this time we don't | |||
339 | // hash the collected data and compare the whole data set instead. This | |||
340 | // prevents any false-positives due to hash code collisions. | |||
341 | llvm::FoldingSetNodeID DataLHS, DataRHS; | |||
342 | FoldingSetNodeIDWrapper LHSWrapper(DataLHS); | |||
343 | FoldingSetNodeIDWrapper RHSWrapper(DataRHS); | |||
344 | ||||
345 | CollectStmtSequenceData(LHS, LHSWrapper); | |||
| ||||
346 | CollectStmtSequenceData(RHS, RHSWrapper); | |||
347 | ||||
348 | return DataLHS == DataRHS; | |||
349 | } | |||
350 | ||||
351 | void RecursiveCloneTypeIIHashConstraint::constrain( | |||
352 | std::vector<CloneDetector::CloneGroup> &Sequences) { | |||
353 | // FIXME: Maybe we can do this in-place and don't need this additional vector. | |||
354 | std::vector<CloneDetector::CloneGroup> Result; | |||
355 | ||||
356 | for (CloneDetector::CloneGroup &Group : Sequences) { | |||
357 | // We assume in the following code that the Group is non-empty, so we | |||
358 | // skip all empty groups. | |||
359 | if (Group.empty()) | |||
360 | continue; | |||
361 | ||||
362 | std::vector<std::pair<size_t, StmtSequence>> StmtsByHash; | |||
363 | ||||
364 | // Generate hash codes for all children of S and save them in StmtsByHash. | |||
365 | for (const StmtSequence &S : Group) { | |||
366 | saveHash(S.front(), S.getContainingDecl(), StmtsByHash); | |||
367 | } | |||
368 | ||||
369 | // Sort hash_codes in StmtsByHash. | |||
370 | llvm::stable_sort(StmtsByHash, llvm::less_first()); | |||
371 | ||||
372 | // Check for each StmtSequence if its successor has the same hash value. | |||
373 | // We don't check the last StmtSequence as it has no successor. | |||
374 | // Note: The 'size - 1 ' in the condition is safe because we check for an | |||
375 | // empty Group vector at the beginning of this function. | |||
376 | for (unsigned i = 0; i < StmtsByHash.size() - 1; ++i) { | |||
377 | const auto Current = StmtsByHash[i]; | |||
378 | ||||
379 | // It's likely that we just found a sequence of StmtSequences that | |||
380 | // represent a CloneGroup, so we create a new group and start checking and | |||
381 | // adding the StmtSequences in this sequence. | |||
382 | CloneDetector::CloneGroup NewGroup; | |||
383 | ||||
384 | size_t PrototypeHash = Current.first; | |||
385 | ||||
386 | for (; i < StmtsByHash.size(); ++i) { | |||
387 | // A different hash value means we have reached the end of the sequence. | |||
388 | if (PrototypeHash != StmtsByHash[i].first) { | |||
389 | // The current sequence could be the start of a new CloneGroup. So we | |||
390 | // decrement i so that we visit it again in the outer loop. | |||
391 | // Note: i can never be 0 at this point because we are just comparing | |||
392 | // the hash of the Current StmtSequence with itself in the 'if' above. | |||
393 | assert(i != 0)((i != 0) ? static_cast<void> (0) : __assert_fail ("i != 0" , "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Analysis/CloneDetection.cpp" , 393, __PRETTY_FUNCTION__)); | |||
394 | --i; | |||
395 | break; | |||
396 | } | |||
397 | // Same hash value means we should add the StmtSequence to the current | |||
398 | // group. | |||
399 | NewGroup.push_back(StmtsByHash[i].second); | |||
400 | } | |||
401 | ||||
402 | // We created a new clone group with matching hash codes and move it to | |||
403 | // the result vector. | |||
404 | Result.push_back(NewGroup); | |||
405 | } | |||
406 | } | |||
407 | // Sequences is the output parameter, so we copy our result into it. | |||
408 | Sequences = Result; | |||
409 | } | |||
410 | ||||
411 | void RecursiveCloneTypeIIVerifyConstraint::constrain( | |||
412 | std::vector<CloneDetector::CloneGroup> &Sequences) { | |||
413 | CloneConstraint::splitCloneGroups( | |||
414 | Sequences, [](const StmtSequence &A, const StmtSequence &B) { | |||
415 | return areSequencesClones(A, B); | |||
416 | }); | |||
417 | } | |||
418 | ||||
419 | size_t MinComplexityConstraint::calculateStmtComplexity( | |||
420 | const StmtSequence &Seq, std::size_t Limit, | |||
421 | const std::string &ParentMacroStack) { | |||
422 | if (Seq.empty()) | |||
423 | return 0; | |||
424 | ||||
425 | size_t Complexity = 1; | |||
426 | ||||
427 | ASTContext &Context = Seq.getASTContext(); | |||
428 | ||||
429 | // Look up what macros expanded into the current statement. | |||
430 | std::string MacroStack = | |||
431 | data_collection::getMacroStack(Seq.getBeginLoc(), Context); | |||
432 | ||||
433 | // First, check if ParentMacroStack is not empty which means we are currently | |||
434 | // dealing with a parent statement which was expanded from a macro. | |||
435 | // If this parent statement was expanded from the same macros as this | |||
436 | // statement, we reduce the initial complexity of this statement to zero. | |||
437 | // This causes that a group of statements that were generated by a single | |||
438 | // macro expansion will only increase the total complexity by one. | |||
439 | // Note: This is not the final complexity of this statement as we still | |||
440 | // add the complexity of the child statements to the complexity value. | |||
441 | if (!ParentMacroStack.empty() && MacroStack == ParentMacroStack) { | |||
442 | Complexity = 0; | |||
443 | } | |||
444 | ||||
445 | // Iterate over the Stmts in the StmtSequence and add their complexity values | |||
446 | // to the current complexity value. | |||
447 | if (Seq.holdsSequence()) { | |||
448 | for (const Stmt *S : Seq) { | |||
449 | Complexity += calculateStmtComplexity( | |||
450 | StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack); | |||
451 | if (Complexity >= Limit) | |||
452 | return Limit; | |||
453 | } | |||
454 | } else { | |||
455 | for (const Stmt *S : Seq.front()->children()) { | |||
456 | Complexity += calculateStmtComplexity( | |||
457 | StmtSequence(S, Seq.getContainingDecl()), Limit, MacroStack); | |||
458 | if (Complexity >= Limit) | |||
459 | return Limit; | |||
460 | } | |||
461 | } | |||
462 | return Complexity; | |||
463 | } | |||
464 | ||||
465 | void MatchingVariablePatternConstraint::constrain( | |||
466 | std::vector<CloneDetector::CloneGroup> &CloneGroups) { | |||
467 | CloneConstraint::splitCloneGroups( | |||
468 | CloneGroups, [](const StmtSequence &A, const StmtSequence &B) { | |||
469 | VariablePattern PatternA(A); | |||
470 | VariablePattern PatternB(B); | |||
471 | return PatternA.countPatternDifferences(PatternB) == 0; | |||
472 | }); | |||
473 | } | |||
474 | ||||
475 | void CloneConstraint::splitCloneGroups( | |||
476 | std::vector<CloneDetector::CloneGroup> &CloneGroups, | |||
477 | llvm::function_ref<bool(const StmtSequence &, const StmtSequence &)> | |||
478 | Compare) { | |||
479 | std::vector<CloneDetector::CloneGroup> Result; | |||
480 | for (auto &HashGroup : CloneGroups) { | |||
481 | // Contains all indexes in HashGroup that were already added to a | |||
482 | // CloneGroup. | |||
483 | std::vector<char> Indexes; | |||
484 | Indexes.resize(HashGroup.size()); | |||
485 | ||||
486 | for (unsigned i = 0; i < HashGroup.size(); ++i) { | |||
487 | // Skip indexes that are already part of a CloneGroup. | |||
488 | if (Indexes[i]) | |||
489 | continue; | |||
490 | ||||
491 | // Pick the first unhandled StmtSequence and consider it as the | |||
492 | // beginning | |||
493 | // of a new CloneGroup for now. | |||
494 | // We don't add i to Indexes because we never iterate back. | |||
495 | StmtSequence Prototype = HashGroup[i]; | |||
496 | CloneDetector::CloneGroup PotentialGroup = {Prototype}; | |||
497 | ++Indexes[i]; | |||
498 | ||||
499 | // Check all following StmtSequences for clones. | |||
500 | for (unsigned j = i + 1; j < HashGroup.size(); ++j) { | |||
501 | // Skip indexes that are already part of a CloneGroup. | |||
502 | if (Indexes[j]) | |||
503 | continue; | |||
504 | ||||
505 | // If a following StmtSequence belongs to our CloneGroup, we add it. | |||
506 | const StmtSequence &Candidate = HashGroup[j]; | |||
507 | ||||
508 | if (!Compare(Prototype, Candidate)) | |||
509 | continue; | |||
510 | ||||
511 | PotentialGroup.push_back(Candidate); | |||
512 | // Make sure we never visit this StmtSequence again. | |||
513 | ++Indexes[j]; | |||
514 | } | |||
515 | ||||
516 | // Otherwise, add it to the result and continue searching for more | |||
517 | // groups. | |||
518 | Result.push_back(PotentialGroup); | |||
519 | } | |||
520 | ||||
521 | assert(llvm::all_of(Indexes, [](char c) { return c == 1; }))((llvm::all_of(Indexes, [](char c) { return c == 1; })) ? static_cast <void> (0) : __assert_fail ("llvm::all_of(Indexes, [](char c) { return c == 1; })" , "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Analysis/CloneDetection.cpp" , 521, __PRETTY_FUNCTION__)); | |||
522 | } | |||
523 | CloneGroups = Result; | |||
524 | } | |||
525 | ||||
526 | void VariablePattern::addVariableOccurence(const VarDecl *VarDecl, | |||
527 | const Stmt *Mention) { | |||
528 | // First check if we already reference this variable | |||
529 | for (size_t KindIndex = 0; KindIndex < Variables.size(); ++KindIndex) { | |||
530 | if (Variables[KindIndex] == VarDecl) { | |||
531 | // If yes, add a new occurrence that points to the existing entry in | |||
532 | // the Variables vector. | |||
533 | Occurences.emplace_back(KindIndex, Mention); | |||
534 | return; | |||
535 | } | |||
536 | } | |||
537 | // If this variable wasn't already referenced, add it to the list of | |||
538 | // referenced variables and add a occurrence that points to this new entry. | |||
539 | Occurences.emplace_back(Variables.size(), Mention); | |||
540 | Variables.push_back(VarDecl); | |||
541 | } | |||
542 | ||||
543 | void VariablePattern::addVariables(const Stmt *S) { | |||
544 | // Sometimes we get a nullptr (such as from IfStmts which often have nullptr | |||
545 | // children). We skip such statements as they don't reference any | |||
546 | // variables. | |||
547 | if (!S) | |||
548 | return; | |||
549 | ||||
550 | // Check if S is a reference to a variable. If yes, add it to the pattern. | |||
551 | if (auto D = dyn_cast<DeclRefExpr>(S)) { | |||
552 | if (auto VD = dyn_cast<VarDecl>(D->getDecl()->getCanonicalDecl())) | |||
553 | addVariableOccurence(VD, D); | |||
554 | } | |||
555 | ||||
556 | // Recursively check all children of the given statement. | |||
557 | for (const Stmt *Child : S->children()) { | |||
558 | addVariables(Child); | |||
559 | } | |||
560 | } | |||
561 | ||||
562 | unsigned VariablePattern::countPatternDifferences( | |||
563 | const VariablePattern &Other, | |||
564 | VariablePattern::SuspiciousClonePair *FirstMismatch) { | |||
565 | unsigned NumberOfDifferences = 0; | |||
566 | ||||
567 | assert(Other.Occurences.size() == Occurences.size())((Other.Occurences.size() == Occurences.size()) ? static_cast <void> (0) : __assert_fail ("Other.Occurences.size() == Occurences.size()" , "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Analysis/CloneDetection.cpp" , 567, __PRETTY_FUNCTION__)); | |||
568 | for (unsigned i = 0; i < Occurences.size(); ++i) { | |||
569 | auto ThisOccurence = Occurences[i]; | |||
570 | auto OtherOccurence = Other.Occurences[i]; | |||
571 | if (ThisOccurence.KindID == OtherOccurence.KindID) | |||
572 | continue; | |||
573 | ||||
574 | ++NumberOfDifferences; | |||
575 | ||||
576 | // If FirstMismatch is not a nullptr, we need to store information about | |||
577 | // the first difference between the two patterns. | |||
578 | if (FirstMismatch == nullptr) | |||
579 | continue; | |||
580 | ||||
581 | // Only proceed if we just found the first difference as we only store | |||
582 | // information about the first difference. | |||
583 | if (NumberOfDifferences != 1) | |||
584 | continue; | |||
585 | ||||
586 | const VarDecl *FirstSuggestion = nullptr; | |||
587 | // If there is a variable available in the list of referenced variables | |||
588 | // which wouldn't break the pattern if it is used in place of the | |||
589 | // current variable, we provide this variable as the suggested fix. | |||
590 | if (OtherOccurence.KindID < Variables.size()) | |||
591 | FirstSuggestion = Variables[OtherOccurence.KindID]; | |||
592 | ||||
593 | // Store information about the first clone. | |||
594 | FirstMismatch->FirstCloneInfo = | |||
595 | VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo( | |||
596 | Variables[ThisOccurence.KindID], ThisOccurence.Mention, | |||
597 | FirstSuggestion); | |||
598 | ||||
599 | // Same as above but with the other clone. We do this for both clones as | |||
600 | // we don't know which clone is the one containing the unintended | |||
601 | // pattern error. | |||
602 | const VarDecl *SecondSuggestion = nullptr; | |||
603 | if (ThisOccurence.KindID < Other.Variables.size()) | |||
604 | SecondSuggestion = Other.Variables[ThisOccurence.KindID]; | |||
605 | ||||
606 | // Store information about the second clone. | |||
607 | FirstMismatch->SecondCloneInfo = | |||
608 | VariablePattern::SuspiciousClonePair::SuspiciousCloneInfo( | |||
609 | Other.Variables[OtherOccurence.KindID], OtherOccurence.Mention, | |||
610 | SecondSuggestion); | |||
611 | ||||
612 | // SuspiciousClonePair guarantees that the first clone always has a | |||
613 | // suggested variable associated with it. As we know that one of the two | |||
614 | // clones in the pair always has suggestion, we swap the two clones | |||
615 | // in case the first clone has no suggested variable which means that | |||
616 | // the second clone has a suggested variable and should be first. | |||
617 | if (!FirstMismatch->FirstCloneInfo.Suggestion) | |||
618 | std::swap(FirstMismatch->FirstCloneInfo, FirstMismatch->SecondCloneInfo); | |||
619 | ||||
620 | // This ensures that we always have at least one suggestion in a pair. | |||
621 | assert(FirstMismatch->FirstCloneInfo.Suggestion)((FirstMismatch->FirstCloneInfo.Suggestion) ? static_cast< void> (0) : __assert_fail ("FirstMismatch->FirstCloneInfo.Suggestion" , "/build/llvm-toolchain-snapshot-9~svn362543/tools/clang/lib/Analysis/CloneDetection.cpp" , 621, __PRETTY_FUNCTION__)); | |||
622 | } | |||
623 | ||||
624 | return NumberOfDifferences; | |||
625 | } |