LLVM 20.0.0git
Lint.cpp
Go to the documentation of this file.
1//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass statically checks for common and easily-identified constructs
10// which produce undefined or likely unintended behavior in LLVM IR.
11//
12// It is not a guarantee of correctness, in two ways. First, it isn't
13// comprehensive. There are checks which could be done statically which are
14// not yet implemented. Some of these are indicated by TODO comments, but
15// those aren't comprehensive either. Second, many conditions cannot be
16// checked statically. This pass does no dynamic instrumentation, so it
17// can't check for all possible problems.
18//
19// Another limitation is that it assumes all code will be executed. A store
20// through a null pointer in a basic block which is never reached is harmless,
21// but this pass will warn about it anyway. This is the main reason why most
22// of these checks live here instead of in the Verifier pass.
23//
24// Optimization passes may make conditions that this pass checks for more or
25// less obvious. If an optimization pass appears to be introducing a warning,
26// it may be that the optimization pass is merely exposing an existing
27// condition in the code.
28//
29// This code may be run before instcombine. In many cases, instcombine checks
30// for the same kinds of things and turns instructions with undefined behavior
31// into unreachable (or equivalent). Because of this, this pass makes some
32// effort to look through bitcasts and so on.
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/Analysis/Lint.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
40#include "llvm/ADT/Twine.h"
46#include "llvm/Analysis/Loads.h"
52#include "llvm/IR/Argument.h"
53#include "llvm/IR/BasicBlock.h"
54#include "llvm/IR/Constant.h"
55#include "llvm/IR/Constants.h"
56#include "llvm/IR/DataLayout.h"
58#include "llvm/IR/Dominators.h"
59#include "llvm/IR/Function.h"
61#include "llvm/IR/InstVisitor.h"
62#include "llvm/IR/InstrTypes.h"
63#include "llvm/IR/Instruction.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/PassManager.h"
68#include "llvm/IR/Type.h"
69#include "llvm/IR/Value.h"
73#include <cassert>
74#include <cstdint>
75#include <iterator>
76#include <string>
77
78using namespace llvm;
79
80static const char LintAbortOnErrorArgName[] = "lint-abort-on-error";
81static cl::opt<bool>
83 cl::desc("In the Lint pass, abort on errors."));
84
85namespace {
86namespace MemRef {
87static const unsigned Read = 1;
88static const unsigned Write = 2;
89static const unsigned Callee = 4;
90static const unsigned Branchee = 8;
91} // end namespace MemRef
92
93class Lint : public InstVisitor<Lint> {
94 friend class InstVisitor<Lint>;
95
97
98 void visitCallBase(CallBase &CB);
99 void visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
100 MaybeAlign Alignment, Type *Ty, unsigned Flags);
101
103 void visitLoadInst(LoadInst &I);
105 void visitXor(BinaryOperator &I);
106 void visitSub(BinaryOperator &I);
107 void visitLShr(BinaryOperator &I);
108 void visitAShr(BinaryOperator &I);
109 void visitShl(BinaryOperator &I);
110 void visitSDiv(BinaryOperator &I);
111 void visitUDiv(BinaryOperator &I);
112 void visitSRem(BinaryOperator &I);
113 void visitURem(BinaryOperator &I);
120
121 Value *findValue(Value *V, bool OffsetOk) const;
122 Value *findValueImpl(Value *V, bool OffsetOk,
123 SmallPtrSetImpl<Value *> &Visited) const;
124
125public:
126 Module *Mod;
127 const DataLayout *DL;
128 AliasAnalysis *AA;
129 AssumptionCache *AC;
130 DominatorTree *DT;
132
133 std::string Messages;
134 raw_string_ostream MessagesStr;
135
136 Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA,
138 : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI),
139 MessagesStr(Messages) {}
140
141 void WriteValues(ArrayRef<const Value *> Vs) {
142 for (const Value *V : Vs) {
143 if (!V)
144 continue;
145 if (isa<Instruction>(V)) {
146 MessagesStr << *V << '\n';
147 } else {
148 V->printAsOperand(MessagesStr, true, Mod);
149 MessagesStr << '\n';
150 }
151 }
152 }
153
154 /// A check failed, so printout out the condition and the message.
155 ///
156 /// This provides a nice place to put a breakpoint if you want to see why
157 /// something is not correct.
158 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
159
160 /// A check failed (with values to print).
161 ///
162 /// This calls the Message-only version so that the above is easier to set
163 /// a breakpoint on.
164 template <typename T1, typename... Ts>
165 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) {
166 CheckFailed(Message);
167 WriteValues({V1, Vs...});
168 }
169};
170} // end anonymous namespace
171
172// Check - We know that cond should be true, if not print an error message.
173#define Check(C, ...) \
174 do { \
175 if (!(C)) { \
176 CheckFailed(__VA_ARGS__); \
177 return; \
178 } \
179 } while (false)
180
181void Lint::visitFunction(Function &F) {
182 // This isn't undefined behavior, it's just a little unusual, and it's a
183 // fairly common mistake to neglect to name a function.
184 Check(F.hasName() || F.hasLocalLinkage(),
185 "Unusual: Unnamed function with non-local linkage", &F);
186
187 // TODO: Check for irreducible control flow.
188}
189
190void Lint::visitCallBase(CallBase &I) {
191 Value *Callee = I.getCalledOperand();
192
193 visitMemoryReference(I, MemoryLocation::getAfter(Callee), std::nullopt,
194 nullptr, MemRef::Callee);
195
196 if (Function *F = dyn_cast<Function>(findValue(Callee,
197 /*OffsetOk=*/false))) {
198 Check(I.getCallingConv() == F->getCallingConv(),
199 "Undefined behavior: Caller and callee calling convention differ",
200 &I);
201
202 FunctionType *FT = F->getFunctionType();
203 unsigned NumActualArgs = I.arg_size();
204
205 Check(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
206 : FT->getNumParams() == NumActualArgs,
207 "Undefined behavior: Call argument count mismatches callee "
208 "argument count",
209 &I);
210
211 Check(FT->getReturnType() == I.getType(),
212 "Undefined behavior: Call return type mismatches "
213 "callee return type",
214 &I);
215
216 // Check argument types (in case the callee was casted) and attributes.
217 // TODO: Verify that caller and callee attributes are compatible.
218 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
219 auto AI = I.arg_begin(), AE = I.arg_end();
220 for (; AI != AE; ++AI) {
221 Value *Actual = *AI;
222 if (PI != PE) {
223 Argument *Formal = &*PI++;
224 Check(Formal->getType() == Actual->getType(),
225 "Undefined behavior: Call argument type mismatches "
226 "callee parameter type",
227 &I);
228
229 // Check that noalias arguments don't alias other arguments. This is
230 // not fully precise because we don't know the sizes of the dereferenced
231 // memory regions.
232 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
233 AttributeList PAL = I.getAttributes();
234 unsigned ArgNo = 0;
235 for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) {
236 // Skip ByVal arguments since they will be memcpy'd to the callee's
237 // stack so we're not really passing the pointer anyway.
238 if (PAL.hasParamAttr(ArgNo, Attribute::ByVal))
239 continue;
240 // If both arguments are readonly, they have no dependence.
241 if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo))
242 continue;
243 // Skip readnone arguments since those are guaranteed not to be
244 // dereferenced anyway.
245 if (I.doesNotAccessMemory(ArgNo))
246 continue;
247 if (AI != BI && (*BI)->getType()->isPointerTy()) {
248 AliasResult Result = AA->alias(*AI, *BI);
249 Check(Result != AliasResult::MustAlias &&
251 "Unusual: noalias argument aliases another argument", &I);
252 }
253 }
254 }
255
256 // Check that an sret argument points to valid memory.
257 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
258 Type *Ty = Formal->getParamStructRetType();
259 MemoryLocation Loc(
260 Actual, LocationSize::precise(DL->getTypeStoreSize(Ty)));
261 visitMemoryReference(I, Loc, DL->getABITypeAlign(Ty), Ty,
262 MemRef::Read | MemRef::Write);
263 }
264 }
265 }
266 }
267
268 if (const auto *CI = dyn_cast<CallInst>(&I)) {
269 if (CI->isTailCall()) {
270 const AttributeList &PAL = CI->getAttributes();
271 unsigned ArgNo = 0;
272 for (Value *Arg : I.args()) {
273 // Skip ByVal arguments since they will be memcpy'd to the callee's
274 // stack anyway.
275 if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal))
276 continue;
277 Value *Obj = findValue(Arg, /*OffsetOk=*/true);
278 Check(!isa<AllocaInst>(Obj),
279 "Undefined behavior: Call with \"tail\" keyword references "
280 "alloca",
281 &I);
282 }
283 }
284 }
285
286 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
287 switch (II->getIntrinsicID()) {
288 default:
289 break;
290
291 // TODO: Check more intrinsics
292
293 case Intrinsic::memcpy:
294 case Intrinsic::memcpy_inline: {
295 MemCpyInst *MCI = cast<MemCpyInst>(&I);
296 visitMemoryReference(I, MemoryLocation::getForDest(MCI),
297 MCI->getDestAlign(), nullptr, MemRef::Write);
298 visitMemoryReference(I, MemoryLocation::getForSource(MCI),
299 MCI->getSourceAlign(), nullptr, MemRef::Read);
300
301 // Check that the memcpy arguments don't overlap. The AliasAnalysis API
302 // isn't expressive enough for what we really want to do. Known partial
303 // overlap is not distinguished from the case where nothing is known.
305 if (const ConstantInt *Len =
306 dyn_cast<ConstantInt>(findValue(MCI->getLength(),
307 /*OffsetOk=*/false)))
308 if (Len->getValue().isIntN(32))
309 Size = LocationSize::precise(Len->getValue().getZExtValue());
310 Check(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
312 "Undefined behavior: memcpy source and destination overlap", &I);
313 break;
314 }
315 case Intrinsic::memmove: {
316 MemMoveInst *MMI = cast<MemMoveInst>(&I);
317 visitMemoryReference(I, MemoryLocation::getForDest(MMI),
318 MMI->getDestAlign(), nullptr, MemRef::Write);
319 visitMemoryReference(I, MemoryLocation::getForSource(MMI),
320 MMI->getSourceAlign(), nullptr, MemRef::Read);
321 break;
322 }
323 case Intrinsic::memset: {
324 MemSetInst *MSI = cast<MemSetInst>(&I);
325 visitMemoryReference(I, MemoryLocation::getForDest(MSI),
326 MSI->getDestAlign(), nullptr, MemRef::Write);
327 break;
328 }
329 case Intrinsic::memset_inline: {
330 MemSetInlineInst *MSII = cast<MemSetInlineInst>(&I);
331 visitMemoryReference(I, MemoryLocation::getForDest(MSII),
332 MSII->getDestAlign(), nullptr, MemRef::Write);
333 break;
334 }
335
336 case Intrinsic::vastart:
337 // vastart in non-varargs function is rejected by the verifier
338 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
339 std::nullopt, nullptr, MemRef::Read | MemRef::Write);
340 break;
341 case Intrinsic::vacopy:
342 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
343 std::nullopt, nullptr, MemRef::Write);
344 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI),
345 std::nullopt, nullptr, MemRef::Read);
346 break;
347 case Intrinsic::vaend:
348 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
349 std::nullopt, nullptr, MemRef::Read | MemRef::Write);
350 break;
351
352 case Intrinsic::stackrestore:
353 // Stackrestore doesn't read or write memory, but it sets the
354 // stack pointer, which the compiler may read from or write to
355 // at any time, so check it for both readability and writeability.
356 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
357 std::nullopt, nullptr, MemRef::Read | MemRef::Write);
358 break;
359 case Intrinsic::get_active_lane_mask:
360 if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1)))
361 Check(!TripCount->isZero(),
362 "get_active_lane_mask: operand #2 "
363 "must be greater than 0",
364 &I);
365 break;
366 }
367}
368
369void Lint::visitReturnInst(ReturnInst &I) {
370 Function *F = I.getParent()->getParent();
371 Check(!F->doesNotReturn(),
372 "Unusual: Return statement in function with noreturn attribute", &I);
373
374 if (Value *V = I.getReturnValue()) {
375 Value *Obj = findValue(V, /*OffsetOk=*/true);
376 Check(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
377 }
378}
379
380// TODO: Check that the reference is in bounds.
381// TODO: Check readnone/readonly function attributes.
382void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
383 MaybeAlign Align, Type *Ty, unsigned Flags) {
384 // If no memory is being referenced, it doesn't matter if the pointer
385 // is valid.
386 if (Loc.Size.isZero())
387 return;
388
389 Value *Ptr = const_cast<Value *>(Loc.Ptr);
390 Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
391 Check(!isa<ConstantPointerNull>(UnderlyingObject),
392 "Undefined behavior: Null pointer dereference", &I);
393 Check(!isa<UndefValue>(UnderlyingObject),
394 "Undefined behavior: Undef pointer dereference", &I);
395 Check(!isa<ConstantInt>(UnderlyingObject) ||
396 !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
397 "Unusual: All-ones pointer dereference", &I);
398 Check(!isa<ConstantInt>(UnderlyingObject) ||
399 !cast<ConstantInt>(UnderlyingObject)->isOne(),
400 "Unusual: Address one pointer dereference", &I);
401
402 if (Flags & MemRef::Write) {
403 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
404 Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
405 &I);
406 Check(!isa<Function>(UnderlyingObject) &&
407 !isa<BlockAddress>(UnderlyingObject),
408 "Undefined behavior: Write to text section", &I);
409 }
410 if (Flags & MemRef::Read) {
411 Check(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
412 &I);
413 Check(!isa<BlockAddress>(UnderlyingObject),
414 "Undefined behavior: Load from block address", &I);
415 }
416 if (Flags & MemRef::Callee) {
417 Check(!isa<BlockAddress>(UnderlyingObject),
418 "Undefined behavior: Call to block address", &I);
419 }
420 if (Flags & MemRef::Branchee) {
421 Check(!isa<Constant>(UnderlyingObject) ||
422 isa<BlockAddress>(UnderlyingObject),
423 "Undefined behavior: Branch to non-blockaddress", &I);
424 }
425
426 // Check for buffer overflows and misalignment.
427 // Only handles memory references that read/write something simple like an
428 // alloca instruction or a global variable.
429 int64_t Offset = 0;
431 // OK, so the access is to a constant offset from Ptr. Check that Ptr is
432 // something we can handle and if so extract the size of this base object
433 // along with its alignment.
435 MaybeAlign BaseAlign;
436
437 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
438 Type *ATy = AI->getAllocatedType();
439 if (!AI->isArrayAllocation() && ATy->isSized())
440 BaseSize = DL->getTypeAllocSize(ATy);
441 BaseAlign = AI->getAlign();
442 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
443 // If the global may be defined differently in another compilation unit
444 // then don't warn about funky memory accesses.
445 if (GV->hasDefinitiveInitializer()) {
446 Type *GTy = GV->getValueType();
447 if (GTy->isSized())
448 BaseSize = DL->getTypeAllocSize(GTy);
449 BaseAlign = GV->getAlign();
450 if (!BaseAlign && GTy->isSized())
451 BaseAlign = DL->getABITypeAlign(GTy);
452 }
453 }
454
455 // Accesses from before the start or after the end of the object are not
456 // defined.
457 Check(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize ||
458 (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize),
459 "Undefined behavior: Buffer overflow", &I);
460
461 // Accesses that say that the memory is more aligned than it is are not
462 // defined.
463 if (!Align && Ty && Ty->isSized())
464 Align = DL->getABITypeAlign(Ty);
465 if (BaseAlign && Align)
466 Check(*Align <= commonAlignment(*BaseAlign, Offset),
467 "Undefined behavior: Memory reference address is misaligned", &I);
468 }
469}
470
471void Lint::visitLoadInst(LoadInst &I) {
472 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), I.getType(),
473 MemRef::Read);
474}
475
476void Lint::visitStoreInst(StoreInst &I) {
477 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(),
478 I.getOperand(0)->getType(), MemRef::Write);
479}
480
481void Lint::visitXor(BinaryOperator &I) {
482 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
483 "Undefined result: xor(undef, undef)", &I);
484}
485
486void Lint::visitSub(BinaryOperator &I) {
487 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
488 "Undefined result: sub(undef, undef)", &I);
489}
490
491void Lint::visitLShr(BinaryOperator &I) {
492 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
493 /*OffsetOk=*/false)))
494 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
495 "Undefined result: Shift count out of range", &I);
496}
497
498void Lint::visitAShr(BinaryOperator &I) {
499 if (ConstantInt *CI =
500 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
501 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
502 "Undefined result: Shift count out of range", &I);
503}
504
505void Lint::visitShl(BinaryOperator &I) {
506 if (ConstantInt *CI =
507 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
508 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
509 "Undefined result: Shift count out of range", &I);
510}
511
512static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
513 AssumptionCache *AC) {
514 // Assume undef could be zero.
515 if (isa<UndefValue>(V))
516 return true;
517
518 VectorType *VecTy = dyn_cast<VectorType>(V->getType());
519 if (!VecTy) {
520 KnownBits Known =
521 computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
522 return Known.isZero();
523 }
524
525 // Per-component check doesn't work with zeroinitializer
526 Constant *C = dyn_cast<Constant>(V);
527 if (!C)
528 return false;
529
530 if (C->isZeroValue())
531 return true;
532
533 // For a vector, KnownZero will only be true if all values are zero, so check
534 // this per component
535 for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements();
536 I != N; ++I) {
537 Constant *Elem = C->getAggregateElement(I);
538 if (isa<UndefValue>(Elem))
539 return true;
540
541 KnownBits Known = computeKnownBits(Elem, DL);
542 if (Known.isZero())
543 return true;
544 }
545
546 return false;
547}
548
549void Lint::visitSDiv(BinaryOperator &I) {
550 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC),
551 "Undefined behavior: Division by zero", &I);
552}
553
554void Lint::visitUDiv(BinaryOperator &I) {
555 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC),
556 "Undefined behavior: Division by zero", &I);
557}
558
559void Lint::visitSRem(BinaryOperator &I) {
560 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC),
561 "Undefined behavior: Division by zero", &I);
562}
563
564void Lint::visitURem(BinaryOperator &I) {
565 Check(!isZero(I.getOperand(1), I.getDataLayout(), DT, AC),
566 "Undefined behavior: Division by zero", &I);
567}
568
569void Lint::visitAllocaInst(AllocaInst &I) {
570 if (isa<ConstantInt>(I.getArraySize()))
571 // This isn't undefined behavior, it's just an obvious pessimization.
572 Check(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
573 "Pessimization: Static alloca outside of entry block", &I);
574
575 // TODO: Check for an unusual size (MSB set?)
576}
577
578void Lint::visitVAArgInst(VAArgInst &I) {
579 visitMemoryReference(I, MemoryLocation::get(&I), std::nullopt, nullptr,
580 MemRef::Read | MemRef::Write);
581}
582
583void Lint::visitIndirectBrInst(IndirectBrInst &I) {
584 visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()),
585 std::nullopt, nullptr, MemRef::Branchee);
586
587 Check(I.getNumDestinations() != 0,
588 "Undefined behavior: indirectbr with no destinations", &I);
589}
590
591void Lint::visitExtractElementInst(ExtractElementInst &I) {
592 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
593 /*OffsetOk=*/false)))
594 Check(
595 CI->getValue().ult(
596 cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()),
597 "Undefined result: extractelement index out of range", &I);
598}
599
600void Lint::visitInsertElementInst(InsertElementInst &I) {
601 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
602 /*OffsetOk=*/false)))
603 Check(CI->getValue().ult(
604 cast<FixedVectorType>(I.getType())->getNumElements()),
605 "Undefined result: insertelement index out of range", &I);
606}
607
608void Lint::visitUnreachableInst(UnreachableInst &I) {
609 // This isn't undefined behavior, it's merely suspicious.
610 Check(&I == &I.getParent()->front() ||
611 std::prev(I.getIterator())->mayHaveSideEffects(),
612 "Unusual: unreachable immediately preceded by instruction without "
613 "side effects",
614 &I);
615}
616
617/// findValue - Look through bitcasts and simple memory reference patterns
618/// to identify an equivalent, but more informative, value. If OffsetOk
619/// is true, look through getelementptrs with non-zero offsets too.
620///
621/// Most analysis passes don't require this logic, because instcombine
622/// will simplify most of these kinds of things away. But it's a goal of
623/// this Lint pass to be useful even on non-optimized IR.
624Value *Lint::findValue(Value *V, bool OffsetOk) const {
626 return findValueImpl(V, OffsetOk, Visited);
627}
628
629/// findValueImpl - Implementation helper for findValue.
630Value *Lint::findValueImpl(Value *V, bool OffsetOk,
631 SmallPtrSetImpl<Value *> &Visited) const {
632 // Detect self-referential values.
633 if (!Visited.insert(V).second)
634 return PoisonValue::get(V->getType());
635
636 // TODO: Look through sext or zext cast, when the result is known to
637 // be interpreted as signed or unsigned, respectively.
638 // TODO: Look through eliminable cast pairs.
639 // TODO: Look through calls with unique return values.
640 // TODO: Look through vector insert/extract/shuffle.
641 V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts();
642 if (LoadInst *L = dyn_cast<LoadInst>(V)) {
643 BasicBlock::iterator BBI = L->getIterator();
644 BasicBlock *BB = L->getParent();
645 SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
646 BatchAAResults BatchAA(*AA);
647 for (;;) {
648 if (!VisitedBlocks.insert(BB).second)
649 break;
650 if (Value *U =
651 FindAvailableLoadedValue(L, BB, BBI, DefMaxInstsToScan, &BatchAA))
652 return findValueImpl(U, OffsetOk, Visited);
653 if (BBI != BB->begin())
654 break;
655 BB = BB->getUniquePredecessor();
656 if (!BB)
657 break;
658 BBI = BB->end();
659 }
660 } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
661 if (Value *W = PN->hasConstantValue())
662 return findValueImpl(W, OffsetOk, Visited);
663 } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
664 if (CI->isNoopCast(*DL))
665 return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
666 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
667 if (Value *W =
668 FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices()))
669 if (W != V)
670 return findValueImpl(W, OffsetOk, Visited);
671 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
672 // Same as above, but for ConstantExpr instead of Instruction.
673 if (Instruction::isCast(CE->getOpcode())) {
675 CE->getOperand(0)->getType(), CE->getType(),
676 *DL))
677 return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
678 }
679 }
680
681 // As a last resort, try SimplifyInstruction or constant folding.
682 if (Instruction *Inst = dyn_cast<Instruction>(V)) {
683 if (Value *W = simplifyInstruction(Inst, {*DL, TLI, DT, AC}))
684 return findValueImpl(W, OffsetOk, Visited);
685 } else if (auto *C = dyn_cast<Constant>(V)) {
686 Value *W = ConstantFoldConstant(C, *DL, TLI);
687 if (W != V)
688 return findValueImpl(W, OffsetOk, Visited);
689 }
690
691 return V;
692}
693
695 auto *Mod = F.getParent();
696 auto *DL = &F.getDataLayout();
697 auto *AA = &AM.getResult<AAManager>(F);
698 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
699 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
700 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
701 Lint L(Mod, DL, AA, AC, DT, TLI);
702 L.visit(F);
703 dbgs() << L.MessagesStr.str();
704 if (LintAbortOnError && !L.MessagesStr.str().empty())
705 report_fatal_error(Twine("Linter found errors, aborting. (enabled by --") +
707 false);
708 return PreservedAnalyses::all();
709}
710
711//===----------------------------------------------------------------------===//
712// Implement the public interfaces to this file...
713//===----------------------------------------------------------------------===//
714
715/// lintFunction - Check a function for errors, printing messages on stderr.
716///
718 Function &F = const_cast<Function &>(f);
719 assert(!F.isDeclaration() && "Cannot lint external functions");
720
722 FAM.registerPass([&] { return TargetLibraryAnalysis(); });
723 FAM.registerPass([&] { return DominatorTreeAnalysis(); });
724 FAM.registerPass([&] { return AssumptionAnalysis(); });
725 FAM.registerPass([&] {
726 AAManager AA;
730 return AA;
731 });
732 LintPass().run(F, FAM);
733}
734
735/// lintModule - Check a module for errors, printing messages on stderr.
736///
737void llvm::lintModule(const Module &M) {
738 for (const Function &F : M) {
739 if (!F.isDeclaration())
741 }
742}
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This is the interface for LLVM's primary stateless and local alias analysis.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
uint64_t Size
#define Check(C,...)
Definition: Lint.cpp:173
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:512
static const char LintAbortOnErrorArgName[]
Definition: Lint.cpp:80
static cl::opt< bool > LintAbortOnError(LintAbortOnErrorArgName, cl::init(false), cl::desc("In the Lint pass, abort on errors."))
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides utility analysis objects describing memory locations.
#define T1
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
Module * Mod
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getNumElements(Type *Ty)
This is the interface for a metadata-based scoped no-alias analysis.
This file defines the SmallPtrSet class.
This is the interface for a metadata-based TBAA.
A manager for alias analyses.
void registerFunctionAnalysis()
Register a specific AA result.
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
The possible results of an alias query.
Definition: AliasAnalysis.h:82
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
an instruction to allocate memory on the stack
Definition: Instructions.h:61
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:467
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
Definition: Function.cpp:302
bool onlyReadsMemory() const
Return true if this argument has the readonly or readnone attribute.
Definition: Function.cpp:338
Type * getParamStructRetType() const
If this is an sret argument, return its type.
Definition: Function.cpp:259
bool hasStructRetAttr() const
Return true if this argument has the sret attribute.
Definition: Function.cpp:317
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the attribute exists for the given argument.
Definition: Attributes.h:805
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
Analysis pass providing a never-invalidated alias analysis result.
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1236
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
static bool isNoopCast(Instruction::CastOps Opcode, Type *SrcTy, Type *DstTy, const DataLayout &DL)
A no-op cast is one that can be effected without changing any bits.
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1097
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
This is an important base class in LLVM.
Definition: Constant.h:42
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:103
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
This instruction extracts a single (scalar) element from a VectorType value.
This instruction extracts a struct member or array element value from an aggregate value.
Indirect Branch Instruction.
This instruction inserts a single (scalar) element into a VectorType value.
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitIndirectBrInst(IndirectBrInst &I)
Definition: InstVisitor.h:235
RetTy visitExtractElementInst(ExtractElementInst &I)
Definition: InstVisitor.h:191
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:267
void visitFunction(Function &F)
Definition: InstVisitor.h:142
RetTy visitUnreachableInst(UnreachableInst &I)
Definition: InstVisitor.h:241
RetTy visitReturnInst(ReturnInst &I)
Definition: InstVisitor.h:226
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:170
RetTy visitInsertElementInst(InsertElementInst &I)
Definition: InstVisitor.h:192
RetTy visitAllocaInst(AllocaInst &I)
Definition: InstVisitor.h:168
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
RetTy visitVAArgInst(VAArgInst &I)
Definition: InstVisitor.h:190
bool isCast() const
Definition: Instruction.h:282
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: Lint.cpp:694
An instruction for reading from memory.
Definition: Instructions.h:174
bool hasValue() const
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
bool isZero() const
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset.inline intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Return a value (possibly void), from a function.
Analysis pass providing a never-invalidated alias analysis result.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:347
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
An instruction for storing to memory.
Definition: Instructions.h:290
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
Analysis pass providing a never-invalidated alias analysis result.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:298
This function has undefined behavior.
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
Definition: Lint.cpp:86
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, BatchAAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:479
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
void lintModule(const Module &M)
Lint a module.
Definition: Lint.cpp:737
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
@ Mod
The access may modify the value stored in memory.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Value * FindInsertedValue(Value *V, ArrayRef< unsigned > idx_range, std::optional< BasicBlock::iterator > InsertBefore=std::nullopt)
Given an aggregate and an sequence of indices, see if the scalar value indexed is already around as a...
void lintFunction(const Function &F)
lintFunction - Check a function for errors, printing messages on stderr.
Definition: Lint.cpp:717
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:76
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117