LLVM 17.0.0git
Lint.cpp
Go to the documentation of this file.
1//===-- Lint.cpp - Check for common errors in LLVM IR ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass statically checks for common and easily-identified constructs
10// which produce undefined or likely unintended behavior in LLVM IR.
11//
12// It is not a guarantee of correctness, in two ways. First, it isn't
13// comprehensive. There are checks which could be done statically which are
14// not yet implemented. Some of these are indicated by TODO comments, but
15// those aren't comprehensive either. Second, many conditions cannot be
16// checked statically. This pass does no dynamic instrumentation, so it
17// can't check for all possible problems.
18//
19// Another limitation is that it assumes all code will be executed. A store
20// through a null pointer in a basic block which is never reached is harmless,
21// but this pass will warn about it anyway. This is the main reason why most
22// of these checks live here instead of in the Verifier pass.
23//
24// Optimization passes may make conditions that this pass checks for more or
25// less obvious. If an optimization pass appears to be introducing a warning,
26// it may be that the optimization pass is merely exposing an existing
27// condition in the code.
28//
29// This code may be run before instcombine. In many cases, instcombine checks
30// for the same kinds of things and turns instructions with undefined behavior
31// into unreachable (or equivalent). Because of this, this pass makes some
32// effort to look through bitcasts and so on.
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/Analysis/Lint.h"
37#include "llvm/ADT/APInt.h"
38#include "llvm/ADT/ArrayRef.h"
40#include "llvm/ADT/Twine.h"
45#include "llvm/Analysis/Loads.h"
49#include "llvm/IR/Argument.h"
50#include "llvm/IR/BasicBlock.h"
51#include "llvm/IR/Constant.h"
52#include "llvm/IR/Constants.h"
53#include "llvm/IR/DataLayout.h"
55#include "llvm/IR/Dominators.h"
56#include "llvm/IR/Function.h"
58#include "llvm/IR/InstVisitor.h"
59#include "llvm/IR/InstrTypes.h"
60#include "llvm/IR/Instruction.h"
64#include "llvm/IR/Module.h"
65#include "llvm/IR/PassManager.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/Value.h"
69#include "llvm/Pass.h"
73#include <cassert>
74#include <cstdint>
75#include <iterator>
76#include <string>
77
78using namespace llvm;
79
80namespace {
81namespace MemRef {
82static const unsigned Read = 1;
83static const unsigned Write = 2;
84static const unsigned Callee = 4;
85static const unsigned Branchee = 8;
86} // end namespace MemRef
87
88class Lint : public InstVisitor<Lint> {
89 friend class InstVisitor<Lint>;
90
92
93 void visitCallBase(CallBase &CB);
94 void visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
95 MaybeAlign Alignment, Type *Ty, unsigned Flags);
96 void visitEHBeginCatch(IntrinsicInst *II);
97 void visitEHEndCatch(IntrinsicInst *II);
98
100 void visitLoadInst(LoadInst &I);
102 void visitXor(BinaryOperator &I);
103 void visitSub(BinaryOperator &I);
104 void visitLShr(BinaryOperator &I);
105 void visitAShr(BinaryOperator &I);
106 void visitShl(BinaryOperator &I);
107 void visitSDiv(BinaryOperator &I);
108 void visitUDiv(BinaryOperator &I);
109 void visitSRem(BinaryOperator &I);
110 void visitURem(BinaryOperator &I);
117
118 Value *findValue(Value *V, bool OffsetOk) const;
119 Value *findValueImpl(Value *V, bool OffsetOk,
120 SmallPtrSetImpl<Value *> &Visited) const;
121
122public:
123 Module *Mod;
124 const DataLayout *DL;
125 AliasAnalysis *AA;
126 AssumptionCache *AC;
127 DominatorTree *DT;
129
130 std::string Messages;
131 raw_string_ostream MessagesStr;
132
133 Lint(Module *Mod, const DataLayout *DL, AliasAnalysis *AA,
135 : Mod(Mod), DL(DL), AA(AA), AC(AC), DT(DT), TLI(TLI),
136 MessagesStr(Messages) {}
137
138 void WriteValues(ArrayRef<const Value *> Vs) {
139 for (const Value *V : Vs) {
140 if (!V)
141 continue;
142 if (isa<Instruction>(V)) {
143 MessagesStr << *V << '\n';
144 } else {
145 V->printAsOperand(MessagesStr, true, Mod);
146 MessagesStr << '\n';
147 }
148 }
149 }
150
151 /// A check failed, so printout out the condition and the message.
152 ///
153 /// This provides a nice place to put a breakpoint if you want to see why
154 /// something is not correct.
155 void CheckFailed(const Twine &Message) { MessagesStr << Message << '\n'; }
156
157 /// A check failed (with values to print).
158 ///
159 /// This calls the Message-only version so that the above is easier to set
160 /// a breakpoint on.
161 template <typename T1, typename... Ts>
162 void CheckFailed(const Twine &Message, const T1 &V1, const Ts &... Vs) {
163 CheckFailed(Message);
164 WriteValues({V1, Vs...});
165 }
166};
167} // end anonymous namespace
168
169// Check - We know that cond should be true, if not print an error message.
170#define Check(C, ...) \
171 do { \
172 if (!(C)) { \
173 CheckFailed(__VA_ARGS__); \
174 return; \
175 } \
176 } while (false)
177
178void Lint::visitFunction(Function &F) {
179 // This isn't undefined behavior, it's just a little unusual, and it's a
180 // fairly common mistake to neglect to name a function.
181 Check(F.hasName() || F.hasLocalLinkage(),
182 "Unusual: Unnamed function with non-local linkage", &F);
183
184 // TODO: Check for irreducible control flow.
185}
186
187void Lint::visitCallBase(CallBase &I) {
188 Value *Callee = I.getCalledOperand();
189
190 visitMemoryReference(I, MemoryLocation::getAfter(Callee), std::nullopt,
191 nullptr, MemRef::Callee);
192
193 if (Function *F = dyn_cast<Function>(findValue(Callee,
194 /*OffsetOk=*/false))) {
195 Check(I.getCallingConv() == F->getCallingConv(),
196 "Undefined behavior: Caller and callee calling convention differ",
197 &I);
198
199 FunctionType *FT = F->getFunctionType();
200 unsigned NumActualArgs = I.arg_size();
201
202 Check(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs
203 : FT->getNumParams() == NumActualArgs,
204 "Undefined behavior: Call argument count mismatches callee "
205 "argument count",
206 &I);
207
208 Check(FT->getReturnType() == I.getType(),
209 "Undefined behavior: Call return type mismatches "
210 "callee return type",
211 &I);
212
213 // Check argument types (in case the callee was casted) and attributes.
214 // TODO: Verify that caller and callee attributes are compatible.
215 Function::arg_iterator PI = F->arg_begin(), PE = F->arg_end();
216 auto AI = I.arg_begin(), AE = I.arg_end();
217 for (; AI != AE; ++AI) {
218 Value *Actual = *AI;
219 if (PI != PE) {
220 Argument *Formal = &*PI++;
221 Check(Formal->getType() == Actual->getType(),
222 "Undefined behavior: Call argument type mismatches "
223 "callee parameter type",
224 &I);
225
226 // Check that noalias arguments don't alias other arguments. This is
227 // not fully precise because we don't know the sizes of the dereferenced
228 // memory regions.
229 if (Formal->hasNoAliasAttr() && Actual->getType()->isPointerTy()) {
230 AttributeList PAL = I.getAttributes();
231 unsigned ArgNo = 0;
232 for (auto *BI = I.arg_begin(); BI != AE; ++BI, ++ArgNo) {
233 // Skip ByVal arguments since they will be memcpy'd to the callee's
234 // stack so we're not really passing the pointer anyway.
235 if (PAL.hasParamAttr(ArgNo, Attribute::ByVal))
236 continue;
237 // If both arguments are readonly, they have no dependence.
238 if (Formal->onlyReadsMemory() && I.onlyReadsMemory(ArgNo))
239 continue;
240 if (AI != BI && (*BI)->getType()->isPointerTy()) {
241 AliasResult Result = AA->alias(*AI, *BI);
242 Check(Result != AliasResult::MustAlias &&
244 "Unusual: noalias argument aliases another argument", &I);
245 }
246 }
247 }
248
249 // Check that an sret argument points to valid memory.
250 if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {
251 Type *Ty = Formal->getParamStructRetType();
252 MemoryLocation Loc(
253 Actual, LocationSize::precise(DL->getTypeStoreSize(Ty)));
254 visitMemoryReference(I, Loc, DL->getABITypeAlign(Ty), Ty,
255 MemRef::Read | MemRef::Write);
256 }
257 }
258 }
259 }
260
261 if (const auto *CI = dyn_cast<CallInst>(&I)) {
262 if (CI->isTailCall()) {
263 const AttributeList &PAL = CI->getAttributes();
264 unsigned ArgNo = 0;
265 for (Value *Arg : I.args()) {
266 // Skip ByVal arguments since they will be memcpy'd to the callee's
267 // stack anyway.
268 if (PAL.hasParamAttr(ArgNo++, Attribute::ByVal))
269 continue;
270 Value *Obj = findValue(Arg, /*OffsetOk=*/true);
271 Check(!isa<AllocaInst>(Obj),
272 "Undefined behavior: Call with \"tail\" keyword references "
273 "alloca",
274 &I);
275 }
276 }
277 }
278
279 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I))
280 switch (II->getIntrinsicID()) {
281 default:
282 break;
283
284 // TODO: Check more intrinsics
285
286 case Intrinsic::memcpy: {
287 MemCpyInst *MCI = cast<MemCpyInst>(&I);
288 visitMemoryReference(I, MemoryLocation::getForDest(MCI),
289 MCI->getDestAlign(), nullptr, MemRef::Write);
290 visitMemoryReference(I, MemoryLocation::getForSource(MCI),
291 MCI->getSourceAlign(), nullptr, MemRef::Read);
292
293 // Check that the memcpy arguments don't overlap. The AliasAnalysis API
294 // isn't expressive enough for what we really want to do. Known partial
295 // overlap is not distinguished from the case where nothing is known.
297 if (const ConstantInt *Len =
298 dyn_cast<ConstantInt>(findValue(MCI->getLength(),
299 /*OffsetOk=*/false)))
300 if (Len->getValue().isIntN(32))
301 Size = LocationSize::precise(Len->getValue().getZExtValue());
302 Check(AA->alias(MCI->getSource(), Size, MCI->getDest(), Size) !=
304 "Undefined behavior: memcpy source and destination overlap", &I);
305 break;
306 }
307 case Intrinsic::memcpy_inline: {
308 MemCpyInlineInst *MCII = cast<MemCpyInlineInst>(&I);
309 const uint64_t Size = MCII->getLength()->getValue().getLimitedValue();
310 visitMemoryReference(I, MemoryLocation::getForDest(MCII),
311 MCII->getDestAlign(), nullptr, MemRef::Write);
312 visitMemoryReference(I, MemoryLocation::getForSource(MCII),
313 MCII->getSourceAlign(), nullptr, MemRef::Read);
314
315 // Check that the memcpy arguments don't overlap. The AliasAnalysis API
316 // isn't expressive enough for what we really want to do. Known partial
317 // overlap is not distinguished from the case where nothing is known.
319 Check(AA->alias(MCII->getSource(), LS, MCII->getDest(), LS) !=
321 "Undefined behavior: memcpy source and destination overlap", &I);
322 break;
323 }
324 case Intrinsic::memmove: {
325 MemMoveInst *MMI = cast<MemMoveInst>(&I);
326 visitMemoryReference(I, MemoryLocation::getForDest(MMI),
327 MMI->getDestAlign(), nullptr, MemRef::Write);
328 visitMemoryReference(I, MemoryLocation::getForSource(MMI),
329 MMI->getSourceAlign(), nullptr, MemRef::Read);
330 break;
331 }
332 case Intrinsic::memset: {
333 MemSetInst *MSI = cast<MemSetInst>(&I);
334 visitMemoryReference(I, MemoryLocation::getForDest(MSI),
335 MSI->getDestAlign(), nullptr, MemRef::Write);
336 break;
337 }
338 case Intrinsic::memset_inline: {
339 MemSetInlineInst *MSII = cast<MemSetInlineInst>(&I);
340 visitMemoryReference(I, MemoryLocation::getForDest(MSII),
341 MSII->getDestAlign(), nullptr, MemRef::Write);
342 break;
343 }
344
345 case Intrinsic::vastart:
346 Check(I.getParent()->getParent()->isVarArg(),
347 "Undefined behavior: va_start called in a non-varargs function",
348 &I);
349
350 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
351 std::nullopt, nullptr, MemRef::Read | MemRef::Write);
352 break;
353 case Intrinsic::vacopy:
354 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
355 std::nullopt, nullptr, MemRef::Write);
356 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 1, TLI),
357 std::nullopt, nullptr, MemRef::Read);
358 break;
359 case Intrinsic::vaend:
360 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
361 std::nullopt, nullptr, MemRef::Read | MemRef::Write);
362 break;
363
364 case Intrinsic::stackrestore:
365 // Stackrestore doesn't read or write memory, but it sets the
366 // stack pointer, which the compiler may read from or write to
367 // at any time, so check it for both readability and writeability.
368 visitMemoryReference(I, MemoryLocation::getForArgument(&I, 0, TLI),
369 std::nullopt, nullptr, MemRef::Read | MemRef::Write);
370 break;
371 case Intrinsic::get_active_lane_mask:
372 if (auto *TripCount = dyn_cast<ConstantInt>(I.getArgOperand(1)))
373 Check(!TripCount->isZero(),
374 "get_active_lane_mask: operand #2 "
375 "must be greater than 0",
376 &I);
377 break;
378 }
379}
380
381void Lint::visitReturnInst(ReturnInst &I) {
382 Function *F = I.getParent()->getParent();
383 Check(!F->doesNotReturn(),
384 "Unusual: Return statement in function with noreturn attribute", &I);
385
386 if (Value *V = I.getReturnValue()) {
387 Value *Obj = findValue(V, /*OffsetOk=*/true);
388 Check(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);
389 }
390}
391
392// TODO: Check that the reference is in bounds.
393// TODO: Check readnone/readonly function attributes.
394void Lint::visitMemoryReference(Instruction &I, const MemoryLocation &Loc,
395 MaybeAlign Align, Type *Ty, unsigned Flags) {
396 // If no memory is being referenced, it doesn't matter if the pointer
397 // is valid.
398 if (Loc.Size.isZero())
399 return;
400
401 Value *Ptr = const_cast<Value *>(Loc.Ptr);
402 Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);
403 Check(!isa<ConstantPointerNull>(UnderlyingObject),
404 "Undefined behavior: Null pointer dereference", &I);
405 Check(!isa<UndefValue>(UnderlyingObject),
406 "Undefined behavior: Undef pointer dereference", &I);
407 Check(!isa<ConstantInt>(UnderlyingObject) ||
408 !cast<ConstantInt>(UnderlyingObject)->isMinusOne(),
409 "Unusual: All-ones pointer dereference", &I);
410 Check(!isa<ConstantInt>(UnderlyingObject) ||
411 !cast<ConstantInt>(UnderlyingObject)->isOne(),
412 "Unusual: Address one pointer dereference", &I);
413
414 if (Flags & MemRef::Write) {
415 if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(UnderlyingObject))
416 Check(!GV->isConstant(), "Undefined behavior: Write to read-only memory",
417 &I);
418 Check(!isa<Function>(UnderlyingObject) &&
419 !isa<BlockAddress>(UnderlyingObject),
420 "Undefined behavior: Write to text section", &I);
421 }
422 if (Flags & MemRef::Read) {
423 Check(!isa<Function>(UnderlyingObject), "Unusual: Load from function body",
424 &I);
425 Check(!isa<BlockAddress>(UnderlyingObject),
426 "Undefined behavior: Load from block address", &I);
427 }
428 if (Flags & MemRef::Callee) {
429 Check(!isa<BlockAddress>(UnderlyingObject),
430 "Undefined behavior: Call to block address", &I);
431 }
432 if (Flags & MemRef::Branchee) {
433 Check(!isa<Constant>(UnderlyingObject) ||
434 isa<BlockAddress>(UnderlyingObject),
435 "Undefined behavior: Branch to non-blockaddress", &I);
436 }
437
438 // Check for buffer overflows and misalignment.
439 // Only handles memory references that read/write something simple like an
440 // alloca instruction or a global variable.
441 int64_t Offset = 0;
443 // OK, so the access is to a constant offset from Ptr. Check that Ptr is
444 // something we can handle and if so extract the size of this base object
445 // along with its alignment.
447 MaybeAlign BaseAlign;
448
449 if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {
450 Type *ATy = AI->getAllocatedType();
451 if (!AI->isArrayAllocation() && ATy->isSized())
452 BaseSize = DL->getTypeAllocSize(ATy);
453 BaseAlign = AI->getAlign();
454 } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {
455 // If the global may be defined differently in another compilation unit
456 // then don't warn about funky memory accesses.
457 if (GV->hasDefinitiveInitializer()) {
458 Type *GTy = GV->getValueType();
459 if (GTy->isSized())
460 BaseSize = DL->getTypeAllocSize(GTy);
461 BaseAlign = GV->getAlign();
462 if (!BaseAlign && GTy->isSized())
463 BaseAlign = DL->getABITypeAlign(GTy);
464 }
465 }
466
467 // Accesses from before the start or after the end of the object are not
468 // defined.
469 Check(!Loc.Size.hasValue() || BaseSize == MemoryLocation::UnknownSize ||
470 (Offset >= 0 && Offset + Loc.Size.getValue() <= BaseSize),
471 "Undefined behavior: Buffer overflow", &I);
472
473 // Accesses that say that the memory is more aligned than it is are not
474 // defined.
475 if (!Align && Ty && Ty->isSized())
476 Align = DL->getABITypeAlign(Ty);
477 if (BaseAlign && Align)
478 Check(*Align <= commonAlignment(*BaseAlign, Offset),
479 "Undefined behavior: Memory reference address is misaligned", &I);
480 }
481}
482
483void Lint::visitLoadInst(LoadInst &I) {
484 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(), I.getType(),
485 MemRef::Read);
486}
487
488void Lint::visitStoreInst(StoreInst &I) {
489 visitMemoryReference(I, MemoryLocation::get(&I), I.getAlign(),
490 I.getOperand(0)->getType(), MemRef::Write);
491}
492
493void Lint::visitXor(BinaryOperator &I) {
494 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
495 "Undefined result: xor(undef, undef)", &I);
496}
497
498void Lint::visitSub(BinaryOperator &I) {
499 Check(!isa<UndefValue>(I.getOperand(0)) || !isa<UndefValue>(I.getOperand(1)),
500 "Undefined result: sub(undef, undef)", &I);
501}
502
503void Lint::visitLShr(BinaryOperator &I) {
504 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1),
505 /*OffsetOk=*/false)))
506 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
507 "Undefined result: Shift count out of range", &I);
508}
509
510void Lint::visitAShr(BinaryOperator &I) {
511 if (ConstantInt *CI =
512 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
513 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
514 "Undefined result: Shift count out of range", &I);
515}
516
517void Lint::visitShl(BinaryOperator &I) {
518 if (ConstantInt *CI =
519 dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))
520 Check(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),
521 "Undefined result: Shift count out of range", &I);
522}
523
524static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,
525 AssumptionCache *AC) {
526 // Assume undef could be zero.
527 if (isa<UndefValue>(V))
528 return true;
529
530 VectorType *VecTy = dyn_cast<VectorType>(V->getType());
531 if (!VecTy) {
532 KnownBits Known =
533 computeKnownBits(V, DL, 0, AC, dyn_cast<Instruction>(V), DT);
534 return Known.isZero();
535 }
536
537 // Per-component check doesn't work with zeroinitializer
538 Constant *C = dyn_cast<Constant>(V);
539 if (!C)
540 return false;
541
542 if (C->isZeroValue())
543 return true;
544
545 // For a vector, KnownZero will only be true if all values are zero, so check
546 // this per component
547 for (unsigned I = 0, N = cast<FixedVectorType>(VecTy)->getNumElements();
548 I != N; ++I) {
549 Constant *Elem = C->getAggregateElement(I);
550 if (isa<UndefValue>(Elem))
551 return true;
552
553 KnownBits Known = computeKnownBits(Elem, DL);
554 if (Known.isZero())
555 return true;
556 }
557
558 return false;
559}
560
561void Lint::visitSDiv(BinaryOperator &I) {
562 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
563 "Undefined behavior: Division by zero", &I);
564}
565
566void Lint::visitUDiv(BinaryOperator &I) {
567 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
568 "Undefined behavior: Division by zero", &I);
569}
570
571void Lint::visitSRem(BinaryOperator &I) {
572 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
573 "Undefined behavior: Division by zero", &I);
574}
575
576void Lint::visitURem(BinaryOperator &I) {
577 Check(!isZero(I.getOperand(1), I.getModule()->getDataLayout(), DT, AC),
578 "Undefined behavior: Division by zero", &I);
579}
580
581void Lint::visitAllocaInst(AllocaInst &I) {
582 if (isa<ConstantInt>(I.getArraySize()))
583 // This isn't undefined behavior, it's just an obvious pessimization.
584 Check(&I.getParent()->getParent()->getEntryBlock() == I.getParent(),
585 "Pessimization: Static alloca outside of entry block", &I);
586
587 // TODO: Check for an unusual size (MSB set?)
588}
589
590void Lint::visitVAArgInst(VAArgInst &I) {
591 visitMemoryReference(I, MemoryLocation::get(&I), std::nullopt, nullptr,
592 MemRef::Read | MemRef::Write);
593}
594
595void Lint::visitIndirectBrInst(IndirectBrInst &I) {
596 visitMemoryReference(I, MemoryLocation::getAfter(I.getAddress()),
597 std::nullopt, nullptr, MemRef::Branchee);
598
599 Check(I.getNumDestinations() != 0,
600 "Undefined behavior: indirectbr with no destinations", &I);
601}
602
603void Lint::visitExtractElementInst(ExtractElementInst &I) {
604 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(),
605 /*OffsetOk=*/false)))
606 Check(
607 CI->getValue().ult(
608 cast<FixedVectorType>(I.getVectorOperandType())->getNumElements()),
609 "Undefined result: extractelement index out of range", &I);
610}
611
612void Lint::visitInsertElementInst(InsertElementInst &I) {
613 if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2),
614 /*OffsetOk=*/false)))
615 Check(CI->getValue().ult(
616 cast<FixedVectorType>(I.getType())->getNumElements()),
617 "Undefined result: insertelement index out of range", &I);
618}
619
620void Lint::visitUnreachableInst(UnreachableInst &I) {
621 // This isn't undefined behavior, it's merely suspicious.
622 Check(&I == &I.getParent()->front() ||
623 std::prev(I.getIterator())->mayHaveSideEffects(),
624 "Unusual: unreachable immediately preceded by instruction without "
625 "side effects",
626 &I);
627}
628
629/// findValue - Look through bitcasts and simple memory reference patterns
630/// to identify an equivalent, but more informative, value. If OffsetOk
631/// is true, look through getelementptrs with non-zero offsets too.
632///
633/// Most analysis passes don't require this logic, because instcombine
634/// will simplify most of these kinds of things away. But it's a goal of
635/// this Lint pass to be useful even on non-optimized IR.
636Value *Lint::findValue(Value *V, bool OffsetOk) const {
638 return findValueImpl(V, OffsetOk, Visited);
639}
640
641/// findValueImpl - Implementation helper for findValue.
642Value *Lint::findValueImpl(Value *V, bool OffsetOk,
643 SmallPtrSetImpl<Value *> &Visited) const {
644 // Detect self-referential values.
645 if (!Visited.insert(V).second)
646 return UndefValue::get(V->getType());
647
648 // TODO: Look through sext or zext cast, when the result is known to
649 // be interpreted as signed or unsigned, respectively.
650 // TODO: Look through eliminable cast pairs.
651 // TODO: Look through calls with unique return values.
652 // TODO: Look through vector insert/extract/shuffle.
653 V = OffsetOk ? getUnderlyingObject(V) : V->stripPointerCasts();
654 if (LoadInst *L = dyn_cast<LoadInst>(V)) {
655 BasicBlock::iterator BBI = L->getIterator();
656 BasicBlock *BB = L->getParent();
657 SmallPtrSet<BasicBlock *, 4> VisitedBlocks;
658 for (;;) {
659 if (!VisitedBlocks.insert(BB).second)
660 break;
661 if (Value *U =
663 return findValueImpl(U, OffsetOk, Visited);
664 if (BBI != BB->begin())
665 break;
666 BB = BB->getUniquePredecessor();
667 if (!BB)
668 break;
669 BBI = BB->end();
670 }
671 } else if (PHINode *PN = dyn_cast<PHINode>(V)) {
672 if (Value *W = PN->hasConstantValue())
673 return findValueImpl(W, OffsetOk, Visited);
674 } else if (CastInst *CI = dyn_cast<CastInst>(V)) {
675 if (CI->isNoopCast(*DL))
676 return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
677 } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
678 if (Value *W =
679 FindInsertedValue(Ex->getAggregateOperand(), Ex->getIndices()))
680 if (W != V)
681 return findValueImpl(W, OffsetOk, Visited);
682 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
683 // Same as above, but for ConstantExpr instead of Instruction.
684 if (Instruction::isCast(CE->getOpcode())) {
686 CE->getOperand(0)->getType(), CE->getType(),
687 *DL))
688 return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
689 }
690 }
691
692 // As a last resort, try SimplifyInstruction or constant folding.
693 if (Instruction *Inst = dyn_cast<Instruction>(V)) {
694 if (Value *W = simplifyInstruction(Inst, {*DL, TLI, DT, AC}))
695 return findValueImpl(W, OffsetOk, Visited);
696 } else if (auto *C = dyn_cast<Constant>(V)) {
697 Value *W = ConstantFoldConstant(C, *DL, TLI);
698 if (W != V)
699 return findValueImpl(W, OffsetOk, Visited);
700 }
701
702 return V;
703}
704
706 auto *Mod = F.getParent();
707 auto *DL = &F.getParent()->getDataLayout();
708 auto *AA = &AM.getResult<AAManager>(F);
709 auto *AC = &AM.getResult<AssumptionAnalysis>(F);
710 auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
711 auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
712 Lint L(Mod, DL, AA, AC, DT, TLI);
713 L.visit(F);
714 dbgs() << L.MessagesStr.str();
715 return PreservedAnalyses::all();
716}
717
718namespace {
719class LintLegacyPass : public FunctionPass {
720public:
721 static char ID; // Pass identification, replacement for typeid
722 LintLegacyPass() : FunctionPass(ID) {
724 }
725
726 bool runOnFunction(Function &F) override;
727
728 void getAnalysisUsage(AnalysisUsage &AU) const override {
729 AU.setPreservesAll();
734 }
735 void print(raw_ostream &O, const Module *M) const override {}
736};
737} // namespace
738
739char LintLegacyPass::ID = 0;
740INITIALIZE_PASS_BEGIN(LintLegacyPass, "lint", "Statically lint-checks LLVM IR",
741 false, true)
746INITIALIZE_PASS_END(LintLegacyPass, "lint", "Statically lint-checks LLVM IR",
748
749bool LintLegacyPass::runOnFunction(Function &F) {
750 auto *Mod = F.getParent();
751 auto *DL = &F.getParent()->getDataLayout();
752 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
753 auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
754 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
755 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
756 Lint L(Mod, DL, AA, AC, DT, TLI);
757 L.visit(F);
758 dbgs() << L.MessagesStr.str();
759 return false;
760}
761
762//===----------------------------------------------------------------------===//
763// Implement the public interfaces to this file...
764//===----------------------------------------------------------------------===//
765
766FunctionPass *llvm::createLintLegacyPassPass() { return new LintLegacyPass(); }
767
768/// lintFunction - Check a function for errors, printing messages on stderr.
769///
771 Function &F = const_cast<Function &>(f);
772 assert(!F.isDeclaration() && "Cannot lint external functions");
773
774 legacy::FunctionPassManager FPM(F.getParent());
775 auto *V = new LintLegacyPass();
776 FPM.add(V);
777 FPM.run(F);
778}
779
780/// lintModule - Check a module for errors, printing messages on stderr.
781///
782void llvm::lintModule(const Module &M) {
784 auto *V = new LintLegacyPass();
785 PM.add(V);
786 PM.run(const_cast<Module &>(M));
787}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file implements a class to represent arbitrary precision integral constant values and operations...
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
basic Basic Alias true
This file contains the declarations for the subclasses of Constant, which represent the different fla...
uint64_t Size
static bool runOnFunction(Function &F, bool PostInlining)
Implicit null checks
#define Check(C,...)
Definition: Lint.cpp:170
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:524
lint
Definition: Lint.cpp:746
Statically lint checks LLVM IR
Definition: Lint.cpp:746
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides utility analysis objects describing memory locations.
#define T1
Module.h This file contains the declarations for the Module class.
Module * Mod
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallPtrSet class.
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB)
The main low level interface to the alias analysis implementation.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:463
The possible results of an alias query.
Definition: AliasAnalysis.h:83
@ PartialAlias
The two locations alias, but only due to a partial overlap.
@ MustAlias
The two locations precisely alias each other.
an instruction to allocate memory on the stack
Definition: Instructions.h:58
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesAll()
Set by analyses that do not transform their input at all.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
bool hasNoAliasAttr() const
Return true if this argument has the noalias attribute.
Definition: Function.cpp:237
bool onlyReadsMemory() const
Return true if this argument has the readonly or readnone attribute.
Definition: Function.cpp:273
Type * getParamStructRetType() const
If this is an sret argument, return its type.
Definition: Function.cpp:205
bool hasStructRetAttr() const
Return true if this argument has the sret attribute.
Definition: Function.cpp:252
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Return true if the attribute exists for the given argument.
Definition: Attributes.h:764
AttributeSet getAttributes(unsigned Index) const
The attributes for the specified index are returned.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:316
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:292
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1184
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:428
static bool isNoopCast(Instruction::CastOps Opcode, Type *SrcTy, Type *DstTy, const DataLayout &DL)
A no-op cast is one that can be effected without changing any bits.
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:998
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:132
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
This instruction extracts a single (scalar) element from a VectorType value.
This instruction extracts a struct member or array element value from an aggregate value.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
Indirect Branch Instruction.
This instruction inserts a single (scalar) element into a VectorType value.
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitIndirectBrInst(IndirectBrInst &I)
Definition: InstVisitor.h:235
RetTy visitExtractElementInst(ExtractElementInst &I)
Definition: InstVisitor.h:191
RetTy visitCallBase(CallBase &I)
Definition: InstVisitor.h:267
void visitFunction(Function &F)
Definition: InstVisitor.h:142
RetTy visitUnreachableInst(UnreachableInst &I)
Definition: InstVisitor.h:241
RetTy visitReturnInst(ReturnInst &I)
Definition: InstVisitor.h:226
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:170
RetTy visitInsertElementInst(InsertElementInst &I)
Definition: InstVisitor.h:192
RetTy visitAllocaInst(AllocaInst &I)
Definition: InstVisitor.h:168
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169
RetTy visitVAArgInst(VAArgInst &I)
Definition: InstVisitor.h:190
bool isCast() const
Definition: Instruction.h:176
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: Lint.cpp:705
An instruction for reading from memory.
Definition: Instructions.h:177
bool hasValue() const
static LocationSize precise(uint64_t Value)
uint64_t getValue() const
bool isZero() const
static constexpr LocationSize afterPointer()
Any location after the base pointer (but still within the underlying object).
This class wraps the llvm.memcpy.inline intrinsic.
ConstantInt * getLength() const
This class wraps the llvm.memcpy intrinsic.
Value * getLength() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset.inline intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location after Ptr, while remaining within the underlying objec...
const Value * Ptr
The address of the start of the location.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Return a value (possibly void), from a function.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
An instruction for storing to memory.
Definition: Instructions.h:301
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:249
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:295
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1740
This function has undefined behavior.
This class represents the va_arg llvm instruction, which returns an argument of the specified type gi...
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
FunctionPassManager manages FunctionPasses.
bool run(Function &F)
run - Execute all of the passes scheduled for execution.
void add(Pass *P) override
Add a pass to the queue of passes to run.
PassManager manages ModulePassManagers.
void add(Pass *P) override
Add a pass to the queue of passes to run.
bool run(Module &M)
run - Execute all of the passes scheduled for execution.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
Definition: Lint.cpp:81
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CE
Windows NT (Windows on ARM)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Value * FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB, BasicBlock::iterator &ScanFrom, unsigned MaxInstsToScan=DefMaxInstsToScan, AAResults *AA=nullptr, bool *IsLoadCSE=nullptr, unsigned *NumScanedInst=nullptr)
Scan backwards to see if we have the value of the given load available locally within a small number ...
Definition: Loads.cpp:428
Constant * ConstantFoldConstant(const Constant *C, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr)
ConstantFoldConstant - Fold the constant using the specified DataLayout.
Value * FindInsertedValue(Value *V, ArrayRef< unsigned > idx_range, Instruction *InsertBefore=nullptr)
Given an aggregate and an sequence of indices, see if the scalar value indexed is already around as a...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void lintModule(const Module &M)
Lint a module.
Definition: Lint.cpp:782
cl::opt< unsigned > DefMaxInstsToScan
The default number of maximum instructions to scan in the block, used by FindAvailableLoadedValue().
@ Mod
The access may modify the value stored in memory.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
FunctionPass * createLintLegacyPassPass()
Definition: Lint.cpp:766
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
void initializeLintLegacyPassPass(PassRegistry &)
void lintFunction(const Function &F)
lintFunction - Check a function for errors, printing messages on stderr.
Definition: Lint.cpp:770
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
bool isZero() const
Returns true if value is all zero.
Definition: KnownBits.h:72
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117