LLVM 17.0.0git
BoundsChecking.cpp
Go to the documentation of this file.
1//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/Statistic.h"
11#include "llvm/ADT/Twine.h"
16#include "llvm/IR/BasicBlock.h"
17#include "llvm/IR/Constants.h"
18#include "llvm/IR/DataLayout.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/Value.h"
27#include "llvm/Pass.h"
30#include "llvm/Support/Debug.h"
32#include <cstdint>
33#include <utility>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "bounds-checking"
38
39static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
40 cl::desc("Use one trap block per function"));
41
42STATISTIC(ChecksAdded, "Bounds checks added");
43STATISTIC(ChecksSkipped, "Bounds checks skipped");
44STATISTIC(ChecksUnable, "Bounds checks unable to add");
45
47
48/// Gets the conditions under which memory accessing instructions will overflow.
49///
50/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
51/// the result from the load or the value being stored. It is used to determine
52/// the size of memory block that is touched.
53///
54/// Returns the condition under which the access will overflow.
56 const DataLayout &DL, TargetLibraryInfo &TLI,
57 ObjectSizeOffsetEvaluator &ObjSizeEval,
58 BuilderTy &IRB, ScalarEvolution &SE) {
59 TypeSize NeededSize = DL.getTypeStoreSize(InstVal->getType());
60 LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
61 << " bytes\n");
62
63 SizeOffsetEvalType SizeOffset = ObjSizeEval.compute(Ptr);
64
65 if (!ObjSizeEval.bothKnown(SizeOffset)) {
66 ++ChecksUnable;
67 return nullptr;
68 }
69
70 Value *Size = SizeOffset.first;
71 Value *Offset = SizeOffset.second;
72 ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
73
74 Type *IndexTy = DL.getIndexType(Ptr->getType());
75 Value *NeededSizeVal = IRB.CreateTypeSize(IndexTy, NeededSize);
76
77 auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
78 auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
79 auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal));
80
81 // three checks are required to ensure safety:
82 // . Offset >= 0 (since the offset is given from the base ptr)
83 // . Size >= Offset (unsigned)
84 // . Size - Offset >= NeededSize (unsigned)
85 //
86 // optimization: if Size >= 0 (signed), skip 1st check
87 // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
88 Value *ObjSize = IRB.CreateSub(Size, Offset);
89 Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax())
90 ? ConstantInt::getFalse(Ptr->getContext())
92 Value *Cmp3 = SizeRange.sub(OffsetRange)
93 .getUnsignedMin()
94 .uge(NeededSizeRange.getUnsignedMax())
95 ? ConstantInt::getFalse(Ptr->getContext())
96 : IRB.CreateICmpULT(ObjSize, NeededSizeVal);
97 Value *Or = IRB.CreateOr(Cmp2, Cmp3);
98 if ((!SizeCI || SizeCI->getValue().slt(0)) &&
99 !SizeRange.getSignedMin().isNonNegative()) {
100 Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0));
101 Or = IRB.CreateOr(Cmp1, Or);
102 }
103
104 return Or;
105}
106
107/// Adds run-time bounds checks to memory accessing instructions.
108///
109/// \p Or is the condition that should guard the trap.
110///
111/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
112template <typename GetTrapBBT>
113static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) {
114 // check if the comparison is always false
115 ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or);
116 if (C) {
117 ++ChecksSkipped;
118 // If non-zero, nothing to do.
119 if (!C->getZExtValue())
120 return;
121 }
122 ++ChecksAdded;
123
125 BasicBlock *OldBB = SplitI->getParent();
126 BasicBlock *Cont = OldBB->splitBasicBlock(SplitI);
127 OldBB->getTerminator()->eraseFromParent();
128
129 if (C) {
130 // If we have a constant zero, unconditionally branch.
131 // FIXME: We should really handle this differently to bypass the splitting
132 // the block.
133 BranchInst::Create(GetTrapBB(IRB), OldBB);
134 return;
135 }
136
137 // Create the conditional branch.
138 BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB);
139}
140
142 ScalarEvolution &SE) {
143 if (F.hasFnAttribute(Attribute::NoSanitizeBounds))
144 return false;
145
146 const DataLayout &DL = F.getParent()->getDataLayout();
147 ObjectSizeOpts EvalOpts;
148 EvalOpts.RoundToAlign = true;
149 EvalOpts.EvalMode = ObjectSizeOpts::Mode::ExactUnderlyingSizeAndOffset;
150 ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
151
152 // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
153 // touching instructions
155 for (Instruction &I : instructions(F)) {
156 Value *Or = nullptr;
157 BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
158 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
159 if (!LI->isVolatile())
160 Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
161 ObjSizeEval, IRB, SE);
162 } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
163 if (!SI->isVolatile())
164 Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
165 DL, TLI, ObjSizeEval, IRB, SE);
166 } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
167 if (!AI->isVolatile())
168 Or =
169 getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
170 DL, TLI, ObjSizeEval, IRB, SE);
171 } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
172 if (!AI->isVolatile())
173 Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(),
174 DL, TLI, ObjSizeEval, IRB, SE);
175 }
176 if (Or)
177 TrapInfo.push_back(std::make_pair(&I, Or));
178 }
179
180 // Create a trapping basic block on demand using a callback. Depending on
181 // flags, this will either create a single block for the entire function or
182 // will create a fresh block every time it is called.
183 BasicBlock *TrapBB = nullptr;
184 auto GetTrapBB = [&TrapBB](BuilderTy &IRB) {
185 if (TrapBB && SingleTrapBB)
186 return TrapBB;
187
188 Function *Fn = IRB.GetInsertBlock()->getParent();
189 // FIXME: This debug location doesn't make a lot of sense in the
190 // `SingleTrapBB` case.
191 auto DebugLoc = IRB.getCurrentDebugLocation();
193 TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
194 IRB.SetInsertPoint(TrapBB);
195
196 auto *F = Intrinsic::getDeclaration(Fn->getParent(), Intrinsic::trap);
197 CallInst *TrapCall = IRB.CreateCall(F, {});
198 TrapCall->setDoesNotReturn();
199 TrapCall->setDoesNotThrow();
200 TrapCall->setDebugLoc(DebugLoc);
201 IRB.CreateUnreachable();
202
203 return TrapBB;
204 };
205
206 // Add the checks.
207 for (const auto &Entry : TrapInfo) {
208 Instruction *Inst = Entry.first;
210 insertBoundsCheck(Entry.second, IRB, GetTrapBB);
211 }
212
213 return !TrapInfo.empty();
214}
215
217 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
218 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
219
220 if (!addBoundsChecking(F, TLI, SE))
221 return PreservedAnalyses::all();
222
224}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB)
Adds run-time bounds checks to memory accessing instructions.
static Value * getBoundsCheckCond(Value *Ptr, Value *InstVal, const DataLayout &DL, TargetLibraryInfo &TLI, ObjectSizeOffsetEvaluator &ObjSizeEval, BuilderTy &IRB, ScalarEvolution &SE)
Gets the conditions under which memory accessing instructions will overflow.
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, ScalarEvolution &SE)
static cl::opt< bool > SingleTrapBB("bounds-checking-single-trap", cl::desc("Use one trap block per function"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
print must be executed print the must be executed context for all instructions
@ SI
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:620
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:513
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:718
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:410
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
static BranchInst * Create(BasicBlock *IfTrue, Instruction *InsertBefore=nullptr)
void setDoesNotReturn()
Definition: InstrTypes.h:1912
void setDoesNotThrow()
Definition: InstrTypes.h:1919
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:136
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2152
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateTypeSize(Type *DstType, TypeSize Size)
Create an expression which evaluates to the number of units in Size at runtime.
Definition: IRBuilder.cpp:113
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1273
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1426
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2168
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
const BasicBlock * getParent() const
Definition: Instruction.h:90
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:355
An instruction for reading from memory.
Definition: Instructions.h:177
Evaluate the size and offset of an object pointed to by a Value*.
SizeOffsetEvalType compute(Value *V)
bool bothKnown(SizeOffsetEvalType SizeOffset)
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:34
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1506
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
std::pair< Value *, Value * > SizeOffsetEvalType
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ Or
Bitwise or logical OR of integers.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Various options to control the behavior of getObjectSize.
Mode EvalMode
How we want to evaluate this object's size.
bool RoundToAlign
Whether to round the result up to the alignment of allocas, byval arguments, and global variables.