LLVM 19.0.0git
BoundsChecking.cpp
Go to the documentation of this file.
1//===- BoundsChecking.cpp - Instrumentation for run-time bounds checking --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/Statistic.h"
11#include "llvm/ADT/Twine.h"
16#include "llvm/IR/BasicBlock.h"
17#include "llvm/IR/Constants.h"
18#include "llvm/IR/DataLayout.h"
19#include "llvm/IR/Function.h"
20#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Instruction.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/Value.h"
28#include "llvm/Support/Debug.h"
30#include <cstdint>
31#include <utility>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "bounds-checking"
36
37static cl::opt<bool> SingleTrapBB("bounds-checking-single-trap",
38 cl::desc("Use one trap block per function"));
39
40static cl::opt<bool> DebugTrapBB("bounds-checking-unique-traps",
41 cl::desc("Always use one trap per check"));
42
43STATISTIC(ChecksAdded, "Bounds checks added");
44STATISTIC(ChecksSkipped, "Bounds checks skipped");
45STATISTIC(ChecksUnable, "Bounds checks unable to add");
46
48
49/// Gets the conditions under which memory accessing instructions will overflow.
50///
51/// \p Ptr is the pointer that will be read/written, and \p InstVal is either
52/// the result from the load or the value being stored. It is used to determine
53/// the size of memory block that is touched.
54///
55/// Returns the condition under which the access will overflow.
57 const DataLayout &DL, TargetLibraryInfo &TLI,
58 ObjectSizeOffsetEvaluator &ObjSizeEval,
59 BuilderTy &IRB, ScalarEvolution &SE) {
60 TypeSize NeededSize = DL.getTypeStoreSize(InstVal->getType());
61 LLVM_DEBUG(dbgs() << "Instrument " << *Ptr << " for " << Twine(NeededSize)
62 << " bytes\n");
63
64 SizeOffsetValue SizeOffset = ObjSizeEval.compute(Ptr);
65
66 if (!SizeOffset.bothKnown()) {
67 ++ChecksUnable;
68 return nullptr;
69 }
70
71 Value *Size = SizeOffset.Size;
72 Value *Offset = SizeOffset.Offset;
73 ConstantInt *SizeCI = dyn_cast<ConstantInt>(Size);
74
75 Type *IndexTy = DL.getIndexType(Ptr->getType());
76 Value *NeededSizeVal = IRB.CreateTypeSize(IndexTy, NeededSize);
77
78 auto SizeRange = SE.getUnsignedRange(SE.getSCEV(Size));
79 auto OffsetRange = SE.getUnsignedRange(SE.getSCEV(Offset));
80 auto NeededSizeRange = SE.getUnsignedRange(SE.getSCEV(NeededSizeVal));
81
82 // three checks are required to ensure safety:
83 // . Offset >= 0 (since the offset is given from the base ptr)
84 // . Size >= Offset (unsigned)
85 // . Size - Offset >= NeededSize (unsigned)
86 //
87 // optimization: if Size >= 0 (signed), skip 1st check
88 // FIXME: add NSW/NUW here? -- we dont care if the subtraction overflows
89 Value *ObjSize = IRB.CreateSub(Size, Offset);
90 Value *Cmp2 = SizeRange.getUnsignedMin().uge(OffsetRange.getUnsignedMax())
91 ? ConstantInt::getFalse(Ptr->getContext())
93 Value *Cmp3 = SizeRange.sub(OffsetRange)
94 .getUnsignedMin()
95 .uge(NeededSizeRange.getUnsignedMax())
96 ? ConstantInt::getFalse(Ptr->getContext())
97 : IRB.CreateICmpULT(ObjSize, NeededSizeVal);
98 Value *Or = IRB.CreateOr(Cmp2, Cmp3);
99 if ((!SizeCI || SizeCI->getValue().slt(0)) &&
100 !SizeRange.getSignedMin().isNonNegative()) {
101 Value *Cmp1 = IRB.CreateICmpSLT(Offset, ConstantInt::get(IndexTy, 0));
102 Or = IRB.CreateOr(Cmp1, Or);
103 }
104
105 return Or;
106}
107
108/// Adds run-time bounds checks to memory accessing instructions.
109///
110/// \p Or is the condition that should guard the trap.
111///
112/// \p GetTrapBB is a callable that returns the trap BB to use on failure.
113template <typename GetTrapBBT>
114static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB) {
115 // check if the comparison is always false
116 ConstantInt *C = dyn_cast_or_null<ConstantInt>(Or);
117 if (C) {
118 ++ChecksSkipped;
119 // If non-zero, nothing to do.
120 if (!C->getZExtValue())
121 return;
122 }
123 ++ChecksAdded;
124
126 BasicBlock *OldBB = SplitI->getParent();
127 BasicBlock *Cont = OldBB->splitBasicBlock(SplitI);
128 OldBB->getTerminator()->eraseFromParent();
129
130 if (C) {
131 // If we have a constant zero, unconditionally branch.
132 // FIXME: We should really handle this differently to bypass the splitting
133 // the block.
134 BranchInst::Create(GetTrapBB(IRB), OldBB);
135 return;
136 }
137
138 // Create the conditional branch.
139 BranchInst::Create(GetTrapBB(IRB), Cont, Or, OldBB);
140}
141
143 ScalarEvolution &SE) {
144 if (F.hasFnAttribute(Attribute::NoSanitizeBounds))
145 return false;
146
147 const DataLayout &DL = F.getParent()->getDataLayout();
148 ObjectSizeOpts EvalOpts;
149 EvalOpts.RoundToAlign = true;
150 EvalOpts.EvalMode = ObjectSizeOpts::Mode::ExactUnderlyingSizeAndOffset;
151 ObjectSizeOffsetEvaluator ObjSizeEval(DL, &TLI, F.getContext(), EvalOpts);
152
153 // check HANDLE_MEMORY_INST in include/llvm/Instruction.def for memory
154 // touching instructions
156 for (Instruction &I : instructions(F)) {
157 Value *Or = nullptr;
158 BuilderTy IRB(I.getParent(), BasicBlock::iterator(&I), TargetFolder(DL));
159 if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
160 if (!LI->isVolatile())
161 Or = getBoundsCheckCond(LI->getPointerOperand(), LI, DL, TLI,
162 ObjSizeEval, IRB, SE);
163 } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
164 if (!SI->isVolatile())
165 Or = getBoundsCheckCond(SI->getPointerOperand(), SI->getValueOperand(),
166 DL, TLI, ObjSizeEval, IRB, SE);
167 } else if (AtomicCmpXchgInst *AI = dyn_cast<AtomicCmpXchgInst>(&I)) {
168 if (!AI->isVolatile())
169 Or =
170 getBoundsCheckCond(AI->getPointerOperand(), AI->getCompareOperand(),
171 DL, TLI, ObjSizeEval, IRB, SE);
172 } else if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(&I)) {
173 if (!AI->isVolatile())
174 Or = getBoundsCheckCond(AI->getPointerOperand(), AI->getValOperand(),
175 DL, TLI, ObjSizeEval, IRB, SE);
176 }
177 if (Or)
178 TrapInfo.push_back(std::make_pair(&I, Or));
179 }
180
181 // Create a trapping basic block on demand using a callback. Depending on
182 // flags, this will either create a single block for the entire function or
183 // will create a fresh block every time it is called.
184 BasicBlock *TrapBB = nullptr;
185 auto GetTrapBB = [&TrapBB](BuilderTy &IRB) {
186 Function *Fn = IRB.GetInsertBlock()->getParent();
187 auto DebugLoc = IRB.getCurrentDebugLocation();
189
190 if (TrapBB && SingleTrapBB && !DebugTrapBB)
191 return TrapBB;
192
193 TrapBB = BasicBlock::Create(Fn->getContext(), "trap", Fn);
194 IRB.SetInsertPoint(TrapBB);
195
196 Intrinsic::ID IntrID = DebugTrapBB ? Intrinsic::ubsantrap : Intrinsic::trap;
197 auto *F = Intrinsic::getDeclaration(Fn->getParent(), IntrID);
198
199 CallInst *TrapCall;
200 if (DebugTrapBB) {
201 TrapCall =
202 IRB.CreateCall(F, ConstantInt::get(IRB.getInt8Ty(), Fn->size()));
203 } else {
204 TrapCall = IRB.CreateCall(F, {});
205 }
206
207 TrapCall->setDoesNotReturn();
208 TrapCall->setDoesNotThrow();
209 TrapCall->setDebugLoc(DebugLoc);
210 IRB.CreateUnreachable();
211
212 return TrapBB;
213 };
214
215 // Add the checks.
216 for (const auto &Entry : TrapInfo) {
217 Instruction *Inst = Entry.first;
219 insertBoundsCheck(Entry.second, IRB, GetTrapBB);
220 }
221
222 return !TrapInfo.empty();
223}
224
226 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
227 auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
228
229 if (!addBoundsChecking(F, TLI, SE))
230 return PreservedAnalyses::all();
231
233}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static void insertBoundsCheck(Value *Or, BuilderTy &IRB, GetTrapBBT GetTrapBB)
Adds run-time bounds checks to memory accessing instructions.
static cl::opt< bool > DebugTrapBB("bounds-checking-unique-traps", cl::desc("Always use one trap per check"))
static Value * getBoundsCheckCond(Value *Ptr, Value *InstVal, const DataLayout &DL, TargetLibraryInfo &TLI, ObjectSizeOffsetEvaluator &ObjSizeEval, BuilderTy &IRB, ScalarEvolution &SE)
Gets the conditions under which memory accessing instructions will overflow.
static bool addBoundsChecking(Function &F, TargetLibraryInfo &TLI, ScalarEvolution &SE)
static cl::opt< bool > SingleTrapBB("bounds-checking-single-trap", cl::desc("Use one trap block per function"))
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1108
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:570
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
void setDoesNotReturn()
Definition: InstrTypes.h:2266
void setDoesNotThrow()
Definition: InstrTypes.h:2273
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:856
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
size_t size() const
Definition: Function.h:804
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2257
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateTypeSize(Type *DstType, TypeSize Size)
Create an expression which evaluates to the number of units in Size at runtime.
Definition: IRBuilder.cpp:104
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1344
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
Value * CreateICmpSLT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
An instruction for reading from memory.
Definition: Instructions.h:184
Evaluate the size and offset of an object pointed to by a Value*.
SizeOffsetValue compute(Value *V)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getUnsignedRange(const SCEV *S)
Determine the unsigned range for a particular SCEV.
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
TargetFolder - Create constants with target dependent folding.
Definition: TargetFolder.h:34
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
@ Or
Bitwise or logical OR of integers.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Various options to control the behavior of getObjectSize.
Mode EvalMode
How we want to evaluate this object's size.
bool RoundToAlign
Whether to round the result up to the alignment of allocas, byval arguments, and global variables.
bool bothKnown() const