LLVM  14.0.0git
PoisonChecking.cpp
Go to the documentation of this file.
1 //===- PoisonChecking.cpp - -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implements a transform pass which instruments IR such that poison semantics
10 // are made explicit. That is, it provides a (possibly partial) executable
11 // semantics for every instruction w.r.t. poison as specified in the LLVM
12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass
13 // is focused purely on the semantics of LLVM IR, not any particular source
14 // language. If you're looking for something to see if your C/C++ contains
15 // UB, this is not it.
16 //
17 // The rewritten semantics of each instruction will include the following
18 // components:
19 //
20 // 1) The original instruction, unmodified.
21 // 2) A propagation rule which translates dynamic information about the poison
22 // state of each input to whether the dynamic output of the instruction
23 // produces poison.
24 // 3) A creation rule which validates any poison producing flags on the
25 // instruction itself (e.g. checks for overflow on nsw).
26 // 4) A check rule which traps (to a handler function) if this instruction must
27 // execute undefined behavior given the poison state of it's inputs.
28 //
29 // This is a must analysis based transform; that is, the resulting code may
30 // produce a false negative result (not report UB when actually exists
31 // according to the LangRef spec), but should never produce a false positive
32 // (report UB where it doesn't exist).
33 //
34 // Use cases for this pass include:
35 // - Understanding (and testing!) the implications of the definition of poison
36 // from the LangRef.
37 // - Validating the output of a IR fuzzer to ensure that all programs produced
38 // are well defined on the specific input used.
39 // - Finding/confirming poison specific miscompiles by checking the poison
40 // status of an input/IR pair is the same before and after an optimization
41 // transform.
42 // - Checking that a bugpoint reduction does not introduce UB which didn't
43 // exist in the original program being reduced.
44 //
45 // The major sources of inaccuracy are currently:
46 // - Most validation rules not yet implemented for instructions with poison
47 // relavant flags. At the moment, only nsw/nuw on add/sub are supported.
48 // - UB which is control dependent on a branch on poison is not yet
49 // reported. Currently, only data flow dependence is modeled.
50 // - Poison which is propagated through memory is not modeled. As such,
51 // storing poison to memory and then reloading it will cause a false negative
52 // as we consider the reloaded value to not be poisoned.
53 // - Poison propagation across function boundaries is not modeled. At the
54 // moment, all arguments and return values are assumed not to be poison.
55 // - Undef is not modeled. In particular, the optimizer's freedom to pick
56 // concrete values for undef bits so as to maximize potential for producing
57 // poison is not modeled.
58 //
59 //===----------------------------------------------------------------------===//
60 
62 #include "llvm/ADT/DenseMap.h"
63 #include "llvm/ADT/Statistic.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/InstVisitor.h"
68 #include "llvm/IR/IntrinsicInst.h"
69 #include "llvm/IR/PatternMatch.h"
71 #include "llvm/Support/Debug.h"
72 
73 using namespace llvm;
74 
75 #define DEBUG_TYPE "poison-checking"
76 
77 static cl::opt<bool>
78 LocalCheck("poison-checking-function-local",
79  cl::init(false),
80  cl::desc("Check that returns are non-poison (for testing)"));
81 
82 
83 static bool isConstantFalse(Value* V) {
84  assert(V->getType()->isIntegerTy(1));
85  if (auto *CI = dyn_cast<ConstantInt>(V))
86  return CI->isZero();
87  return false;
88 }
89 
91  if (Ops.size() == 0)
92  return B.getFalse();
93  unsigned i = 0;
94  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
95  if (i == Ops.size())
96  return B.getFalse();
97  Value *Accum = Ops[i++];
98  for (; i < Ops.size(); i++)
99  if (!isConstantFalse(Ops[i]))
100  Accum = B.CreateOr(Accum, Ops[i]);
101  return Accum;
102 }
103 
105  SmallVectorImpl<Value*> &Checks) {
106  assert(isa<BinaryOperator>(I));
107 
108  IRBuilder<> B(&I);
109  Value *LHS = I.getOperand(0);
110  Value *RHS = I.getOperand(1);
111  switch (I.getOpcode()) {
112  default:
113  return;
114  case Instruction::Add: {
115  if (I.hasNoSignedWrap()) {
116  auto *OverflowOp =
117  B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
118  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
119  }
120  if (I.hasNoUnsignedWrap()) {
121  auto *OverflowOp =
122  B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
123  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
124  }
125  break;
126  }
127  case Instruction::Sub: {
128  if (I.hasNoSignedWrap()) {
129  auto *OverflowOp =
130  B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
131  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
132  }
133  if (I.hasNoUnsignedWrap()) {
134  auto *OverflowOp =
135  B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
136  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
137  }
138  break;
139  }
140  case Instruction::Mul: {
141  if (I.hasNoSignedWrap()) {
142  auto *OverflowOp =
143  B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
144  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
145  }
146  if (I.hasNoUnsignedWrap()) {
147  auto *OverflowOp =
148  B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
149  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
150  }
151  break;
152  }
153  case Instruction::UDiv: {
154  if (I.isExact()) {
155  auto *Check =
156  B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
157  ConstantInt::get(LHS->getType(), 0));
158  Checks.push_back(Check);
159  }
160  break;
161  }
162  case Instruction::SDiv: {
163  if (I.isExact()) {
164  auto *Check =
165  B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
166  ConstantInt::get(LHS->getType(), 0));
167  Checks.push_back(Check);
168  }
169  break;
170  }
171  case Instruction::AShr:
172  case Instruction::LShr:
173  case Instruction::Shl: {
174  Value *ShiftCheck =
175  B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
176  ConstantInt::get(RHS->getType(),
177  LHS->getType()->getScalarSizeInBits()));
178  Checks.push_back(ShiftCheck);
179  break;
180  }
181  };
182 }
183 
184 /// Given an instruction which can produce poison on non-poison inputs
185 /// (i.e. canCreatePoison returns true), generate runtime checks to produce
186 /// boolean indicators of when poison would result.
188  SmallVectorImpl<Value*> &Checks) {
189  IRBuilder<> B(&I);
190  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
192 
193  // Handle non-binops separately
194  switch (I.getOpcode()) {
195  default:
196  // Note there are a couple of missing cases here, once implemented, this
197  // should become an llvm_unreachable.
198  break;
199  case Instruction::ExtractElement: {
200  Value *Vec = I.getOperand(0);
201  auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
202  if (!VecVTy)
203  break;
204  Value *Idx = I.getOperand(1);
205  unsigned NumElts = VecVTy->getNumElements();
206  Value *Check =
207  B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
208  ConstantInt::get(Idx->getType(), NumElts));
209  Checks.push_back(Check);
210  break;
211  }
212  case Instruction::InsertElement: {
213  Value *Vec = I.getOperand(0);
214  auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
215  if (!VecVTy)
216  break;
217  Value *Idx = I.getOperand(2);
218  unsigned NumElts = VecVTy->getNumElements();
219  Value *Check =
220  B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
221  ConstantInt::get(Idx->getType(), NumElts));
222  Checks.push_back(Check);
223  break;
224  }
225  };
226 }
227 
229  auto Itr = ValToPoison.find(V);
230  if (Itr != ValToPoison.end())
231  return Itr->second;
232  if (isa<Constant>(V)) {
233  return ConstantInt::getFalse(V->getContext());
234  }
235  // Return false for unknwon values - this implements a non-strict mode where
236  // unhandled IR constructs are simply considered to never produce poison. At
237  // some point in the future, we probably want a "strict mode" for testing if
238  // nothing else.
239  return ConstantInt::getFalse(V->getContext());
240 }
241 
243  assert(Cond->getType()->isIntegerTy(1));
244  if (auto *CI = dyn_cast<ConstantInt>(Cond))
245  if (CI->isAllOnesValue())
246  return;
247 
248  Module *M = B.GetInsertBlock()->getModule();
249  M->getOrInsertFunction("__poison_checker_assert",
250  Type::getVoidTy(M->getContext()),
251  Type::getInt1Ty(M->getContext()));
252  Function *TrapFunc = M->getFunction("__poison_checker_assert");
253  B.CreateCall(TrapFunc, Cond);
254 }
255 
257  assert(Cond->getType()->isIntegerTy(1));
258  CreateAssert(B, B.CreateNot(Cond));
259 }
260 
261 static bool rewrite(Function &F) {
262  auto * const Int1Ty = Type::getInt1Ty(F.getContext());
263 
264  DenseMap<Value *, Value *> ValToPoison;
265 
266  for (BasicBlock &BB : F)
267  for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
268  auto *OldPHI = cast<PHINode>(&*I);
269  auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
270  for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
271  NewPHI->addIncoming(UndefValue::get(Int1Ty),
272  OldPHI->getIncomingBlock(i));
273  NewPHI->insertBefore(OldPHI);
274  ValToPoison[OldPHI] = NewPHI;
275  }
276 
277  for (BasicBlock &BB : F)
278  for (Instruction &I : BB) {
279  if (isa<PHINode>(I)) continue;
280 
281  IRBuilder<> B(cast<Instruction>(&I));
282 
283  // Note: There are many more sources of documented UB, but this pass only
284  // attempts to find UB triggered by propagation of poison.
285  SmallPtrSet<const Value *, 4> NonPoisonOps;
286  getGuaranteedNonPoisonOps(&I, NonPoisonOps);
287  for (const Value *Op : NonPoisonOps)
288  CreateAssertNot(B, getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
289 
290  if (LocalCheck)
291  if (auto *RI = dyn_cast<ReturnInst>(&I))
292  if (RI->getNumOperands() != 0) {
293  Value *Op = RI->getOperand(0);
294  CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
295  }
296 
297  SmallVector<Value*, 4> Checks;
298  if (propagatesPoison(cast<Operator>(&I)))
299  for (Value *V : I.operands())
300  Checks.push_back(getPoisonFor(ValToPoison, V));
301 
302  if (canCreatePoison(cast<Operator>(&I)))
303  generateCreationChecks(I, Checks);
304  ValToPoison[&I] = buildOrChain(B, Checks);
305  }
306 
307  for (BasicBlock &BB : F)
308  for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
309  auto *OldPHI = cast<PHINode>(&*I);
310  if (!ValToPoison.count(OldPHI))
311  continue; // skip the newly inserted phis
312  auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
313  for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
314  auto *OldVal = OldPHI->getIncomingValue(i);
315  NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
316  }
317  }
318  return true;
319 }
320 
321 
323  ModuleAnalysisManager &AM) {
324  bool Changed = false;
325  for (auto &F : M)
326  Changed |= rewrite(F);
327 
328  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
329 }
330 
334 }
335 
336 /* Major TODO Items:
337  - Control dependent poison UB
338  - Strict mode - (i.e. must analyze every operand)
339  - Poison through memory
340  - Function ABIs
341  - Full coverage of intrinsics, etc.. (ouch)
342 
343  Instructions w/Unclear Semantics:
344  - shufflevector - It would seem reasonable for an out of bounds mask element
345  to produce poison, but the LangRef does not state.
346  - all binary ops w/vector operands - The likely interpretation would be that
347  any element overflowing should produce poison for the entire result, but
348  the LangRef does not state.
349  - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
350  strange that only certian flags should be documented as producing poison.
351 
352  Cases of clear poison semantics not yet implemented:
353  - Exact flags on ashr/lshr produce poison
354  - NSW/NUW flags on shl produce poison
355  - Inbounds flag on getelementptr produce poison
356  - fptosi/fptoui (out of bounds input) produce poison
357  - Scalable vector types for insertelement/extractelement
358  - Floating point binary ops w/fmf nnan/noinfs flags produce poison
359  */
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::getGuaranteedNonPoisonOps
void getGuaranteedNonPoisonOps(const Instruction *I, SmallPtrSetImpl< const Value * > &Ops)
Insert operands of I into Ops such that I will trigger undefined behavior if I is executed and that o...
Definition: ValueTracking.cpp:5471
llvm
This file implements support for optimizing divisions by a constant.
Definition: AllocatorList.h:23
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:238
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
IntrinsicInst.h
llvm::Function
Definition: Function.h:62
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::IRBuilder<>
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:742
ValueTracking.h
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:158
DenseMap.h
MemoryBuiltins.h
getPoisonFor
static Value * getPoisonFor(DenseMap< Value *, Value * > &ValToPoison, Value *V)
Definition: PoisonChecking.cpp:228
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
generateCreationChecks
static void generateCreationChecks(Instruction &I, SmallVectorImpl< Value * > &Checks)
Given an instruction which can produce poison on non-poison inputs (i.e.
Definition: PoisonChecking.cpp:187
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::PoisonCheckingPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PoisonChecking.cpp:322
CommandLine.h
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:191
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1796
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:925
PatternMatch.h
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
Check
static bool Check(DecodeStatus &Out, DecodeStatus In)
Definition: AArch64Disassembler.cpp:242
llvm::canCreatePoison
bool canCreatePoison(const Operator *Op, bool ConsiderFlags=true)
Definition: ValueTracking.cpp:5090
llvm::cl::opt< bool >
LocalCheck
static cl::opt< bool > LocalCheck("poison-checking-function-local", cl::init(false), cl::desc("Check that returns are non-poison (for testing)"))
buildOrChain
static Value * buildOrChain(IRBuilder<> &B, ArrayRef< Value * > Ops)
Definition: PoisonChecking.cpp:90
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
isConstantFalse
static bool isConstantFalse(Value *V)
Definition: PoisonChecking.cpp:83
llvm::CmpInst::ICMP_UGE
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:744
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:67
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
InstVisitor.h
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:993
CreateAssertNot
static void CreateAssertNot(IRBuilder<> &B, Value *Cond)
Definition: PoisonChecking.cpp:256
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:880
rewrite
static bool rewrite(Function &F)
Definition: PoisonChecking.cpp:261
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:324
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2675
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
CreateAssert
static void CreateAssert(IRBuilder<> &B, Value *Cond)
Definition: PoisonChecking.cpp:242
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:224
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::SmallVectorImpl< Value * >
generateCreationChecksForBinOp
static void generateCreationChecksForBinOp(Instruction &I, SmallVectorImpl< Value * > &Checks)
Definition: PoisonChecking.cpp:104
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::propagatesPoison
bool propagatesPoison(const Operator *I)
Return true if I yields poison or raises UB if any of its operands is poison.
Definition: ValueTracking.cpp:5390
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::cl::desc
Definition: CommandLine.h:412
PoisonChecking.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h