LLVM  16.0.0git
PoisonChecking.cpp
Go to the documentation of this file.
1 //===- PoisonChecking.cpp - -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Implements a transform pass which instruments IR such that poison semantics
10 // are made explicit. That is, it provides a (possibly partial) executable
11 // semantics for every instruction w.r.t. poison as specified in the LLVM
12 // LangRef. There are obvious parallels to the sanitizer tools, but this pass
13 // is focused purely on the semantics of LLVM IR, not any particular source
14 // language. If you're looking for something to see if your C/C++ contains
15 // UB, this is not it.
16 //
17 // The rewritten semantics of each instruction will include the following
18 // components:
19 //
20 // 1) The original instruction, unmodified.
21 // 2) A propagation rule which translates dynamic information about the poison
22 // state of each input to whether the dynamic output of the instruction
23 // produces poison.
24 // 3) A creation rule which validates any poison producing flags on the
25 // instruction itself (e.g. checks for overflow on nsw).
26 // 4) A check rule which traps (to a handler function) if this instruction must
27 // execute undefined behavior given the poison state of it's inputs.
28 //
29 // This is a must analysis based transform; that is, the resulting code may
30 // produce a false negative result (not report UB when actually exists
31 // according to the LangRef spec), but should never produce a false positive
32 // (report UB where it doesn't exist).
33 //
34 // Use cases for this pass include:
35 // - Understanding (and testing!) the implications of the definition of poison
36 // from the LangRef.
37 // - Validating the output of a IR fuzzer to ensure that all programs produced
38 // are well defined on the specific input used.
39 // - Finding/confirming poison specific miscompiles by checking the poison
40 // status of an input/IR pair is the same before and after an optimization
41 // transform.
42 // - Checking that a bugpoint reduction does not introduce UB which didn't
43 // exist in the original program being reduced.
44 //
45 // The major sources of inaccuracy are currently:
46 // - Most validation rules not yet implemented for instructions with poison
47 // relavant flags. At the moment, only nsw/nuw on add/sub are supported.
48 // - UB which is control dependent on a branch on poison is not yet
49 // reported. Currently, only data flow dependence is modeled.
50 // - Poison which is propagated through memory is not modeled. As such,
51 // storing poison to memory and then reloading it will cause a false negative
52 // as we consider the reloaded value to not be poisoned.
53 // - Poison propagation across function boundaries is not modeled. At the
54 // moment, all arguments and return values are assumed not to be poison.
55 // - Undef is not modeled. In particular, the optimizer's freedom to pick
56 // concrete values for undef bits so as to maximize potential for producing
57 // poison is not modeled.
58 //
59 //===----------------------------------------------------------------------===//
60 
62 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/IR/IRBuilder.h"
66 
67 using namespace llvm;
68 
69 #define DEBUG_TYPE "poison-checking"
70 
71 static cl::opt<bool>
72 LocalCheck("poison-checking-function-local",
73  cl::init(false),
74  cl::desc("Check that returns are non-poison (for testing)"));
75 
76 
77 static bool isConstantFalse(Value* V) {
78  assert(V->getType()->isIntegerTy(1));
79  if (auto *CI = dyn_cast<ConstantInt>(V))
80  return CI->isZero();
81  return false;
82 }
83 
85  if (Ops.size() == 0)
86  return B.getFalse();
87  unsigned i = 0;
88  for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
89  if (i == Ops.size())
90  return B.getFalse();
91  Value *Accum = Ops[i++];
92  for (Value *Op : llvm::drop_begin(Ops, i))
93  if (!isConstantFalse(Op))
94  Accum = B.CreateOr(Accum, Op);
95  return Accum;
96 }
97 
99  SmallVectorImpl<Value*> &Checks) {
100  assert(isa<BinaryOperator>(I));
101 
102  IRBuilder<> B(&I);
103  Value *LHS = I.getOperand(0);
104  Value *RHS = I.getOperand(1);
105  switch (I.getOpcode()) {
106  default:
107  return;
108  case Instruction::Add: {
109  if (I.hasNoSignedWrap()) {
110  auto *OverflowOp =
111  B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
112  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
113  }
114  if (I.hasNoUnsignedWrap()) {
115  auto *OverflowOp =
116  B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
117  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
118  }
119  break;
120  }
121  case Instruction::Sub: {
122  if (I.hasNoSignedWrap()) {
123  auto *OverflowOp =
124  B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
125  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
126  }
127  if (I.hasNoUnsignedWrap()) {
128  auto *OverflowOp =
129  B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
130  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
131  }
132  break;
133  }
134  case Instruction::Mul: {
135  if (I.hasNoSignedWrap()) {
136  auto *OverflowOp =
137  B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
138  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
139  }
140  if (I.hasNoUnsignedWrap()) {
141  auto *OverflowOp =
142  B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
143  Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
144  }
145  break;
146  }
147  case Instruction::UDiv: {
148  if (I.isExact()) {
149  auto *Check =
150  B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
151  ConstantInt::get(LHS->getType(), 0));
152  Checks.push_back(Check);
153  }
154  break;
155  }
156  case Instruction::SDiv: {
157  if (I.isExact()) {
158  auto *Check =
159  B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
160  ConstantInt::get(LHS->getType(), 0));
161  Checks.push_back(Check);
162  }
163  break;
164  }
165  case Instruction::AShr:
166  case Instruction::LShr:
167  case Instruction::Shl: {
168  Value *ShiftCheck =
169  B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
172  Checks.push_back(ShiftCheck);
173  break;
174  }
175  };
176 }
177 
178 /// Given an instruction which can produce poison on non-poison inputs
179 /// (i.e. canCreatePoison returns true), generate runtime checks to produce
180 /// boolean indicators of when poison would result.
182  SmallVectorImpl<Value*> &Checks) {
183  IRBuilder<> B(&I);
184  if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
186 
187  // Handle non-binops separately
188  switch (I.getOpcode()) {
189  default:
190  // Note there are a couple of missing cases here, once implemented, this
191  // should become an llvm_unreachable.
192  break;
193  case Instruction::ExtractElement: {
194  Value *Vec = I.getOperand(0);
195  auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
196  if (!VecVTy)
197  break;
198  Value *Idx = I.getOperand(1);
199  unsigned NumElts = VecVTy->getNumElements();
200  Value *Check =
201  B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
202  ConstantInt::get(Idx->getType(), NumElts));
203  Checks.push_back(Check);
204  break;
205  }
206  case Instruction::InsertElement: {
207  Value *Vec = I.getOperand(0);
208  auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
209  if (!VecVTy)
210  break;
211  Value *Idx = I.getOperand(2);
212  unsigned NumElts = VecVTy->getNumElements();
213  Value *Check =
214  B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
215  ConstantInt::get(Idx->getType(), NumElts));
216  Checks.push_back(Check);
217  break;
218  }
219  };
220 }
221 
223  auto Itr = ValToPoison.find(V);
224  if (Itr != ValToPoison.end())
225  return Itr->second;
226  if (isa<Constant>(V)) {
227  return ConstantInt::getFalse(V->getContext());
228  }
229  // Return false for unknwon values - this implements a non-strict mode where
230  // unhandled IR constructs are simply considered to never produce poison. At
231  // some point in the future, we probably want a "strict mode" for testing if
232  // nothing else.
233  return ConstantInt::getFalse(V->getContext());
234 }
235 
237  assert(Cond->getType()->isIntegerTy(1));
238  if (auto *CI = dyn_cast<ConstantInt>(Cond))
239  if (CI->isAllOnesValue())
240  return;
241 
242  Module *M = B.GetInsertBlock()->getModule();
243  M->getOrInsertFunction("__poison_checker_assert",
244  Type::getVoidTy(M->getContext()),
245  Type::getInt1Ty(M->getContext()));
246  Function *TrapFunc = M->getFunction("__poison_checker_assert");
247  B.CreateCall(TrapFunc, Cond);
248 }
249 
251  assert(Cond->getType()->isIntegerTy(1));
252  CreateAssert(B, B.CreateNot(Cond));
253 }
254 
255 static bool rewrite(Function &F) {
256  auto * const Int1Ty = Type::getInt1Ty(F.getContext());
257 
258  DenseMap<Value *, Value *> ValToPoison;
259 
260  for (BasicBlock &BB : F)
261  for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
262  auto *OldPHI = cast<PHINode>(&*I);
263  auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
264  for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
265  NewPHI->addIncoming(UndefValue::get(Int1Ty),
266  OldPHI->getIncomingBlock(i));
267  NewPHI->insertBefore(OldPHI);
268  ValToPoison[OldPHI] = NewPHI;
269  }
270 
271  for (BasicBlock &BB : F)
272  for (Instruction &I : BB) {
273  if (isa<PHINode>(I)) continue;
274 
275  IRBuilder<> B(cast<Instruction>(&I));
276 
277  // Note: There are many more sources of documented UB, but this pass only
278  // attempts to find UB triggered by propagation of poison.
279  SmallPtrSet<const Value *, 4> NonPoisonOps;
280  getGuaranteedNonPoisonOps(&I, NonPoisonOps);
281  for (const Value *Op : NonPoisonOps)
282  CreateAssertNot(B, getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
283 
284  if (LocalCheck)
285  if (auto *RI = dyn_cast<ReturnInst>(&I))
286  if (RI->getNumOperands() != 0) {
287  Value *Op = RI->getOperand(0);
288  CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
289  }
290 
291  SmallVector<Value*, 4> Checks;
292  if (propagatesPoison(cast<Operator>(&I)))
293  for (Value *V : I.operands())
294  Checks.push_back(getPoisonFor(ValToPoison, V));
295 
296  if (canCreatePoison(cast<Operator>(&I)))
297  generateCreationChecks(I, Checks);
298  ValToPoison[&I] = buildOrChain(B, Checks);
299  }
300 
301  for (BasicBlock &BB : F)
302  for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
303  auto *OldPHI = cast<PHINode>(&*I);
304  if (!ValToPoison.count(OldPHI))
305  continue; // skip the newly inserted phis
306  auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
307  for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
308  auto *OldVal = OldPHI->getIncomingValue(i);
309  NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
310  }
311  }
312  return true;
313 }
314 
315 
317  ModuleAnalysisManager &AM) {
318  bool Changed = false;
319  for (auto &F : M)
320  Changed |= rewrite(F);
321 
322  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
323 }
324 
328 }
329 
330 /* Major TODO Items:
331  - Control dependent poison UB
332  - Strict mode - (i.e. must analyze every operand)
333  - Poison through memory
334  - Function ABIs
335  - Full coverage of intrinsics, etc.. (ouch)
336 
337  Instructions w/Unclear Semantics:
338  - shufflevector - It would seem reasonable for an out of bounds mask element
339  to produce poison, but the LangRef does not state.
340  - all binary ops w/vector operands - The likely interpretation would be that
341  any element overflowing should produce poison for the entire result, but
342  the LangRef does not state.
343  - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
344  strange that only certian flags should be documented as producing poison.
345 
346  Cases of clear poison semantics not yet implemented:
347  - Exact flags on ashr/lshr produce poison
348  - NSW/NUW flags on shl produce poison
349  - Inbounds flag on getelementptr produce poison
350  - fptosi/fptoui (out of bounds input) produce poison
351  - Scalable vector types for insertelement/extractelement
352  - Floating point binary ops w/fmf nnan/noinfs flags produce poison
353  */
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm::getGuaranteedNonPoisonOps
void getGuaranteedNonPoisonOps(const Instruction *I, SmallPtrSetImpl< const Value * > &Ops)
Insert operands of I into Ops such that I will trigger undefined behavior if I is executed and that o...
Definition: ValueTracking.cpp:5654
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:236
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:387
llvm::Function
Definition: Function.h:60
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
llvm::IRBuilder<>
llvm::CmpInst::ICMP_NE
@ ICMP_NE
not equal
Definition: InstrTypes.h:741
ValueTracking.h
llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: PassManager.h:155
DenseMap.h
getPoisonFor
static Value * getPoisonFor(DenseMap< Value *, Value * > &ValToPoison, Value *V)
Definition: PoisonChecking.cpp:222
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
RHS
Value * RHS
Definition: X86PartialReduction.cpp:76
generateCreationChecks
static void generateCreationChecks(Instruction &I, SmallVectorImpl< Value * > &Checks)
Given an instruction which can produce poison on non-poison inputs (i.e.
Definition: PoisonChecking.cpp:181
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::PoisonCheckingPass::run
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM)
Definition: PoisonChecking.cpp:316
CommandLine.h
LHS
Value * LHS
Definition: X86PartialReduction.cpp:75
Check
#define Check(C,...)
Definition: Lint.cpp:170
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1713
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:210
llvm::canCreatePoison
bool canCreatePoison(const Operator *Op, bool ConsiderFlags=true)
Definition: ValueTracking.cpp:5271
llvm::cl::opt< bool >
LocalCheck
static cl::opt< bool > LocalCheck("poison-checking-function-local", cl::init(false), cl::desc("Check that returns are non-poison (for testing)"))
buildOrChain
static Value * buildOrChain(IRBuilder<> &B, ArrayRef< Value * > Ops)
Definition: PoisonChecking.cpp:84
llvm::DenseMap
Definition: DenseMap.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:446
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
isConstantFalse
static bool isConstantFalse(Value *V)
Definition: PoisonChecking.cpp:77
llvm::CmpInst::ICMP_UGE
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:743
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:138
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
CreateAssertNot
static void CreateAssertNot(IRBuilder<> &B, Value *Cond)
Definition: PoisonChecking.cpp:250
llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:834
rewrite
static bool rewrite(Function &F)
Definition: PoisonChecking.cpp:255
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:351
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2739
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:185
CreateAssert
static void CreateAssert(IRBuilder<> &B, Value *Cond)
Definition: PoisonChecking.cpp:236
llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:222
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::SmallVectorImpl< Value * >
generateCreationChecksForBinOp
static void generateCreationChecksForBinOp(Instruction &I, SmallVectorImpl< Value * > &Checks)
Definition: PoisonChecking.cpp:98
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::propagatesPoison
bool propagatesPoison(const Operator *I)
Return true if I yields poison or raises UB if any of its operands is poison.
Definition: ValueTracking.cpp:5573
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::cl::desc
Definition: CommandLine.h:412
PoisonChecking.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74