LLVM 19.0.0git
IRMutator.cpp
Go to the documentation of this file.
1//===-- IRMutator.cpp -----------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/STLExtras.h"
11#include "llvm/ADT/SmallSet.h"
18#include "llvm/IR/BasicBlock.h"
19#include "llvm/IR/FMF.h"
20#include "llvm/IR/Function.h"
23#include "llvm/IR/Module.h"
24#include "llvm/IR/Operator.h"
25#include "llvm/IR/Verifier.h"
30#include <map>
31#include <optional>
32
33using namespace llvm;
34
36 auto RS = makeSampler<Function *>(IB.Rand);
37 for (Function &F : M)
38 if (!F.isDeclaration())
39 RS.sample(&F, /*Weight=*/1);
40
41 while (RS.totalWeight() < IB.MinFunctionNum) {
42 Function *F = IB.createFunctionDefinition(M);
43 RS.sample(F, /*Weight=*/1);
44 }
45 mutate(*RS.getSelection(), IB);
46}
47
50 [](BasicBlock *BB) { return !BB->isEHPad(); });
52 mutate(*makeSampler(IB.Rand, Range).getSelection(), IB);
54
56 mutate(*makeSampler(IB.Rand, make_pointer_range(BB)).getSelection(), IB);
57}
58
60 return M.getInstructionCount() + M.size() + M.global_size() + M.alias_size();
61}
62
63void IRMutator::mutateModule(Module &M, int Seed, size_t MaxSize) {
64 std::vector<Type *> Types;
65 for (const auto &Getter : AllowedTypes)
66 Types.push_back(Getter(M.getContext()));
67 RandomIRBuilder IB(Seed, Types);
68
69 size_t CurSize = IRMutator::getModuleSize(M);
70 auto RS = makeSampler<IRMutationStrategy *>(IB.Rand);
71 for (const auto &Strategy : Strategies)
72 RS.sample(Strategy.get(),
73 Strategy->getWeight(CurSize, MaxSize, RS.totalWeight()));
74 if (RS.totalWeight() == 0)
75 return;
76 auto Strategy = RS.getSelection();
77
78 Strategy->mutate(M, IB);
79}
80
83 FPM.addPass(DCEPass());
85 FAM.registerPass([&] { return TargetLibraryAnalysis(); });
87 FPM.run(F, FAM);
88}
89
93}
94
95std::vector<fuzzerop::OpDescriptor> InjectorIRStrategy::getDefaultOps() {
96 std::vector<fuzzerop::OpDescriptor> Ops;
103 return Ops;
104}
105
106std::optional<fuzzerop::OpDescriptor>
107InjectorIRStrategy::chooseOperation(Value *Src, RandomIRBuilder &IB) {
108 auto OpMatchesPred = [&Src](fuzzerop::OpDescriptor &Op) {
109 return Op.SourcePreds[0].matches({}, Src);
110 };
111 auto RS = makeSampler(IB.Rand, make_filter_range(Operations, OpMatchesPred));
112 if (RS.isEmpty())
113 return std::nullopt;
114 return *RS;
115}
116
119 auto End = BB.getTerminatingMustTailCall() ? std::prev(BB.end()) : BB.end();
120 return make_range(BB.getFirstInsertionPt(), End);
121}
122
125 for (Instruction &I : getInsertionRange(BB))
126 Insts.push_back(&I);
127 if (Insts.size() < 1)
128 return;
129
130 // Choose an insertion point for our new instruction.
131 size_t IP = uniform<size_t>(IB.Rand, 0, Insts.size() - 1);
132
133 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
134 auto InstsAfter = ArrayRef(Insts).slice(IP);
135
136 // Choose a source, which will be used to constrain the operation selection.
138 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore));
139
140 // Choose an operation that's constrained to be valid for the type of the
141 // source, collect any other sources it needs, and then build it.
142 auto OpDesc = chooseOperation(Srcs[0], IB);
143 // Bail if no operation was found
144 if (!OpDesc)
145 return;
146
147 for (const auto &Pred : ArrayRef(OpDesc->SourcePreds).slice(1))
148 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
149
150 if (Value *Op = OpDesc->BuilderFunc(Srcs, Insts[IP])) {
151 // Find a sink and wire up the results of the operation.
152 IB.connectToSink(BB, InstsAfter, Op);
153 }
154}
155
156uint64_t InstDeleterIRStrategy::getWeight(size_t CurrentSize, size_t MaxSize,
157 uint64_t CurrentWeight) {
158 // If we have less than 200 bytes, panic and try to always delete.
159 if (CurrentSize > MaxSize - 200)
160 return CurrentWeight ? CurrentWeight * 100 : 1;
161 // Draw a line starting from when we only have 1k left and increasing linearly
162 // to double the current weight.
163 int64_t Line = (-2 * static_cast<int64_t>(CurrentWeight)) *
164 (static_cast<int64_t>(MaxSize) -
165 static_cast<int64_t>(CurrentSize) - 1000) /
166 1000;
167 // Clamp negative weights to zero.
168 if (Line < 0)
169 return 0;
170 return Line;
171}
172
174 auto RS = makeSampler<Instruction *>(IB.Rand);
175 for (Instruction &Inst : instructions(F)) {
176 // TODO: We can't handle these instructions.
177 if (Inst.isTerminator() || Inst.isEHPad() || Inst.isSwiftError() ||
178 isa<PHINode>(Inst))
179 continue;
180
181 RS.sample(&Inst, /*Weight=*/1);
182 }
183 if (RS.isEmpty())
184 return;
185
186 // Delete the instruction.
187 mutate(*RS.getSelection(), IB);
188 // Clean up any dead code that's left over after removing the instruction.
190}
191
193 assert(!Inst.isTerminator() && "Deleting terminators invalidates CFG");
194
195 if (Inst.getType()->isVoidTy()) {
196 // Instructions with void type (ie, store) have no uses to worry about. Just
197 // erase it and move on.
198 Inst.eraseFromParent();
199 return;
200 }
201
202 // Otherwise we need to find some other value with the right type to keep the
203 // users happy.
204 auto Pred = fuzzerop::onlyType(Inst.getType());
205 auto RS = makeSampler<Value *>(IB.Rand);
207 BasicBlock *BB = Inst.getParent();
208 for (auto I = BB->getFirstInsertionPt(), E = Inst.getIterator(); I != E;
209 ++I) {
210 if (Pred.matches({}, &*I))
211 RS.sample(&*I, /*Weight=*/1);
212 InstsBefore.push_back(&*I);
213 }
214 if (!RS)
215 RS.sample(IB.newSource(*BB, InstsBefore, {}, Pred), /*Weight=*/1);
216
217 Inst.replaceAllUsesWith(RS.getSelection());
218 Inst.eraseFromParent();
219}
220
222 RandomIRBuilder &IB) {
223 SmallVector<std::function<void()>, 8> Modifications;
224 CmpInst *CI = nullptr;
225 GetElementPtrInst *GEP = nullptr;
226 switch (Inst.getOpcode()) {
227 default:
228 break;
229 // Add nsw, nuw flag
230 case Instruction::Add:
231 case Instruction::Mul:
232 case Instruction::Sub:
233 case Instruction::Shl:
234 Modifications.push_back(
235 [&Inst]() { Inst.setHasNoSignedWrap(!Inst.hasNoSignedWrap()); });
236 Modifications.push_back(
237 [&Inst]() { Inst.setHasNoUnsignedWrap(!Inst.hasNoUnsignedWrap()); });
238 break;
239 case Instruction::ICmp:
240 CI = cast<ICmpInst>(&Inst);
241 for (unsigned p = CmpInst::FIRST_ICMP_PREDICATE;
243 Modifications.push_back(
244 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
245 }
246 break;
247 // Add inbound flag.
248 case Instruction::GetElementPtr:
249 GEP = cast<GetElementPtrInst>(&Inst);
250 Modifications.push_back(
251 [GEP]() { GEP->setIsInBounds(!GEP->isInBounds()); });
252 break;
253 // Add exact flag.
254 case Instruction::UDiv:
255 case Instruction::SDiv:
256 case Instruction::LShr:
257 case Instruction::AShr:
258 Modifications.push_back([&Inst] { Inst.setIsExact(!Inst.isExact()); });
259 break;
260
261 case Instruction::FCmp:
262 CI = cast<FCmpInst>(&Inst);
263 for (unsigned p = CmpInst::FIRST_FCMP_PREDICATE;
265 Modifications.push_back(
266 [CI, p]() { CI->setPredicate(static_cast<CmpInst::Predicate>(p)); });
267 }
268 break;
269 }
270
271 // Add fast math flag if possible.
272 if (isa<FPMathOperator>(&Inst)) {
273 // Try setting everything unless they are already on.
274 Modifications.push_back(
275 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().all()); });
276 // Try unsetting everything unless they are already off.
277 Modifications.push_back(
278 [&Inst] { Inst.setFast(!Inst.getFastMathFlags().none()); });
279 // Individual setting by flipping the bit
280 Modifications.push_back(
281 [&Inst] { Inst.setHasAllowReassoc(!Inst.hasAllowReassoc()); });
282 Modifications.push_back([&Inst] { Inst.setHasNoNaNs(!Inst.hasNoNaNs()); });
283 Modifications.push_back([&Inst] { Inst.setHasNoInfs(!Inst.hasNoInfs()); });
284 Modifications.push_back(
285 [&Inst] { Inst.setHasNoSignedZeros(!Inst.hasNoSignedZeros()); });
286 Modifications.push_back(
287 [&Inst] { Inst.setHasAllowReciprocal(!Inst.hasAllowReciprocal()); });
288 Modifications.push_back(
289 [&Inst] { Inst.setHasAllowContract(!Inst.hasAllowContract()); });
290 Modifications.push_back(
291 [&Inst] { Inst.setHasApproxFunc(!Inst.hasApproxFunc()); });
292 }
293
294 // Randomly switch operands of instructions
295 std::pair<int, int> NoneItem({-1, -1}), ShuffleItems(NoneItem);
296 switch (Inst.getOpcode()) {
297 case Instruction::SDiv:
298 case Instruction::UDiv:
299 case Instruction::SRem:
300 case Instruction::URem:
301 case Instruction::FDiv:
302 case Instruction::FRem: {
303 // Verify that the after shuffle the second operand is not
304 // constant 0.
305 Value *Operand = Inst.getOperand(0);
306 if (Constant *C = dyn_cast<Constant>(Operand)) {
307 if (!C->isZeroValue()) {
308 ShuffleItems = {0, 1};
309 }
310 }
311 break;
312 }
313 case Instruction::Select:
314 ShuffleItems = {1, 2};
315 break;
316 case Instruction::Add:
317 case Instruction::Sub:
318 case Instruction::Mul:
319 case Instruction::Shl:
320 case Instruction::LShr:
321 case Instruction::AShr:
322 case Instruction::And:
323 case Instruction::Or:
324 case Instruction::Xor:
325 case Instruction::FAdd:
326 case Instruction::FSub:
327 case Instruction::FMul:
328 case Instruction::ICmp:
329 case Instruction::FCmp:
330 case Instruction::ShuffleVector:
331 ShuffleItems = {0, 1};
332 break;
333 }
334 if (ShuffleItems != NoneItem) {
335 Modifications.push_back([&Inst, &ShuffleItems]() {
336 Value *Op0 = Inst.getOperand(ShuffleItems.first);
337 Inst.setOperand(ShuffleItems.first, Inst.getOperand(ShuffleItems.second));
338 Inst.setOperand(ShuffleItems.second, Op0);
339 });
340 }
341
342 auto RS = makeSampler(IB.Rand, Modifications);
343 if (RS)
344 RS.getSelection()();
345}
346
347/// Return a case value that is not already taken to make sure we don't have two
348/// cases with same value.
350 uint64_t MaxValue, RandomIRBuilder &IB) {
351 uint64_t tmp;
352 do {
353 tmp = uniform<uint64_t>(IB.Rand, 0, MaxValue);
354 } while (CasesTaken.count(tmp) != 0);
355 CasesTaken.insert(tmp);
356 return tmp;
357}
358
360 Module *M = BB.getParent()->getParent();
361 // If nullptr is selected, we will create a new function declaration.
362 SmallVector<Function *, 32> Functions({nullptr});
363 for (Function &F : M->functions()) {
364 Functions.push_back(&F);
365 }
366
367 auto RS = makeSampler(IB.Rand, Functions);
368 Function *F = RS.getSelection();
369 // Some functions accept metadata type or token type as arguments.
370 // We don't call those functions for now.
371 // For example, `@llvm.dbg.declare(metadata, metadata, metadata)`
372 // https://llvm.org/docs/SourceLevelDebugging.html#llvm-dbg-declare
373 auto IsUnsupportedTy = [](Type *T) {
374 return T->isMetadataTy() || T->isTokenTy();
375 };
376 if (!F || IsUnsupportedTy(F->getReturnType()) ||
377 any_of(F->getFunctionType()->params(), IsUnsupportedTy)) {
378 F = IB.createFunctionDeclaration(*M);
379 }
380
381 FunctionType *FTy = F->getFunctionType();
383 if (!F->arg_empty()) {
384 for (Type *ArgTy : FTy->params()) {
385 SourcePreds.push_back(fuzzerop::onlyType(ArgTy));
386 }
387 }
388 bool isRetVoid = (F->getReturnType() == Type::getVoidTy(M->getContext()));
389 auto BuilderFunc = [FTy, F, isRetVoid](ArrayRef<Value *> Srcs,
390 Instruction *Inst) {
391 StringRef Name = isRetVoid ? nullptr : "C";
392 CallInst *Call = CallInst::Create(FTy, F, Srcs, Name, Inst);
393 // Don't return this call inst if it return void as it can't be sinked.
394 return isRetVoid ? nullptr : Call;
395 };
396
398 for (Instruction &I : getInsertionRange(BB))
399 Insts.push_back(&I);
400 if (Insts.size() < 1)
401 return;
402
403 // Choose an insertion point for our new call instruction.
404 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
405
406 auto InstsBefore = ArrayRef(Insts).slice(0, IP);
407 auto InstsAfter = ArrayRef(Insts).slice(IP);
408
409 // Choose a source, which will be used to constrain the operation selection.
411
412 for (const auto &Pred : ArrayRef(SourcePreds)) {
413 Srcs.push_back(IB.findOrCreateSource(BB, InstsBefore, Srcs, Pred));
414 }
415
416 if (Value *Op = BuilderFunc(Srcs, Insts[IP])) {
417 // Find a sink and wire up the results of the operation.
418 IB.connectToSink(BB, InstsAfter, Op);
419 }
420}
421
424 for (Instruction &I : getInsertionRange(BB))
425 Insts.push_back(&I);
426 if (Insts.size() < 1)
427 return;
428
429 // Choose a point where we split the block.
430 uint64_t IP = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
431 auto InstsBeforeSplit = ArrayRef(Insts).slice(0, IP);
432
433 // `Sink` inherits Blocks' terminator, `Source` will have a BranchInst
434 // directly jumps to `Sink`. Here, we have to create a new terminator for
435 // `Source`.
436 BasicBlock *Block = Insts[IP]->getParent();
437 BasicBlock *Source = Block;
438 BasicBlock *Sink = Block->splitBasicBlock(Insts[IP], "BB");
439
440 Function *F = BB.getParent();
441 LLVMContext &C = F->getParent()->getContext();
442 // A coin decides if it is branch or switch
443 if (uniform<uint64_t>(IB.Rand, 0, 1)) {
444 // Branch
445 BasicBlock *IfTrue = BasicBlock::Create(C, "T", F);
446 BasicBlock *IfFalse = BasicBlock::Create(C, "F", F);
447 Value *Cond =
448 IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
450 BranchInst *Branch = BranchInst::Create(IfTrue, IfFalse, Cond);
451 // Remove the old terminator.
452 ReplaceInstWithInst(Source->getTerminator(), Branch);
453 // Connect these blocks to `Sink`
454 connectBlocksToSink({IfTrue, IfFalse}, Sink, IB);
455 } else {
456 // Switch
457 // Determine Integer type, it IS possible we use a boolean to switch.
458 auto RS =
459 makeSampler(IB.Rand, make_filter_range(IB.KnownTypes, [](Type *Ty) {
460 return Ty->isIntegerTy();
461 }));
462 assert(RS && "There is no integer type in all allowed types, is the "
463 "setting correct?");
464 Type *Ty = RS.getSelection();
465 IntegerType *IntTy = cast<IntegerType>(Ty);
466
467 uint64_t BitSize = IntTy->getBitWidth();
468 uint64_t MaxCaseVal =
469 (BitSize >= 64) ? (uint64_t)-1 : ((uint64_t)1 << BitSize) - 1;
470 // Create Switch inst in Block
471 Value *Cond = IB.findOrCreateSource(*Source, InstsBeforeSplit, {},
472 fuzzerop::onlyType(IntTy), false);
473 BasicBlock *DefaultBlock = BasicBlock::Create(C, "SW_D", F);
474 uint64_t NumCases = uniform<uint64_t>(IB.Rand, 1, MaxNumCases);
475 NumCases = (NumCases > MaxCaseVal) ? MaxCaseVal + 1 : NumCases;
476 SwitchInst *Switch = SwitchInst::Create(Cond, DefaultBlock, NumCases);
477 // Remove the old terminator.
478 ReplaceInstWithInst(Source->getTerminator(), Switch);
479
480 // Create blocks, for each block assign a case value.
481 SmallVector<BasicBlock *, 4> Blocks({DefaultBlock});
482 SmallSet<uint64_t, 4> CasesTaken;
483 for (uint64_t i = 0; i < NumCases; i++) {
484 uint64_t CaseVal = getUniqueCaseValue(CasesTaken, MaxCaseVal, IB);
485 BasicBlock *CaseBlock = BasicBlock::Create(C, "SW_C", F);
486 ConstantInt *OnValue = ConstantInt::get(IntTy, CaseVal);
487 Switch->addCase(OnValue, CaseBlock);
488 Blocks.push_back(CaseBlock);
489 }
490
491 // Connect these blocks to `Sink`
492 connectBlocksToSink(Blocks, Sink, IB);
493 }
494}
495
496/// The caller has to guarantee that these blocks are "empty", i.e. it doesn't
497/// even have terminator.
498void InsertCFGStrategy::connectBlocksToSink(ArrayRef<BasicBlock *> Blocks,
499 BasicBlock *Sink,
500 RandomIRBuilder &IB) {
501 uint64_t DirectSinkIdx = uniform<uint64_t>(IB.Rand, 0, Blocks.size() - 1);
502 for (uint64_t i = 0; i < Blocks.size(); i++) {
503 // We have at least one block that directly goes to sink.
504 CFGToSink ToSink = (i == DirectSinkIdx)
505 ? CFGToSink::DirectSink
506 : static_cast<CFGToSink>(uniform<uint64_t>(
507 IB.Rand, 0, CFGToSink::EndOfCFGToLink - 1));
508 BasicBlock *BB = Blocks[i];
509 Function *F = BB->getParent();
510 LLVMContext &C = F->getParent()->getContext();
511 switch (ToSink) {
512 case CFGToSink::Return: {
513 Type *RetTy = F->getReturnType();
514 Value *RetValue = nullptr;
515 if (!RetTy->isVoidTy())
516 RetValue =
517 IB.findOrCreateSource(*BB, {}, {}, fuzzerop::onlyType(RetTy));
518 ReturnInst::Create(C, RetValue, BB);
519 break;
520 }
521 case CFGToSink::DirectSink: {
522 BranchInst::Create(Sink, BB);
523 break;
524 }
525 case CFGToSink::SinkOrSelfLoop: {
526 SmallVector<BasicBlock *, 2> Branches({Sink, BB});
527 // A coin decides which block is true branch.
528 uint64_t coin = uniform<uint64_t>(IB.Rand, 0, 1);
529 Value *Cond = IB.findOrCreateSource(
530 *BB, {}, {}, fuzzerop::onlyType(Type::getInt1Ty(C)), false);
531 BranchInst::Create(Branches[coin], Branches[1 - coin], Cond, BB);
532 break;
533 }
534 case CFGToSink::EndOfCFGToLink:
535 llvm_unreachable("EndOfCFGToLink executed, something's wrong.");
536 }
537 }
538}
539
541 // Can't insert PHI node to entry node.
542 if (&BB == &BB.getParent()->getEntryBlock())
543 return;
544 Type *Ty = IB.randomType();
545 PHINode *PHI = PHINode::Create(Ty, llvm::pred_size(&BB), "", &BB.front());
546
547 // Use a map to make sure the same incoming basic block has the same value.
548 DenseMap<BasicBlock *, Value *> IncomingValues;
549 for (BasicBlock *Pred : predecessors(&BB)) {
550 Value *Src = IncomingValues[Pred];
551 // If `Pred` is not in the map yet, we'll get a nullptr.
552 if (!Src) {
554 for (auto I = Pred->begin(); I != Pred->end(); ++I)
555 Insts.push_back(&*I);
556 // There is no need to inform IB what previously used values are if we are
557 // using `onlyType`
558 Src = IB.findOrCreateSource(*Pred, Insts, {}, fuzzerop::onlyType(Ty));
559 IncomingValues[Pred] = Src;
560 }
561 PHI->addIncoming(Src, Pred);
562 }
564 for (Instruction &I : getInsertionRange(BB))
565 InstsAfter.push_back(&I);
566 IB.connectToSink(BB, InstsAfter, PHI);
567}
568
570 for (BasicBlock &BB : F) {
571 this->mutate(BB, IB);
572 }
573}
576 for (Instruction &I : getInsertionRange(BB))
577 Insts.push_back(&I);
578 if (Insts.size() < 1)
579 return;
580 // Choose an Instruction to mutate.
581 uint64_t Idx = uniform<uint64_t>(IB.Rand, 0, Insts.size() - 1);
582 Instruction *Inst = Insts[Idx];
583 // `Idx + 1` so we don't sink to ourselves.
584 auto InstsAfter = ArrayRef(Insts).slice(Idx + 1);
585 Type *Ty = Inst->getType();
586 // Don't sink terminators, void function calls, token, etc.
587 if (!Ty->isVoidTy() && !Ty->isTokenTy())
588 // Find a new sink and wire up the results of the operation.
589 IB.connectToSink(BB, InstsAfter, Inst);
590}
591
593 // A deterministic alternative to SmallPtrSet with the same lookup
594 // performance.
595 std::map<size_t, Instruction *> AliveInsts;
596 std::map<Instruction *, size_t> AliveInstsLookup;
597 size_t InsertIdx = 0;
598 for (auto &I : make_early_inc_range(make_range(
600 // First gather all instructions that can be shuffled. Don't take
601 // terminator.
602 AliveInsts.insert({InsertIdx, &I});
603 AliveInstsLookup.insert({&I, InsertIdx++});
604 // Then remove these instructions from the block
605 I.removeFromParent();
606 }
607
608 // Shuffle these instructions using topological sort.
609 // Returns false if all current instruction's dependencies in this block have
610 // been shuffled. If so, this instruction can be shuffled too.
611 auto hasAliveParent = [&AliveInsts, &AliveInstsLookup](size_t Index) {
612 for (Value *O : AliveInsts[Index]->operands()) {
613 Instruction *P = dyn_cast<Instruction>(O);
614 if (P && AliveInstsLookup.count(P))
615 return true;
616 }
617 return false;
618 };
619 // Get all alive instructions that depend on the current instruction.
620 // Takes Instruction* instead of index because the instruction is already
621 // shuffled.
622 auto getAliveChildren = [&AliveInstsLookup](Instruction *I) {
624 for (Value *U : I->users()) {
625 Instruction *P = dyn_cast<Instruction>(U);
626 if (P && AliveInstsLookup.count(P))
627 Children.insert(AliveInstsLookup[P]);
628 }
629 return Children;
630 };
631 SmallSet<size_t, 8> RootIndices;
633 for (const auto &[Index, Inst] : AliveInsts) {
634 if (!hasAliveParent(Index))
635 RootIndices.insert(Index);
636 }
637 // Topological sort by randomly selecting a node without a parent, or root.
638 while (!RootIndices.empty()) {
639 auto RS = makeSampler<size_t>(IB.Rand);
640 for (size_t RootIdx : RootIndices)
641 RS.sample(RootIdx, 1);
642 size_t RootIdx = RS.getSelection();
643
644 RootIndices.erase(RootIdx);
645 Instruction *Root = AliveInsts[RootIdx];
646 AliveInsts.erase(RootIdx);
647 AliveInstsLookup.erase(Root);
648 Insts.push_back(Root);
649
650 for (size_t Child : getAliveChildren(Root)) {
651 if (!hasAliveParent(Child)) {
652 RootIndices.insert(Child);
653 }
654 }
655 }
656
657 Instruction *Terminator = BB.getTerminator();
658 // Then put instructions back.
659 for (Instruction *I : Insts) {
660 I->insertBefore(Terminator);
661 }
662}
663
664std::unique_ptr<Module> llvm::parseModule(const uint8_t *Data, size_t Size,
665 LLVMContext &Context) {
666
667 if (Size <= 1)
668 // We get bogus data given an empty corpus - just create a new module.
669 return std::make_unique<Module>("M", Context);
670
671 auto Buffer = MemoryBuffer::getMemBuffer(
672 StringRef(reinterpret_cast<const char *>(Data), Size), "Fuzzer input",
673 /*RequiresNullTerminator=*/false);
674
675 SMDiagnostic Err;
676 auto M = parseBitcodeFile(Buffer->getMemBufferRef(), Context);
677 if (Error E = M.takeError()) {
678 errs() << toString(std::move(E)) << "\n";
679 return nullptr;
680 }
681 return std::move(M.get());
682}
683
684size_t llvm::writeModule(const Module &M, uint8_t *Dest, size_t MaxSize) {
685 std::string Buf;
686 {
689 }
690 if (Buf.size() > MaxSize)
691 return 0;
692 memcpy(Dest, Buf.data(), Buf.size());
693 return Buf.size();
694}
695
696std::unique_ptr<Module> llvm::parseAndVerify(const uint8_t *Data, size_t Size,
697 LLVMContext &Context) {
698 auto M = parseModule(Data, Size, Context);
699 if (!M || verifyModule(*M, &errs()))
700 return nullptr;
701
702 return M;
703}
Rewrite undef for PHI
Expand Atomic instructions
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
Hexagon Common GEP
static void eliminateDeadCode(Function &F)
Definition: IRMutator.cpp:81
static iterator_range< BasicBlock::iterator > getInsertionRange(BasicBlock &BB)
Definition: IRMutator.cpp:118
static uint64_t getUniqueCaseValue(SmallSet< uint64_t, 4 > &CasesTaken, uint64_t MaxValue, RandomIRBuilder &IB)
Return a case value that is not already taken to make sure we don't have two cases with same value.
Definition: IRMutator.cpp:349
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
FunctionAnalysisManager FAM
const SmallVectorImpl< MachineOperand > & Cond
static ManagedStatic< cl::opt< uint64_t >, CreateSeed > Seed
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
This file defines the SmallSet class.
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
Definition: PassManager.h:535
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:445
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:410
const Instruction & front() const
Definition: BasicBlock.h:455
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:201
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:208
bool isEHPad() const
Return true if this basic block is an exception handling block.
Definition: BasicBlock.h:659
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:223
const CallInst * getTerminatingMustTailCall() const
Returns the call instruction marked 'musttail' prior to the terminating return instruction of this ba...
Definition: BasicBlock.cpp:294
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, BasicBlock::iterator InsertBefore)
This class represents a function call, abstracting a target machine's calling convention.
static CallInst * Create(FunctionType *Ty, Value *F, const Twine &NameStr, BasicBlock::iterator InsertBefore)
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:983
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:1108
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
This is an important base class in LLVM.
Definition: Constant.h:41
Basic Dead Code Elimination pass.
Definition: DCE.h:23
This class represents an Operation in the Expression.
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
bool none() const
Definition: FMF.h:58
bool all() const
Definition: FMF.h:59
Class to represent function types.
Definition: DerivedTypes.h:103
ArrayRef< Type * > params() const
Definition: DerivedTypes.h:130
const BasicBlock & getEntryBlock() const
Definition: Function.h:790
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:974
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
virtual void mutate(Module &M, RandomIRBuilder &IB)
Definition: IRMutator.cpp:35
void mutateModule(Module &M, int Seed, size_t MaxSize)
Mutate given module.
Definition: IRMutator.cpp:63
static size_t getModuleSize(const Module &M)
Calculate the size of module as the number of objects in it, i.e.
Definition: IRMutator.cpp:59
static std::vector< fuzzerop::OpDescriptor > getDefaultOps()
Definition: IRMutator.cpp:95
void mutate(Function &F, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:90
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:422
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:359
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:540
uint64_t getWeight(size_t CurrentSize, size_t MaxSize, uint64_t CurrentWeight) override
Provide a weight to bias towards choosing this strategy for a mutation.
Definition: IRMutator.cpp:156
void mutate(Function &F, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:173
void mutate(Instruction &Inst, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:221
void setHasNoUnsignedWrap(bool b=true)
Set or clear the nuw flag on this instruction, which must be an operator which supports this flag.
bool hasNoNaNs() const LLVM_READONLY
Determine whether the no-NaNs flag is set.
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoInfs() const LLVM_READONLY
Determine whether the no-infs flag is set.
void setHasNoSignedZeros(bool B)
Set or clear the no-signed-zeros flag on this instruction, which must be an operator which supports t...
bool hasNoSignedZeros() const LLVM_READONLY
Determine whether the no-signed-zeros flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
void setHasAllowContract(bool B)
Set or clear the allow-contract flag on this instruction, which must be an operator which supports th...
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void setHasNoNaNs(bool B)
Set or clear the no-nans flag on this instruction, which must be an operator which supports this flag...
void setHasApproxFunc(bool B)
Set or clear the approximate-math-functions flag on this instruction, which must be an operator which...
void setHasAllowReassoc(bool B)
Set or clear the reassociation flag on this instruction, which must be an operator which supports thi...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:99
bool isExact() const LLVM_READONLY
Determine whether the exact flag is set.
bool isTerminator() const
Definition: Instruction.h:254
bool hasAllowReciprocal() const LLVM_READONLY
Determine whether the allow-reciprocal flag is set.
void setHasNoInfs(bool B)
Set or clear the no-infs flag on this instruction, which must be an operator which supports this flag...
FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
bool hasApproxFunc() const LLVM_READONLY
Determine whether the approximate-math-functions flag is set.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:251
void setIsExact(bool b=true)
Set or clear the exact flag on this instruction, which must be an operator which supports this flag.
void setHasAllowReciprocal(bool B)
Set or clear the allow-reciprocal flag on this instruction, which must be an operator which supports ...
bool hasAllowContract() const LLVM_READONLY
Determine whether the allow-contract flag is set.
void setFast(bool B)
Set or clear all fast-math-flags on this instruction, which must be an operator which supports this f...
bool hasAllowReassoc() const LLVM_READONLY
Determine whether the allow-reassociation flag is set.
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
static std::unique_ptr< MemoryBuffer > getMemBuffer(StringRef InputData, StringRef BufferName="", bool RequiresNullTerminator=true)
Open the specified memory range as a MemoryBuffer.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Definition: PassManager.h:296
LLVM_ATTRIBUTE_MINSIZE void addPass(PassT &&Pass)
Definition: PassManager.h:249
PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM, ExtraArgTs... ExtraArgs)
Run all of the passes in this manager over the given unit of IR.
Definition: PassManager.h:201
static ReturnInst * Create(LLVMContext &C, Value *retVal, BasicBlock::iterator InsertBefore)
Instances of this class encapsulate one diagnostic report, allowing printing to a raw_ostream as a ca...
Definition: SourceMgr.h:281
void mutate(BasicBlock &BB, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:592
void mutate(Function &F, RandomIRBuilder &IB) override
Definition: IRMutator.cpp:569
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool empty() const
Definition: SmallSet.h:159
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
size_t size() const
Definition: SmallVector.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Multiway switch.
static SwitchInst * Create(Value *Value, BasicBlock *Default, unsigned NumCases, BasicBlock::iterator InsertBefore)
Analysis pass providing the TargetLibraryInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static IntegerType * getInt1Ty(LLVMContext &C)
static Type * getVoidTy(LLVMContext &C)
bool isTokenTy() const
Return true if this is 'token'.
Definition: Type.h:225
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:140
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
const ParentPtrTy getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
A range adaptor for a pair of iterators.
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
std::optional< const char * > toString(const std::optional< DWARFFormValue > &V)
Take an optional DWARFFormValue and try to extract a string value from it.
static SourcePred onlyType(Type *Only)
Definition: OpDescriptor.h:95
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void ReplaceInstWithInst(BasicBlock *BB, BasicBlock::iterator &BI, Instruction *I)
Replace the instruction specified by BI with the instruction specified by I.
void describeFuzzerIntOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Getters for the default sets of operations, per general category.
Definition: Operations.cpp:18
Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
void WriteBitcodeToFile(const Module &M, raw_ostream &Out, bool ShouldPreserveUseListOrder=false, const ModuleSummaryIndex *Index=nullptr, bool GenerateHash=false, ModuleHash *ModHash=nullptr)
Write the specified module to the specified raw output stream.
std::unique_ptr< Module > parseAndVerify(const uint8_t *Data, size_t Size, LLVMContext &Context)
Try to parse module and verify it.
Definition: IRMutator.cpp:696
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
std::unique_ptr< Module > parseModule(const uint8_t *Data, size_t Size, LLVMContext &Context)
Fuzzer friendly interface for the llvm bitcode parser.
Definition: IRMutator.cpp:664
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void describeFuzzerAggregateOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:88
size_t writeModule(const Module &M, uint8_t *Dest, size_t MaxSize)
Fuzzer friendly interface for the llvm bitcode printer.
Definition: IRMutator.cpp:684
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
ReservoirSampler< ElT, GenT > makeSampler(GenT &RandGen, RangeT &&Items)
Definition: Random.h:75
void describeFuzzerVectorOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:94
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition: STLExtras.h:572
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
DWARFExpression::Operation Op
void describeFuzzerFloatOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:45
void describeFuzzerControlFlowOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:75
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition: iterator.h:363
void describeFuzzerPointerOps(std::vector< fuzzerop::OpDescriptor > &Ops)
Definition: Operations.cpp:84
unsigned pred_size(const MachineBasicBlock *BB)
bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
Definition: Verifier.cpp:7097
A description of some operation we can build while fuzzing IR.
Definition: OpDescriptor.h:89