LLVM 18.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// Provide a pass which mitigates speculative execution attacks which operate
11/// by speculating incorrectly past some predicate (a type check, bounds check,
12/// or other condition) to reach a load with invalid inputs and leak the data
13/// accessed by that load using a side channel out of the speculative domain.
14///
15/// For details on the attacks, see the first variant in both the Project Zero
16/// writeup and the Spectre paper:
17/// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18/// https://spectreattack.com/spectre.pdf
19///
20//===----------------------------------------------------------------------===//
21
22#include "X86.h"
23#include "X86InstrBuilder.h"
24#include "X86InstrInfo.h"
25#include "X86Subtarget.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/ScopeExit.h"
31#include "llvm/ADT/SmallSet.h"
34#include "llvm/ADT/Statistic.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/MC/MCSchedule.h"
51#include "llvm/Pass.h"
53#include "llvm/Support/Debug.h"
56#include <algorithm>
57#include <cassert>
58#include <iterator>
59#include <optional>
60#include <utility>
61
62using namespace llvm;
63
64#define PASS_KEY "x86-slh"
65#define DEBUG_TYPE PASS_KEY
66
67STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
68STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
69STATISTIC(NumAddrRegsHardened,
70 "Number of address mode used registers hardaned");
71STATISTIC(NumPostLoadRegsHardened,
72 "Number of post-load register values hardened");
73STATISTIC(NumCallsOrJumpsHardened,
74 "Number of calls or jumps requiring extra hardening");
75STATISTIC(NumInstsInserted, "Number of instructions inserted");
76STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
77
79 "x86-speculative-load-hardening",
80 cl::desc("Force enable speculative load hardening"), cl::init(false),
82
84 PASS_KEY "-lfence",
86 "Use LFENCE along each conditional edge to harden against speculative "
87 "loads rather than conditional movs and poisoned pointers."),
88 cl::init(false), cl::Hidden);
89
91 PASS_KEY "-post-load",
92 cl::desc("Harden the value loaded *after* it is loaded by "
93 "flushing the loaded bits to 1. This is hard to do "
94 "in general but can be done easily for GPRs."),
95 cl::init(true), cl::Hidden);
96
98 PASS_KEY "-fence-call-and-ret",
99 cl::desc("Use a full speculation fence to harden both call and ret edges "
100 "rather than a lighter weight mitigation."),
101 cl::init(false), cl::Hidden);
102
104 PASS_KEY "-ip",
105 cl::desc("Harden interprocedurally by passing our state in and out of "
106 "functions in the high bits of the stack pointer."),
107 cl::init(true), cl::Hidden);
108
109static cl::opt<bool>
111 cl::desc("Sanitize loads from memory. When disable, no "
112 "significant security is provided."),
113 cl::init(true), cl::Hidden);
114
116 PASS_KEY "-indirect",
117 cl::desc("Harden indirect calls and jumps against using speculatively "
118 "stored attacker controlled addresses. This is designed to "
119 "mitigate Spectre v1.2 style attacks."),
120 cl::init(true), cl::Hidden);
121
122namespace {
123
124class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
125public:
126 X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
127
128 StringRef getPassName() const override {
129 return "X86 speculative load hardening";
130 }
131 bool runOnMachineFunction(MachineFunction &MF) override;
132 void getAnalysisUsage(AnalysisUsage &AU) const override;
133
134 /// Pass identification, replacement for typeid.
135 static char ID;
136
137private:
138 /// The information about a block's conditional terminators needed to trace
139 /// our predicate state through the exiting edges.
140 struct BlockCondInfo {
142
143 // We mostly have one conditional branch, and in extremely rare cases have
144 // two. Three and more are so rare as to be unimportant for compile time.
146
147 MachineInstr *UncondBr;
148 };
149
150 /// Manages the predicate state traced through the program.
151 struct PredState {
152 unsigned InitialReg = 0;
153 unsigned PoisonReg = 0;
154
155 const TargetRegisterClass *RC;
157
158 PredState(MachineFunction &MF, const TargetRegisterClass *RC)
159 : RC(RC), SSA(MF) {}
160 };
161
162 const X86Subtarget *Subtarget = nullptr;
163 MachineRegisterInfo *MRI = nullptr;
164 const X86InstrInfo *TII = nullptr;
165 const TargetRegisterInfo *TRI = nullptr;
166
167 std::optional<PredState> PS;
168
169 void hardenEdgesWithLFENCE(MachineFunction &MF);
170
171 SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
172
174 tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
175
176 void unfoldCallAndJumpLoads(MachineFunction &MF);
177
179 tracePredStateThroughIndirectBranches(MachineFunction &MF);
180
181 void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
182
183 unsigned saveEFLAGS(MachineBasicBlock &MBB,
185 const DebugLoc &Loc);
186 void restoreEFLAGS(MachineBasicBlock &MBB,
187 MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc,
188 Register Reg);
189
190 void mergePredStateIntoSP(MachineBasicBlock &MBB,
192 const DebugLoc &Loc, unsigned PredStateReg);
193 unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
195 const DebugLoc &Loc);
196
197 void
198 hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
199 MachineOperand &IndexMO,
200 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
202 sinkPostLoadHardenedInst(MachineInstr &MI,
203 SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
204 bool canHardenRegister(Register Reg);
205 unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
207 const DebugLoc &Loc);
208 unsigned hardenPostLoad(MachineInstr &MI);
209 void hardenReturnInstr(MachineInstr &MI);
210 void tracePredStateThroughCall(MachineInstr &MI);
211 void hardenIndirectCallOrJumpInstr(
213 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
214};
215
216} // end anonymous namespace
217
218char X86SpeculativeLoadHardeningPass::ID = 0;
219
220void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
221 AnalysisUsage &AU) const {
223}
224
226 MachineBasicBlock &Succ, int SuccCount,
227 MachineInstr *Br, MachineInstr *&UncondBr,
228 const X86InstrInfo &TII) {
229 assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
230
232
234
235 // We have to insert the new block immediately after the current one as we
236 // don't know what layout-successor relationships the successor has and we
237 // may not be able to (and generally don't want to) try to fix those up.
238 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
239
240 // Update the branch instruction if necessary.
241 if (Br) {
242 assert(Br->getOperand(0).getMBB() == &Succ &&
243 "Didn't start with the right target!");
244 Br->getOperand(0).setMBB(&NewMBB);
245
246 // If this successor was reached through a branch rather than fallthrough,
247 // we might have *broken* fallthrough and so need to inject a new
248 // unconditional branch.
249 if (!UncondBr) {
250 MachineBasicBlock &OldLayoutSucc =
251 *std::next(MachineFunction::iterator(&NewMBB));
252 assert(MBB.isSuccessor(&OldLayoutSucc) &&
253 "Without an unconditional branch, the old layout successor should "
254 "be an actual successor!");
255 auto BrBuilder =
256 BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
257 // Update the unconditional branch now that we've added one.
258 UncondBr = &*BrBuilder;
259 }
260
261 // Insert unconditional "jump Succ" instruction in the new block if
262 // necessary.
263 if (!NewMBB.isLayoutSuccessor(&Succ)) {
265 TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
266 }
267 } else {
268 assert(!UncondBr &&
269 "Cannot have a branchless successor and an unconditional branch!");
270 assert(NewMBB.isLayoutSuccessor(&Succ) &&
271 "A non-branch successor must have been a layout successor before "
272 "and now is a layout successor of the new block.");
273 }
274
275 // If this is the only edge to the successor, we can just replace it in the
276 // CFG. Otherwise we need to add a new entry in the CFG for the new
277 // successor.
278 if (SuccCount == 1) {
279 MBB.replaceSuccessor(&Succ, &NewMBB);
280 } else {
281 MBB.splitSuccessor(&Succ, &NewMBB);
282 }
283
284 // Hook up the edge from the new basic block to the old successor in the CFG.
285 NewMBB.addSuccessor(&Succ);
286
287 // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
288 for (MachineInstr &MI : Succ) {
289 if (!MI.isPHI())
290 break;
291 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
292 OpIdx += 2) {
293 MachineOperand &OpV = MI.getOperand(OpIdx);
294 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
295 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
296 if (OpMBB.getMBB() != &MBB)
297 continue;
298
299 // If this is the last edge to the succesor, just replace MBB in the PHI
300 if (SuccCount == 1) {
301 OpMBB.setMBB(&NewMBB);
302 break;
303 }
304
305 // Otherwise, append a new pair of operands for the new incoming edge.
306 MI.addOperand(MF, OpV);
307 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
308 break;
309 }
310 }
311
312 // Inherit live-ins from the successor
313 for (auto &LI : Succ.liveins())
314 NewMBB.addLiveIn(LI);
315
316 LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
317 << Succ.getName() << "'.\n");
318 return NewMBB;
319}
320
321/// Removing duplicate PHI operands to leave the PHI in a canonical and
322/// predictable form.
323///
324/// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
325/// isn't what you might expect. We may have multiple entries in PHI nodes for
326/// a single predecessor. This makes CFG-updating extremely complex, so here we
327/// simplify all PHI nodes to a model even simpler than the IR's model: exactly
328/// one entry per predecessor, regardless of how many edges there are.
331 SmallVector<int, 4> DupIndices;
332 for (auto &MBB : MF)
333 for (auto &MI : MBB) {
334 if (!MI.isPHI())
335 break;
336
337 // First we scan the operands of the PHI looking for duplicate entries
338 // a particular predecessor. We retain the operand index of each duplicate
339 // entry found.
340 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
341 OpIdx += 2)
342 if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
343 DupIndices.push_back(OpIdx);
344
345 // Now walk the duplicate indices, removing both the block and value. Note
346 // that these are stored as a vector making this element-wise removal
347 // :w
348 // potentially quadratic.
349 //
350 // FIXME: It is really frustrating that we have to use a quadratic
351 // removal algorithm here. There should be a better way, but the use-def
352 // updates required make that impossible using the public API.
353 //
354 // Note that we have to process these backwards so that we don't
355 // invalidate other indices with each removal.
356 while (!DupIndices.empty()) {
357 int OpIdx = DupIndices.pop_back_val();
358 // Remove both the block and value operand, again in reverse order to
359 // preserve indices.
360 MI.removeOperand(OpIdx + 1);
361 MI.removeOperand(OpIdx);
362 }
363
364 Preds.clear();
365 }
366}
367
368/// Helper to scan a function for loads vulnerable to misspeculation that we
369/// want to harden.
370///
371/// We use this to avoid making changes to functions where there is nothing we
372/// need to do to harden against misspeculation.
374 for (MachineBasicBlock &MBB : MF) {
375 for (MachineInstr &MI : MBB) {
376 // Loads within this basic block after an LFENCE are not at risk of
377 // speculatively executing with invalid predicates from prior control
378 // flow. So break out of this block but continue scanning the function.
379 if (MI.getOpcode() == X86::LFENCE)
380 break;
381
382 // Looking for loads only.
383 if (!MI.mayLoad())
384 continue;
385
386 // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
387 if (MI.getOpcode() == X86::MFENCE)
388 continue;
389
390 // We found a load.
391 return true;
392 }
393 }
394
395 // No loads found.
396 return false;
397}
398
399bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
400 MachineFunction &MF) {
401 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
402 << " **********\n");
403
404 // Only run if this pass is forced enabled or we detect the relevant function
405 // attribute requesting SLH.
407 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
408 return false;
409
410 Subtarget = &MF.getSubtarget<X86Subtarget>();
411 MRI = &MF.getRegInfo();
412 TII = Subtarget->getInstrInfo();
413 TRI = Subtarget->getRegisterInfo();
414
415 // FIXME: Support for 32-bit.
416 PS.emplace(MF, &X86::GR64_NOSPRegClass);
417
418 if (MF.begin() == MF.end())
419 // Nothing to do for a degenerate empty function...
420 return false;
421
422 // We support an alternative hardening technique based on a debug flag.
424 hardenEdgesWithLFENCE(MF);
425 return true;
426 }
427
428 // Create a dummy debug loc to use for all the generated code here.
429 DebugLoc Loc;
430
431 MachineBasicBlock &Entry = *MF.begin();
432 auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
433
434 // Do a quick scan to see if we have any checkable loads.
435 bool HasVulnerableLoad = hasVulnerableLoad(MF);
436
437 // See if we have any conditional branching blocks that we will need to trace
438 // predicate state through.
439 SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
440
441 // If we have no interesting conditions or loads, nothing to do here.
442 if (!HasVulnerableLoad && Infos.empty())
443 return true;
444
445 // The poison value is required to be an all-ones value for many aspects of
446 // this mitigation.
447 const int PoisonVal = -1;
448 PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
449 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
450 .addImm(PoisonVal);
451 ++NumInstsInserted;
452
453 // If we have loads being hardened and we've asked for call and ret edges to
454 // get a full fence-based mitigation, inject that fence.
455 if (HasVulnerableLoad && FenceCallAndRet) {
456 // We need to insert an LFENCE at the start of the function to suspend any
457 // incoming misspeculation from the caller. This helps two-fold: the caller
458 // may not have been protected as this code has been, and this code gets to
459 // not take any specific action to protect across calls.
460 // FIXME: We could skip this for functions which unconditionally return
461 // a constant.
462 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
463 ++NumInstsInserted;
464 ++NumLFENCEsInserted;
465 }
466
467 // If we guarded the entry with an LFENCE and have no conditionals to protect
468 // in blocks, then we're done.
469 if (FenceCallAndRet && Infos.empty())
470 // We may have changed the function's code at this point to insert fences.
471 return true;
472
473 // For every basic block in the function which can b
475 // Set up the predicate state by extracting it from the incoming stack
476 // pointer so we pick up any misspeculation in our caller.
477 PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
478 } else {
479 // Otherwise, just build the predicate state itself by zeroing a register
480 // as we don't need any initial state.
481 PS->InitialReg = MRI->createVirtualRegister(PS->RC);
482 Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
483 auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
484 PredStateSubReg);
485 ++NumInstsInserted;
486 MachineOperand *ZeroEFLAGSDefOp =
487 ZeroI->findRegisterDefOperand(X86::EFLAGS);
488 assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
489 "Must have an implicit def of EFLAGS!");
490 ZeroEFLAGSDefOp->setIsDead(true);
491 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
492 PS->InitialReg)
493 .addImm(0)
494 .addReg(PredStateSubReg)
495 .addImm(X86::sub_32bit);
496 }
497
498 // We're going to need to trace predicate state throughout the function's
499 // CFG. Prepare for this by setting up our initial state of PHIs with unique
500 // predecessor entries and all the initial predicate state.
502
503 // Track the updated values in an SSA updater to rewrite into SSA form at the
504 // end.
505 PS->SSA.Initialize(PS->InitialReg);
506 PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
507
508 // Trace through the CFG.
509 auto CMovs = tracePredStateThroughCFG(MF, Infos);
510
511 // We may also enter basic blocks in this function via exception handling
512 // control flow. Here, if we are hardening interprocedurally, we need to
513 // re-capture the predicate state from the throwing code. In the Itanium ABI,
514 // the throw will always look like a call to __cxa_throw and will have the
515 // predicate state in the stack pointer, so extract fresh predicate state from
516 // the stack pointer and make it available in SSA.
517 // FIXME: Handle non-itanium ABI EH models.
519 for (MachineBasicBlock &MBB : MF) {
520 assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
521 assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
522 assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
523 if (!MBB.isEHPad())
524 continue;
525 PS->SSA.AddAvailableValue(
526 &MBB,
527 extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
528 }
529 }
530
532 // If we are going to harden calls and jumps we need to unfold their memory
533 // operands.
534 unfoldCallAndJumpLoads(MF);
535
536 // Then we trace predicate state through the indirect branches.
537 auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
538 CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
539 }
540
541 // Now that we have the predicate state available at the start of each block
542 // in the CFG, trace it through each block, hardening vulnerable instructions
543 // as we go.
544 tracePredStateThroughBlocksAndHarden(MF);
545
546 // Now rewrite all the uses of the pred state using the SSA updater to insert
547 // PHIs connecting the state between blocks along the CFG edges.
548 for (MachineInstr *CMovI : CMovs)
549 for (MachineOperand &Op : CMovI->operands()) {
550 if (!Op.isReg() || Op.getReg() != PS->InitialReg)
551 continue;
552
553 PS->SSA.RewriteUse(Op);
554 }
555
556 LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
557 dbgs() << "\n"; MF.verify(this));
558 return true;
559}
560
561/// Implements the naive hardening approach of putting an LFENCE after every
562/// potentially mis-predicted control flow construct.
563///
564/// We include this as an alternative mostly for the purpose of comparison. The
565/// performance impact of this is expected to be extremely severe and not
566/// practical for any real-world users.
567void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
568 MachineFunction &MF) {
569 // First, we scan the function looking for blocks that are reached along edges
570 // that we might want to harden.
572 for (MachineBasicBlock &MBB : MF) {
573 // If there are no or only one successor, nothing to do here.
574 if (MBB.succ_size() <= 1)
575 continue;
576
577 // Skip blocks unless their terminators start with a branch. Other
578 // terminators don't seem interesting for guarding against misspeculation.
579 auto TermIt = MBB.getFirstTerminator();
580 if (TermIt == MBB.end() || !TermIt->isBranch())
581 continue;
582
583 // Add all the non-EH-pad succossors to the blocks we want to harden. We
584 // skip EH pads because there isn't really a condition of interest on
585 // entering.
586 for (MachineBasicBlock *SuccMBB : MBB.successors())
587 if (!SuccMBB->isEHPad())
588 Blocks.insert(SuccMBB);
589 }
590
591 for (MachineBasicBlock *MBB : Blocks) {
592 auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
593 BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
594 ++NumInstsInserted;
595 ++NumLFENCEsInserted;
596 }
597}
598
600X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
602
603 // Walk the function and build up a summary for each block's conditions that
604 // we need to trace through.
605 for (MachineBasicBlock &MBB : MF) {
606 // If there are no or only one successor, nothing to do here.
607 if (MBB.succ_size() <= 1)
608 continue;
609
610 // We want to reliably handle any conditional branch terminators in the
611 // MBB, so we manually analyze the branch. We can handle all of the
612 // permutations here, including ones that analyze branch cannot.
613 //
614 // The approach is to walk backwards across the terminators, resetting at
615 // any unconditional non-indirect branch, and track all conditional edges
616 // to basic blocks as well as the fallthrough or unconditional successor
617 // edge. For each conditional edge, we track the target and the opposite
618 // condition code in order to inject a "no-op" cmov into that successor
619 // that will harden the predicate. For the fallthrough/unconditional
620 // edge, we inject a separate cmov for each conditional branch with
621 // matching condition codes. This effectively implements an "and" of the
622 // condition flags, even if there isn't a single condition flag that would
623 // directly implement that. We don't bother trying to optimize either of
624 // these cases because if such an optimization is possible, LLVM should
625 // have optimized the conditional *branches* in that way already to reduce
626 // instruction count. This late, we simply assume the minimal number of
627 // branch instructions is being emitted and use that to guide our cmov
628 // insertion.
629
630 BlockCondInfo Info = {&MBB, {}, nullptr};
631
632 // Now walk backwards through the terminators and build up successors they
633 // reach and the conditions.
634 for (MachineInstr &MI : llvm::reverse(MBB)) {
635 // Once we've handled all the terminators, we're done.
636 if (!MI.isTerminator())
637 break;
638
639 // If we see a non-branch terminator, we can't handle anything so bail.
640 if (!MI.isBranch()) {
641 Info.CondBrs.clear();
642 break;
643 }
644
645 // If we see an unconditional branch, reset our state, clear any
646 // fallthrough, and set this is the "else" successor.
647 if (MI.getOpcode() == X86::JMP_1) {
648 Info.CondBrs.clear();
649 Info.UncondBr = &MI;
650 continue;
651 }
652
653 // If we get an invalid condition, we have an indirect branch or some
654 // other unanalyzable "fallthrough" case. We model this as a nullptr for
655 // the destination so we can still guard any conditional successors.
656 // Consider code sequences like:
657 // ```
658 // jCC L1
659 // jmpq *%rax
660 // ```
661 // We still want to harden the edge to `L1`.
663 Info.CondBrs.clear();
664 Info.UncondBr = &MI;
665 continue;
666 }
667
668 // We have a vanilla conditional branch, add it to our list.
669 Info.CondBrs.push_back(&MI);
670 }
671 if (Info.CondBrs.empty()) {
672 ++NumBranchesUntraced;
673 LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
674 MBB.dump());
675 continue;
676 }
677
678 Infos.push_back(Info);
679 }
680
681 return Infos;
682}
683
684/// Trace the predicate state through the CFG, instrumenting each conditional
685/// branch such that misspeculation through an edge will poison the predicate
686/// state.
687///
688/// Returns the list of inserted CMov instructions so that they can have their
689/// uses of the predicate state rewritten into proper SSA form once it is
690/// complete.
692X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
694 // Collect the inserted cmov instructions so we can rewrite their uses of the
695 // predicate state into SSA form.
697
698 // Now walk all of the basic blocks looking for ones that end in conditional
699 // jumps where we need to update this register along each edge.
700 for (const BlockCondInfo &Info : Infos) {
701 MachineBasicBlock &MBB = *Info.MBB;
702 const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
703 MachineInstr *UncondBr = Info.UncondBr;
704
705 LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
706 << "\n");
707 ++NumCondBranchesTraced;
708
709 // Compute the non-conditional successor as either the target of any
710 // unconditional branch or the layout successor.
711 MachineBasicBlock *UncondSucc =
712 UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
713 ? UncondBr->getOperand(0).getMBB()
714 : nullptr)
715 : &*std::next(MachineFunction::iterator(&MBB));
716
717 // Count how many edges there are to any given successor.
719 if (UncondSucc)
720 ++SuccCounts[UncondSucc];
721 for (auto *CondBr : CondBrs)
722 ++SuccCounts[CondBr->getOperand(0).getMBB()];
723
724 // A lambda to insert cmov instructions into a block checking all of the
725 // condition codes in a sequence.
726 auto BuildCheckingBlockForSuccAndConds =
727 [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
728 MachineInstr *Br, MachineInstr *&UncondBr,
730 // First, we split the edge to insert the checking block into a safe
731 // location.
732 auto &CheckingMBB =
733 (SuccCount == 1 && Succ.pred_size() == 1)
734 ? Succ
735 : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
736
737 bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
738 if (!LiveEFLAGS)
739 CheckingMBB.addLiveIn(X86::EFLAGS);
740
741 // Now insert the cmovs to implement the checks.
742 auto InsertPt = CheckingMBB.begin();
743 assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
744 "Should never have a PHI in the initial checking block as it "
745 "always has a single predecessor!");
746
747 // We will wire each cmov to each other, but need to start with the
748 // incoming pred state.
749 unsigned CurStateReg = PS->InitialReg;
750
751 for (X86::CondCode Cond : Conds) {
752 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
753 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
754
755 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
756 // Note that we intentionally use an empty debug location so that
757 // this picks up the preceding location.
758 auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
759 TII->get(CMovOp), UpdatedStateReg)
760 .addReg(CurStateReg)
761 .addReg(PS->PoisonReg)
762 .addImm(Cond);
763 // If this is the last cmov and the EFLAGS weren't originally
764 // live-in, mark them as killed.
765 if (!LiveEFLAGS && Cond == Conds.back())
766 CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
767
768 ++NumInstsInserted;
769 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
770 dbgs() << "\n");
771
772 // The first one of the cmovs will be using the top level
773 // `PredStateReg` and need to get rewritten into SSA form.
774 if (CurStateReg == PS->InitialReg)
775 CMovs.push_back(&*CMovI);
776
777 // The next cmov should start from this one's def.
778 CurStateReg = UpdatedStateReg;
779 }
780
781 // And put the last one into the available values for SSA form of our
782 // predicate state.
783 PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
784 };
785
786 std::vector<X86::CondCode> UncondCodeSeq;
787 for (auto *CondBr : CondBrs) {
788 MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
789 int &SuccCount = SuccCounts[&Succ];
790
793 UncondCodeSeq.push_back(Cond);
794
795 BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
796 {InvCond});
797
798 // Decrement the successor count now that we've split one of the edges.
799 // We need to keep the count of edges to the successor accurate in order
800 // to know above when to *replace* the successor in the CFG vs. just
801 // adding the new successor.
802 --SuccCount;
803 }
804
805 // Since we may have split edges and changed the number of successors,
806 // normalize the probabilities. This avoids doing it each time we split an
807 // edge.
809
810 // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
811 // need to intersect the other condition codes. We can do this by just
812 // doing a cmov for each one.
813 if (!UncondSucc)
814 // If we have no fallthrough to protect (perhaps it is an indirect jump?)
815 // just skip this and continue.
816 continue;
817
818 assert(SuccCounts[UncondSucc] == 1 &&
819 "We should never have more than one edge to the unconditional "
820 "successor at this point because every other edge must have been "
821 "split above!");
822
823 // Sort and unique the codes to minimize them.
824 llvm::sort(UncondCodeSeq);
825 UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()),
826 UncondCodeSeq.end());
827
828 // Build a checking version of the successor.
829 BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
830 UncondBr, UncondBr, UncondCodeSeq);
831 }
832
833 return CMovs;
834}
835
836/// Compute the register class for the unfolded load.
837///
838/// FIXME: This should probably live in X86InstrInfo, potentially by adding
839/// a way to unfold into a newly created vreg rather than requiring a register
840/// input.
841static const TargetRegisterClass *
843 unsigned Opcode) {
844 unsigned Index;
845 unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
846 Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
847 const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
848 return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
849}
850
851void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
852 MachineFunction &MF) {
853 for (MachineBasicBlock &MBB : MF)
854 // We use make_early_inc_range here so we can remove instructions if needed
855 // without disturbing the iteration.
857 // Must either be a call or a branch.
858 if (!MI.isCall() && !MI.isBranch())
859 continue;
860 // We only care about loading variants of these instructions.
861 if (!MI.mayLoad())
862 continue;
863
864 switch (MI.getOpcode()) {
865 default: {
867 dbgs() << "ERROR: Found an unexpected loading branch or call "
868 "instruction:\n";
869 MI.dump(); dbgs() << "\n");
870 report_fatal_error("Unexpected loading branch or call!");
871 }
872
873 case X86::FARCALL16m:
874 case X86::FARCALL32m:
875 case X86::FARCALL64m:
876 case X86::FARJMP16m:
877 case X86::FARJMP32m:
878 case X86::FARJMP64m:
879 // We cannot mitigate far jumps or calls, but we also don't expect them
880 // to be vulnerable to Spectre v1.2 style attacks.
881 continue;
882
883 case X86::CALL16m:
884 case X86::CALL16m_NT:
885 case X86::CALL32m:
886 case X86::CALL32m_NT:
887 case X86::CALL64m:
888 case X86::CALL64m_NT:
889 case X86::JMP16m:
890 case X86::JMP16m_NT:
891 case X86::JMP32m:
892 case X86::JMP32m_NT:
893 case X86::JMP64m:
894 case X86::JMP64m_NT:
895 case X86::TAILJMPm64:
896 case X86::TAILJMPm64_REX:
897 case X86::TAILJMPm:
898 case X86::TCRETURNmi64:
899 case X86::TCRETURNmi: {
900 // Use the generic unfold logic now that we know we're dealing with
901 // expected instructions.
902 // FIXME: We don't have test coverage for all of these!
903 auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
904 if (!UnfoldedRC) {
906 << "ERROR: Unable to unfold load from instruction:\n";
907 MI.dump(); dbgs() << "\n");
908 report_fatal_error("Unable to unfold load!");
909 }
910 Register Reg = MRI->createVirtualRegister(UnfoldedRC);
912 // If we were able to compute an unfolded reg class, any failure here
913 // is just a programming error so just assert.
914 bool Unfolded =
915 TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
916 /*UnfoldStore*/ false, NewMIs);
917 (void)Unfolded;
918 assert(Unfolded &&
919 "Computed unfolded register class but failed to unfold");
920 // Now stitch the new instructions into place and erase the old one.
921 for (auto *NewMI : NewMIs)
922 MBB.insert(MI.getIterator(), NewMI);
923
924 // Update the call site info.
925 if (MI.isCandidateForCallSiteEntry())
926 MF.eraseCallSiteInfo(&MI);
927
928 MI.eraseFromParent();
929 LLVM_DEBUG({
930 dbgs() << "Unfolded load successfully into:\n";
931 for (auto *NewMI : NewMIs) {
932 NewMI->dump();
933 dbgs() << "\n";
934 }
935 });
936 continue;
937 }
938 }
939 llvm_unreachable("Escaped switch with default!");
940 }
941}
942
943/// Trace the predicate state through indirect branches, instrumenting them to
944/// poison the state if a target is reached that does not match the expected
945/// target.
946///
947/// This is designed to mitigate Spectre variant 1 attacks where an indirect
948/// branch is trained to predict a particular target and then mispredicts that
949/// target in a way that can leak data. Despite using an indirect branch, this
950/// is really a variant 1 style attack: it does not steer execution to an
951/// arbitrary or attacker controlled address, and it does not require any
952/// special code executing next to the victim. This attack can also be mitigated
953/// through retpolines, but those require either replacing indirect branches
954/// with conditional direct branches or lowering them through a device that
955/// blocks speculation. This mitigation can replace these retpoline-style
956/// mitigations for jump tables and other indirect branches within a function
957/// when variant 2 isn't a risk while allowing limited speculation. Indirect
958/// calls, however, cannot be mitigated through this technique without changing
959/// the ABI in a fundamental way.
961X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
962 MachineFunction &MF) {
963 // We use the SSAUpdater to insert PHI nodes for the target addresses of
964 // indirect branches. We don't actually need the full power of the SSA updater
965 // in this particular case as we always have immediately available values, but
966 // this avoids us having to re-implement the PHI construction logic.
967 MachineSSAUpdater TargetAddrSSA(MF);
968 TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
969
970 // Track which blocks were terminated with an indirect branch.
971 SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
972
973 // We need to know what blocks end up reached via indirect branches. We
974 // expect this to be a subset of those whose address is taken and so track it
975 // directly via the CFG.
976 SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
977
978 // Walk all the blocks which end in an indirect branch and make the
979 // target address available.
980 for (MachineBasicBlock &MBB : MF) {
981 // Find the last terminator.
982 auto MII = MBB.instr_rbegin();
983 while (MII != MBB.instr_rend() && MII->isDebugInstr())
984 ++MII;
985 if (MII == MBB.instr_rend())
986 continue;
987 MachineInstr &TI = *MII;
988 if (!TI.isTerminator() || !TI.isBranch())
989 // No terminator or non-branch terminator.
990 continue;
991
992 unsigned TargetReg;
993
994 switch (TI.getOpcode()) {
995 default:
996 // Direct branch or conditional branch (leading to fallthrough).
997 continue;
998
999 case X86::FARJMP16m:
1000 case X86::FARJMP32m:
1001 case X86::FARJMP64m:
1002 // We cannot mitigate far jumps or calls, but we also don't expect them
1003 // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1004 continue;
1005
1006 case X86::JMP16m:
1007 case X86::JMP16m_NT:
1008 case X86::JMP32m:
1009 case X86::JMP32m_NT:
1010 case X86::JMP64m:
1011 case X86::JMP64m_NT:
1012 // Mostly as documentation.
1013 report_fatal_error("Memory operand jumps should have been unfolded!");
1014
1015 case X86::JMP16r:
1017 "Support for 16-bit indirect branches is not implemented.");
1018 case X86::JMP32r:
1020 "Support for 32-bit indirect branches is not implemented.");
1021
1022 case X86::JMP64r:
1023 TargetReg = TI.getOperand(0).getReg();
1024 }
1025
1026 // We have definitely found an indirect branch. Verify that there are no
1027 // preceding conditional branches as we don't yet support that.
1028 if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1029 return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1030 })) {
1031 LLVM_DEBUG({
1032 dbgs() << "ERROR: Found other terminators in a block with an indirect "
1033 "branch! This is not yet supported! Terminator sequence:\n";
1034 for (MachineInstr &MI : MBB.terminators()) {
1035 MI.dump();
1036 dbgs() << '\n';
1037 }
1038 });
1039 report_fatal_error("Unimplemented terminator sequence!");
1040 }
1041
1042 // Make the target register an available value for this block.
1043 TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1044 IndirectTerminatedMBBs.insert(&MBB);
1045
1046 // Add all the successors to our target candidates.
1047 for (MachineBasicBlock *Succ : MBB.successors())
1048 IndirectTargetMBBs.insert(Succ);
1049 }
1050
1051 // Keep track of the cmov instructions we insert so we can return them.
1053
1054 // If we didn't find any indirect branches with targets, nothing to do here.
1055 if (IndirectTargetMBBs.empty())
1056 return CMovs;
1057
1058 // We found indirect branches and targets that need to be instrumented to
1059 // harden loads within them. Walk the blocks of the function (to get a stable
1060 // ordering) and instrument each target of an indirect branch.
1061 for (MachineBasicBlock &MBB : MF) {
1062 // Skip the blocks that aren't candidate targets.
1063 if (!IndirectTargetMBBs.count(&MBB))
1064 continue;
1065
1066 // We don't expect EH pads to ever be reached via an indirect branch. If
1067 // this is desired for some reason, we could simply skip them here rather
1068 // than asserting.
1069 assert(!MBB.isEHPad() &&
1070 "Unexpected EH pad as target of an indirect branch!");
1071
1072 // We should never end up threading EFLAGS into a block to harden
1073 // conditional jumps as there would be an additional successor via the
1074 // indirect branch. As a consequence, all such edges would be split before
1075 // reaching here, and the inserted block will handle the EFLAGS-based
1076 // hardening.
1077 assert(!MBB.isLiveIn(X86::EFLAGS) &&
1078 "Cannot check within a block that already has live-in EFLAGS!");
1079
1080 // We can't handle having non-indirect edges into this block unless this is
1081 // the only successor and we can synthesize the necessary target address.
1082 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1083 // If we've already handled this by extracting the target directly,
1084 // nothing to do.
1085 if (IndirectTerminatedMBBs.count(Pred))
1086 continue;
1087
1088 // Otherwise, we have to be the only successor. We generally expect this
1089 // to be true as conditional branches should have had a critical edge
1090 // split already. We don't however need to worry about EH pad successors
1091 // as they'll happily ignore the target and their hardening strategy is
1092 // resilient to all ways in which they could be reached speculatively.
1093 if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1094 return Succ->isEHPad() || Succ == &MBB;
1095 })) {
1096 LLVM_DEBUG({
1097 dbgs() << "ERROR: Found conditional entry to target of indirect "
1098 "branch!\n";
1099 Pred->dump();
1100 MBB.dump();
1101 });
1102 report_fatal_error("Cannot harden a conditional entry to a target of "
1103 "an indirect branch!");
1104 }
1105
1106 // Now we need to compute the address of this block and install it as a
1107 // synthetic target in the predecessor. We do this at the bottom of the
1108 // predecessor.
1109 auto InsertPt = Pred->getFirstTerminator();
1110 Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1111 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1112 !Subtarget->isPositionIndependent()) {
1113 // Directly materialize it into an immediate.
1114 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1115 TII->get(X86::MOV64ri32), TargetReg)
1116 .addMBB(&MBB);
1117 ++NumInstsInserted;
1118 (void)AddrI;
1119 LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1120 dbgs() << "\n");
1121 } else {
1122 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1123 TargetReg)
1124 .addReg(/*Base*/ X86::RIP)
1125 .addImm(/*Scale*/ 1)
1126 .addReg(/*Index*/ 0)
1127 .addMBB(&MBB)
1128 .addReg(/*Segment*/ 0);
1129 ++NumInstsInserted;
1130 (void)AddrI;
1131 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1132 dbgs() << "\n");
1133 }
1134 // And make this available.
1135 TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1136 }
1137
1138 // Materialize the needed SSA value of the target. Note that we need the
1139 // middle of the block as this block might at the bottom have an indirect
1140 // branch back to itself. We can do this here because at this point, every
1141 // predecessor of this block has an available value. This is basically just
1142 // automating the construction of a PHI node for this target.
1143 Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1144
1145 // Insert a comparison of the incoming target register with this block's
1146 // address. This also requires us to mark the block as having its address
1147 // taken explicitly.
1149 auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1150 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1151 !Subtarget->isPositionIndependent()) {
1152 // Check directly against a relocated immediate when we can.
1153 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1154 .addReg(TargetReg, RegState::Kill)
1155 .addMBB(&MBB);
1156 ++NumInstsInserted;
1157 (void)CheckI;
1158 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1159 } else {
1160 // Otherwise compute the address into a register first.
1161 Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1162 auto AddrI =
1163 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1164 .addReg(/*Base*/ X86::RIP)
1165 .addImm(/*Scale*/ 1)
1166 .addReg(/*Index*/ 0)
1167 .addMBB(&MBB)
1168 .addReg(/*Segment*/ 0);
1169 ++NumInstsInserted;
1170 (void)AddrI;
1171 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1172 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1173 .addReg(TargetReg, RegState::Kill)
1174 .addReg(AddrReg, RegState::Kill);
1175 ++NumInstsInserted;
1176 (void)CheckI;
1177 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1178 }
1179
1180 // Now cmov over the predicate if the comparison wasn't equal.
1181 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1182 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1183 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1184 auto CMovI =
1185 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1186 .addReg(PS->InitialReg)
1187 .addReg(PS->PoisonReg)
1189 CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
1190 ++NumInstsInserted;
1191 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1192 CMovs.push_back(&*CMovI);
1193
1194 // And put the new value into the available values for SSA form of our
1195 // predicate state.
1196 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1197 }
1198
1199 // Return all the newly inserted cmov instructions of the predicate state.
1200 return CMovs;
1201}
1202
1203// Returns true if the MI has EFLAGS as a register def operand and it's live,
1204// otherwise it returns false
1205static bool isEFLAGSDefLive(const MachineInstr &MI) {
1206 if (const MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1207 return !DefOp->isDead();
1208 }
1209 return false;
1210}
1211
1213 const TargetRegisterInfo &TRI) {
1214 // Check if EFLAGS are alive by seeing if there is a def of them or they
1215 // live-in, and then seeing if that def is in turn used.
1217 if (MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1218 // If the def is dead, then EFLAGS is not live.
1219 if (DefOp->isDead())
1220 return false;
1221
1222 // Otherwise we've def'ed it, and it is live.
1223 return true;
1224 }
1225 // While at this instruction, also check if we use and kill EFLAGS
1226 // which means it isn't live.
1227 if (MI.killsRegister(X86::EFLAGS, &TRI))
1228 return false;
1229 }
1230
1231 // If we didn't find anything conclusive (neither definitely alive or
1232 // definitely dead) return whether it lives into the block.
1233 return MBB.isLiveIn(X86::EFLAGS);
1234}
1235
1236/// Trace the predicate state through each of the blocks in the function,
1237/// hardening everything necessary along the way.
1238///
1239/// We call this routine once the initial predicate state has been established
1240/// for each basic block in the function in the SSA updater. This routine traces
1241/// it through the instructions within each basic block, and for non-returning
1242/// blocks informs the SSA updater about the final state that lives out of the
1243/// block. Along the way, it hardens any vulnerable instruction using the
1244/// currently valid predicate state. We have to do these two things together
1245/// because the SSA updater only works across blocks. Within a block, we track
1246/// the current predicate state directly and update it as it changes.
1247///
1248/// This operates in two passes over each block. First, we analyze the loads in
1249/// the block to determine which strategy will be used to harden them: hardening
1250/// the address or hardening the loaded value when loaded into a register
1251/// amenable to hardening. We have to process these first because the two
1252/// strategies may interact -- later hardening may change what strategy we wish
1253/// to use. We also will analyze data dependencies between loads and avoid
1254/// hardening those loads that are data dependent on a load with a hardened
1255/// address. We also skip hardening loads already behind an LFENCE as that is
1256/// sufficient to harden them against misspeculation.
1257///
1258/// Second, we actively trace the predicate state through the block, applying
1259/// the hardening steps we determined necessary in the first pass as we go.
1260///
1261/// These two passes are applied to each basic block. We operate one block at a
1262/// time to simplify reasoning about reachability and sequencing.
1263void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
1264 MachineFunction &MF) {
1265 SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1266 SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1267
1268 SmallSet<unsigned, 16> HardenedAddrRegs;
1269
1270 SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg;
1271
1272 // Track the set of load-dependent registers through the basic block. Because
1273 // the values of these registers have an existing data dependency on a loaded
1274 // value which we would have checked, we can omit any checks on them.
1275 SparseBitVector<> LoadDepRegs;
1276
1277 for (MachineBasicBlock &MBB : MF) {
1278 // The first pass over the block: collect all the loads which can have their
1279 // loaded value hardened and all the loads that instead need their address
1280 // hardened. During this walk we propagate load dependence for address
1281 // hardened loads and also look for LFENCE to stop hardening wherever
1282 // possible. When deciding whether or not to harden the loaded value or not,
1283 // we check to see if any registers used in the address will have been
1284 // hardened at this point and if so, harden any remaining address registers
1285 // as that often successfully re-uses hardened addresses and minimizes
1286 // instructions.
1287 //
1288 // FIXME: We should consider an aggressive mode where we continue to keep as
1289 // many loads value hardened even when some address register hardening would
1290 // be free (due to reuse).
1291 //
1292 // Note that we only need this pass if we are actually hardening loads.
1293 if (HardenLoads)
1294 for (MachineInstr &MI : MBB) {
1295 // We naively assume that all def'ed registers of an instruction have
1296 // a data dependency on all of their operands.
1297 // FIXME: Do a more careful analysis of x86 to build a conservative
1298 // model here.
1299 if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1300 return Op.isReg() && LoadDepRegs.test(Op.getReg());
1301 }))
1302 for (MachineOperand &Def : MI.defs())
1303 if (Def.isReg())
1304 LoadDepRegs.set(Def.getReg());
1305
1306 // Both Intel and AMD are guiding that they will change the semantics of
1307 // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1308 // no more need to guard things in this block.
1309 if (MI.getOpcode() == X86::LFENCE)
1310 break;
1311
1312 // If this instruction cannot load, nothing to do.
1313 if (!MI.mayLoad())
1314 continue;
1315
1316 // Some instructions which "load" are trivially safe or unimportant.
1317 if (MI.getOpcode() == X86::MFENCE)
1318 continue;
1319
1320 // Extract the memory operand information about this instruction.
1321 // FIXME: This doesn't handle loading pseudo instructions which we often
1322 // could handle with similarly generic logic. We probably need to add an
1323 // MI-layer routine similar to the MC-layer one we use here which maps
1324 // pseudos much like this maps real instructions.
1325 const MCInstrDesc &Desc = MI.getDesc();
1326 int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1327 if (MemRefBeginIdx < 0) {
1329 << "WARNING: unable to harden loading instruction: ";
1330 MI.dump());
1331 continue;
1332 }
1333
1334 MemRefBeginIdx += X86II::getOperandBias(Desc);
1335
1336 MachineOperand &BaseMO =
1337 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1338 MachineOperand &IndexMO =
1339 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1340
1341 // If we have at least one (non-frame-index, non-RIP) register operand,
1342 // and neither operand is load-dependent, we need to check the load.
1343 unsigned BaseReg = 0, IndexReg = 0;
1344 if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1345 BaseMO.getReg() != X86::NoRegister)
1346 BaseReg = BaseMO.getReg();
1347 if (IndexMO.getReg() != X86::NoRegister)
1348 IndexReg = IndexMO.getReg();
1349
1350 if (!BaseReg && !IndexReg)
1351 // No register operands!
1352 continue;
1353
1354 // If any register operand is dependent, this load is dependent and we
1355 // needn't check it.
1356 // FIXME: Is this true in the case where we are hardening loads after
1357 // they complete? Unclear, need to investigate.
1358 if ((BaseReg && LoadDepRegs.test(BaseReg)) ||
1359 (IndexReg && LoadDepRegs.test(IndexReg)))
1360 continue;
1361
1362 // If post-load hardening is enabled, this load is compatible with
1363 // post-load hardening, and we aren't already going to harden one of the
1364 // address registers, queue it up to be hardened post-load. Notably,
1365 // even once hardened this won't introduce a useful dependency that
1366 // could prune out subsequent loads.
1368 !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1369 MI.getOperand(0).isReg() &&
1370 canHardenRegister(MI.getOperand(0).getReg()) &&
1371 !HardenedAddrRegs.count(BaseReg) &&
1372 !HardenedAddrRegs.count(IndexReg)) {
1373 HardenPostLoad.insert(&MI);
1374 HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1375 continue;
1376 }
1377
1378 // Record this instruction for address hardening and record its register
1379 // operands as being address-hardened.
1380 HardenLoadAddr.insert(&MI);
1381 if (BaseReg)
1382 HardenedAddrRegs.insert(BaseReg);
1383 if (IndexReg)
1384 HardenedAddrRegs.insert(IndexReg);
1385
1386 for (MachineOperand &Def : MI.defs())
1387 if (Def.isReg())
1388 LoadDepRegs.set(Def.getReg());
1389 }
1390
1391 // Now re-walk the instructions in the basic block, and apply whichever
1392 // hardening strategy we have elected. Note that we do this in a second
1393 // pass specifically so that we have the complete set of instructions for
1394 // which we will do post-load hardening and can defer it in certain
1395 // circumstances.
1396 for (MachineInstr &MI : MBB) {
1397 if (HardenLoads) {
1398 // We cannot both require hardening the def of a load and its address.
1399 assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1400 "Requested to harden both the address and def of a load!");
1401
1402 // Check if this is a load whose address needs to be hardened.
1403 if (HardenLoadAddr.erase(&MI)) {
1404 const MCInstrDesc &Desc = MI.getDesc();
1405 int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1406 assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1407
1408 MemRefBeginIdx += X86II::getOperandBias(Desc);
1409
1410 MachineOperand &BaseMO =
1411 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1412 MachineOperand &IndexMO =
1413 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1414 hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1415 continue;
1416 }
1417
1418 // Test if this instruction is one of our post load instructions (and
1419 // remove it from the set if so).
1420 if (HardenPostLoad.erase(&MI)) {
1421 assert(!MI.isCall() && "Must not try to post-load harden a call!");
1422
1423 // If this is a data-invariant load and there is no EFLAGS
1424 // interference, we want to try and sink any hardening as far as
1425 // possible.
1427 // Sink the instruction we'll need to harden as far as we can down
1428 // the graph.
1429 MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1430
1431 // If we managed to sink this instruction, update everything so we
1432 // harden that instruction when we reach it in the instruction
1433 // sequence.
1434 if (SunkMI != &MI) {
1435 // If in sinking there was no instruction needing to be hardened,
1436 // we're done.
1437 if (!SunkMI)
1438 continue;
1439
1440 // Otherwise, add this to the set of defs we harden.
1441 HardenPostLoad.insert(SunkMI);
1442 continue;
1443 }
1444 }
1445
1446 unsigned HardenedReg = hardenPostLoad(MI);
1447
1448 // Mark the resulting hardened register as such so we don't re-harden.
1449 AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1450
1451 continue;
1452 }
1453
1454 // Check for an indirect call or branch that may need its input hardened
1455 // even if we couldn't find the specific load used, or were able to
1456 // avoid hardening it for some reason. Note that here we cannot break
1457 // out afterward as we may still need to handle any call aspect of this
1458 // instruction.
1459 if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1460 hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1461 }
1462
1463 // After we finish hardening loads we handle interprocedural hardening if
1464 // enabled and relevant for this instruction.
1466 continue;
1467 if (!MI.isCall() && !MI.isReturn())
1468 continue;
1469
1470 // If this is a direct return (IE, not a tail call) just directly harden
1471 // it.
1472 if (MI.isReturn() && !MI.isCall()) {
1473 hardenReturnInstr(MI);
1474 continue;
1475 }
1476
1477 // Otherwise we have a call. We need to handle transferring the predicate
1478 // state into a call and recovering it after the call returns (unless this
1479 // is a tail call).
1480 assert(MI.isCall() && "Should only reach here for calls!");
1481 tracePredStateThroughCall(MI);
1482 }
1483
1484 HardenPostLoad.clear();
1485 HardenLoadAddr.clear();
1486 HardenedAddrRegs.clear();
1487 AddrRegToHardenedReg.clear();
1488
1489 // Currently, we only track data-dependent loads within a basic block.
1490 // FIXME: We should see if this is necessary or if we could be more
1491 // aggressive here without opening up attack avenues.
1492 LoadDepRegs.clear();
1493 }
1494}
1495
1496/// Save EFLAGS into the returned GPR. This can in turn be restored with
1497/// `restoreEFLAGS`.
1498///
1499/// Note that LLVM can only lower very simple patterns of saved and restored
1500/// EFLAGS registers. The restore should always be within the same basic block
1501/// as the save so that no PHI nodes are inserted.
1502unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
1504 const DebugLoc &Loc) {
1505 // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1506 // what instruction selection does.
1507 Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1508 // We directly copy the FLAGS register and rely on later lowering to clean
1509 // this up into the appropriate setCC instructions.
1510 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1511 ++NumInstsInserted;
1512 return Reg;
1513}
1514
1515/// Restore EFLAGS from the provided GPR. This should be produced by
1516/// `saveEFLAGS`.
1517///
1518/// This must be done within the same basic block as the save in order to
1519/// reliably lower.
1520void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1522 const DebugLoc &Loc, Register Reg) {
1523 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1524 ++NumInstsInserted;
1525}
1526
1527/// Takes the current predicate state (in a register) and merges it into the
1528/// stack pointer. The state is essentially a single bit, but we merge this in
1529/// a way that won't form non-canonical pointers and also will be preserved
1530/// across normal stack adjustments.
1531void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1533 const DebugLoc &Loc, unsigned PredStateReg) {
1534 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1535 // FIXME: This hard codes a shift distance based on the number of bits needed
1536 // to stay canonical on 64-bit. We should compute this somehow and support
1537 // 32-bit as part of that.
1538 auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1539 .addReg(PredStateReg, RegState::Kill)
1540 .addImm(47);
1541 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1542 ++NumInstsInserted;
1543 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1544 .addReg(X86::RSP)
1545 .addReg(TmpReg, RegState::Kill);
1546 OrI->addRegisterDead(X86::EFLAGS, TRI);
1547 ++NumInstsInserted;
1548}
1549
1550/// Extracts the predicate state stored in the high bits of the stack pointer.
1551unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1553 const DebugLoc &Loc) {
1554 Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1555 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1556
1557 // We know that the stack pointer will have any preserved predicate state in
1558 // its high bit. We just want to smear this across the other bits. Turns out,
1559 // this is exactly what an arithmetic right shift does.
1560 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1561 .addReg(X86::RSP);
1562 auto ShiftI =
1563 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1564 .addReg(TmpReg, RegState::Kill)
1565 .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1566 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1567 ++NumInstsInserted;
1568
1569 return PredStateReg;
1570}
1571
1572void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1573 MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1574 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
1575 MachineBasicBlock &MBB = *MI.getParent();
1576 const DebugLoc &Loc = MI.getDebugLoc();
1577
1578 // Check if EFLAGS are alive by seeing if there is a def of them or they
1579 // live-in, and then seeing if that def is in turn used.
1580 bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1581
1583
1584 if (BaseMO.isFI()) {
1585 // A frame index is never a dynamically controllable load, so only
1586 // harden it if we're covering fixed address loads as well.
1587 LLVM_DEBUG(
1588 dbgs() << " Skipping hardening base of explicit stack frame load: ";
1589 MI.dump(); dbgs() << "\n");
1590 } else if (BaseMO.getReg() == X86::RSP) {
1591 // Some idempotent atomic operations are lowered directly to a locked
1592 // OR with 0 to the top of stack(or slightly offset from top) which uses an
1593 // explicit RSP register as the base.
1594 assert(IndexMO.getReg() == X86::NoRegister &&
1595 "Explicit RSP access with dynamic index!");
1596 LLVM_DEBUG(
1597 dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1598 } else if (BaseMO.getReg() == X86::RIP ||
1599 BaseMO.getReg() == X86::NoRegister) {
1600 // For both RIP-relative addressed loads or absolute loads, we cannot
1601 // meaningfully harden them because the address being loaded has no
1602 // dynamic component.
1603 //
1604 // FIXME: When using a segment base (like TLS does) we end up with the
1605 // dynamic address being the base plus -1 because we can't mutate the
1606 // segment register here. This allows the signed 32-bit offset to point at
1607 // valid segment-relative addresses and load them successfully.
1608 LLVM_DEBUG(
1609 dbgs() << " Cannot harden base of "
1610 << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1611 << " address in a load!");
1612 } else {
1613 assert(BaseMO.isReg() &&
1614 "Only allowed to have a frame index or register base.");
1615 HardenOpRegs.push_back(&BaseMO);
1616 }
1617
1618 if (IndexMO.getReg() != X86::NoRegister &&
1619 (HardenOpRegs.empty() ||
1620 HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1621 HardenOpRegs.push_back(&IndexMO);
1622
1623 assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1624 "Should have exactly one or two registers to harden!");
1625 assert((HardenOpRegs.size() == 1 ||
1626 HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1627 "Should not have two of the same registers!");
1628
1629 // Remove any registers that have alreaded been checked.
1630 llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1631 // See if this operand's register has already been checked.
1632 auto It = AddrRegToHardenedReg.find(Op->getReg());
1633 if (It == AddrRegToHardenedReg.end())
1634 // Not checked, so retain this one.
1635 return false;
1636
1637 // Otherwise, we can directly update this operand and remove it.
1638 Op->setReg(It->second);
1639 return true;
1640 });
1641 // If there are none left, we're done.
1642 if (HardenOpRegs.empty())
1643 return;
1644
1645 // Compute the current predicate state.
1646 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1647
1648 auto InsertPt = MI.getIterator();
1649
1650 // If EFLAGS are live and we don't have access to instructions that avoid
1651 // clobbering EFLAGS we need to save and restore them. This in turn makes
1652 // the EFLAGS no longer live.
1653 unsigned FlagsReg = 0;
1654 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1655 EFLAGSLive = false;
1656 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1657 }
1658
1659 for (MachineOperand *Op : HardenOpRegs) {
1660 Register OpReg = Op->getReg();
1661 auto *OpRC = MRI->getRegClass(OpReg);
1662 Register TmpReg = MRI->createVirtualRegister(OpRC);
1663
1664 // If this is a vector register, we'll need somewhat custom logic to handle
1665 // hardening it.
1666 if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1667 OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1668 assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1669 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1670
1671 // Move our state into a vector register.
1672 // FIXME: We could skip this at the cost of longer encodings with AVX-512
1673 // but that doesn't seem likely worth it.
1674 Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1675 auto MovI =
1676 BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1677 .addReg(StateReg);
1678 (void)MovI;
1679 ++NumInstsInserted;
1680 LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1681
1682 // Broadcast it across the vector register.
1683 Register VBStateReg = MRI->createVirtualRegister(OpRC);
1684 auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1685 TII->get(Is128Bit ? X86::VPBROADCASTQrr
1686 : X86::VPBROADCASTQYrr),
1687 VBStateReg)
1688 .addReg(VStateReg);
1689 (void)BroadcastI;
1690 ++NumInstsInserted;
1691 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1692 dbgs() << "\n");
1693
1694 // Merge our potential poison state into the value with a vector or.
1695 auto OrI =
1696 BuildMI(MBB, InsertPt, Loc,
1697 TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1698 .addReg(VBStateReg)
1699 .addReg(OpReg);
1700 (void)OrI;
1701 ++NumInstsInserted;
1702 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1703 } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1704 OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1705 OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1706 assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1707 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1708 bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1709 if (Is128Bit || Is256Bit)
1710 assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1711
1712 // Broadcast our state into a vector register.
1713 Register VStateReg = MRI->createVirtualRegister(OpRC);
1714 unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1715 : Is256Bit ? X86::VPBROADCASTQrZ256rr
1716 : X86::VPBROADCASTQrZrr;
1717 auto BroadcastI =
1718 BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1719 .addReg(StateReg);
1720 (void)BroadcastI;
1721 ++NumInstsInserted;
1722 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1723 dbgs() << "\n");
1724
1725 // Merge our potential poison state into the value with a vector or.
1726 unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1727 : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1728 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1729 .addReg(VStateReg)
1730 .addReg(OpReg);
1731 (void)OrI;
1732 ++NumInstsInserted;
1733 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1734 } else {
1735 // FIXME: Need to support GR32 here for 32-bit code.
1736 assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1737 "Not a supported register class for address hardening!");
1738
1739 if (!EFLAGSLive) {
1740 // Merge our potential poison state into the value with an or.
1741 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1742 .addReg(StateReg)
1743 .addReg(OpReg);
1744 OrI->addRegisterDead(X86::EFLAGS, TRI);
1745 ++NumInstsInserted;
1746 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1747 } else {
1748 // We need to avoid touching EFLAGS so shift out all but the least
1749 // significant bit using the instruction that doesn't update flags.
1750 auto ShiftI =
1751 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1752 .addReg(OpReg)
1753 .addReg(StateReg);
1754 (void)ShiftI;
1755 ++NumInstsInserted;
1756 LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1757 dbgs() << "\n");
1758 }
1759 }
1760
1761 // Record this register as checked and update the operand.
1762 assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1763 "Should not have checked this register yet!");
1764 AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1765 Op->setReg(TmpReg);
1766 ++NumAddrRegsHardened;
1767 }
1768
1769 // And restore the flags if needed.
1770 if (FlagsReg)
1771 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1772}
1773
1774MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1775 MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1777 "Cannot get here with a non-invariant load!");
1778 assert(!isEFLAGSDefLive(InitialMI) &&
1779 "Cannot get here with a data invariant load "
1780 "that interferes with EFLAGS!");
1781
1782 // See if we can sink hardening the loaded value.
1783 auto SinkCheckToSingleUse =
1784 [&](MachineInstr &MI) -> std::optional<MachineInstr *> {
1785 Register DefReg = MI.getOperand(0).getReg();
1786
1787 // We need to find a single use which we can sink the check. We can
1788 // primarily do this because many uses may already end up checked on their
1789 // own.
1790 MachineInstr *SingleUseMI = nullptr;
1791 for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1792 // If we're already going to harden this use, it is data invariant, it
1793 // does not interfere with EFLAGS, and within our block.
1794 if (HardenedInstrs.count(&UseMI)) {
1796 // If we've already decided to harden a non-load, we must have sunk
1797 // some other post-load hardened instruction to it and it must itself
1798 // be data-invariant.
1800 "Data variant instruction being hardened!");
1801 continue;
1802 }
1803
1804 // Otherwise, this is a load and the load component can't be data
1805 // invariant so check how this register is being used.
1806 const MCInstrDesc &Desc = UseMI.getDesc();
1807 int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1808 assert(MemRefBeginIdx >= 0 &&
1809 "Should always have mem references here!");
1810 MemRefBeginIdx += X86II::getOperandBias(Desc);
1811
1812 MachineOperand &BaseMO =
1813 UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1814 MachineOperand &IndexMO =
1815 UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1816 if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1817 (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1818 // The load uses the register as part of its address making it not
1819 // invariant.
1820 return {};
1821
1822 continue;
1823 }
1824
1825 if (SingleUseMI)
1826 // We already have a single use, this would make two. Bail.
1827 return {};
1828
1829 // If this single use isn't data invariant, isn't in this block, or has
1830 // interfering EFLAGS, we can't sink the hardening to it.
1831 if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1833 return {};
1834
1835 // If this instruction defines multiple registers bail as we won't harden
1836 // all of them.
1837 if (UseMI.getDesc().getNumDefs() > 1)
1838 return {};
1839
1840 // If this register isn't a virtual register we can't walk uses of sanely,
1841 // just bail. Also check that its register class is one of the ones we
1842 // can harden.
1843 Register UseDefReg = UseMI.getOperand(0).getReg();
1844 if (!UseDefReg.isVirtual() || !canHardenRegister(UseDefReg))
1845 return {};
1846
1847 SingleUseMI = &UseMI;
1848 }
1849
1850 // If SingleUseMI is still null, there is no use that needs its own
1851 // checking. Otherwise, it is the single use that needs checking.
1852 return {SingleUseMI};
1853 };
1854
1855 MachineInstr *MI = &InitialMI;
1856 while (std::optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1857 // Update which MI we're checking now.
1858 MI = *SingleUse;
1859 if (!MI)
1860 break;
1861 }
1862
1863 return MI;
1864}
1865
1866bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
1867 auto *RC = MRI->getRegClass(Reg);
1868 int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1869 if (RegBytes > 8)
1870 // We don't support post-load hardening of vectors.
1871 return false;
1872
1873 unsigned RegIdx = Log2_32(RegBytes);
1874 assert(RegIdx < 4 && "Unsupported register size");
1875
1876 // If this register class is explicitly constrained to a class that doesn't
1877 // require REX prefix, we may not be able to satisfy that constraint when
1878 // emitting the hardening instructions, so bail out here.
1879 // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1880 // end up both with a NOREX and REX-only register as operands to the hardening
1881 // instructions. It would be better to fix that code to handle this situation
1882 // rather than hack around it in this way.
1883 const TargetRegisterClass *NOREXRegClasses[] = {
1884 &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1885 &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1886 if (RC == NOREXRegClasses[RegIdx])
1887 return false;
1888
1889 const TargetRegisterClass *GPRRegClasses[] = {
1890 &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1891 &X86::GR64RegClass};
1892 return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1893}
1894
1895/// Harden a value in a register.
1896///
1897/// This is the low-level logic to fully harden a value sitting in a register
1898/// against leaking during speculative execution.
1899///
1900/// Unlike hardening an address that is used by a load, this routine is required
1901/// to hide *all* incoming bits in the register.
1902///
1903/// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1904/// larger than the predicate state register. FIXME: We should support vector
1905/// registers here by broadcasting the predicate state.
1906///
1907/// The new, hardened virtual register is returned. It will have the same
1908/// register class as `Reg`.
1909unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
1911 const DebugLoc &Loc) {
1912 assert(canHardenRegister(Reg) && "Cannot harden this register!");
1913 assert(Reg.isVirtual() && "Cannot harden a physical register!");
1914
1915 auto *RC = MRI->getRegClass(Reg);
1916 int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1917 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1918 assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
1919 "Unknown register size");
1920
1921 // FIXME: Need to teach this about 32-bit mode.
1922 if (Bytes != 8) {
1923 unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1924 unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1925 Register NarrowStateReg = MRI->createVirtualRegister(RC);
1926 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1927 .addReg(StateReg, 0, SubRegImm);
1928 StateReg = NarrowStateReg;
1929 }
1930
1931 unsigned FlagsReg = 0;
1932 if (isEFLAGSLive(MBB, InsertPt, *TRI))
1933 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1934
1935 Register NewReg = MRI->createVirtualRegister(RC);
1936 unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1937 unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1938 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1939 .addReg(StateReg)
1940 .addReg(Reg);
1941 OrI->addRegisterDead(X86::EFLAGS, TRI);
1942 ++NumInstsInserted;
1943 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1944
1945 if (FlagsReg)
1946 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1947
1948 return NewReg;
1949}
1950
1951/// Harden a load by hardening the loaded value in the defined register.
1952///
1953/// We can harden a non-leaking load into a register without touching the
1954/// address by just hiding all of the loaded bits during misspeculation. We use
1955/// an `or` instruction to do this because we set up our poison value as all
1956/// ones. And the goal is just for the loaded bits to not be exposed to
1957/// execution and coercing them to one is sufficient.
1958///
1959/// Returns the newly hardened register.
1960unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
1961 MachineBasicBlock &MBB = *MI.getParent();
1962 const DebugLoc &Loc = MI.getDebugLoc();
1963
1964 auto &DefOp = MI.getOperand(0);
1965 Register OldDefReg = DefOp.getReg();
1966 auto *DefRC = MRI->getRegClass(OldDefReg);
1967
1968 // Because we want to completely replace the uses of this def'ed value with
1969 // the hardened value, create a dedicated new register that will only be used
1970 // to communicate the unhardened value to the hardening.
1971 Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1972 DefOp.setReg(UnhardenedReg);
1973
1974 // Now harden this register's value, getting a hardened reg that is safe to
1975 // use. Note that we insert the instructions to compute this *after* the
1976 // defining instruction, not before it.
1977 unsigned HardenedReg = hardenValueInRegister(
1978 UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1979
1980 // Finally, replace the old register (which now only has the uses of the
1981 // original def) with the hardened register.
1982 MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1983
1984 ++NumPostLoadRegsHardened;
1985 return HardenedReg;
1986}
1987
1988/// Harden a return instruction.
1989///
1990/// Returns implicitly perform a load which we need to harden. Without hardening
1991/// this load, an attacker my speculatively write over the return address to
1992/// steer speculation of the return to an attacker controlled address. This is
1993/// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1994/// this paper:
1995/// https://people.csail.mit.edu/vlk/spectre11.pdf
1996///
1997/// We can harden this by introducing an LFENCE that will delay any load of the
1998/// return address until prior instructions have retired (and thus are not being
1999/// speculated), or we can harden the address used by the implicit load: the
2000/// stack pointer.
2001///
2002/// If we are not using an LFENCE, hardening the stack pointer has an additional
2003/// benefit: it allows us to pass the predicate state accumulated in this
2004/// function back to the caller. In the absence of a BCBS attack on the return,
2005/// the caller will typically be resumed and speculatively executed due to the
2006/// Return Stack Buffer (RSB) prediction which is very accurate and has a high
2007/// priority. It is possible that some code from the caller will be executed
2008/// speculatively even during a BCBS-attacked return until the steering takes
2009/// effect. Whenever this happens, the caller can recover the (poisoned)
2010/// predicate state from the stack pointer and continue to harden loads.
2011void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
2012 MachineBasicBlock &MBB = *MI.getParent();
2013 const DebugLoc &Loc = MI.getDebugLoc();
2014 auto InsertPt = MI.getIterator();
2015
2016 if (FenceCallAndRet)
2017 // No need to fence here as we'll fence at the return site itself. That
2018 // handles more cases than we can handle here.
2019 return;
2020
2021 // Take our predicate state, shift it to the high 17 bits (so that we keep
2022 // pointers canonical) and merge it into RSP. This will allow the caller to
2023 // extract it when we return (speculatively).
2024 mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2025}
2026
2027/// Trace the predicate state through a call.
2028///
2029/// There are several layers of this needed to handle the full complexity of
2030/// calls.
2031///
2032/// First, we need to send the predicate state into the called function. We do
2033/// this by merging it into the high bits of the stack pointer.
2034///
2035/// For tail calls, this is all we need to do.
2036///
2037/// For calls where we might return and resume the control flow, we need to
2038/// extract the predicate state from the high bits of the stack pointer after
2039/// control returns from the called function.
2040///
2041/// We also need to verify that we intended to return to this location in the
2042/// code. An attacker might arrange for the processor to mispredict the return
2043/// to this valid but incorrect return address in the program rather than the
2044/// correct one. See the paper on this attack, called "ret2spec" by the
2045/// researchers, here:
2046/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2047///
2048/// The way we verify that we returned to the correct location is by preserving
2049/// the expected return address across the call. One technique involves taking
2050/// advantage of the red-zone to load the return address from `8(%rsp)` where it
2051/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2052/// directly save the address into a register that will be preserved across the
2053/// call. We compare this intended return address against the address
2054/// immediately following the call (the observed return address). If these
2055/// mismatch, we have detected misspeculation and can poison our predicate
2056/// state.
2057void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
2058 MachineInstr &MI) {
2059 MachineBasicBlock &MBB = *MI.getParent();
2060 MachineFunction &MF = *MBB.getParent();
2061 auto InsertPt = MI.getIterator();
2062 const DebugLoc &Loc = MI.getDebugLoc();
2063
2064 if (FenceCallAndRet) {
2065 if (MI.isReturn())
2066 // Tail call, we don't return to this function.
2067 // FIXME: We should also handle noreturn calls.
2068 return;
2069
2070 // We don't need to fence before the call because the function should fence
2071 // in its entry. However, we do need to fence after the call returns.
2072 // Fencing before the return doesn't correctly handle cases where the return
2073 // itself is mispredicted.
2074 BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2075 ++NumInstsInserted;
2076 ++NumLFENCEsInserted;
2077 return;
2078 }
2079
2080 // First, we transfer the predicate state into the called function by merging
2081 // it into the stack pointer. This will kill the current def of the state.
2082 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2083 mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2084
2085 // If this call is also a return, it is a tail call and we don't need anything
2086 // else to handle it so just return. Also, if there are no further
2087 // instructions and no successors, this call does not return so we can also
2088 // bail.
2089 if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2090 return;
2091
2092 // Create a symbol to track the return address and attach it to the call
2093 // machine instruction. We will lower extra symbols attached to call
2094 // instructions as label immediately following the call.
2095 MCSymbol *RetSymbol =
2096 MF.getContext().createTempSymbol("slh_ret_addr",
2097 /*AlwaysAddSuffix*/ true);
2098 MI.setPostInstrSymbol(MF, RetSymbol);
2099
2100 const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2101 unsigned ExpectedRetAddrReg = 0;
2102
2103 // If we have no red zones or if the function returns twice (possibly without
2104 // using the `ret` instruction) like setjmp, we need to save the expected
2105 // return address prior to the call.
2106 if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2107 MF.exposesReturnsTwice()) {
2108 // If we don't have red zones, we need to compute the expected return
2109 // address prior to the call and store it in a register that lives across
2110 // the call.
2111 //
2112 // In some ways, this is doubly satisfying as a mitigation because it will
2113 // also successfully detect stack smashing bugs in some cases (typically,
2114 // when a callee-saved register is used and the callee doesn't push it onto
2115 // the stack). But that isn't our primary goal, so we only use it as
2116 // a fallback.
2117 //
2118 // FIXME: It isn't clear that this is reliable in the face of
2119 // rematerialization in the register allocator. We somehow need to force
2120 // that to not occur for this particular instruction, and instead to spill
2121 // or otherwise preserve the value computed *prior* to the call.
2122 //
2123 // FIXME: It is even less clear why MachineCSE can't just fold this when we
2124 // end up having to use identical instructions both before and after the
2125 // call to feed the comparison.
2126 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2127 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2128 !Subtarget->isPositionIndependent()) {
2129 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2130 .addSym(RetSymbol);
2131 } else {
2132 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2133 .addReg(/*Base*/ X86::RIP)
2134 .addImm(/*Scale*/ 1)
2135 .addReg(/*Index*/ 0)
2136 .addSym(RetSymbol)
2137 .addReg(/*Segment*/ 0);
2138 }
2139 }
2140
2141 // Step past the call to handle when it returns.
2142 ++InsertPt;
2143
2144 // If we didn't pre-compute the expected return address into a register, then
2145 // red zones are enabled and the return address is still available on the
2146 // stack immediately after the call. As the very first instruction, we load it
2147 // into a register.
2148 if (!ExpectedRetAddrReg) {
2149 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2150 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2151 .addReg(/*Base*/ X86::RSP)
2152 .addImm(/*Scale*/ 1)
2153 .addReg(/*Index*/ 0)
2154 .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2155 // the return address is 8-bytes past it.
2156 .addReg(/*Segment*/ 0);
2157 }
2158
2159 // Now we extract the callee's predicate state from the stack pointer.
2160 unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2161
2162 // Test the expected return address against our actual address. If we can
2163 // form this basic block's address as an immediate, this is easy. Otherwise
2164 // we compute it.
2165 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2166 !Subtarget->isPositionIndependent()) {
2167 // FIXME: Could we fold this with the load? It would require careful EFLAGS
2168 // management.
2169 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2170 .addReg(ExpectedRetAddrReg, RegState::Kill)
2171 .addSym(RetSymbol);
2172 } else {
2173 Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2174 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2175 .addReg(/*Base*/ X86::RIP)
2176 .addImm(/*Scale*/ 1)
2177 .addReg(/*Index*/ 0)
2178 .addSym(RetSymbol)
2179 .addReg(/*Segment*/ 0);
2180 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2181 .addReg(ExpectedRetAddrReg, RegState::Kill)
2182 .addReg(ActualRetAddrReg, RegState::Kill);
2183 }
2184
2185 // Now conditionally update the predicate state we just extracted if we ended
2186 // up at a different return address than expected.
2187 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2188 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2189
2190 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2191 auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2192 .addReg(NewStateReg, RegState::Kill)
2193 .addReg(PS->PoisonReg)
2195 CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
2196 ++NumInstsInserted;
2197 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2198
2199 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2200}
2201
2202/// An attacker may speculatively store over a value that is then speculatively
2203/// loaded and used as the target of an indirect call or jump instruction. This
2204/// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2205/// in this paper:
2206/// https://people.csail.mit.edu/vlk/spectre11.pdf
2207///
2208/// When this happens, the speculative execution of the call or jump will end up
2209/// being steered to this attacker controlled address. While most such loads
2210/// will be adequately hardened already, we want to ensure that they are
2211/// definitively treated as needing post-load hardening. While address hardening
2212/// is sufficient to prevent secret data from leaking to the attacker, it may
2213/// not be sufficient to prevent an attacker from steering speculative
2214/// execution. We forcibly unfolded all relevant loads above and so will always
2215/// have an opportunity to post-load harden here, we just need to scan for cases
2216/// not already flagged and add them.
2217void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
2219 SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
2220 switch (MI.getOpcode()) {
2221 case X86::FARCALL16m:
2222 case X86::FARCALL32m:
2223 case X86::FARCALL64m:
2224 case X86::FARJMP16m:
2225 case X86::FARJMP32m:
2226 case X86::FARJMP64m:
2227 // We don't need to harden either far calls or far jumps as they are
2228 // safe from Spectre.
2229 return;
2230
2231 default:
2232 break;
2233 }
2234
2235 // We should never see a loading instruction at this point, as those should
2236 // have been unfolded.
2237 assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2238
2239 // If the first operand isn't a register, this is a branch or call
2240 // instruction with an immediate operand which doesn't need to be hardened.
2241 if (!MI.getOperand(0).isReg())
2242 return;
2243
2244 // For all of these, the target register is the first operand of the
2245 // instruction.
2246 auto &TargetOp = MI.getOperand(0);
2247 Register OldTargetReg = TargetOp.getReg();
2248
2249 // Try to lookup a hardened version of this register. We retain a reference
2250 // here as we want to update the map to track any newly computed hardened
2251 // register.
2252 unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2253
2254 // If we don't have a hardened register yet, compute one. Otherwise, just use
2255 // the already hardened register.
2256 //
2257 // FIXME: It is a little suspect that we use partially hardened registers that
2258 // only feed addresses. The complexity of partial hardening with SHRX
2259 // continues to pile up. Should definitively measure its value and consider
2260 // eliminating it.
2261 if (!HardenedTargetReg)
2262 HardenedTargetReg = hardenValueInRegister(
2263 OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2264
2265 // Set the target operand to the hardened register.
2266 TargetOp.setReg(HardenedTargetReg);
2267
2268 ++NumCallsOrJumpsHardened;
2269}
2270
2271INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
2272 "X86 speculative load hardener", false, false)
2273INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
2274 "X86 speculative load hardener", false, false)
2275
2277 return new X86SpeculativeLoadHardeningPass();
2278}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:496
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
Memory SSA
Definition: MemorySSA.cpp:71
if(VerifyEach)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the SparseBitVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
static const TargetRegisterClass * getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
static bool isEFLAGSDefLive(const MachineInstr &MI)
X86 speculative load hardener
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
#define PASS_KEY
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:645
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Definition: MCContext.cpp:322
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
iterator SkipPHIsLabelsAndDebug(iterator I, bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
bool isEHPad() const
Returns true if the block is a landing pad.
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
reverse_instr_iterator instr_rbegin()
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
void splitSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New, bool NormalizeSuccProbs=false)
Split the old successor into old plus new and updates the probability info.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:543
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:939
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:947
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:472
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
MachineOperand * findRegisterUseOperand(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
MachineOperand class - Representation of each machine instruction operand.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
void dump() const
Definition: Pass.cpp:136
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:345
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:380
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:384
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:366
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:451
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
void set(unsigned Idx)
bool test(unsigned Idx) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeModel::Model getCodeModel() const
Returns the code model.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
int getMemoryOperandNo(uint64_t TSFlags)
The function returns the MCInst operand # for the first field of the memory operand.
Definition: X86BaseInfo.h:1095
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
Definition: X86BaseInfo.h:1055
CondCode getCondFromBranch(const MachineInstr &MI)
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
@ AddrIndexReg
Definition: X86BaseInfo.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1727
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:666
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1734
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
FunctionPass * createX86SpeculativeLoadHardeningPass()
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:429
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1652
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2021
Description of the encoding of one expression Op.