LLVM  13.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1 //====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// Provide a pass which mitigates speculative execution attacks which operate
11 /// by speculating incorrectly past some predicate (a type check, bounds check,
12 /// or other condition) to reach a load with invalid inputs and leak the data
13 /// accessed by that load using a side channel out of the speculative domain.
14 ///
15 /// For details on the attacks, see the first variant in both the Project Zero
16 /// writeup and the Spectre paper:
17 /// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18 /// https://spectreattack.com/spectre.pdf
19 ///
20 //===----------------------------------------------------------------------===//
21 
22 #include "X86.h"
23 #include "X86InstrBuilder.h"
24 #include "X86InstrInfo.h"
25 #include "X86Subtarget.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/Optional.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/ScopeExit.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/Statistic.h"
50 #include "llvm/IR/DebugLoc.h"
51 #include "llvm/MC/MCSchedule.h"
52 #include "llvm/Pass.h"
54 #include "llvm/Support/Debug.h"
57 #include <algorithm>
58 #include <cassert>
59 #include <iterator>
60 #include <utility>
61 
62 using namespace llvm;
63 
64 #define PASS_KEY "x86-slh"
65 #define DEBUG_TYPE PASS_KEY
66 
67 STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
68 STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
69 STATISTIC(NumAddrRegsHardened,
70  "Number of address mode used registers hardaned");
71 STATISTIC(NumPostLoadRegsHardened,
72  "Number of post-load register values hardened");
73 STATISTIC(NumCallsOrJumpsHardened,
74  "Number of calls or jumps requiring extra hardening");
75 STATISTIC(NumInstsInserted, "Number of instructions inserted");
76 STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
77 
79  "x86-speculative-load-hardening",
80  cl::desc("Force enable speculative load hardening"), cl::init(false),
81  cl::Hidden);
82 
84  PASS_KEY "-lfence",
85  cl::desc(
86  "Use LFENCE along each conditional edge to harden against speculative "
87  "loads rather than conditional movs and poisoned pointers."),
88  cl::init(false), cl::Hidden);
89 
91  PASS_KEY "-post-load",
92  cl::desc("Harden the value loaded *after* it is loaded by "
93  "flushing the loaded bits to 1. This is hard to do "
94  "in general but can be done easily for GPRs."),
95  cl::init(true), cl::Hidden);
96 
98  PASS_KEY "-fence-call-and-ret",
99  cl::desc("Use a full speculation fence to harden both call and ret edges "
100  "rather than a lighter weight mitigation."),
101  cl::init(false), cl::Hidden);
102 
104  PASS_KEY "-ip",
105  cl::desc("Harden interprocedurally by passing our state in and out of "
106  "functions in the high bits of the stack pointer."),
107  cl::init(true), cl::Hidden);
108 
109 static cl::opt<bool>
110  HardenLoads(PASS_KEY "-loads",
111  cl::desc("Sanitize loads from memory. When disable, no "
112  "significant security is provided."),
113  cl::init(true), cl::Hidden);
114 
116  PASS_KEY "-indirect",
117  cl::desc("Harden indirect calls and jumps against using speculatively "
118  "stored attacker controlled addresses. This is designed to "
119  "mitigate Spectre v1.2 style attacks."),
120  cl::init(true), cl::Hidden);
121 
122 namespace {
123 
124 class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
125 public:
126  X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
127 
128  StringRef getPassName() const override {
129  return "X86 speculative load hardening";
130  }
131  bool runOnMachineFunction(MachineFunction &MF) override;
132  void getAnalysisUsage(AnalysisUsage &AU) const override;
133 
134  /// Pass identification, replacement for typeid.
135  static char ID;
136 
137 private:
138  /// The information about a block's conditional terminators needed to trace
139  /// our predicate state through the exiting edges.
140  struct BlockCondInfo {
142 
143  // We mostly have one conditional branch, and in extremely rare cases have
144  // two. Three and more are so rare as to be unimportant for compile time.
146 
147  MachineInstr *UncondBr;
148  };
149 
150  /// Manages the predicate state traced through the program.
151  struct PredState {
152  unsigned InitialReg = 0;
153  unsigned PoisonReg = 0;
154 
155  const TargetRegisterClass *RC;
157 
158  PredState(MachineFunction &MF, const TargetRegisterClass *RC)
159  : RC(RC), SSA(MF) {}
160  };
161 
162  const X86Subtarget *Subtarget = nullptr;
163  MachineRegisterInfo *MRI = nullptr;
164  const X86InstrInfo *TII = nullptr;
165  const TargetRegisterInfo *TRI = nullptr;
166 
168 
169  void hardenEdgesWithLFENCE(MachineFunction &MF);
170 
171  SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
172 
174  tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
175 
176  void unfoldCallAndJumpLoads(MachineFunction &MF);
177 
179  tracePredStateThroughIndirectBranches(MachineFunction &MF);
180 
181  void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
182 
183  unsigned saveEFLAGS(MachineBasicBlock &MBB,
184  MachineBasicBlock::iterator InsertPt, DebugLoc Loc);
185  void restoreEFLAGS(MachineBasicBlock &MBB,
187  Register Reg);
188 
189  void mergePredStateIntoSP(MachineBasicBlock &MBB,
191  unsigned PredStateReg);
192  unsigned extractPredStateFromSP(MachineBasicBlock &MBB,
194  DebugLoc Loc);
195 
196  void
197  hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
198  MachineOperand &IndexMO,
199  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
200  MachineInstr *
201  sinkPostLoadHardenedInst(MachineInstr &MI,
202  SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
203  bool canHardenRegister(Register Reg);
204  unsigned hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
206  DebugLoc Loc);
207  unsigned hardenPostLoad(MachineInstr &MI);
208  void hardenReturnInstr(MachineInstr &MI);
209  void tracePredStateThroughCall(MachineInstr &MI);
210  void hardenIndirectCallOrJumpInstr(
211  MachineInstr &MI,
212  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg);
213 };
214 
215 } // end anonymous namespace
216 
218 
219 void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
220  AnalysisUsage &AU) const {
222 }
223 
225  MachineBasicBlock &Succ, int SuccCount,
226  MachineInstr *Br, MachineInstr *&UncondBr,
227  const X86InstrInfo &TII) {
228  assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
229 
230  MachineFunction &MF = *MBB.getParent();
231 
233 
234  // We have to insert the new block immediately after the current one as we
235  // don't know what layout-successor relationships the successor has and we
236  // may not be able to (and generally don't want to) try to fix those up.
237  MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
238 
239  // Update the branch instruction if necessary.
240  if (Br) {
241  assert(Br->getOperand(0).getMBB() == &Succ &&
242  "Didn't start with the right target!");
243  Br->getOperand(0).setMBB(&NewMBB);
244 
245  // If this successor was reached through a branch rather than fallthrough,
246  // we might have *broken* fallthrough and so need to inject a new
247  // unconditional branch.
248  if (!UncondBr) {
249  MachineBasicBlock &OldLayoutSucc =
250  *std::next(MachineFunction::iterator(&NewMBB));
251  assert(MBB.isSuccessor(&OldLayoutSucc) &&
252  "Without an unconditional branch, the old layout successor should "
253  "be an actual successor!");
254  auto BrBuilder =
255  BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
256  // Update the unconditional branch now that we've added one.
257  UncondBr = &*BrBuilder;
258  }
259 
260  // Insert unconditional "jump Succ" instruction in the new block if
261  // necessary.
262  if (!NewMBB.isLayoutSuccessor(&Succ)) {
264  TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
265  }
266  } else {
267  assert(!UncondBr &&
268  "Cannot have a branchless successor and an unconditional branch!");
269  assert(NewMBB.isLayoutSuccessor(&Succ) &&
270  "A non-branch successor must have been a layout successor before "
271  "and now is a layout successor of the new block.");
272  }
273 
274  // If this is the only edge to the successor, we can just replace it in the
275  // CFG. Otherwise we need to add a new entry in the CFG for the new
276  // successor.
277  if (SuccCount == 1) {
278  MBB.replaceSuccessor(&Succ, &NewMBB);
279  } else {
280  MBB.splitSuccessor(&Succ, &NewMBB);
281  }
282 
283  // Hook up the edge from the new basic block to the old successor in the CFG.
284  NewMBB.addSuccessor(&Succ);
285 
286  // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
287  for (MachineInstr &MI : Succ) {
288  if (!MI.isPHI())
289  break;
290  for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
291  OpIdx += 2) {
292  MachineOperand &OpV = MI.getOperand(OpIdx);
293  MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
294  assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
295  if (OpMBB.getMBB() != &MBB)
296  continue;
297 
298  // If this is the last edge to the succesor, just replace MBB in the PHI
299  if (SuccCount == 1) {
300  OpMBB.setMBB(&NewMBB);
301  break;
302  }
303 
304  // Otherwise, append a new pair of operands for the new incoming edge.
305  MI.addOperand(MF, OpV);
306  MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
307  break;
308  }
309  }
310 
311  // Inherit live-ins from the successor
312  for (auto &LI : Succ.liveins())
313  NewMBB.addLiveIn(LI);
314 
315  LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
316  << Succ.getName() << "'.\n");
317  return NewMBB;
318 }
319 
320 /// Removing duplicate PHI operands to leave the PHI in a canonical and
321 /// predictable form.
322 ///
323 /// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
324 /// isn't what you might expect. We may have multiple entries in PHI nodes for
325 /// a single predecessor. This makes CFG-updating extremely complex, so here we
326 /// simplify all PHI nodes to a model even simpler than the IR's model: exactly
327 /// one entry per predecessor, regardless of how many edges there are.
330  SmallVector<int, 4> DupIndices;
331  for (auto &MBB : MF)
332  for (auto &MI : MBB) {
333  if (!MI.isPHI())
334  break;
335 
336  // First we scan the operands of the PHI looking for duplicate entries
337  // a particular predecessor. We retain the operand index of each duplicate
338  // entry found.
339  for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
340  OpIdx += 2)
341  if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
342  DupIndices.push_back(OpIdx);
343 
344  // Now walk the duplicate indices, removing both the block and value. Note
345  // that these are stored as a vector making this element-wise removal
346  // :w
347  // potentially quadratic.
348  //
349  // FIXME: It is really frustrating that we have to use a quadratic
350  // removal algorithm here. There should be a better way, but the use-def
351  // updates required make that impossible using the public API.
352  //
353  // Note that we have to process these backwards so that we don't
354  // invalidate other indices with each removal.
355  while (!DupIndices.empty()) {
356  int OpIdx = DupIndices.pop_back_val();
357  // Remove both the block and value operand, again in reverse order to
358  // preserve indices.
359  MI.RemoveOperand(OpIdx + 1);
360  MI.RemoveOperand(OpIdx);
361  }
362 
363  Preds.clear();
364  }
365 }
366 
367 /// Helper to scan a function for loads vulnerable to misspeculation that we
368 /// want to harden.
369 ///
370 /// We use this to avoid making changes to functions where there is nothing we
371 /// need to do to harden against misspeculation.
373  for (MachineBasicBlock &MBB : MF) {
374  for (MachineInstr &MI : MBB) {
375  // Loads within this basic block after an LFENCE are not at risk of
376  // speculatively executing with invalid predicates from prior control
377  // flow. So break out of this block but continue scanning the function.
378  if (MI.getOpcode() == X86::LFENCE)
379  break;
380 
381  // Looking for loads only.
382  if (!MI.mayLoad())
383  continue;
384 
385  // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
386  if (MI.getOpcode() == X86::MFENCE)
387  continue;
388 
389  // We found a load.
390  return true;
391  }
392  }
393 
394  // No loads found.
395  return false;
396 }
397 
398 bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
399  MachineFunction &MF) {
400  LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
401  << " **********\n");
402 
403  // Only run if this pass is forced enabled or we detect the relevant function
404  // attribute requesting SLH.
406  !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
407  return false;
408 
409  Subtarget = &MF.getSubtarget<X86Subtarget>();
410  MRI = &MF.getRegInfo();
411  TII = Subtarget->getInstrInfo();
412  TRI = Subtarget->getRegisterInfo();
413 
414  // FIXME: Support for 32-bit.
415  PS.emplace(MF, &X86::GR64_NOSPRegClass);
416 
417  if (MF.begin() == MF.end())
418  // Nothing to do for a degenerate empty function...
419  return false;
420 
421  // We support an alternative hardening technique based on a debug flag.
422  if (HardenEdgesWithLFENCE) {
423  hardenEdgesWithLFENCE(MF);
424  return true;
425  }
426 
427  // Create a dummy debug loc to use for all the generated code here.
428  DebugLoc Loc;
429 
430  MachineBasicBlock &Entry = *MF.begin();
431  auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
432 
433  // Do a quick scan to see if we have any checkable loads.
434  bool HasVulnerableLoad = hasVulnerableLoad(MF);
435 
436  // See if we have any conditional branching blocks that we will need to trace
437  // predicate state through.
438  SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
439 
440  // If we have no interesting conditions or loads, nothing to do here.
441  if (!HasVulnerableLoad && Infos.empty())
442  return true;
443 
444  // The poison value is required to be an all-ones value for many aspects of
445  // this mitigation.
446  const int PoisonVal = -1;
447  PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
448  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
449  .addImm(PoisonVal);
450  ++NumInstsInserted;
451 
452  // If we have loads being hardened and we've asked for call and ret edges to
453  // get a full fence-based mitigation, inject that fence.
454  if (HasVulnerableLoad && FenceCallAndRet) {
455  // We need to insert an LFENCE at the start of the function to suspend any
456  // incoming misspeculation from the caller. This helps two-fold: the caller
457  // may not have been protected as this code has been, and this code gets to
458  // not take any specific action to protect across calls.
459  // FIXME: We could skip this for functions which unconditionally return
460  // a constant.
461  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
462  ++NumInstsInserted;
463  ++NumLFENCEsInserted;
464  }
465 
466  // If we guarded the entry with an LFENCE and have no conditionals to protect
467  // in blocks, then we're done.
468  if (FenceCallAndRet && Infos.empty())
469  // We may have changed the function's code at this point to insert fences.
470  return true;
471 
472  // For every basic block in the function which can b
474  // Set up the predicate state by extracting it from the incoming stack
475  // pointer so we pick up any misspeculation in our caller.
476  PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
477  } else {
478  // Otherwise, just build the predicate state itself by zeroing a register
479  // as we don't need any initial state.
480  PS->InitialReg = MRI->createVirtualRegister(PS->RC);
481  Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
482  auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
483  PredStateSubReg);
484  ++NumInstsInserted;
485  MachineOperand *ZeroEFLAGSDefOp =
486  ZeroI->findRegisterDefOperand(X86::EFLAGS);
487  assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
488  "Must have an implicit def of EFLAGS!");
489  ZeroEFLAGSDefOp->setIsDead(true);
490  BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
491  PS->InitialReg)
492  .addImm(0)
493  .addReg(PredStateSubReg)
494  .addImm(X86::sub_32bit);
495  }
496 
497  // We're going to need to trace predicate state throughout the function's
498  // CFG. Prepare for this by setting up our initial state of PHIs with unique
499  // predecessor entries and all the initial predicate state.
501 
502  // Track the updated values in an SSA updater to rewrite into SSA form at the
503  // end.
504  PS->SSA.Initialize(PS->InitialReg);
505  PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
506 
507  // Trace through the CFG.
508  auto CMovs = tracePredStateThroughCFG(MF, Infos);
509 
510  // We may also enter basic blocks in this function via exception handling
511  // control flow. Here, if we are hardening interprocedurally, we need to
512  // re-capture the predicate state from the throwing code. In the Itanium ABI,
513  // the throw will always look like a call to __cxa_throw and will have the
514  // predicate state in the stack pointer, so extract fresh predicate state from
515  // the stack pointer and make it available in SSA.
516  // FIXME: Handle non-itanium ABI EH models.
518  for (MachineBasicBlock &MBB : MF) {
519  assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
520  assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
521  assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
522  if (!MBB.isEHPad())
523  continue;
524  PS->SSA.AddAvailableValue(
525  &MBB,
526  extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
527  }
528  }
529 
531  // If we are going to harden calls and jumps we need to unfold their memory
532  // operands.
533  unfoldCallAndJumpLoads(MF);
534 
535  // Then we trace predicate state through the indirect branches.
536  auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
537  CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
538  }
539 
540  // Now that we have the predicate state available at the start of each block
541  // in the CFG, trace it through each block, hardening vulnerable instructions
542  // as we go.
543  tracePredStateThroughBlocksAndHarden(MF);
544 
545  // Now rewrite all the uses of the pred state using the SSA updater to insert
546  // PHIs connecting the state between blocks along the CFG edges.
547  for (MachineInstr *CMovI : CMovs)
548  for (MachineOperand &Op : CMovI->operands()) {
549  if (!Op.isReg() || Op.getReg() != PS->InitialReg)
550  continue;
551 
552  PS->SSA.RewriteUse(Op);
553  }
554 
555  LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
556  dbgs() << "\n"; MF.verify(this));
557  return true;
558 }
559 
560 /// Implements the naive hardening approach of putting an LFENCE after every
561 /// potentially mis-predicted control flow construct.
562 ///
563 /// We include this as an alternative mostly for the purpose of comparison. The
564 /// performance impact of this is expected to be extremely severe and not
565 /// practical for any real-world users.
566 void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
567  MachineFunction &MF) {
568  // First, we scan the function looking for blocks that are reached along edges
569  // that we might want to harden.
571  for (MachineBasicBlock &MBB : MF) {
572  // If there are no or only one successor, nothing to do here.
573  if (MBB.succ_size() <= 1)
574  continue;
575 
576  // Skip blocks unless their terminators start with a branch. Other
577  // terminators don't seem interesting for guarding against misspeculation.
578  auto TermIt = MBB.getFirstTerminator();
579  if (TermIt == MBB.end() || !TermIt->isBranch())
580  continue;
581 
582  // Add all the non-EH-pad succossors to the blocks we want to harden. We
583  // skip EH pads because there isn't really a condition of interest on
584  // entering.
585  for (MachineBasicBlock *SuccMBB : MBB.successors())
586  if (!SuccMBB->isEHPad())
587  Blocks.insert(SuccMBB);
588  }
589 
590  for (MachineBasicBlock *MBB : Blocks) {
591  auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
592  BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
593  ++NumInstsInserted;
594  ++NumLFENCEsInserted;
595  }
596 }
597 
599 X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
601 
602  // Walk the function and build up a summary for each block's conditions that
603  // we need to trace through.
604  for (MachineBasicBlock &MBB : MF) {
605  // If there are no or only one successor, nothing to do here.
606  if (MBB.succ_size() <= 1)
607  continue;
608 
609  // We want to reliably handle any conditional branch terminators in the
610  // MBB, so we manually analyze the branch. We can handle all of the
611  // permutations here, including ones that analyze branch cannot.
612  //
613  // The approach is to walk backwards across the terminators, resetting at
614  // any unconditional non-indirect branch, and track all conditional edges
615  // to basic blocks as well as the fallthrough or unconditional successor
616  // edge. For each conditional edge, we track the target and the opposite
617  // condition code in order to inject a "no-op" cmov into that successor
618  // that will harden the predicate. For the fallthrough/unconditional
619  // edge, we inject a separate cmov for each conditional branch with
620  // matching condition codes. This effectively implements an "and" of the
621  // condition flags, even if there isn't a single condition flag that would
622  // directly implement that. We don't bother trying to optimize either of
623  // these cases because if such an optimization is possible, LLVM should
624  // have optimized the conditional *branches* in that way already to reduce
625  // instruction count. This late, we simply assume the minimal number of
626  // branch instructions is being emitted and use that to guide our cmov
627  // insertion.
628 
629  BlockCondInfo Info = {&MBB, {}, nullptr};
630 
631  // Now walk backwards through the terminators and build up successors they
632  // reach and the conditions.
633  for (MachineInstr &MI : llvm::reverse(MBB)) {
634  // Once we've handled all the terminators, we're done.
635  if (!MI.isTerminator())
636  break;
637 
638  // If we see a non-branch terminator, we can't handle anything so bail.
639  if (!MI.isBranch()) {
640  Info.CondBrs.clear();
641  break;
642  }
643 
644  // If we see an unconditional branch, reset our state, clear any
645  // fallthrough, and set this is the "else" successor.
646  if (MI.getOpcode() == X86::JMP_1) {
647  Info.CondBrs.clear();
648  Info.UncondBr = &MI;
649  continue;
650  }
651 
652  // If we get an invalid condition, we have an indirect branch or some
653  // other unanalyzable "fallthrough" case. We model this as a nullptr for
654  // the destination so we can still guard any conditional successors.
655  // Consider code sequences like:
656  // ```
657  // jCC L1
658  // jmpq *%rax
659  // ```
660  // We still want to harden the edge to `L1`.
662  Info.CondBrs.clear();
663  Info.UncondBr = &MI;
664  continue;
665  }
666 
667  // We have a vanilla conditional branch, add it to our list.
668  Info.CondBrs.push_back(&MI);
669  }
670  if (Info.CondBrs.empty()) {
671  ++NumBranchesUntraced;
672  LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
673  MBB.dump());
674  continue;
675  }
676 
677  Infos.push_back(Info);
678  }
679 
680  return Infos;
681 }
682 
683 /// Trace the predicate state through the CFG, instrumenting each conditional
684 /// branch such that misspeculation through an edge will poison the predicate
685 /// state.
686 ///
687 /// Returns the list of inserted CMov instructions so that they can have their
688 /// uses of the predicate state rewritten into proper SSA form once it is
689 /// complete.
691 X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
693  // Collect the inserted cmov instructions so we can rewrite their uses of the
694  // predicate state into SSA form.
696 
697  // Now walk all of the basic blocks looking for ones that end in conditional
698  // jumps where we need to update this register along each edge.
699  for (const BlockCondInfo &Info : Infos) {
700  MachineBasicBlock &MBB = *Info.MBB;
701  const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
702  MachineInstr *UncondBr = Info.UncondBr;
703 
704  LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
705  << "\n");
706  ++NumCondBranchesTraced;
707 
708  // Compute the non-conditional successor as either the target of any
709  // unconditional branch or the layout successor.
710  MachineBasicBlock *UncondSucc =
711  UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
712  ? UncondBr->getOperand(0).getMBB()
713  : nullptr)
714  : &*std::next(MachineFunction::iterator(&MBB));
715 
716  // Count how many edges there are to any given successor.
718  if (UncondSucc)
719  ++SuccCounts[UncondSucc];
720  for (auto *CondBr : CondBrs)
721  ++SuccCounts[CondBr->getOperand(0).getMBB()];
722 
723  // A lambda to insert cmov instructions into a block checking all of the
724  // condition codes in a sequence.
725  auto BuildCheckingBlockForSuccAndConds =
726  [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
727  MachineInstr *Br, MachineInstr *&UncondBr,
728  ArrayRef<X86::CondCode> Conds) {
729  // First, we split the edge to insert the checking block into a safe
730  // location.
731  auto &CheckingMBB =
732  (SuccCount == 1 && Succ.pred_size() == 1)
733  ? Succ
734  : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
735 
736  bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
737  if (!LiveEFLAGS)
738  CheckingMBB.addLiveIn(X86::EFLAGS);
739 
740  // Now insert the cmovs to implement the checks.
741  auto InsertPt = CheckingMBB.begin();
742  assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
743  "Should never have a PHI in the initial checking block as it "
744  "always has a single predecessor!");
745 
746  // We will wire each cmov to each other, but need to start with the
747  // incoming pred state.
748  unsigned CurStateReg = PS->InitialReg;
749 
750  for (X86::CondCode Cond : Conds) {
751  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
752  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
753 
754  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
755  // Note that we intentionally use an empty debug location so that
756  // this picks up the preceding location.
757  auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
758  TII->get(CMovOp), UpdatedStateReg)
759  .addReg(CurStateReg)
760  .addReg(PS->PoisonReg)
761  .addImm(Cond);
762  // If this is the last cmov and the EFLAGS weren't originally
763  // live-in, mark them as killed.
764  if (!LiveEFLAGS && Cond == Conds.back())
765  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
766 
767  ++NumInstsInserted;
768  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
769  dbgs() << "\n");
770 
771  // The first one of the cmovs will be using the top level
772  // `PredStateReg` and need to get rewritten into SSA form.
773  if (CurStateReg == PS->InitialReg)
774  CMovs.push_back(&*CMovI);
775 
776  // The next cmov should start from this one's def.
777  CurStateReg = UpdatedStateReg;
778  }
779 
780  // And put the last one into the available values for SSA form of our
781  // predicate state.
782  PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
783  };
784 
785  std::vector<X86::CondCode> UncondCodeSeq;
786  for (auto *CondBr : CondBrs) {
787  MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
788  int &SuccCount = SuccCounts[&Succ];
789 
792  UncondCodeSeq.push_back(Cond);
793 
794  BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
795  {InvCond});
796 
797  // Decrement the successor count now that we've split one of the edges.
798  // We need to keep the count of edges to the successor accurate in order
799  // to know above when to *replace* the successor in the CFG vs. just
800  // adding the new successor.
801  --SuccCount;
802  }
803 
804  // Since we may have split edges and changed the number of successors,
805  // normalize the probabilities. This avoids doing it each time we split an
806  // edge.
808 
809  // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
810  // need to intersect the other condition codes. We can do this by just
811  // doing a cmov for each one.
812  if (!UncondSucc)
813  // If we have no fallthrough to protect (perhaps it is an indirect jump?)
814  // just skip this and continue.
815  continue;
816 
817  assert(SuccCounts[UncondSucc] == 1 &&
818  "We should never have more than one edge to the unconditional "
819  "successor at this point because every other edge must have been "
820  "split above!");
821 
822  // Sort and unique the codes to minimize them.
823  llvm::sort(UncondCodeSeq);
824  UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()),
825  UncondCodeSeq.end());
826 
827  // Build a checking version of the successor.
828  BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
829  UncondBr, UncondBr, UncondCodeSeq);
830  }
831 
832  return CMovs;
833 }
834 
835 /// Compute the register class for the unfolded load.
836 ///
837 /// FIXME: This should probably live in X86InstrInfo, potentially by adding
838 /// a way to unfold into a newly created vreg rather than requiring a register
839 /// input.
840 static const TargetRegisterClass *
842  unsigned Opcode) {
843  unsigned Index;
844  unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
845  Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
846  const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
847  return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
848 }
849 
850 void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
851  MachineFunction &MF) {
852  for (MachineBasicBlock &MBB : MF)
853  for (auto MII = MBB.instr_begin(), MIE = MBB.instr_end(); MII != MIE;) {
854  // Grab a reference and increment the iterator so we can remove this
855  // instruction if needed without disturbing the iteration.
856  MachineInstr &MI = *MII++;
857 
858  // Must either be a call or a branch.
859  if (!MI.isCall() && !MI.isBranch())
860  continue;
861  // We only care about loading variants of these instructions.
862  if (!MI.mayLoad())
863  continue;
864 
865  switch (MI.getOpcode()) {
866  default: {
867  LLVM_DEBUG(
868  dbgs() << "ERROR: Found an unexpected loading branch or call "
869  "instruction:\n";
870  MI.dump(); dbgs() << "\n");
871  report_fatal_error("Unexpected loading branch or call!");
872  }
873 
874  case X86::FARCALL16m:
875  case X86::FARCALL32m:
876  case X86::FARCALL64m:
877  case X86::FARJMP16m:
878  case X86::FARJMP32m:
879  case X86::FARJMP64m:
880  // We cannot mitigate far jumps or calls, but we also don't expect them
881  // to be vulnerable to Spectre v1.2 style attacks.
882  continue;
883 
884  case X86::CALL16m:
885  case X86::CALL16m_NT:
886  case X86::CALL32m:
887  case X86::CALL32m_NT:
888  case X86::CALL64m:
889  case X86::CALL64m_NT:
890  case X86::JMP16m:
891  case X86::JMP16m_NT:
892  case X86::JMP32m:
893  case X86::JMP32m_NT:
894  case X86::JMP64m:
895  case X86::JMP64m_NT:
896  case X86::TAILJMPm64:
897  case X86::TAILJMPm64_REX:
898  case X86::TAILJMPm:
899  case X86::TCRETURNmi64:
900  case X86::TCRETURNmi: {
901  // Use the generic unfold logic now that we know we're dealing with
902  // expected instructions.
903  // FIXME: We don't have test coverage for all of these!
904  auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
905  if (!UnfoldedRC) {
906  LLVM_DEBUG(dbgs()
907  << "ERROR: Unable to unfold load from instruction:\n";
908  MI.dump(); dbgs() << "\n");
909  report_fatal_error("Unable to unfold load!");
910  }
911  Register Reg = MRI->createVirtualRegister(UnfoldedRC);
913  // If we were able to compute an unfolded reg class, any failure here
914  // is just a programming error so just assert.
915  bool Unfolded =
916  TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
917  /*UnfoldStore*/ false, NewMIs);
918  (void)Unfolded;
919  assert(Unfolded &&
920  "Computed unfolded register class but failed to unfold");
921  // Now stitch the new instructions into place and erase the old one.
922  for (auto *NewMI : NewMIs)
923  MBB.insert(MI.getIterator(), NewMI);
924 
925  // Update the call site info.
926  if (MI.isCandidateForCallSiteEntry())
927  MF.eraseCallSiteInfo(&MI);
928 
929  MI.eraseFromParent();
930  LLVM_DEBUG({
931  dbgs() << "Unfolded load successfully into:\n";
932  for (auto *NewMI : NewMIs) {
933  NewMI->dump();
934  dbgs() << "\n";
935  }
936  });
937  continue;
938  }
939  }
940  llvm_unreachable("Escaped switch with default!");
941  }
942 }
943 
944 /// Trace the predicate state through indirect branches, instrumenting them to
945 /// poison the state if a target is reached that does not match the expected
946 /// target.
947 ///
948 /// This is designed to mitigate Spectre variant 1 attacks where an indirect
949 /// branch is trained to predict a particular target and then mispredicts that
950 /// target in a way that can leak data. Despite using an indirect branch, this
951 /// is really a variant 1 style attack: it does not steer execution to an
952 /// arbitrary or attacker controlled address, and it does not require any
953 /// special code executing next to the victim. This attack can also be mitigated
954 /// through retpolines, but those require either replacing indirect branches
955 /// with conditional direct branches or lowering them through a device that
956 /// blocks speculation. This mitigation can replace these retpoline-style
957 /// mitigations for jump tables and other indirect branches within a function
958 /// when variant 2 isn't a risk while allowing limited speculation. Indirect
959 /// calls, however, cannot be mitigated through this technique without changing
960 /// the ABI in a fundamental way.
962 X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
963  MachineFunction &MF) {
964  // We use the SSAUpdater to insert PHI nodes for the target addresses of
965  // indirect branches. We don't actually need the full power of the SSA updater
966  // in this particular case as we always have immediately available values, but
967  // this avoids us having to re-implement the PHI construction logic.
968  MachineSSAUpdater TargetAddrSSA(MF);
969  TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
970 
971  // Track which blocks were terminated with an indirect branch.
972  SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
973 
974  // We need to know what blocks end up reached via indirect branches. We
975  // expect this to be a subset of those whose address is taken and so track it
976  // directly via the CFG.
977  SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
978 
979  // Walk all the blocks which end in an indirect branch and make the
980  // target address available.
981  for (MachineBasicBlock &MBB : MF) {
982  // Find the last terminator.
983  auto MII = MBB.instr_rbegin();
984  while (MII != MBB.instr_rend() && MII->isDebugInstr())
985  ++MII;
986  if (MII == MBB.instr_rend())
987  continue;
988  MachineInstr &TI = *MII;
989  if (!TI.isTerminator() || !TI.isBranch())
990  // No terminator or non-branch terminator.
991  continue;
992 
993  unsigned TargetReg;
994 
995  switch (TI.getOpcode()) {
996  default:
997  // Direct branch or conditional branch (leading to fallthrough).
998  continue;
999 
1000  case X86::FARJMP16m:
1001  case X86::FARJMP32m:
1002  case X86::FARJMP64m:
1003  // We cannot mitigate far jumps or calls, but we also don't expect them
1004  // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1005  continue;
1006 
1007  case X86::JMP16m:
1008  case X86::JMP16m_NT:
1009  case X86::JMP32m:
1010  case X86::JMP32m_NT:
1011  case X86::JMP64m:
1012  case X86::JMP64m_NT:
1013  // Mostly as documentation.
1014  report_fatal_error("Memory operand jumps should have been unfolded!");
1015 
1016  case X86::JMP16r:
1018  "Support for 16-bit indirect branches is not implemented.");
1019  case X86::JMP32r:
1021  "Support for 32-bit indirect branches is not implemented.");
1022 
1023  case X86::JMP64r:
1024  TargetReg = TI.getOperand(0).getReg();
1025  }
1026 
1027  // We have definitely found an indirect branch. Verify that there are no
1028  // preceding conditional branches as we don't yet support that.
1029  if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1030  return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1031  })) {
1032  LLVM_DEBUG({
1033  dbgs() << "ERROR: Found other terminators in a block with an indirect "
1034  "branch! This is not yet supported! Terminator sequence:\n";
1035  for (MachineInstr &MI : MBB.terminators()) {
1036  MI.dump();
1037  dbgs() << '\n';
1038  }
1039  });
1040  report_fatal_error("Unimplemented terminator sequence!");
1041  }
1042 
1043  // Make the target register an available value for this block.
1044  TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1045  IndirectTerminatedMBBs.insert(&MBB);
1046 
1047  // Add all the successors to our target candidates.
1048  for (MachineBasicBlock *Succ : MBB.successors())
1049  IndirectTargetMBBs.insert(Succ);
1050  }
1051 
1052  // Keep track of the cmov instructions we insert so we can return them.
1054 
1055  // If we didn't find any indirect branches with targets, nothing to do here.
1056  if (IndirectTargetMBBs.empty())
1057  return CMovs;
1058 
1059  // We found indirect branches and targets that need to be instrumented to
1060  // harden loads within them. Walk the blocks of the function (to get a stable
1061  // ordering) and instrument each target of an indirect branch.
1062  for (MachineBasicBlock &MBB : MF) {
1063  // Skip the blocks that aren't candidate targets.
1064  if (!IndirectTargetMBBs.count(&MBB))
1065  continue;
1066 
1067  // We don't expect EH pads to ever be reached via an indirect branch. If
1068  // this is desired for some reason, we could simply skip them here rather
1069  // than asserting.
1070  assert(!MBB.isEHPad() &&
1071  "Unexpected EH pad as target of an indirect branch!");
1072 
1073  // We should never end up threading EFLAGS into a block to harden
1074  // conditional jumps as there would be an additional successor via the
1075  // indirect branch. As a consequence, all such edges would be split before
1076  // reaching here, and the inserted block will handle the EFLAGS-based
1077  // hardening.
1078  assert(!MBB.isLiveIn(X86::EFLAGS) &&
1079  "Cannot check within a block that already has live-in EFLAGS!");
1080 
1081  // We can't handle having non-indirect edges into this block unless this is
1082  // the only successor and we can synthesize the necessary target address.
1083  for (MachineBasicBlock *Pred : MBB.predecessors()) {
1084  // If we've already handled this by extracting the target directly,
1085  // nothing to do.
1086  if (IndirectTerminatedMBBs.count(Pred))
1087  continue;
1088 
1089  // Otherwise, we have to be the only successor. We generally expect this
1090  // to be true as conditional branches should have had a critical edge
1091  // split already. We don't however need to worry about EH pad successors
1092  // as they'll happily ignore the target and their hardening strategy is
1093  // resilient to all ways in which they could be reached speculatively.
1094  if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1095  return Succ->isEHPad() || Succ == &MBB;
1096  })) {
1097  LLVM_DEBUG({
1098  dbgs() << "ERROR: Found conditional entry to target of indirect "
1099  "branch!\n";
1100  Pred->dump();
1101  MBB.dump();
1102  });
1103  report_fatal_error("Cannot harden a conditional entry to a target of "
1104  "an indirect branch!");
1105  }
1106 
1107  // Now we need to compute the address of this block and install it as a
1108  // synthetic target in the predecessor. We do this at the bottom of the
1109  // predecessor.
1110  auto InsertPt = Pred->getFirstTerminator();
1111  Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1112  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1113  !Subtarget->isPositionIndependent()) {
1114  // Directly materialize it into an immediate.
1115  auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1116  TII->get(X86::MOV64ri32), TargetReg)
1117  .addMBB(&MBB);
1118  ++NumInstsInserted;
1119  (void)AddrI;
1120  LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1121  dbgs() << "\n");
1122  } else {
1123  auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1124  TargetReg)
1125  .addReg(/*Base*/ X86::RIP)
1126  .addImm(/*Scale*/ 1)
1127  .addReg(/*Index*/ 0)
1128  .addMBB(&MBB)
1129  .addReg(/*Segment*/ 0);
1130  ++NumInstsInserted;
1131  (void)AddrI;
1132  LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1133  dbgs() << "\n");
1134  }
1135  // And make this available.
1136  TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1137  }
1138 
1139  // Materialize the needed SSA value of the target. Note that we need the
1140  // middle of the block as this block might at the bottom have an indirect
1141  // branch back to itself. We can do this here because at this point, every
1142  // predecessor of this block has an available value. This is basically just
1143  // automating the construction of a PHI node for this target.
1144  unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1145 
1146  // Insert a comparison of the incoming target register with this block's
1147  // address. This also requires us to mark the block as having its address
1148  // taken explicitly.
1150  auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1151  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1152  !Subtarget->isPositionIndependent()) {
1153  // Check directly against a relocated immediate when we can.
1154  auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1155  .addReg(TargetReg, RegState::Kill)
1156  .addMBB(&MBB);
1157  ++NumInstsInserted;
1158  (void)CheckI;
1159  LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1160  } else {
1161  // Otherwise compute the address into a register first.
1162  Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1163  auto AddrI =
1164  BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1165  .addReg(/*Base*/ X86::RIP)
1166  .addImm(/*Scale*/ 1)
1167  .addReg(/*Index*/ 0)
1168  .addMBB(&MBB)
1169  .addReg(/*Segment*/ 0);
1170  ++NumInstsInserted;
1171  (void)AddrI;
1172  LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1173  auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1174  .addReg(TargetReg, RegState::Kill)
1175  .addReg(AddrReg, RegState::Kill);
1176  ++NumInstsInserted;
1177  (void)CheckI;
1178  LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1179  }
1180 
1181  // Now cmov over the predicate if the comparison wasn't equal.
1182  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1183  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1184  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1185  auto CMovI =
1186  BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1187  .addReg(PS->InitialReg)
1188  .addReg(PS->PoisonReg)
1189  .addImm(X86::COND_NE);
1190  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
1191  ++NumInstsInserted;
1192  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1193  CMovs.push_back(&*CMovI);
1194 
1195  // And put the new value into the available values for SSA form of our
1196  // predicate state.
1197  PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1198  }
1199 
1200  // Return all the newly inserted cmov instructions of the predicate state.
1201  return CMovs;
1202 }
1203 
1204 // Returns true if the MI has EFLAGS as a register def operand and it's live,
1205 // otherwise it returns false
1206 static bool isEFLAGSDefLive(const MachineInstr &MI) {
1207  if (const MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1208  return !DefOp->isDead();
1209  }
1210  return false;
1211 }
1212 
1214  const TargetRegisterInfo &TRI) {
1215  // Check if EFLAGS are alive by seeing if there is a def of them or they
1216  // live-in, and then seeing if that def is in turn used.
1218  if (MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) {
1219  // If the def is dead, then EFLAGS is not live.
1220  if (DefOp->isDead())
1221  return false;
1222 
1223  // Otherwise we've def'ed it, and it is live.
1224  return true;
1225  }
1226  // While at this instruction, also check if we use and kill EFLAGS
1227  // which means it isn't live.
1228  if (MI.killsRegister(X86::EFLAGS, &TRI))
1229  return false;
1230  }
1231 
1232  // If we didn't find anything conclusive (neither definitely alive or
1233  // definitely dead) return whether it lives into the block.
1234  return MBB.isLiveIn(X86::EFLAGS);
1235 }
1236 
1237 /// Trace the predicate state through each of the blocks in the function,
1238 /// hardening everything necessary along the way.
1239 ///
1240 /// We call this routine once the initial predicate state has been established
1241 /// for each basic block in the function in the SSA updater. This routine traces
1242 /// it through the instructions within each basic block, and for non-returning
1243 /// blocks informs the SSA updater about the final state that lives out of the
1244 /// block. Along the way, it hardens any vulnerable instruction using the
1245 /// currently valid predicate state. We have to do these two things together
1246 /// because the SSA updater only works across blocks. Within a block, we track
1247 /// the current predicate state directly and update it as it changes.
1248 ///
1249 /// This operates in two passes over each block. First, we analyze the loads in
1250 /// the block to determine which strategy will be used to harden them: hardening
1251 /// the address or hardening the loaded value when loaded into a register
1252 /// amenable to hardening. We have to process these first because the two
1253 /// strategies may interact -- later hardening may change what strategy we wish
1254 /// to use. We also will analyze data dependencies between loads and avoid
1255 /// hardening those loads that are data dependent on a load with a hardened
1256 /// address. We also skip hardening loads already behind an LFENCE as that is
1257 /// sufficient to harden them against misspeculation.
1258 ///
1259 /// Second, we actively trace the predicate state through the block, applying
1260 /// the hardening steps we determined necessary in the first pass as we go.
1261 ///
1262 /// These two passes are applied to each basic block. We operate one block at a
1263 /// time to simplify reasoning about reachability and sequencing.
1264 void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
1265  MachineFunction &MF) {
1266  SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1267  SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1268 
1269  SmallSet<unsigned, 16> HardenedAddrRegs;
1270 
1271  SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg;
1272 
1273  // Track the set of load-dependent registers through the basic block. Because
1274  // the values of these registers have an existing data dependency on a loaded
1275  // value which we would have checked, we can omit any checks on them.
1276  SparseBitVector<> LoadDepRegs;
1277 
1278  for (MachineBasicBlock &MBB : MF) {
1279  // The first pass over the block: collect all the loads which can have their
1280  // loaded value hardened and all the loads that instead need their address
1281  // hardened. During this walk we propagate load dependence for address
1282  // hardened loads and also look for LFENCE to stop hardening wherever
1283  // possible. When deciding whether or not to harden the loaded value or not,
1284  // we check to see if any registers used in the address will have been
1285  // hardened at this point and if so, harden any remaining address registers
1286  // as that often successfully re-uses hardened addresses and minimizes
1287  // instructions.
1288  //
1289  // FIXME: We should consider an aggressive mode where we continue to keep as
1290  // many loads value hardened even when some address register hardening would
1291  // be free (due to reuse).
1292  //
1293  // Note that we only need this pass if we are actually hardening loads.
1294  if (HardenLoads)
1295  for (MachineInstr &MI : MBB) {
1296  // We naively assume that all def'ed registers of an instruction have
1297  // a data dependency on all of their operands.
1298  // FIXME: Do a more careful analysis of x86 to build a conservative
1299  // model here.
1300  if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1301  return Op.isReg() && LoadDepRegs.test(Op.getReg());
1302  }))
1303  for (MachineOperand &Def : MI.defs())
1304  if (Def.isReg())
1305  LoadDepRegs.set(Def.getReg());
1306 
1307  // Both Intel and AMD are guiding that they will change the semantics of
1308  // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1309  // no more need to guard things in this block.
1310  if (MI.getOpcode() == X86::LFENCE)
1311  break;
1312 
1313  // If this instruction cannot load, nothing to do.
1314  if (!MI.mayLoad())
1315  continue;
1316 
1317  // Some instructions which "load" are trivially safe or unimportant.
1318  if (MI.getOpcode() == X86::MFENCE)
1319  continue;
1320 
1321  // Extract the memory operand information about this instruction.
1322  // FIXME: This doesn't handle loading pseudo instructions which we often
1323  // could handle with similarly generic logic. We probably need to add an
1324  // MI-layer routine similar to the MC-layer one we use here which maps
1325  // pseudos much like this maps real instructions.
1326  const MCInstrDesc &Desc = MI.getDesc();
1327  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1328  if (MemRefBeginIdx < 0) {
1329  LLVM_DEBUG(dbgs()
1330  << "WARNING: unable to harden loading instruction: ";
1331  MI.dump());
1332  continue;
1333  }
1334 
1335  MemRefBeginIdx += X86II::getOperandBias(Desc);
1336 
1337  MachineOperand &BaseMO =
1338  MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1339  MachineOperand &IndexMO =
1340  MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1341 
1342  // If we have at least one (non-frame-index, non-RIP) register operand,
1343  // and neither operand is load-dependent, we need to check the load.
1344  unsigned BaseReg = 0, IndexReg = 0;
1345  if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1346  BaseMO.getReg() != X86::NoRegister)
1347  BaseReg = BaseMO.getReg();
1348  if (IndexMO.getReg() != X86::NoRegister)
1349  IndexReg = IndexMO.getReg();
1350 
1351  if (!BaseReg && !IndexReg)
1352  // No register operands!
1353  continue;
1354 
1355  // If any register operand is dependent, this load is dependent and we
1356  // needn't check it.
1357  // FIXME: Is this true in the case where we are hardening loads after
1358  // they complete? Unclear, need to investigate.
1359  if ((BaseReg && LoadDepRegs.test(BaseReg)) ||
1360  (IndexReg && LoadDepRegs.test(IndexReg)))
1361  continue;
1362 
1363  // If post-load hardening is enabled, this load is compatible with
1364  // post-load hardening, and we aren't already going to harden one of the
1365  // address registers, queue it up to be hardened post-load. Notably,
1366  // even once hardened this won't introduce a useful dependency that
1367  // could prune out subsequent loads.
1369  !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1370  MI.getOperand(0).isReg() &&
1371  canHardenRegister(MI.getOperand(0).getReg()) &&
1372  !HardenedAddrRegs.count(BaseReg) &&
1373  !HardenedAddrRegs.count(IndexReg)) {
1374  HardenPostLoad.insert(&MI);
1375  HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1376  continue;
1377  }
1378 
1379  // Record this instruction for address hardening and record its register
1380  // operands as being address-hardened.
1381  HardenLoadAddr.insert(&MI);
1382  if (BaseReg)
1383  HardenedAddrRegs.insert(BaseReg);
1384  if (IndexReg)
1385  HardenedAddrRegs.insert(IndexReg);
1386 
1387  for (MachineOperand &Def : MI.defs())
1388  if (Def.isReg())
1389  LoadDepRegs.set(Def.getReg());
1390  }
1391 
1392  // Now re-walk the instructions in the basic block, and apply whichever
1393  // hardening strategy we have elected. Note that we do this in a second
1394  // pass specifically so that we have the complete set of instructions for
1395  // which we will do post-load hardening and can defer it in certain
1396  // circumstances.
1397  for (MachineInstr &MI : MBB) {
1398  if (HardenLoads) {
1399  // We cannot both require hardening the def of a load and its address.
1400  assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1401  "Requested to harden both the address and def of a load!");
1402 
1403  // Check if this is a load whose address needs to be hardened.
1404  if (HardenLoadAddr.erase(&MI)) {
1405  const MCInstrDesc &Desc = MI.getDesc();
1406  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1407  assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1408 
1409  MemRefBeginIdx += X86II::getOperandBias(Desc);
1410 
1411  MachineOperand &BaseMO =
1412  MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1413  MachineOperand &IndexMO =
1414  MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1415  hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1416  continue;
1417  }
1418 
1419  // Test if this instruction is one of our post load instructions (and
1420  // remove it from the set if so).
1421  if (HardenPostLoad.erase(&MI)) {
1422  assert(!MI.isCall() && "Must not try to post-load harden a call!");
1423 
1424  // If this is a data-invariant load and there is no EFLAGS
1425  // interference, we want to try and sink any hardening as far as
1426  // possible.
1428  // Sink the instruction we'll need to harden as far as we can down
1429  // the graph.
1430  MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1431 
1432  // If we managed to sink this instruction, update everything so we
1433  // harden that instruction when we reach it in the instruction
1434  // sequence.
1435  if (SunkMI != &MI) {
1436  // If in sinking there was no instruction needing to be hardened,
1437  // we're done.
1438  if (!SunkMI)
1439  continue;
1440 
1441  // Otherwise, add this to the set of defs we harden.
1442  HardenPostLoad.insert(SunkMI);
1443  continue;
1444  }
1445  }
1446 
1447  unsigned HardenedReg = hardenPostLoad(MI);
1448 
1449  // Mark the resulting hardened register as such so we don't re-harden.
1450  AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1451 
1452  continue;
1453  }
1454 
1455  // Check for an indirect call or branch that may need its input hardened
1456  // even if we couldn't find the specific load used, or were able to
1457  // avoid hardening it for some reason. Note that here we cannot break
1458  // out afterward as we may still need to handle any call aspect of this
1459  // instruction.
1460  if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1461  hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1462  }
1463 
1464  // After we finish hardening loads we handle interprocedural hardening if
1465  // enabled and relevant for this instruction.
1467  continue;
1468  if (!MI.isCall() && !MI.isReturn())
1469  continue;
1470 
1471  // If this is a direct return (IE, not a tail call) just directly harden
1472  // it.
1473  if (MI.isReturn() && !MI.isCall()) {
1474  hardenReturnInstr(MI);
1475  continue;
1476  }
1477 
1478  // Otherwise we have a call. We need to handle transferring the predicate
1479  // state into a call and recovering it after the call returns (unless this
1480  // is a tail call).
1481  assert(MI.isCall() && "Should only reach here for calls!");
1482  tracePredStateThroughCall(MI);
1483  }
1484 
1485  HardenPostLoad.clear();
1486  HardenLoadAddr.clear();
1487  HardenedAddrRegs.clear();
1488  AddrRegToHardenedReg.clear();
1489 
1490  // Currently, we only track data-dependent loads within a basic block.
1491  // FIXME: We should see if this is necessary or if we could be more
1492  // aggressive here without opening up attack avenues.
1493  LoadDepRegs.clear();
1494  }
1495 }
1496 
1497 /// Save EFLAGS into the returned GPR. This can in turn be restored with
1498 /// `restoreEFLAGS`.
1499 ///
1500 /// Note that LLVM can only lower very simple patterns of saved and restored
1501 /// EFLAGS registers. The restore should always be within the same basic block
1502 /// as the save so that no PHI nodes are inserted.
1503 unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS(
1505  DebugLoc Loc) {
1506  // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1507  // what instruction selection does.
1508  Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1509  // We directly copy the FLAGS register and rely on later lowering to clean
1510  // this up into the appropriate setCC instructions.
1511  BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1512  ++NumInstsInserted;
1513  return Reg;
1514 }
1515 
1516 /// Restore EFLAGS from the provided GPR. This should be produced by
1517 /// `saveEFLAGS`.
1518 ///
1519 /// This must be done within the same basic block as the save in order to
1520 /// reliably lower.
1521 void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1523  Register Reg) {
1524  BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1525  ++NumInstsInserted;
1526 }
1527 
1528 /// Takes the current predicate state (in a register) and merges it into the
1529 /// stack pointer. The state is essentially a single bit, but we merge this in
1530 /// a way that won't form non-canonical pointers and also will be preserved
1531 /// across normal stack adjustments.
1532 void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1534  unsigned PredStateReg) {
1535  Register TmpReg = MRI->createVirtualRegister(PS->RC);
1536  // FIXME: This hard codes a shift distance based on the number of bits needed
1537  // to stay canonical on 64-bit. We should compute this somehow and support
1538  // 32-bit as part of that.
1539  auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1540  .addReg(PredStateReg, RegState::Kill)
1541  .addImm(47);
1542  ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1543  ++NumInstsInserted;
1544  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1545  .addReg(X86::RSP)
1546  .addReg(TmpReg, RegState::Kill);
1547  OrI->addRegisterDead(X86::EFLAGS, TRI);
1548  ++NumInstsInserted;
1549 }
1550 
1551 /// Extracts the predicate state stored in the high bits of the stack pointer.
1552 unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1554  DebugLoc Loc) {
1555  Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1556  Register TmpReg = MRI->createVirtualRegister(PS->RC);
1557 
1558  // We know that the stack pointer will have any preserved predicate state in
1559  // its high bit. We just want to smear this across the other bits. Turns out,
1560  // this is exactly what an arithmetic right shift does.
1561  BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1562  .addReg(X86::RSP);
1563  auto ShiftI =
1564  BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1565  .addReg(TmpReg, RegState::Kill)
1566  .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1567  ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1568  ++NumInstsInserted;
1569 
1570  return PredStateReg;
1571 }
1572 
1573 void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1574  MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1575  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
1576  MachineBasicBlock &MBB = *MI.getParent();
1577  DebugLoc Loc = MI.getDebugLoc();
1578 
1579  // Check if EFLAGS are alive by seeing if there is a def of them or they
1580  // live-in, and then seeing if that def is in turn used.
1581  bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1582 
1583  SmallVector<MachineOperand *, 2> HardenOpRegs;
1584 
1585  if (BaseMO.isFI()) {
1586  // A frame index is never a dynamically controllable load, so only
1587  // harden it if we're covering fixed address loads as well.
1588  LLVM_DEBUG(
1589  dbgs() << " Skipping hardening base of explicit stack frame load: ";
1590  MI.dump(); dbgs() << "\n");
1591  } else if (BaseMO.getReg() == X86::RSP) {
1592  // Some idempotent atomic operations are lowered directly to a locked
1593  // OR with 0 to the top of stack(or slightly offset from top) which uses an
1594  // explicit RSP register as the base.
1595  assert(IndexMO.getReg() == X86::NoRegister &&
1596  "Explicit RSP access with dynamic index!");
1597  LLVM_DEBUG(
1598  dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1599  } else if (BaseMO.getReg() == X86::RIP ||
1600  BaseMO.getReg() == X86::NoRegister) {
1601  // For both RIP-relative addressed loads or absolute loads, we cannot
1602  // meaningfully harden them because the address being loaded has no
1603  // dynamic component.
1604  //
1605  // FIXME: When using a segment base (like TLS does) we end up with the
1606  // dynamic address being the base plus -1 because we can't mutate the
1607  // segment register here. This allows the signed 32-bit offset to point at
1608  // valid segment-relative addresses and load them successfully.
1609  LLVM_DEBUG(
1610  dbgs() << " Cannot harden base of "
1611  << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1612  << " address in a load!");
1613  } else {
1614  assert(BaseMO.isReg() &&
1615  "Only allowed to have a frame index or register base.");
1616  HardenOpRegs.push_back(&BaseMO);
1617  }
1618 
1619  if (IndexMO.getReg() != X86::NoRegister &&
1620  (HardenOpRegs.empty() ||
1621  HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1622  HardenOpRegs.push_back(&IndexMO);
1623 
1624  assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1625  "Should have exactly one or two registers to harden!");
1626  assert((HardenOpRegs.size() == 1 ||
1627  HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1628  "Should not have two of the same registers!");
1629 
1630  // Remove any registers that have alreaded been checked.
1631  llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1632  // See if this operand's register has already been checked.
1633  auto It = AddrRegToHardenedReg.find(Op->getReg());
1634  if (It == AddrRegToHardenedReg.end())
1635  // Not checked, so retain this one.
1636  return false;
1637 
1638  // Otherwise, we can directly update this operand and remove it.
1639  Op->setReg(It->second);
1640  return true;
1641  });
1642  // If there are none left, we're done.
1643  if (HardenOpRegs.empty())
1644  return;
1645 
1646  // Compute the current predicate state.
1647  unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1648 
1649  auto InsertPt = MI.getIterator();
1650 
1651  // If EFLAGS are live and we don't have access to instructions that avoid
1652  // clobbering EFLAGS we need to save and restore them. This in turn makes
1653  // the EFLAGS no longer live.
1654  unsigned FlagsReg = 0;
1655  if (EFLAGSLive && !Subtarget->hasBMI2()) {
1656  EFLAGSLive = false;
1657  FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1658  }
1659 
1660  for (MachineOperand *Op : HardenOpRegs) {
1661  Register OpReg = Op->getReg();
1662  auto *OpRC = MRI->getRegClass(OpReg);
1663  Register TmpReg = MRI->createVirtualRegister(OpRC);
1664 
1665  // If this is a vector register, we'll need somewhat custom logic to handle
1666  // hardening it.
1667  if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1668  OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1669  assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1670  bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1671 
1672  // Move our state into a vector register.
1673  // FIXME: We could skip this at the cost of longer encodings with AVX-512
1674  // but that doesn't seem likely worth it.
1675  Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1676  auto MovI =
1677  BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1678  .addReg(StateReg);
1679  (void)MovI;
1680  ++NumInstsInserted;
1681  LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1682 
1683  // Broadcast it across the vector register.
1684  Register VBStateReg = MRI->createVirtualRegister(OpRC);
1685  auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1686  TII->get(Is128Bit ? X86::VPBROADCASTQrr
1687  : X86::VPBROADCASTQYrr),
1688  VBStateReg)
1689  .addReg(VStateReg);
1690  (void)BroadcastI;
1691  ++NumInstsInserted;
1692  LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1693  dbgs() << "\n");
1694 
1695  // Merge our potential poison state into the value with a vector or.
1696  auto OrI =
1697  BuildMI(MBB, InsertPt, Loc,
1698  TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1699  .addReg(VBStateReg)
1700  .addReg(OpReg);
1701  (void)OrI;
1702  ++NumInstsInserted;
1703  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1704  } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1705  OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1706  OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1707  assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1708  bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1709  bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1710  if (Is128Bit || Is256Bit)
1711  assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1712 
1713  // Broadcast our state into a vector register.
1714  Register VStateReg = MRI->createVirtualRegister(OpRC);
1715  unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1716  : Is256Bit ? X86::VPBROADCASTQrZ256rr
1717  : X86::VPBROADCASTQrZrr;
1718  auto BroadcastI =
1719  BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1720  .addReg(StateReg);
1721  (void)BroadcastI;
1722  ++NumInstsInserted;
1723  LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1724  dbgs() << "\n");
1725 
1726  // Merge our potential poison state into the value with a vector or.
1727  unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1728  : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1729  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1730  .addReg(VStateReg)
1731  .addReg(OpReg);
1732  (void)OrI;
1733  ++NumInstsInserted;
1734  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1735  } else {
1736  // FIXME: Need to support GR32 here for 32-bit code.
1737  assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1738  "Not a supported register class for address hardening!");
1739 
1740  if (!EFLAGSLive) {
1741  // Merge our potential poison state into the value with an or.
1742  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1743  .addReg(StateReg)
1744  .addReg(OpReg);
1745  OrI->addRegisterDead(X86::EFLAGS, TRI);
1746  ++NumInstsInserted;
1747  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1748  } else {
1749  // We need to avoid touching EFLAGS so shift out all but the least
1750  // significant bit using the instruction that doesn't update flags.
1751  auto ShiftI =
1752  BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1753  .addReg(OpReg)
1754  .addReg(StateReg);
1755  (void)ShiftI;
1756  ++NumInstsInserted;
1757  LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1758  dbgs() << "\n");
1759  }
1760  }
1761 
1762  // Record this register as checked and update the operand.
1763  assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1764  "Should not have checked this register yet!");
1765  AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1766  Op->setReg(TmpReg);
1767  ++NumAddrRegsHardened;
1768  }
1769 
1770  // And restore the flags if needed.
1771  if (FlagsReg)
1772  restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1773 }
1774 
1775 MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1776  MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1778  "Cannot get here with a non-invariant load!");
1779  assert(!isEFLAGSDefLive(InitialMI) &&
1780  "Cannot get here with a data invariant load "
1781  "that interferes with EFLAGS!");
1782 
1783  // See if we can sink hardening the loaded value.
1784  auto SinkCheckToSingleUse =
1786  Register DefReg = MI.getOperand(0).getReg();
1787 
1788  // We need to find a single use which we can sink the check. We can
1789  // primarily do this because many uses may already end up checked on their
1790  // own.
1791  MachineInstr *SingleUseMI = nullptr;
1792  for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1793  // If we're already going to harden this use, it is data invariant, it
1794  // does not interfere with EFLAGS, and within our block.
1795  if (HardenedInstrs.count(&UseMI)) {
1797  // If we've already decided to harden a non-load, we must have sunk
1798  // some other post-load hardened instruction to it and it must itself
1799  // be data-invariant.
1801  "Data variant instruction being hardened!");
1802  continue;
1803  }
1804 
1805  // Otherwise, this is a load and the load component can't be data
1806  // invariant so check how this register is being used.
1807  const MCInstrDesc &Desc = UseMI.getDesc();
1808  int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags);
1809  assert(MemRefBeginIdx >= 0 &&
1810  "Should always have mem references here!");
1811  MemRefBeginIdx += X86II::getOperandBias(Desc);
1812 
1813  MachineOperand &BaseMO =
1814  UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1815  MachineOperand &IndexMO =
1816  UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1817  if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1818  (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1819  // The load uses the register as part of its address making it not
1820  // invariant.
1821  return {};
1822 
1823  continue;
1824  }
1825 
1826  if (SingleUseMI)
1827  // We already have a single use, this would make two. Bail.
1828  return {};
1829 
1830  // If this single use isn't data invariant, isn't in this block, or has
1831  // interfering EFLAGS, we can't sink the hardening to it.
1832  if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1834  return {};
1835 
1836  // If this instruction defines multiple registers bail as we won't harden
1837  // all of them.
1838  if (UseMI.getDesc().getNumDefs() > 1)
1839  return {};
1840 
1841  // If this register isn't a virtual register we can't walk uses of sanely,
1842  // just bail. Also check that its register class is one of the ones we
1843  // can harden.
1844  Register UseDefReg = UseMI.getOperand(0).getReg();
1845  if (!UseDefReg.isVirtual() || !canHardenRegister(UseDefReg))
1846  return {};
1847 
1848  SingleUseMI = &UseMI;
1849  }
1850 
1851  // If SingleUseMI is still null, there is no use that needs its own
1852  // checking. Otherwise, it is the single use that needs checking.
1853  return {SingleUseMI};
1854  };
1855 
1856  MachineInstr *MI = &InitialMI;
1857  while (Optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1858  // Update which MI we're checking now.
1859  MI = *SingleUse;
1860  if (!MI)
1861  break;
1862  }
1863 
1864  return MI;
1865 }
1866 
1867 bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
1868  auto *RC = MRI->getRegClass(Reg);
1869  int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1870  if (RegBytes > 8)
1871  // We don't support post-load hardening of vectors.
1872  return false;
1873 
1874  unsigned RegIdx = Log2_32(RegBytes);
1875  assert(RegIdx < 4 && "Unsupported register size");
1876 
1877  // If this register class is explicitly constrained to a class that doesn't
1878  // require REX prefix, we may not be able to satisfy that constraint when
1879  // emitting the hardening instructions, so bail out here.
1880  // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1881  // end up both with a NOREX and REX-only register as operands to the hardening
1882  // instructions. It would be better to fix that code to handle this situation
1883  // rather than hack around it in this way.
1884  const TargetRegisterClass *NOREXRegClasses[] = {
1885  &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1886  &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1887  if (RC == NOREXRegClasses[RegIdx])
1888  return false;
1889 
1890  const TargetRegisterClass *GPRRegClasses[] = {
1891  &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1892  &X86::GR64RegClass};
1893  return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1894 }
1895 
1896 /// Harden a value in a register.
1897 ///
1898 /// This is the low-level logic to fully harden a value sitting in a register
1899 /// against leaking during speculative execution.
1900 ///
1901 /// Unlike hardening an address that is used by a load, this routine is required
1902 /// to hide *all* incoming bits in the register.
1903 ///
1904 /// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1905 /// larger than the predicate state register. FIXME: We should support vector
1906 /// registers here by broadcasting the predicate state.
1907 ///
1908 /// The new, hardened virtual register is returned. It will have the same
1909 /// register class as `Reg`.
1910 unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister(
1912  DebugLoc Loc) {
1913  assert(canHardenRegister(Reg) && "Cannot harden this register!");
1914  assert(Reg.isVirtual() && "Cannot harden a physical register!");
1915 
1916  auto *RC = MRI->getRegClass(Reg);
1917  int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1918 
1919  unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1920 
1921  // FIXME: Need to teach this about 32-bit mode.
1922  if (Bytes != 8) {
1923  unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1924  unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1925  Register NarrowStateReg = MRI->createVirtualRegister(RC);
1926  BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1927  .addReg(StateReg, 0, SubRegImm);
1928  StateReg = NarrowStateReg;
1929  }
1930 
1931  unsigned FlagsReg = 0;
1932  if (isEFLAGSLive(MBB, InsertPt, *TRI))
1933  FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1934 
1935  Register NewReg = MRI->createVirtualRegister(RC);
1936  unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1937  unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1938  auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1939  .addReg(StateReg)
1940  .addReg(Reg);
1941  OrI->addRegisterDead(X86::EFLAGS, TRI);
1942  ++NumInstsInserted;
1943  LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1944 
1945  if (FlagsReg)
1946  restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1947 
1948  return NewReg;
1949 }
1950 
1951 /// Harden a load by hardening the loaded value in the defined register.
1952 ///
1953 /// We can harden a non-leaking load into a register without touching the
1954 /// address by just hiding all of the loaded bits during misspeculation. We use
1955 /// an `or` instruction to do this because we set up our poison value as all
1956 /// ones. And the goal is just for the loaded bits to not be exposed to
1957 /// execution and coercing them to one is sufficient.
1958 ///
1959 /// Returns the newly hardened register.
1960 unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
1961  MachineBasicBlock &MBB = *MI.getParent();
1962  DebugLoc Loc = MI.getDebugLoc();
1963 
1964  auto &DefOp = MI.getOperand(0);
1965  Register OldDefReg = DefOp.getReg();
1966  auto *DefRC = MRI->getRegClass(OldDefReg);
1967 
1968  // Because we want to completely replace the uses of this def'ed value with
1969  // the hardened value, create a dedicated new register that will only be used
1970  // to communicate the unhardened value to the hardening.
1971  Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1972  DefOp.setReg(UnhardenedReg);
1973 
1974  // Now harden this register's value, getting a hardened reg that is safe to
1975  // use. Note that we insert the instructions to compute this *after* the
1976  // defining instruction, not before it.
1977  unsigned HardenedReg = hardenValueInRegister(
1978  UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1979 
1980  // Finally, replace the old register (which now only has the uses of the
1981  // original def) with the hardened register.
1982  MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1983 
1984  ++NumPostLoadRegsHardened;
1985  return HardenedReg;
1986 }
1987 
1988 /// Harden a return instruction.
1989 ///
1990 /// Returns implicitly perform a load which we need to harden. Without hardening
1991 /// this load, an attacker my speculatively write over the return address to
1992 /// steer speculation of the return to an attacker controlled address. This is
1993 /// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1994 /// this paper:
1995 /// https://people.csail.mit.edu/vlk/spectre11.pdf
1996 ///
1997 /// We can harden this by introducing an LFENCE that will delay any load of the
1998 /// return address until prior instructions have retired (and thus are not being
1999 /// speculated), or we can harden the address used by the implicit load: the
2000 /// stack pointer.
2001 ///
2002 /// If we are not using an LFENCE, hardening the stack pointer has an additional
2003 /// benefit: it allows us to pass the predicate state accumulated in this
2004 /// function back to the caller. In the absence of a BCBS attack on the return,
2005 /// the caller will typically be resumed and speculatively executed due to the
2006 /// Return Stack Buffer (RSB) prediction which is very accurate and has a high
2007 /// priority. It is possible that some code from the caller will be executed
2008 /// speculatively even during a BCBS-attacked return until the steering takes
2009 /// effect. Whenever this happens, the caller can recover the (poisoned)
2010 /// predicate state from the stack pointer and continue to harden loads.
2011 void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
2012  MachineBasicBlock &MBB = *MI.getParent();
2013  DebugLoc Loc = MI.getDebugLoc();
2014  auto InsertPt = MI.getIterator();
2015 
2016  if (FenceCallAndRet)
2017  // No need to fence here as we'll fence at the return site itself. That
2018  // handles more cases than we can handle here.
2019  return;
2020 
2021  // Take our predicate state, shift it to the high 17 bits (so that we keep
2022  // pointers canonical) and merge it into RSP. This will allow the caller to
2023  // extract it when we return (speculatively).
2024  mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2025 }
2026 
2027 /// Trace the predicate state through a call.
2028 ///
2029 /// There are several layers of this needed to handle the full complexity of
2030 /// calls.
2031 ///
2032 /// First, we need to send the predicate state into the called function. We do
2033 /// this by merging it into the high bits of the stack pointer.
2034 ///
2035 /// For tail calls, this is all we need to do.
2036 ///
2037 /// For calls where we might return and resume the control flow, we need to
2038 /// extract the predicate state from the high bits of the stack pointer after
2039 /// control returns from the called function.
2040 ///
2041 /// We also need to verify that we intended to return to this location in the
2042 /// code. An attacker might arrange for the processor to mispredict the return
2043 /// to this valid but incorrect return address in the program rather than the
2044 /// correct one. See the paper on this attack, called "ret2spec" by the
2045 /// researchers, here:
2046 /// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2047 ///
2048 /// The way we verify that we returned to the correct location is by preserving
2049 /// the expected return address across the call. One technique involves taking
2050 /// advantage of the red-zone to load the return address from `8(%rsp)` where it
2051 /// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2052 /// directly save the address into a register that will be preserved across the
2053 /// call. We compare this intended return address against the address
2054 /// immediately following the call (the observed return address). If these
2055 /// mismatch, we have detected misspeculation and can poison our predicate
2056 /// state.
2057 void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
2058  MachineInstr &MI) {
2059  MachineBasicBlock &MBB = *MI.getParent();
2060  MachineFunction &MF = *MBB.getParent();
2061  auto InsertPt = MI.getIterator();
2062  DebugLoc Loc = MI.getDebugLoc();
2063 
2064  if (FenceCallAndRet) {
2065  if (MI.isReturn())
2066  // Tail call, we don't return to this function.
2067  // FIXME: We should also handle noreturn calls.
2068  return;
2069 
2070  // We don't need to fence before the call because the function should fence
2071  // in its entry. However, we do need to fence after the call returns.
2072  // Fencing before the return doesn't correctly handle cases where the return
2073  // itself is mispredicted.
2074  BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2075  ++NumInstsInserted;
2076  ++NumLFENCEsInserted;
2077  return;
2078  }
2079 
2080  // First, we transfer the predicate state into the called function by merging
2081  // it into the stack pointer. This will kill the current def of the state.
2082  unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2083  mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2084 
2085  // If this call is also a return, it is a tail call and we don't need anything
2086  // else to handle it so just return. Also, if there are no further
2087  // instructions and no successors, this call does not return so we can also
2088  // bail.
2089  if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2090  return;
2091 
2092  // Create a symbol to track the return address and attach it to the call
2093  // machine instruction. We will lower extra symbols attached to call
2094  // instructions as label immediately following the call.
2095  MCSymbol *RetSymbol =
2096  MF.getContext().createTempSymbol("slh_ret_addr",
2097  /*AlwaysAddSuffix*/ true);
2098  MI.setPostInstrSymbol(MF, RetSymbol);
2099 
2100  const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2101  unsigned ExpectedRetAddrReg = 0;
2102 
2103  // If we have no red zones or if the function returns twice (possibly without
2104  // using the `ret` instruction) like setjmp, we need to save the expected
2105  // return address prior to the call.
2106  if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2107  MF.exposesReturnsTwice()) {
2108  // If we don't have red zones, we need to compute the expected return
2109  // address prior to the call and store it in a register that lives across
2110  // the call.
2111  //
2112  // In some ways, this is doubly satisfying as a mitigation because it will
2113  // also successfully detect stack smashing bugs in some cases (typically,
2114  // when a callee-saved register is used and the callee doesn't push it onto
2115  // the stack). But that isn't our primary goal, so we only use it as
2116  // a fallback.
2117  //
2118  // FIXME: It isn't clear that this is reliable in the face of
2119  // rematerialization in the register allocator. We somehow need to force
2120  // that to not occur for this particular instruction, and instead to spill
2121  // or otherwise preserve the value computed *prior* to the call.
2122  //
2123  // FIXME: It is even less clear why MachineCSE can't just fold this when we
2124  // end up having to use identical instructions both before and after the
2125  // call to feed the comparison.
2126  ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2127  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2128  !Subtarget->isPositionIndependent()) {
2129  BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2130  .addSym(RetSymbol);
2131  } else {
2132  BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2133  .addReg(/*Base*/ X86::RIP)
2134  .addImm(/*Scale*/ 1)
2135  .addReg(/*Index*/ 0)
2136  .addSym(RetSymbol)
2137  .addReg(/*Segment*/ 0);
2138  }
2139  }
2140 
2141  // Step past the call to handle when it returns.
2142  ++InsertPt;
2143 
2144  // If we didn't pre-compute the expected return address into a register, then
2145  // red zones are enabled and the return address is still available on the
2146  // stack immediately after the call. As the very first instruction, we load it
2147  // into a register.
2148  if (!ExpectedRetAddrReg) {
2149  ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2150  BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2151  .addReg(/*Base*/ X86::RSP)
2152  .addImm(/*Scale*/ 1)
2153  .addReg(/*Index*/ 0)
2154  .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2155  // the return address is 8-bytes past it.
2156  .addReg(/*Segment*/ 0);
2157  }
2158 
2159  // Now we extract the callee's predicate state from the stack pointer.
2160  unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2161 
2162  // Test the expected return address against our actual address. If we can
2163  // form this basic block's address as an immediate, this is easy. Otherwise
2164  // we compute it.
2165  if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2166  !Subtarget->isPositionIndependent()) {
2167  // FIXME: Could we fold this with the load? It would require careful EFLAGS
2168  // management.
2169  BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2170  .addReg(ExpectedRetAddrReg, RegState::Kill)
2171  .addSym(RetSymbol);
2172  } else {
2173  Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2174  BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2175  .addReg(/*Base*/ X86::RIP)
2176  .addImm(/*Scale*/ 1)
2177  .addReg(/*Index*/ 0)
2178  .addSym(RetSymbol)
2179  .addReg(/*Segment*/ 0);
2180  BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2181  .addReg(ExpectedRetAddrReg, RegState::Kill)
2182  .addReg(ActualRetAddrReg, RegState::Kill);
2183  }
2184 
2185  // Now conditionally update the predicate state we just extracted if we ended
2186  // up at a different return address than expected.
2187  int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2188  auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2189 
2190  Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2191  auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2192  .addReg(NewStateReg, RegState::Kill)
2193  .addReg(PS->PoisonReg)
2194  .addImm(X86::COND_NE);
2195  CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true);
2196  ++NumInstsInserted;
2197  LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2198 
2199  PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2200 }
2201 
2202 /// An attacker may speculatively store over a value that is then speculatively
2203 /// loaded and used as the target of an indirect call or jump instruction. This
2204 /// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2205 /// in this paper:
2206 /// https://people.csail.mit.edu/vlk/spectre11.pdf
2207 ///
2208 /// When this happens, the speculative execution of the call or jump will end up
2209 /// being steered to this attacker controlled address. While most such loads
2210 /// will be adequately hardened already, we want to ensure that they are
2211 /// definitively treated as needing post-load hardening. While address hardening
2212 /// is sufficient to prevent secret data from leaking to the attacker, it may
2213 /// not be sufficient to prevent an attacker from steering speculative
2214 /// execution. We forcibly unfolded all relevant loads above and so will always
2215 /// have an opportunity to post-load harden here, we just need to scan for cases
2216 /// not already flagged and add them.
2217 void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
2218  MachineInstr &MI,
2219  SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) {
2220  switch (MI.getOpcode()) {
2221  case X86::FARCALL16m:
2222  case X86::FARCALL32m:
2223  case X86::FARCALL64m:
2224  case X86::FARJMP16m:
2225  case X86::FARJMP32m:
2226  case X86::FARJMP64m:
2227  // We don't need to harden either far calls or far jumps as they are
2228  // safe from Spectre.
2229  return;
2230 
2231  default:
2232  break;
2233  }
2234 
2235  // We should never see a loading instruction at this point, as those should
2236  // have been unfolded.
2237  assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2238 
2239  // If the first operand isn't a register, this is a branch or call
2240  // instruction with an immediate operand which doesn't need to be hardened.
2241  if (!MI.getOperand(0).isReg())
2242  return;
2243 
2244  // For all of these, the target register is the first operand of the
2245  // instruction.
2246  auto &TargetOp = MI.getOperand(0);
2247  Register OldTargetReg = TargetOp.getReg();
2248 
2249  // Try to lookup a hardened version of this register. We retain a reference
2250  // here as we want to update the map to track any newly computed hardened
2251  // register.
2252  unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2253 
2254  // If we don't have a hardened register yet, compute one. Otherwise, just use
2255  // the already hardened register.
2256  //
2257  // FIXME: It is a little suspect that we use partially hardened registers that
2258  // only feed addresses. The complexity of partial hardening with SHRX
2259  // continues to pile up. Should definitively measure its value and consider
2260  // eliminating it.
2261  if (!HardenedTargetReg)
2262  HardenedTargetReg = hardenValueInRegister(
2263  OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2264 
2265  // Set the target operand to the hardened register.
2266  TargetOp.setReg(HardenedTargetReg);
2267 
2268  ++NumCallsOrJumpsHardened;
2269 }
2270 
2271 INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
2272  "X86 speculative load hardener", false, false)
2273 INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
2274  "X86 speculative load hardener", false, false)
2275 
2277  return new X86SpeculativeLoadHardeningPass();
2278 }
llvm::X86::AddrBaseReg
@ AddrBaseReg
Definition: X86BaseInfo.h:32
llvm::MachineInstr::isBranch
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:844
llvm::MachineBasicBlock::succ_size
unsigned succ_size() const
Definition: MachineBasicBlock.h:344
llvm::X86InstrInfo::isDataInvariant
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
Definition: X86InstrInfo.cpp:138
llvm::MachineInstr::addRegisterDead
bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
Definition: MachineInstr.cpp:1957
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:100
MachineInstr.h
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:132
llvm
Definition: AllocatorList.h:23
llvm::X86II::getMemoryOperandNo
int getMemoryOperandNo(uint64_t TSFlags)
The function returns the MCInst operand # for the first field of the memory operand.
Definition: X86BaseInfo.h:1086
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
MachineSSAUpdater.h
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::MCSymbol
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:100
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::MachineBasicBlock::isLiveIn
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
Definition: MachineBasicBlock.cpp:572
llvm::SmallPtrSetImpl::erase
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:378
Optional.h
llvm::X86::AddrIndexReg
@ AddrIndexReg
Definition: X86BaseInfo.h:34
llvm::MachineRegisterInfo::createVirtualRegister
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Definition: MachineRegisterInfo.cpp:158
X86Subtarget.h
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
HardenIndirectCallsAndJumps
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
llvm::MachineFunction::getContext
MCContext & getContext() const
Definition: MachineFunction.h:507
llvm::SparseBitVector::clear
void clear()
Definition: SparseBitVector.h:451
Pass.h
X86InstrBuilder.h
llvm::MachineBasicBlock::isEHFuncletEntry
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
Definition: MachineBasicBlock.h:461
llvm::MachineOperand::setIsKill
void setIsKill(bool Val=true)
Definition: MachineOperand.h:497
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::MachineFunction::end
iterator end()
Definition: MachineFunction.h:739
llvm::X86Subtarget
Definition: X86Subtarget.h:52
llvm::MachineFunction::exposesReturnsTwice
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
Definition: MachineFunction.h:617
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1674
llvm::SmallDenseMap
Definition: DenseMap.h:880
llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30
MachineBasicBlock.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::TargetRegisterInfo
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Definition: TargetRegisterInfo.h:231
DenseMap.h
llvm::MachineRegisterInfo::use_instructions
iterator_range< use_instr_iterator > use_instructions(Register Reg) const
Definition: MachineRegisterInfo.h:485
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:338
TargetInstrInfo.h
llvm::MachineInstr::findRegisterUseOperand
MachineOperand * findRegisterUseOperand(Register Reg, bool isKill=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
Definition: MachineInstr.h:1418
HardenLoads
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
llvm::MachineFunction::insert
void insert(iterator MBBI, MachineBasicBlock *MBB)
Definition: MachineFunction.h:756
llvm::SmallSet< unsigned, 16 >
llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:345
llvm::Optional
Definition: APInt.h:34
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::count
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:145
llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:449
llvm::X86InstrInfo::isDataInvariantLoad
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
Definition: X86InstrInfo.cpp:433
llvm::X86::getCondFromBranch
CondCode getCondFromBranch(const MachineInstr &MI)
Definition: X86InstrInfo.cpp:2673
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::X86::CondCode
CondCode
Definition: X86BaseInfo.h:80
llvm::MachineBasicBlock::terminators
iterator_range< iterator > terminators()
Definition: MachineBasicBlock.h:288
llvm::X86::COND_INVALID
@ COND_INVALID
Definition: X86BaseInfo.h:107
llvm::MachineOperand::isFI
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
Definition: MachineOperand.h:328
TRI
unsigned const TargetRegisterInfo * TRI
Definition: MachineSink.cpp:1567
INITIALIZE_PASS_END
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
Definition: RegBankSelect.cpp:69
llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:102
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
MachineRegisterInfo.h
llvm::MachineBasicBlock::dump
void dump() const
Definition: MachineBasicBlock.cpp:292
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
SparseBitVector.h
llvm::MachineBasicBlock::addSuccessor
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
Definition: MachineBasicBlock.cpp:743
CommandLine.h
llvm::SparseBitVector
Definition: SparseBitVector.h:255
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1505
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:565
llvm::MCInstrDesc::TSFlags
uint64_t TSFlags
Definition: MCInstrDesc.h:204
X86.h
llvm::MachineOperand::isImplicit
bool isImplicit() const
Definition: MachineOperand.h:377
TargetMachine.h
llvm::MachineInstrBuilder::addMBB
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Definition: MachineInstrBuilder.h:147
llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:488
llvm::MachineBasicBlock::isSuccessor
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
Definition: MachineBasicBlock.cpp:931
getRegClassForUnfoldedLoad
static const TargetRegisterClass * getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
Definition: X86SpeculativeLoadHardening.cpp:841
llvm::TargetRegisterClass
Definition: TargetRegisterInfo.h:46
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::createX86SpeculativeLoadHardeningPass
FunctionPass * createX86SpeculativeLoadHardeningPass()
Definition: X86SpeculativeLoadHardening.cpp:2276
llvm::MachineOperand::isMBB
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
Definition: MachineOperand.h:326
false
Definition: StackSlotColoring.cpp:142
llvm::X86ISD::MFENCE
@ MFENCE
Definition: X86ISelLowering.h:644
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:196
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
llvm::CodeModel::Small
@ Small
Definition: CodeGen.h:28
FenceCallAndRet
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
llvm::TargetRegisterClass::hasSuperClassEq
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
Definition: TargetRegisterInfo.h:136
llvm::report_fatal_error
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::HexagonInstrInfo::insertBranch
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
Definition: HexagonInstrInfo.cpp:584
llvm::MachineFunction::begin
iterator begin()
Definition: MachineFunction.h:737
DebugLoc.h
SmallPtrSet.h
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:26
llvm::MachineBasicBlock::isCleanupFuncletEntry
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
Definition: MachineBasicBlock.h:467
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:49
llvm::MachineInstrBuilder::getReg
Register getReg(unsigned Idx) const
Get the register for the operand index.
Definition: MachineInstrBuilder.h:95
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
HardenEdgesWithLFENCE
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
llvm::MachineRegisterInfo::getRegClass
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
Definition: MachineRegisterInfo.h:634
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:555
llvm::cl::opt< bool >
SSA
Memory SSA
Definition: MemorySSA.cpp:73
llvm::MachineInstr::getDebugLoc
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:418
llvm::SmallSet::count
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:164
HardenInterprocedurally
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
TargetSchedule.h
llvm::X86II::getOperandBias
unsigned getOperandBias(const MCInstrDesc &Desc)
Compute whether all of the def operands are repeated in the uses and therefore should be skipped.
Definition: X86BaseInfo.h:1046
MCSchedule.h
llvm::MachineOperand::setIsDead
void setIsDead(bool Val=true)
Definition: MachineOperand.h:503
llvm::SparseBitVector::set
void set(unsigned Idx)
Definition: SparseBitVector.h:507
llvm::MachineBasicBlock::instr_rend
reverse_instr_iterator instr_rend()
Definition: MachineBasicBlock.h:258
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::clear
void clear()
Definition: DenseMap.h:111
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineOperand::isReg
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Definition: MachineOperand.h:318
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
splitEdge
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
Definition: X86SpeculativeLoadHardening.cpp:224
canonicalizePHIOperands
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
Definition: X86SpeculativeLoadHardening.cpp:328
llvm::MachineOperand::CreateMBB
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
Definition: MachineOperand.h:813
llvm::MachineBasicBlock::SkipPHIsAndLabels
iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
Definition: MachineBasicBlock.cpp:209
llvm::MachineBasicBlock::instr_rbegin
reverse_instr_iterator instr_rbegin()
Definition: MachineBasicBlock.h:256
llvm::SparseBitVector::test
bool test(unsigned Idx) const
Definition: SparseBitVector.h:471
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
MachineConstantPool.h
llvm::MachineFunction::CreateMachineBasicBlock
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
Definition: MachineFunction.cpp:414
ArrayRef.h
MachineFunctionPass.h
llvm::MachineBasicBlock::SkipPHIsLabelsAndDebug
iterator SkipPHIsLabelsAndDebug(iterator I)
Return the first instruction in MBB after I that is not a PHI, label or debug.
Definition: MachineBasicBlock.cpp:224
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineFunction::getName
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Definition: MachineFunction.cpp:522
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::setHasAddressTaken
void setHasAddressTaken()
Set this block to reflect that it potentially is the target of an indirect branch.
Definition: MachineBasicBlock.h:215
load
LLVM currently emits rax rax movq rax rax ret It could narrow the loads and stores to emit rax rax movq rax rax ret The trouble is that there is a TokenFactor between the store and the load
Definition: README.txt:1531
llvm::MachineBasicBlock::getParent
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Definition: MachineBasicBlock.h:225
MachineModuleInfo.h
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:98
llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:357
llvm::MachineBasicBlock::predecessors
iterator_range< pred_iterator > predecessors()
Definition: MachineBasicBlock.h:349
llvm::X86::GetOppositeBranchCondition
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
Definition: X86InstrInfo.cpp:2705
llvm::MachineBasicBlock::instr_begin
instr_iterator instr_begin()
Definition: MachineBasicBlock.h:252
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::MachineBasicBlock::instr_end
instr_iterator instr_end()
Definition: MachineBasicBlock.h:254
llvm::MachineFunction
Definition: MachineFunction.h:227
llvm::SetVector< T, SmallVector< T, N >, SmallDenseSet< T, N > >::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::MachineBasicBlock::succ_empty
bool succ_empty() const
Definition: MachineBasicBlock.h:347
llvm::X86InstrInfo
Definition: X86InstrInfo.h:130
hardener
X86 speculative load hardener
Definition: X86SpeculativeLoadHardening.cpp:2274
llvm::MachineBasicBlock::getFirstTerminator
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
Definition: MachineBasicBlock.cpp:239
llvm::SmallPtrSetImplBase::clear
void clear()
Definition: SmallPtrSet.h:94
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:33
llvm::MachineOperand::getMBB
MachineBasicBlock * getMBB() const
Definition: MachineOperand.h:549
llvm::MachineBasicBlock::isEHScopeEntry
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
Definition: MachineBasicBlock.h:448
llvm::SystemZII::Is128Bit
@ Is128Bit
Definition: SystemZInstrInfo.h:40
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1512
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:167
llvm::MachineBasicBlock::successors
iterator_range< succ_iterator > successors()
Definition: MachineBasicBlock.h:355
llvm::MachineBasicBlock::isEHPad
bool isEHPad() const
Returns true if the block is a landing pad.
Definition: MachineBasicBlock.h:435
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::MachineInstr::getOpcode
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:478
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
if
if(llvm_vc STREQUAL "") set(fake_version_inc "$
Definition: CMakeLists.txt:14
TargetSubtargetInfo.h
llvm::MachineInstr::isTerminator
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:836
llvm::MachineBasicBlock::splitSuccessor
void splitSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New, bool NormalizeSuccProbs=false)
Split the old successor into old plus new and updates the probability info.
Definition: MachineBasicBlock.cpp:762
llvm::SmallSet::insert
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:180
llvm::Pass::dump
void dump() const
Definition: Pass.cpp:131
llvm::MCContext::createTempSymbol
MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Definition: MCContext.cpp:268
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::MachineBasicBlock::addLiveIn
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
Definition: MachineBasicBlock.h:367
llvm::MachineRegisterInfo::replaceRegWith
void replaceRegWith(Register FromReg, Register ToReg)
replaceRegWith - Replace all instances of FromReg with ToReg in the machine function.
Definition: MachineRegisterInfo.cpp:380
MBB
MachineBasicBlock & MBB
Definition: AArch64SLSHardening.cpp:74
llvm::MachineBasicBlock::replaceSuccessor
void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
Definition: MachineBasicBlock.cpp:804
llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:521
llvm::TargetRegisterInfo::getRegSizeInBits
unsigned getRegSizeInBits(const TargetRegisterClass &RC) const
Return the size in bits of a register from class RC.
Definition: TargetRegisterInfo.h:274
llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:551
llvm::DenseMapBase< SmallDenseMap< KeyT, ValueT, 4, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MachineBasicBlock::insert
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
Definition: MachineBasicBlock.cpp:1335
llvm::ilist_iterator
Iterator for intrusive lists based on ilist_node.
Definition: ilist_iterator.h:57
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1446
PASS_KEY
#define PASS_KEY
Definition: X86SpeculativeLoadHardening.cpp:64
llvm::TargetMachine::getCodeModel
CodeModel::Model getCodeModel() const
Returns the code model.
Definition: TargetMachine.cpp:74
llvm::MachineBasicBlock::isLayoutSuccessor
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
Definition: MachineBasicBlock.cpp:935
llvm::X86::getCMovOpcode
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false)
Return a cmov opcode for the given register size in bytes, and operand type.
Definition: X86InstrInfo.cpp:2791
llvm::MachineBasicBlock::normalizeSuccProbs
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
Definition: MachineBasicBlock.h:563
llvm::MachineSSAUpdater
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
Definition: MachineSSAUpdater.h:34
llvm::MachineInstrBuilder::addSym
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
Definition: MachineInstrBuilder.h:268
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY, "X86 speculative load hardener", false, false) INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass
SmallVector.h
llvm::MachineBasicBlock::begin
iterator begin()
Definition: MachineBasicBlock.h:268
MachineInstrBuilder.h
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:329
llvm::SmallPtrSetImplBase::empty
LLVM_NODISCARD bool empty() const
Definition: SmallPtrSet.h:91
llvm::MachineOperand::setMBB
void setMBB(MachineBasicBlock *MBB)
Definition: MachineOperand.h:686
llvm::SmallSet::clear
void clear()
Definition: SmallSet.h:218
EnablePostLoadHardening
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
ScopeExit.h
llvm::SmallVectorImpl< MachineInstr * >
MachineOperand.h
EnableSpeculativeLoadHardening
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
llvm::SmallPtrSetImpl
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:343
llvm::SmallSetVector
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:307
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::DebugLoc
A debug info location.
Definition: DebugLoc.h:33
llvm::cl::desc
Definition: CommandLine.h:411
raw_ostream.h
isEFLAGSLive
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
Definition: X86SpeculativeLoadHardening.cpp:1213
MachineFunction.h
X86InstrInfo.h
llvm::MachineInstrBundleIterator< MachineInstr >
isEFLAGSDefLive
static bool isEFLAGSDefLive(const MachineInstr &MI)
Definition: X86SpeculativeLoadHardening.cpp:1206
TargetRegisterInfo.h
llvm::MachineBasicBlock::getName
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
Definition: MachineBasicBlock.cpp:311
Debug.h
llvm::MachineBasicBlock::end
iterator end()
Definition: MachineBasicBlock.h:270
hasVulnerableLoad
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
Definition: X86SpeculativeLoadHardening.cpp:372
llvm::X86::COND_NE
@ COND_NE
Definition: X86BaseInfo.h:86
SmallSet.h
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38