File: | lib/Target/X86/X86SpeculativeLoadHardening.cpp |
Warning: | line 2257, column 10 The right operand of '==' is a garbage value due to array index out of bounds |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | /// \file | |||
9 | /// | |||
10 | /// Provide a pass which mitigates speculative execution attacks which operate | |||
11 | /// by speculating incorrectly past some predicate (a type check, bounds check, | |||
12 | /// or other condition) to reach a load with invalid inputs and leak the data | |||
13 | /// accessed by that load using a side channel out of the speculative domain. | |||
14 | /// | |||
15 | /// For details on the attacks, see the first variant in both the Project Zero | |||
16 | /// writeup and the Spectre paper: | |||
17 | /// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html | |||
18 | /// https://spectreattack.com/spectre.pdf | |||
19 | /// | |||
20 | //===----------------------------------------------------------------------===// | |||
21 | ||||
22 | #include "X86.h" | |||
23 | #include "X86InstrBuilder.h" | |||
24 | #include "X86InstrInfo.h" | |||
25 | #include "X86Subtarget.h" | |||
26 | #include "llvm/ADT/ArrayRef.h" | |||
27 | #include "llvm/ADT/DenseMap.h" | |||
28 | #include "llvm/ADT/Optional.h" | |||
29 | #include "llvm/ADT/STLExtras.h" | |||
30 | #include "llvm/ADT/ScopeExit.h" | |||
31 | #include "llvm/ADT/SmallPtrSet.h" | |||
32 | #include "llvm/ADT/SmallSet.h" | |||
33 | #include "llvm/ADT/SmallVector.h" | |||
34 | #include "llvm/ADT/SparseBitVector.h" | |||
35 | #include "llvm/ADT/Statistic.h" | |||
36 | #include "llvm/CodeGen/MachineBasicBlock.h" | |||
37 | #include "llvm/CodeGen/MachineConstantPool.h" | |||
38 | #include "llvm/CodeGen/MachineFunction.h" | |||
39 | #include "llvm/CodeGen/MachineFunctionPass.h" | |||
40 | #include "llvm/CodeGen/MachineInstr.h" | |||
41 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
42 | #include "llvm/CodeGen/MachineModuleInfo.h" | |||
43 | #include "llvm/CodeGen/MachineOperand.h" | |||
44 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
45 | #include "llvm/CodeGen/MachineSSAUpdater.h" | |||
46 | #include "llvm/CodeGen/TargetInstrInfo.h" | |||
47 | #include "llvm/CodeGen/TargetRegisterInfo.h" | |||
48 | #include "llvm/CodeGen/TargetSchedule.h" | |||
49 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | |||
50 | #include "llvm/IR/DebugLoc.h" | |||
51 | #include "llvm/MC/MCSchedule.h" | |||
52 | #include "llvm/Pass.h" | |||
53 | #include "llvm/Support/CommandLine.h" | |||
54 | #include "llvm/Support/Debug.h" | |||
55 | #include "llvm/Support/raw_ostream.h" | |||
56 | #include <algorithm> | |||
57 | #include <cassert> | |||
58 | #include <iterator> | |||
59 | #include <utility> | |||
60 | ||||
61 | using namespace llvm; | |||
62 | ||||
63 | #define PASS_KEY"x86-slh" "x86-slh" | |||
64 | #define DEBUG_TYPE"x86-slh" PASS_KEY"x86-slh" | |||
65 | ||||
66 | STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced")static llvm::Statistic NumCondBranchesTraced = {"x86-slh", "NumCondBranchesTraced" , "Number of conditional branches traced", {0}, {false}}; | |||
67 | STATISTIC(NumBranchesUntraced, "Number of branches unable to trace")static llvm::Statistic NumBranchesUntraced = {"x86-slh", "NumBranchesUntraced" , "Number of branches unable to trace", {0}, {false}}; | |||
68 | STATISTIC(NumAddrRegsHardened,static llvm::Statistic NumAddrRegsHardened = {"x86-slh", "NumAddrRegsHardened" , "Number of address mode used registers hardaned", {0}, {false }} | |||
69 | "Number of address mode used registers hardaned")static llvm::Statistic NumAddrRegsHardened = {"x86-slh", "NumAddrRegsHardened" , "Number of address mode used registers hardaned", {0}, {false }}; | |||
70 | STATISTIC(NumPostLoadRegsHardened,static llvm::Statistic NumPostLoadRegsHardened = {"x86-slh", "NumPostLoadRegsHardened" , "Number of post-load register values hardened", {0}, {false }} | |||
71 | "Number of post-load register values hardened")static llvm::Statistic NumPostLoadRegsHardened = {"x86-slh", "NumPostLoadRegsHardened" , "Number of post-load register values hardened", {0}, {false }}; | |||
72 | STATISTIC(NumCallsOrJumpsHardened,static llvm::Statistic NumCallsOrJumpsHardened = {"x86-slh", "NumCallsOrJumpsHardened" , "Number of calls or jumps requiring extra hardening", {0}, { false}} | |||
73 | "Number of calls or jumps requiring extra hardening")static llvm::Statistic NumCallsOrJumpsHardened = {"x86-slh", "NumCallsOrJumpsHardened" , "Number of calls or jumps requiring extra hardening", {0}, { false}}; | |||
74 | STATISTIC(NumInstsInserted, "Number of instructions inserted")static llvm::Statistic NumInstsInserted = {"x86-slh", "NumInstsInserted" , "Number of instructions inserted", {0}, {false}}; | |||
75 | STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted")static llvm::Statistic NumLFENCEsInserted = {"x86-slh", "NumLFENCEsInserted" , "Number of lfence instructions inserted", {0}, {false}}; | |||
76 | ||||
77 | static cl::opt<bool> EnableSpeculativeLoadHardening( | |||
78 | "x86-speculative-load-hardening", | |||
79 | cl::desc("Force enable speculative load hardening"), cl::init(false), | |||
80 | cl::Hidden); | |||
81 | ||||
82 | static cl::opt<bool> HardenEdgesWithLFENCE( | |||
83 | PASS_KEY"x86-slh" "-lfence", | |||
84 | cl::desc( | |||
85 | "Use LFENCE along each conditional edge to harden against speculative " | |||
86 | "loads rather than conditional movs and poisoned pointers."), | |||
87 | cl::init(false), cl::Hidden); | |||
88 | ||||
89 | static cl::opt<bool> EnablePostLoadHardening( | |||
90 | PASS_KEY"x86-slh" "-post-load", | |||
91 | cl::desc("Harden the value loaded *after* it is loaded by " | |||
92 | "flushing the loaded bits to 1. This is hard to do " | |||
93 | "in general but can be done easily for GPRs."), | |||
94 | cl::init(true), cl::Hidden); | |||
95 | ||||
96 | static cl::opt<bool> FenceCallAndRet( | |||
97 | PASS_KEY"x86-slh" "-fence-call-and-ret", | |||
98 | cl::desc("Use a full speculation fence to harden both call and ret edges " | |||
99 | "rather than a lighter weight mitigation."), | |||
100 | cl::init(false), cl::Hidden); | |||
101 | ||||
102 | static cl::opt<bool> HardenInterprocedurally( | |||
103 | PASS_KEY"x86-slh" "-ip", | |||
104 | cl::desc("Harden interprocedurally by passing our state in and out of " | |||
105 | "functions in the high bits of the stack pointer."), | |||
106 | cl::init(true), cl::Hidden); | |||
107 | ||||
108 | static cl::opt<bool> | |||
109 | HardenLoads(PASS_KEY"x86-slh" "-loads", | |||
110 | cl::desc("Sanitize loads from memory. When disable, no " | |||
111 | "significant security is provided."), | |||
112 | cl::init(true), cl::Hidden); | |||
113 | ||||
114 | static cl::opt<bool> HardenIndirectCallsAndJumps( | |||
115 | PASS_KEY"x86-slh" "-indirect", | |||
116 | cl::desc("Harden indirect calls and jumps against using speculatively " | |||
117 | "stored attacker controlled addresses. This is designed to " | |||
118 | "mitigate Spectre v1.2 style attacks."), | |||
119 | cl::init(true), cl::Hidden); | |||
120 | ||||
121 | namespace { | |||
122 | ||||
123 | class X86SpeculativeLoadHardeningPass : public MachineFunctionPass { | |||
124 | public: | |||
125 | X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { | |||
126 | initializeX86SpeculativeLoadHardeningPassPass( | |||
127 | *PassRegistry::getPassRegistry()); | |||
128 | } | |||
129 | ||||
130 | StringRef getPassName() const override { | |||
131 | return "X86 speculative load hardening"; | |||
132 | } | |||
133 | bool runOnMachineFunction(MachineFunction &MF) override; | |||
134 | void getAnalysisUsage(AnalysisUsage &AU) const override; | |||
135 | ||||
136 | /// Pass identification, replacement for typeid. | |||
137 | static char ID; | |||
138 | ||||
139 | private: | |||
140 | /// The information about a block's conditional terminators needed to trace | |||
141 | /// our predicate state through the exiting edges. | |||
142 | struct BlockCondInfo { | |||
143 | MachineBasicBlock *MBB; | |||
144 | ||||
145 | // We mostly have one conditional branch, and in extremely rare cases have | |||
146 | // two. Three and more are so rare as to be unimportant for compile time. | |||
147 | SmallVector<MachineInstr *, 2> CondBrs; | |||
148 | ||||
149 | MachineInstr *UncondBr; | |||
150 | }; | |||
151 | ||||
152 | /// Manages the predicate state traced through the program. | |||
153 | struct PredState { | |||
154 | unsigned InitialReg; | |||
155 | unsigned PoisonReg; | |||
156 | ||||
157 | const TargetRegisterClass *RC; | |||
158 | MachineSSAUpdater SSA; | |||
159 | ||||
160 | PredState(MachineFunction &MF, const TargetRegisterClass *RC) | |||
161 | : RC(RC), SSA(MF) {} | |||
162 | }; | |||
163 | ||||
164 | const X86Subtarget *Subtarget; | |||
165 | MachineRegisterInfo *MRI; | |||
166 | const X86InstrInfo *TII; | |||
167 | const TargetRegisterInfo *TRI; | |||
168 | ||||
169 | Optional<PredState> PS; | |||
170 | ||||
171 | void hardenEdgesWithLFENCE(MachineFunction &MF); | |||
172 | ||||
173 | SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF); | |||
174 | ||||
175 | SmallVector<MachineInstr *, 16> | |||
176 | tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos); | |||
177 | ||||
178 | void unfoldCallAndJumpLoads(MachineFunction &MF); | |||
179 | ||||
180 | SmallVector<MachineInstr *, 16> | |||
181 | tracePredStateThroughIndirectBranches(MachineFunction &MF); | |||
182 | ||||
183 | void tracePredStateThroughBlocksAndHarden(MachineFunction &MF); | |||
184 | ||||
185 | unsigned saveEFLAGS(MachineBasicBlock &MBB, | |||
186 | MachineBasicBlock::iterator InsertPt, DebugLoc Loc); | |||
187 | void restoreEFLAGS(MachineBasicBlock &MBB, | |||
188 | MachineBasicBlock::iterator InsertPt, DebugLoc Loc, | |||
189 | unsigned OFReg); | |||
190 | ||||
191 | void mergePredStateIntoSP(MachineBasicBlock &MBB, | |||
192 | MachineBasicBlock::iterator InsertPt, DebugLoc Loc, | |||
193 | unsigned PredStateReg); | |||
194 | unsigned extractPredStateFromSP(MachineBasicBlock &MBB, | |||
195 | MachineBasicBlock::iterator InsertPt, | |||
196 | DebugLoc Loc); | |||
197 | ||||
198 | void | |||
199 | hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO, | |||
200 | MachineOperand &IndexMO, | |||
201 | SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg); | |||
202 | MachineInstr * | |||
203 | sinkPostLoadHardenedInst(MachineInstr &MI, | |||
204 | SmallPtrSetImpl<MachineInstr *> &HardenedInstrs); | |||
205 | bool canHardenRegister(unsigned Reg); | |||
206 | unsigned hardenValueInRegister(unsigned Reg, MachineBasicBlock &MBB, | |||
207 | MachineBasicBlock::iterator InsertPt, | |||
208 | DebugLoc Loc); | |||
209 | unsigned hardenPostLoad(MachineInstr &MI); | |||
210 | void hardenReturnInstr(MachineInstr &MI); | |||
211 | void tracePredStateThroughCall(MachineInstr &MI); | |||
212 | void hardenIndirectCallOrJumpInstr( | |||
213 | MachineInstr &MI, | |||
214 | SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg); | |||
215 | }; | |||
216 | ||||
217 | } // end anonymous namespace | |||
218 | ||||
219 | char X86SpeculativeLoadHardeningPass::ID = 0; | |||
220 | ||||
221 | void X86SpeculativeLoadHardeningPass::getAnalysisUsage( | |||
222 | AnalysisUsage &AU) const { | |||
223 | MachineFunctionPass::getAnalysisUsage(AU); | |||
224 | } | |||
225 | ||||
226 | static MachineBasicBlock &splitEdge(MachineBasicBlock &MBB, | |||
227 | MachineBasicBlock &Succ, int SuccCount, | |||
228 | MachineInstr *Br, MachineInstr *&UncondBr, | |||
229 | const X86InstrInfo &TII) { | |||
230 | assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!")((!Succ.isEHPad() && "Shouldn't get edges to EH pads!" ) ? static_cast<void> (0) : __assert_fail ("!Succ.isEHPad() && \"Shouldn't get edges to EH pads!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 230, __PRETTY_FUNCTION__)); | |||
231 | ||||
232 | MachineFunction &MF = *MBB.getParent(); | |||
233 | ||||
234 | MachineBasicBlock &NewMBB = *MF.CreateMachineBasicBlock(); | |||
235 | ||||
236 | // We have to insert the new block immediately after the current one as we | |||
237 | // don't know what layout-successor relationships the successor has and we | |||
238 | // may not be able to (and generally don't want to) try to fix those up. | |||
239 | MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB); | |||
240 | ||||
241 | // Update the branch instruction if necessary. | |||
242 | if (Br) { | |||
243 | assert(Br->getOperand(0).getMBB() == &Succ &&((Br->getOperand(0).getMBB() == &Succ && "Didn't start with the right target!" ) ? static_cast<void> (0) : __assert_fail ("Br->getOperand(0).getMBB() == &Succ && \"Didn't start with the right target!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 244, __PRETTY_FUNCTION__)) | |||
244 | "Didn't start with the right target!")((Br->getOperand(0).getMBB() == &Succ && "Didn't start with the right target!" ) ? static_cast<void> (0) : __assert_fail ("Br->getOperand(0).getMBB() == &Succ && \"Didn't start with the right target!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 244, __PRETTY_FUNCTION__)); | |||
245 | Br->getOperand(0).setMBB(&NewMBB); | |||
246 | ||||
247 | // If this successor was reached through a branch rather than fallthrough, | |||
248 | // we might have *broken* fallthrough and so need to inject a new | |||
249 | // unconditional branch. | |||
250 | if (!UncondBr) { | |||
251 | MachineBasicBlock &OldLayoutSucc = | |||
252 | *std::next(MachineFunction::iterator(&NewMBB)); | |||
253 | assert(MBB.isSuccessor(&OldLayoutSucc) &&((MBB.isSuccessor(&OldLayoutSucc) && "Without an unconditional branch, the old layout successor should " "be an actual successor!") ? static_cast<void> (0) : __assert_fail ("MBB.isSuccessor(&OldLayoutSucc) && \"Without an unconditional branch, the old layout successor should \" \"be an actual successor!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 255, __PRETTY_FUNCTION__)) | |||
254 | "Without an unconditional branch, the old layout successor should "((MBB.isSuccessor(&OldLayoutSucc) && "Without an unconditional branch, the old layout successor should " "be an actual successor!") ? static_cast<void> (0) : __assert_fail ("MBB.isSuccessor(&OldLayoutSucc) && \"Without an unconditional branch, the old layout successor should \" \"be an actual successor!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 255, __PRETTY_FUNCTION__)) | |||
255 | "be an actual successor!")((MBB.isSuccessor(&OldLayoutSucc) && "Without an unconditional branch, the old layout successor should " "be an actual successor!") ? static_cast<void> (0) : __assert_fail ("MBB.isSuccessor(&OldLayoutSucc) && \"Without an unconditional branch, the old layout successor should \" \"be an actual successor!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 255, __PRETTY_FUNCTION__)); | |||
256 | auto BrBuilder = | |||
257 | BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc); | |||
258 | // Update the unconditional branch now that we've added one. | |||
259 | UncondBr = &*BrBuilder; | |||
260 | } | |||
261 | ||||
262 | // Insert unconditional "jump Succ" instruction in the new block if | |||
263 | // necessary. | |||
264 | if (!NewMBB.isLayoutSuccessor(&Succ)) { | |||
265 | SmallVector<MachineOperand, 4> Cond; | |||
266 | TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc()); | |||
267 | } | |||
268 | } else { | |||
269 | assert(!UncondBr &&((!UncondBr && "Cannot have a branchless successor and an unconditional branch!" ) ? static_cast<void> (0) : __assert_fail ("!UncondBr && \"Cannot have a branchless successor and an unconditional branch!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 270, __PRETTY_FUNCTION__)) | |||
270 | "Cannot have a branchless successor and an unconditional branch!")((!UncondBr && "Cannot have a branchless successor and an unconditional branch!" ) ? static_cast<void> (0) : __assert_fail ("!UncondBr && \"Cannot have a branchless successor and an unconditional branch!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 270, __PRETTY_FUNCTION__)); | |||
271 | assert(NewMBB.isLayoutSuccessor(&Succ) &&((NewMBB.isLayoutSuccessor(&Succ) && "A non-branch successor must have been a layout successor before " "and now is a layout successor of the new block.") ? static_cast <void> (0) : __assert_fail ("NewMBB.isLayoutSuccessor(&Succ) && \"A non-branch successor must have been a layout successor before \" \"and now is a layout successor of the new block.\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 273, __PRETTY_FUNCTION__)) | |||
272 | "A non-branch successor must have been a layout successor before "((NewMBB.isLayoutSuccessor(&Succ) && "A non-branch successor must have been a layout successor before " "and now is a layout successor of the new block.") ? static_cast <void> (0) : __assert_fail ("NewMBB.isLayoutSuccessor(&Succ) && \"A non-branch successor must have been a layout successor before \" \"and now is a layout successor of the new block.\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 273, __PRETTY_FUNCTION__)) | |||
273 | "and now is a layout successor of the new block.")((NewMBB.isLayoutSuccessor(&Succ) && "A non-branch successor must have been a layout successor before " "and now is a layout successor of the new block.") ? static_cast <void> (0) : __assert_fail ("NewMBB.isLayoutSuccessor(&Succ) && \"A non-branch successor must have been a layout successor before \" \"and now is a layout successor of the new block.\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 273, __PRETTY_FUNCTION__)); | |||
274 | } | |||
275 | ||||
276 | // If this is the only edge to the successor, we can just replace it in the | |||
277 | // CFG. Otherwise we need to add a new entry in the CFG for the new | |||
278 | // successor. | |||
279 | if (SuccCount == 1) { | |||
280 | MBB.replaceSuccessor(&Succ, &NewMBB); | |||
281 | } else { | |||
282 | MBB.splitSuccessor(&Succ, &NewMBB); | |||
283 | } | |||
284 | ||||
285 | // Hook up the edge from the new basic block to the old successor in the CFG. | |||
286 | NewMBB.addSuccessor(&Succ); | |||
287 | ||||
288 | // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB. | |||
289 | for (MachineInstr &MI : Succ) { | |||
290 | if (!MI.isPHI()) | |||
291 | break; | |||
292 | for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; | |||
293 | OpIdx += 2) { | |||
294 | MachineOperand &OpV = MI.getOperand(OpIdx); | |||
295 | MachineOperand &OpMBB = MI.getOperand(OpIdx + 1); | |||
296 | assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!")((OpMBB.isMBB() && "Block operand to a PHI is not a block!" ) ? static_cast<void> (0) : __assert_fail ("OpMBB.isMBB() && \"Block operand to a PHI is not a block!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 296, __PRETTY_FUNCTION__)); | |||
297 | if (OpMBB.getMBB() != &MBB) | |||
298 | continue; | |||
299 | ||||
300 | // If this is the last edge to the succesor, just replace MBB in the PHI | |||
301 | if (SuccCount == 1) { | |||
302 | OpMBB.setMBB(&NewMBB); | |||
303 | break; | |||
304 | } | |||
305 | ||||
306 | // Otherwise, append a new pair of operands for the new incoming edge. | |||
307 | MI.addOperand(MF, OpV); | |||
308 | MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB)); | |||
309 | break; | |||
310 | } | |||
311 | } | |||
312 | ||||
313 | // Inherit live-ins from the successor | |||
314 | for (auto &LI : Succ.liveins()) | |||
315 | NewMBB.addLiveIn(LI); | |||
316 | ||||
317 | LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Split edge from '" << MBB.getName() << "' to '" << Succ.getName() << "'.\n"; } } while (false) | |||
318 | << Succ.getName() << "'.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Split edge from '" << MBB.getName() << "' to '" << Succ.getName() << "'.\n"; } } while (false); | |||
319 | return NewMBB; | |||
320 | } | |||
321 | ||||
322 | /// Removing duplicate PHI operands to leave the PHI in a canonical and | |||
323 | /// predictable form. | |||
324 | /// | |||
325 | /// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR | |||
326 | /// isn't what you might expect. We may have multiple entries in PHI nodes for | |||
327 | /// a single predecessor. This makes CFG-updating extremely complex, so here we | |||
328 | /// simplify all PHI nodes to a model even simpler than the IR's model: exactly | |||
329 | /// one entry per predecessor, regardless of how many edges there are. | |||
330 | static void canonicalizePHIOperands(MachineFunction &MF) { | |||
331 | SmallPtrSet<MachineBasicBlock *, 4> Preds; | |||
332 | SmallVector<int, 4> DupIndices; | |||
333 | for (auto &MBB : MF) | |||
334 | for (auto &MI : MBB) { | |||
335 | if (!MI.isPHI()) | |||
336 | break; | |||
337 | ||||
338 | // First we scan the operands of the PHI looking for duplicate entries | |||
339 | // a particular predecessor. We retain the operand index of each duplicate | |||
340 | // entry found. | |||
341 | for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps; | |||
342 | OpIdx += 2) | |||
343 | if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second) | |||
344 | DupIndices.push_back(OpIdx); | |||
345 | ||||
346 | // Now walk the duplicate indices, removing both the block and value. Note | |||
347 | // that these are stored as a vector making this element-wise removal | |||
348 | // :w | |||
349 | // potentially quadratic. | |||
350 | // | |||
351 | // FIXME: It is really frustrating that we have to use a quadratic | |||
352 | // removal algorithm here. There should be a better way, but the use-def | |||
353 | // updates required make that impossible using the public API. | |||
354 | // | |||
355 | // Note that we have to process these backwards so that we don't | |||
356 | // invalidate other indices with each removal. | |||
357 | while (!DupIndices.empty()) { | |||
358 | int OpIdx = DupIndices.pop_back_val(); | |||
359 | // Remove both the block and value operand, again in reverse order to | |||
360 | // preserve indices. | |||
361 | MI.RemoveOperand(OpIdx + 1); | |||
362 | MI.RemoveOperand(OpIdx); | |||
363 | } | |||
364 | ||||
365 | Preds.clear(); | |||
366 | } | |||
367 | } | |||
368 | ||||
369 | /// Helper to scan a function for loads vulnerable to misspeculation that we | |||
370 | /// want to harden. | |||
371 | /// | |||
372 | /// We use this to avoid making changes to functions where there is nothing we | |||
373 | /// need to do to harden against misspeculation. | |||
374 | static bool hasVulnerableLoad(MachineFunction &MF) { | |||
375 | for (MachineBasicBlock &MBB : MF) { | |||
376 | for (MachineInstr &MI : MBB) { | |||
377 | // Loads within this basic block after an LFENCE are not at risk of | |||
378 | // speculatively executing with invalid predicates from prior control | |||
379 | // flow. So break out of this block but continue scanning the function. | |||
380 | if (MI.getOpcode() == X86::LFENCE) | |||
381 | break; | |||
382 | ||||
383 | // Looking for loads only. | |||
384 | if (!MI.mayLoad()) | |||
385 | continue; | |||
386 | ||||
387 | // An MFENCE is modeled as a load but isn't vulnerable to misspeculation. | |||
388 | if (MI.getOpcode() == X86::MFENCE) | |||
389 | continue; | |||
390 | ||||
391 | // We found a load. | |||
392 | return true; | |||
393 | } | |||
394 | } | |||
395 | ||||
396 | // No loads found. | |||
397 | return false; | |||
398 | } | |||
399 | ||||
400 | bool X86SpeculativeLoadHardeningPass::runOnMachineFunction( | |||
401 | MachineFunction &MF) { | |||
402 | LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "********** " << getPassName () << " : " << MF.getName() << " **********\n" ; } } while (false) | |||
403 | << " **********\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "********** " << getPassName () << " : " << MF.getName() << " **********\n" ; } } while (false); | |||
404 | ||||
405 | // Only run if this pass is forced enabled or we detect the relevant function | |||
406 | // attribute requesting SLH. | |||
407 | if (!EnableSpeculativeLoadHardening && | |||
408 | !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening)) | |||
409 | return false; | |||
410 | ||||
411 | Subtarget = &MF.getSubtarget<X86Subtarget>(); | |||
412 | MRI = &MF.getRegInfo(); | |||
413 | TII = Subtarget->getInstrInfo(); | |||
414 | TRI = Subtarget->getRegisterInfo(); | |||
415 | ||||
416 | // FIXME: Support for 32-bit. | |||
417 | PS.emplace(MF, &X86::GR64_NOSPRegClass); | |||
418 | ||||
419 | if (MF.begin() == MF.end()) | |||
420 | // Nothing to do for a degenerate empty function... | |||
421 | return false; | |||
422 | ||||
423 | // We support an alternative hardening technique based on a debug flag. | |||
424 | if (HardenEdgesWithLFENCE) { | |||
425 | hardenEdgesWithLFENCE(MF); | |||
426 | return true; | |||
427 | } | |||
428 | ||||
429 | // Create a dummy debug loc to use for all the generated code here. | |||
430 | DebugLoc Loc; | |||
431 | ||||
432 | MachineBasicBlock &Entry = *MF.begin(); | |||
433 | auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin()); | |||
434 | ||||
435 | // Do a quick scan to see if we have any checkable loads. | |||
436 | bool HasVulnerableLoad = hasVulnerableLoad(MF); | |||
437 | ||||
438 | // See if we have any conditional branching blocks that we will need to trace | |||
439 | // predicate state through. | |||
440 | SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF); | |||
441 | ||||
442 | // If we have no interesting conditions or loads, nothing to do here. | |||
443 | if (!HasVulnerableLoad && Infos.empty()) | |||
444 | return true; | |||
445 | ||||
446 | // The poison value is required to be an all-ones value for many aspects of | |||
447 | // this mitigation. | |||
448 | const int PoisonVal = -1; | |||
449 | PS->PoisonReg = MRI->createVirtualRegister(PS->RC); | |||
450 | BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg) | |||
451 | .addImm(PoisonVal); | |||
452 | ++NumInstsInserted; | |||
453 | ||||
454 | // If we have loads being hardened and we've asked for call and ret edges to | |||
455 | // get a full fence-based mitigation, inject that fence. | |||
456 | if (HasVulnerableLoad && FenceCallAndRet) { | |||
457 | // We need to insert an LFENCE at the start of the function to suspend any | |||
458 | // incoming misspeculation from the caller. This helps two-fold: the caller | |||
459 | // may not have been protected as this code has been, and this code gets to | |||
460 | // not take any specific action to protect across calls. | |||
461 | // FIXME: We could skip this for functions which unconditionally return | |||
462 | // a constant. | |||
463 | BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE)); | |||
464 | ++NumInstsInserted; | |||
465 | ++NumLFENCEsInserted; | |||
466 | } | |||
467 | ||||
468 | // If we guarded the entry with an LFENCE and have no conditionals to protect | |||
469 | // in blocks, then we're done. | |||
470 | if (FenceCallAndRet && Infos.empty()) | |||
471 | // We may have changed the function's code at this point to insert fences. | |||
472 | return true; | |||
473 | ||||
474 | // For every basic block in the function which can b | |||
475 | if (HardenInterprocedurally && !FenceCallAndRet) { | |||
476 | // Set up the predicate state by extracting it from the incoming stack | |||
477 | // pointer so we pick up any misspeculation in our caller. | |||
478 | PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc); | |||
479 | } else { | |||
480 | // Otherwise, just build the predicate state itself by zeroing a register | |||
481 | // as we don't need any initial state. | |||
482 | PS->InitialReg = MRI->createVirtualRegister(PS->RC); | |||
483 | unsigned PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass); | |||
484 | auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0), | |||
485 | PredStateSubReg); | |||
486 | ++NumInstsInserted; | |||
487 | MachineOperand *ZeroEFLAGSDefOp = | |||
488 | ZeroI->findRegisterDefOperand(X86::EFLAGS); | |||
489 | assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&((ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() && "Must have an implicit def of EFLAGS!") ? static_cast <void> (0) : __assert_fail ("ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() && \"Must have an implicit def of EFLAGS!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 490, __PRETTY_FUNCTION__)) | |||
490 | "Must have an implicit def of EFLAGS!")((ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() && "Must have an implicit def of EFLAGS!") ? static_cast <void> (0) : __assert_fail ("ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() && \"Must have an implicit def of EFLAGS!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 490, __PRETTY_FUNCTION__)); | |||
491 | ZeroEFLAGSDefOp->setIsDead(true); | |||
492 | BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG), | |||
493 | PS->InitialReg) | |||
494 | .addImm(0) | |||
495 | .addReg(PredStateSubReg) | |||
496 | .addImm(X86::sub_32bit); | |||
497 | } | |||
498 | ||||
499 | // We're going to need to trace predicate state throughout the function's | |||
500 | // CFG. Prepare for this by setting up our initial state of PHIs with unique | |||
501 | // predecessor entries and all the initial predicate state. | |||
502 | canonicalizePHIOperands(MF); | |||
503 | ||||
504 | // Track the updated values in an SSA updater to rewrite into SSA form at the | |||
505 | // end. | |||
506 | PS->SSA.Initialize(PS->InitialReg); | |||
507 | PS->SSA.AddAvailableValue(&Entry, PS->InitialReg); | |||
508 | ||||
509 | // Trace through the CFG. | |||
510 | auto CMovs = tracePredStateThroughCFG(MF, Infos); | |||
511 | ||||
512 | // We may also enter basic blocks in this function via exception handling | |||
513 | // control flow. Here, if we are hardening interprocedurally, we need to | |||
514 | // re-capture the predicate state from the throwing code. In the Itanium ABI, | |||
515 | // the throw will always look like a call to __cxa_throw and will have the | |||
516 | // predicate state in the stack pointer, so extract fresh predicate state from | |||
517 | // the stack pointer and make it available in SSA. | |||
518 | // FIXME: Handle non-itanium ABI EH models. | |||
519 | if (HardenInterprocedurally) { | |||
520 | for (MachineBasicBlock &MBB : MF) { | |||
521 | assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!")((!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!" ) ? static_cast<void> (0) : __assert_fail ("!MBB.isEHScopeEntry() && \"Only Itanium ABI EH supported!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 521, __PRETTY_FUNCTION__)); | |||
522 | assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!")((!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!" ) ? static_cast<void> (0) : __assert_fail ("!MBB.isEHFuncletEntry() && \"Only Itanium ABI EH supported!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 522, __PRETTY_FUNCTION__)); | |||
523 | assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!")((!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!" ) ? static_cast<void> (0) : __assert_fail ("!MBB.isCleanupFuncletEntry() && \"Only Itanium ABI EH supported!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 523, __PRETTY_FUNCTION__)); | |||
524 | if (!MBB.isEHPad()) | |||
525 | continue; | |||
526 | PS->SSA.AddAvailableValue( | |||
527 | &MBB, | |||
528 | extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc)); | |||
529 | } | |||
530 | } | |||
531 | ||||
532 | if (HardenIndirectCallsAndJumps) { | |||
533 | // If we are going to harden calls and jumps we need to unfold their memory | |||
534 | // operands. | |||
535 | unfoldCallAndJumpLoads(MF); | |||
536 | ||||
537 | // Then we trace predicate state through the indirect branches. | |||
538 | auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF); | |||
539 | CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end()); | |||
540 | } | |||
541 | ||||
542 | // Now that we have the predicate state available at the start of each block | |||
543 | // in the CFG, trace it through each block, hardening vulnerable instructions | |||
544 | // as we go. | |||
545 | tracePredStateThroughBlocksAndHarden(MF); | |||
546 | ||||
547 | // Now rewrite all the uses of the pred state using the SSA updater to insert | |||
548 | // PHIs connecting the state between blocks along the CFG edges. | |||
549 | for (MachineInstr *CMovI : CMovs) | |||
550 | for (MachineOperand &Op : CMovI->operands()) { | |||
551 | if (!Op.isReg() || Op.getReg() != PS->InitialReg) | |||
552 | continue; | |||
553 | ||||
554 | PS->SSA.RewriteUse(Op); | |||
555 | } | |||
556 | ||||
557 | LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "Final speculative load hardened function:\n" ; MF.dump(); dbgs() << "\n"; MF.verify(this); } } while (false) | |||
558 | dbgs() << "\n"; MF.verify(this))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "Final speculative load hardened function:\n" ; MF.dump(); dbgs() << "\n"; MF.verify(this); } } while (false); | |||
559 | return true; | |||
560 | } | |||
561 | ||||
562 | /// Implements the naive hardening approach of putting an LFENCE after every | |||
563 | /// potentially mis-predicted control flow construct. | |||
564 | /// | |||
565 | /// We include this as an alternative mostly for the purpose of comparison. The | |||
566 | /// performance impact of this is expected to be extremely severe and not | |||
567 | /// practical for any real-world users. | |||
568 | void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE( | |||
569 | MachineFunction &MF) { | |||
570 | // First, we scan the function looking for blocks that are reached along edges | |||
571 | // that we might want to harden. | |||
572 | SmallSetVector<MachineBasicBlock *, 8> Blocks; | |||
573 | for (MachineBasicBlock &MBB : MF) { | |||
574 | // If there are no or only one successor, nothing to do here. | |||
575 | if (MBB.succ_size() <= 1) | |||
576 | continue; | |||
577 | ||||
578 | // Skip blocks unless their terminators start with a branch. Other | |||
579 | // terminators don't seem interesting for guarding against misspeculation. | |||
580 | auto TermIt = MBB.getFirstTerminator(); | |||
581 | if (TermIt == MBB.end() || !TermIt->isBranch()) | |||
582 | continue; | |||
583 | ||||
584 | // Add all the non-EH-pad succossors to the blocks we want to harden. We | |||
585 | // skip EH pads because there isn't really a condition of interest on | |||
586 | // entering. | |||
587 | for (MachineBasicBlock *SuccMBB : MBB.successors()) | |||
588 | if (!SuccMBB->isEHPad()) | |||
589 | Blocks.insert(SuccMBB); | |||
590 | } | |||
591 | ||||
592 | for (MachineBasicBlock *MBB : Blocks) { | |||
593 | auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin()); | |||
594 | BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE)); | |||
595 | ++NumInstsInserted; | |||
596 | ++NumLFENCEsInserted; | |||
597 | } | |||
598 | } | |||
599 | ||||
600 | SmallVector<X86SpeculativeLoadHardeningPass::BlockCondInfo, 16> | |||
601 | X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) { | |||
602 | SmallVector<BlockCondInfo, 16> Infos; | |||
603 | ||||
604 | // Walk the function and build up a summary for each block's conditions that | |||
605 | // we need to trace through. | |||
606 | for (MachineBasicBlock &MBB : MF) { | |||
607 | // If there are no or only one successor, nothing to do here. | |||
608 | if (MBB.succ_size() <= 1) | |||
609 | continue; | |||
610 | ||||
611 | // We want to reliably handle any conditional branch terminators in the | |||
612 | // MBB, so we manually analyze the branch. We can handle all of the | |||
613 | // permutations here, including ones that analyze branch cannot. | |||
614 | // | |||
615 | // The approach is to walk backwards across the terminators, resetting at | |||
616 | // any unconditional non-indirect branch, and track all conditional edges | |||
617 | // to basic blocks as well as the fallthrough or unconditional successor | |||
618 | // edge. For each conditional edge, we track the target and the opposite | |||
619 | // condition code in order to inject a "no-op" cmov into that successor | |||
620 | // that will harden the predicate. For the fallthrough/unconditional | |||
621 | // edge, we inject a separate cmov for each conditional branch with | |||
622 | // matching condition codes. This effectively implements an "and" of the | |||
623 | // condition flags, even if there isn't a single condition flag that would | |||
624 | // directly implement that. We don't bother trying to optimize either of | |||
625 | // these cases because if such an optimization is possible, LLVM should | |||
626 | // have optimized the conditional *branches* in that way already to reduce | |||
627 | // instruction count. This late, we simply assume the minimal number of | |||
628 | // branch instructions is being emitted and use that to guide our cmov | |||
629 | // insertion. | |||
630 | ||||
631 | BlockCondInfo Info = {&MBB, {}, nullptr}; | |||
632 | ||||
633 | // Now walk backwards through the terminators and build up successors they | |||
634 | // reach and the conditions. | |||
635 | for (MachineInstr &MI : llvm::reverse(MBB)) { | |||
636 | // Once we've handled all the terminators, we're done. | |||
637 | if (!MI.isTerminator()) | |||
638 | break; | |||
639 | ||||
640 | // If we see a non-branch terminator, we can't handle anything so bail. | |||
641 | if (!MI.isBranch()) { | |||
642 | Info.CondBrs.clear(); | |||
643 | break; | |||
644 | } | |||
645 | ||||
646 | // If we see an unconditional branch, reset our state, clear any | |||
647 | // fallthrough, and set this is the "else" successor. | |||
648 | if (MI.getOpcode() == X86::JMP_1) { | |||
649 | Info.CondBrs.clear(); | |||
650 | Info.UncondBr = &MI; | |||
651 | continue; | |||
652 | } | |||
653 | ||||
654 | // If we get an invalid condition, we have an indirect branch or some | |||
655 | // other unanalyzable "fallthrough" case. We model this as a nullptr for | |||
656 | // the destination so we can still guard any conditional successors. | |||
657 | // Consider code sequences like: | |||
658 | // ``` | |||
659 | // jCC L1 | |||
660 | // jmpq *%rax | |||
661 | // ``` | |||
662 | // We still want to harden the edge to `L1`. | |||
663 | if (X86::getCondFromBranch(MI) == X86::COND_INVALID) { | |||
664 | Info.CondBrs.clear(); | |||
665 | Info.UncondBr = &MI; | |||
666 | continue; | |||
667 | } | |||
668 | ||||
669 | // We have a vanilla conditional branch, add it to our list. | |||
670 | Info.CondBrs.push_back(&MI); | |||
671 | } | |||
672 | if (Info.CondBrs.empty()) { | |||
673 | ++NumBranchesUntraced; | |||
674 | LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "WARNING: unable to secure successors of block:\n" ; MBB.dump(); } } while (false) | |||
675 | MBB.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "WARNING: unable to secure successors of block:\n" ; MBB.dump(); } } while (false); | |||
676 | continue; | |||
677 | } | |||
678 | ||||
679 | Infos.push_back(Info); | |||
680 | } | |||
681 | ||||
682 | return Infos; | |||
683 | } | |||
684 | ||||
685 | /// Trace the predicate state through the CFG, instrumenting each conditional | |||
686 | /// branch such that misspeculation through an edge will poison the predicate | |||
687 | /// state. | |||
688 | /// | |||
689 | /// Returns the list of inserted CMov instructions so that they can have their | |||
690 | /// uses of the predicate state rewritten into proper SSA form once it is | |||
691 | /// complete. | |||
692 | SmallVector<MachineInstr *, 16> | |||
693 | X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG( | |||
694 | MachineFunction &MF, ArrayRef<BlockCondInfo> Infos) { | |||
695 | // Collect the inserted cmov instructions so we can rewrite their uses of the | |||
696 | // predicate state into SSA form. | |||
697 | SmallVector<MachineInstr *, 16> CMovs; | |||
698 | ||||
699 | // Now walk all of the basic blocks looking for ones that end in conditional | |||
700 | // jumps where we need to update this register along each edge. | |||
701 | for (const BlockCondInfo &Info : Infos) { | |||
702 | MachineBasicBlock &MBB = *Info.MBB; | |||
703 | const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs; | |||
704 | MachineInstr *UncondBr = Info.UncondBr; | |||
705 | ||||
706 | LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "Tracing predicate through block: " << MBB.getName() << "\n"; } } while (false) | |||
707 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "Tracing predicate through block: " << MBB.getName() << "\n"; } } while (false); | |||
708 | ++NumCondBranchesTraced; | |||
709 | ||||
710 | // Compute the non-conditional successor as either the target of any | |||
711 | // unconditional branch or the layout successor. | |||
712 | MachineBasicBlock *UncondSucc = | |||
713 | UncondBr ? (UncondBr->getOpcode() == X86::JMP_1 | |||
714 | ? UncondBr->getOperand(0).getMBB() | |||
715 | : nullptr) | |||
716 | : &*std::next(MachineFunction::iterator(&MBB)); | |||
717 | ||||
718 | // Count how many edges there are to any given successor. | |||
719 | SmallDenseMap<MachineBasicBlock *, int> SuccCounts; | |||
720 | if (UncondSucc) | |||
721 | ++SuccCounts[UncondSucc]; | |||
722 | for (auto *CondBr : CondBrs) | |||
723 | ++SuccCounts[CondBr->getOperand(0).getMBB()]; | |||
724 | ||||
725 | // A lambda to insert cmov instructions into a block checking all of the | |||
726 | // condition codes in a sequence. | |||
727 | auto BuildCheckingBlockForSuccAndConds = | |||
728 | [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, | |||
729 | MachineInstr *Br, MachineInstr *&UncondBr, | |||
730 | ArrayRef<X86::CondCode> Conds) { | |||
731 | // First, we split the edge to insert the checking block into a safe | |||
732 | // location. | |||
733 | auto &CheckingMBB = | |||
734 | (SuccCount == 1 && Succ.pred_size() == 1) | |||
735 | ? Succ | |||
736 | : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII); | |||
737 | ||||
738 | bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS); | |||
739 | if (!LiveEFLAGS) | |||
740 | CheckingMBB.addLiveIn(X86::EFLAGS); | |||
741 | ||||
742 | // Now insert the cmovs to implement the checks. | |||
743 | auto InsertPt = CheckingMBB.begin(); | |||
744 | assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&(((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && "Should never have a PHI in the initial checking block as it " "always has a single predecessor!") ? static_cast<void> (0) : __assert_fail ("(InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && \"Should never have a PHI in the initial checking block as it \" \"always has a single predecessor!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 746, __PRETTY_FUNCTION__)) | |||
745 | "Should never have a PHI in the initial checking block as it "(((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && "Should never have a PHI in the initial checking block as it " "always has a single predecessor!") ? static_cast<void> (0) : __assert_fail ("(InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && \"Should never have a PHI in the initial checking block as it \" \"always has a single predecessor!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 746, __PRETTY_FUNCTION__)) | |||
746 | "always has a single predecessor!")(((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && "Should never have a PHI in the initial checking block as it " "always has a single predecessor!") ? static_cast<void> (0) : __assert_fail ("(InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) && \"Should never have a PHI in the initial checking block as it \" \"always has a single predecessor!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 746, __PRETTY_FUNCTION__)); | |||
747 | ||||
748 | // We will wire each cmov to each other, but need to start with the | |||
749 | // incoming pred state. | |||
750 | unsigned CurStateReg = PS->InitialReg; | |||
751 | ||||
752 | for (X86::CondCode Cond : Conds) { | |||
753 | int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; | |||
754 | auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); | |||
755 | ||||
756 | unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); | |||
757 | // Note that we intentionally use an empty debug location so that | |||
758 | // this picks up the preceding location. | |||
759 | auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(), | |||
760 | TII->get(CMovOp), UpdatedStateReg) | |||
761 | .addReg(CurStateReg) | |||
762 | .addReg(PS->PoisonReg) | |||
763 | .addImm(Cond); | |||
764 | // If this is the last cmov and the EFLAGS weren't originally | |||
765 | // live-in, mark them as killed. | |||
766 | if (!LiveEFLAGS && Cond == Conds.back()) | |||
767 | CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); | |||
768 | ||||
769 | ++NumInstsInserted; | |||
770 | LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting cmov: "; CMovI-> dump(); dbgs() << "\n"; } } while (false) | |||
771 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting cmov: "; CMovI-> dump(); dbgs() << "\n"; } } while (false); | |||
772 | ||||
773 | // The first one of the cmovs will be using the top level | |||
774 | // `PredStateReg` and need to get rewritten into SSA form. | |||
775 | if (CurStateReg == PS->InitialReg) | |||
776 | CMovs.push_back(&*CMovI); | |||
777 | ||||
778 | // The next cmov should start from this one's def. | |||
779 | CurStateReg = UpdatedStateReg; | |||
780 | } | |||
781 | ||||
782 | // And put the last one into the available values for SSA form of our | |||
783 | // predicate state. | |||
784 | PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg); | |||
785 | }; | |||
786 | ||||
787 | std::vector<X86::CondCode> UncondCodeSeq; | |||
788 | for (auto *CondBr : CondBrs) { | |||
789 | MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB(); | |||
790 | int &SuccCount = SuccCounts[&Succ]; | |||
791 | ||||
792 | X86::CondCode Cond = X86::getCondFromBranch(*CondBr); | |||
793 | X86::CondCode InvCond = X86::GetOppositeBranchCondition(Cond); | |||
794 | UncondCodeSeq.push_back(Cond); | |||
795 | ||||
796 | BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr, | |||
797 | {InvCond}); | |||
798 | ||||
799 | // Decrement the successor count now that we've split one of the edges. | |||
800 | // We need to keep the count of edges to the successor accurate in order | |||
801 | // to know above when to *replace* the successor in the CFG vs. just | |||
802 | // adding the new successor. | |||
803 | --SuccCount; | |||
804 | } | |||
805 | ||||
806 | // Since we may have split edges and changed the number of successors, | |||
807 | // normalize the probabilities. This avoids doing it each time we split an | |||
808 | // edge. | |||
809 | MBB.normalizeSuccProbs(); | |||
810 | ||||
811 | // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we | |||
812 | // need to intersect the other condition codes. We can do this by just | |||
813 | // doing a cmov for each one. | |||
814 | if (!UncondSucc) | |||
815 | // If we have no fallthrough to protect (perhaps it is an indirect jump?) | |||
816 | // just skip this and continue. | |||
817 | continue; | |||
818 | ||||
819 | assert(SuccCounts[UncondSucc] == 1 &&((SuccCounts[UncondSucc] == 1 && "We should never have more than one edge to the unconditional " "successor at this point because every other edge must have been " "split above!") ? static_cast<void> (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 822, __PRETTY_FUNCTION__)) | |||
820 | "We should never have more than one edge to the unconditional "((SuccCounts[UncondSucc] == 1 && "We should never have more than one edge to the unconditional " "successor at this point because every other edge must have been " "split above!") ? static_cast<void> (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 822, __PRETTY_FUNCTION__)) | |||
821 | "successor at this point because every other edge must have been "((SuccCounts[UncondSucc] == 1 && "We should never have more than one edge to the unconditional " "successor at this point because every other edge must have been " "split above!") ? static_cast<void> (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 822, __PRETTY_FUNCTION__)) | |||
822 | "split above!")((SuccCounts[UncondSucc] == 1 && "We should never have more than one edge to the unconditional " "successor at this point because every other edge must have been " "split above!") ? static_cast<void> (0) : __assert_fail ("SuccCounts[UncondSucc] == 1 && \"We should never have more than one edge to the unconditional \" \"successor at this point because every other edge must have been \" \"split above!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 822, __PRETTY_FUNCTION__)); | |||
823 | ||||
824 | // Sort and unique the codes to minimize them. | |||
825 | llvm::sort(UncondCodeSeq); | |||
826 | UncondCodeSeq.erase(std::unique(UncondCodeSeq.begin(), UncondCodeSeq.end()), | |||
827 | UncondCodeSeq.end()); | |||
828 | ||||
829 | // Build a checking version of the successor. | |||
830 | BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1, | |||
831 | UncondBr, UncondBr, UncondCodeSeq); | |||
832 | } | |||
833 | ||||
834 | return CMovs; | |||
835 | } | |||
836 | ||||
837 | /// Compute the register class for the unfolded load. | |||
838 | /// | |||
839 | /// FIXME: This should probably live in X86InstrInfo, potentially by adding | |||
840 | /// a way to unfold into a newly created vreg rather than requiring a register | |||
841 | /// input. | |||
842 | static const TargetRegisterClass * | |||
843 | getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, | |||
844 | unsigned Opcode) { | |||
845 | unsigned Index; | |||
846 | unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold( | |||
847 | Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index); | |||
848 | const MCInstrDesc &MCID = TII.get(UnfoldedOpc); | |||
849 | return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF); | |||
850 | } | |||
851 | ||||
852 | void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads( | |||
853 | MachineFunction &MF) { | |||
854 | for (MachineBasicBlock &MBB : MF) | |||
855 | for (auto MII = MBB.instr_begin(), MIE = MBB.instr_end(); MII != MIE;) { | |||
856 | // Grab a reference and increment the iterator so we can remove this | |||
857 | // instruction if needed without disturbing the iteration. | |||
858 | MachineInstr &MI = *MII++; | |||
859 | ||||
860 | // Must either be a call or a branch. | |||
861 | if (!MI.isCall() && !MI.isBranch()) | |||
862 | continue; | |||
863 | // We only care about loading variants of these instructions. | |||
864 | if (!MI.mayLoad()) | |||
865 | continue; | |||
866 | ||||
867 | switch (MI.getOpcode()) { | |||
868 | default: { | |||
869 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "ERROR: Found an unexpected loading branch or call " "instruction:\n"; MI.dump(); dbgs() << "\n"; } } while (false) | |||
870 | dbgs() << "ERROR: Found an unexpected loading branch or call "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "ERROR: Found an unexpected loading branch or call " "instruction:\n"; MI.dump(); dbgs() << "\n"; } } while (false) | |||
871 | "instruction:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "ERROR: Found an unexpected loading branch or call " "instruction:\n"; MI.dump(); dbgs() << "\n"; } } while (false) | |||
872 | MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "ERROR: Found an unexpected loading branch or call " "instruction:\n"; MI.dump(); dbgs() << "\n"; } } while (false); | |||
873 | report_fatal_error("Unexpected loading branch or call!"); | |||
874 | } | |||
875 | ||||
876 | case X86::FARCALL16m: | |||
877 | case X86::FARCALL32m: | |||
878 | case X86::FARCALL64: | |||
879 | case X86::FARJMP16m: | |||
880 | case X86::FARJMP32m: | |||
881 | case X86::FARJMP64: | |||
882 | // We cannot mitigate far jumps or calls, but we also don't expect them | |||
883 | // to be vulnerable to Spectre v1.2 style attacks. | |||
884 | continue; | |||
885 | ||||
886 | case X86::CALL16m: | |||
887 | case X86::CALL16m_NT: | |||
888 | case X86::CALL32m: | |||
889 | case X86::CALL32m_NT: | |||
890 | case X86::CALL64m: | |||
891 | case X86::CALL64m_NT: | |||
892 | case X86::JMP16m: | |||
893 | case X86::JMP16m_NT: | |||
894 | case X86::JMP32m: | |||
895 | case X86::JMP32m_NT: | |||
896 | case X86::JMP64m: | |||
897 | case X86::JMP64m_NT: | |||
898 | case X86::TAILJMPm64: | |||
899 | case X86::TAILJMPm64_REX: | |||
900 | case X86::TAILJMPm: | |||
901 | case X86::TCRETURNmi64: | |||
902 | case X86::TCRETURNmi: { | |||
903 | // Use the generic unfold logic now that we know we're dealing with | |||
904 | // expected instructions. | |||
905 | // FIXME: We don't have test coverage for all of these! | |||
906 | auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode()); | |||
907 | if (!UnfoldedRC) { | |||
908 | LLVM_DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "ERROR: Unable to unfold load from instruction:\n" ; MI.dump(); dbgs() << "\n"; } } while (false) | |||
909 | << "ERROR: Unable to unfold load from instruction:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "ERROR: Unable to unfold load from instruction:\n" ; MI.dump(); dbgs() << "\n"; } } while (false) | |||
910 | MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "ERROR: Unable to unfold load from instruction:\n" ; MI.dump(); dbgs() << "\n"; } } while (false); | |||
911 | report_fatal_error("Unable to unfold load!"); | |||
912 | } | |||
913 | unsigned Reg = MRI->createVirtualRegister(UnfoldedRC); | |||
914 | SmallVector<MachineInstr *, 2> NewMIs; | |||
915 | // If we were able to compute an unfolded reg class, any failure here | |||
916 | // is just a programming error so just assert. | |||
917 | bool Unfolded = | |||
918 | TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true, | |||
919 | /*UnfoldStore*/ false, NewMIs); | |||
920 | (void)Unfolded; | |||
921 | assert(Unfolded &&((Unfolded && "Computed unfolded register class but failed to unfold" ) ? static_cast<void> (0) : __assert_fail ("Unfolded && \"Computed unfolded register class but failed to unfold\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 922, __PRETTY_FUNCTION__)) | |||
922 | "Computed unfolded register class but failed to unfold")((Unfolded && "Computed unfolded register class but failed to unfold" ) ? static_cast<void> (0) : __assert_fail ("Unfolded && \"Computed unfolded register class but failed to unfold\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 922, __PRETTY_FUNCTION__)); | |||
923 | // Now stitch the new instructions into place and erase the old one. | |||
924 | for (auto *NewMI : NewMIs) | |||
925 | MBB.insert(MI.getIterator(), NewMI); | |||
926 | MI.eraseFromParent(); | |||
927 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "Unfolded load successfully into:\n" ; for (auto *NewMI : NewMIs) { NewMI->dump(); dbgs() << "\n"; } }; } } while (false) | |||
928 | dbgs() << "Unfolded load successfully into:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "Unfolded load successfully into:\n" ; for (auto *NewMI : NewMIs) { NewMI->dump(); dbgs() << "\n"; } }; } } while (false) | |||
929 | for (auto *NewMI : NewMIs) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "Unfolded load successfully into:\n" ; for (auto *NewMI : NewMIs) { NewMI->dump(); dbgs() << "\n"; } }; } } while (false) | |||
930 | NewMI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "Unfolded load successfully into:\n" ; for (auto *NewMI : NewMIs) { NewMI->dump(); dbgs() << "\n"; } }; } } while (false) | |||
931 | dbgs() << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "Unfolded load successfully into:\n" ; for (auto *NewMI : NewMIs) { NewMI->dump(); dbgs() << "\n"; } }; } } while (false) | |||
932 | }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "Unfolded load successfully into:\n" ; for (auto *NewMI : NewMIs) { NewMI->dump(); dbgs() << "\n"; } }; } } while (false) | |||
933 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "Unfolded load successfully into:\n" ; for (auto *NewMI : NewMIs) { NewMI->dump(); dbgs() << "\n"; } }; } } while (false); | |||
934 | continue; | |||
935 | } | |||
936 | } | |||
937 | llvm_unreachable("Escaped switch with default!")::llvm::llvm_unreachable_internal("Escaped switch with default!" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 937); | |||
938 | } | |||
939 | } | |||
940 | ||||
941 | /// Trace the predicate state through indirect branches, instrumenting them to | |||
942 | /// poison the state if a target is reached that does not match the expected | |||
943 | /// target. | |||
944 | /// | |||
945 | /// This is designed to mitigate Spectre variant 1 attacks where an indirect | |||
946 | /// branch is trained to predict a particular target and then mispredicts that | |||
947 | /// target in a way that can leak data. Despite using an indirect branch, this | |||
948 | /// is really a variant 1 style attack: it does not steer execution to an | |||
949 | /// arbitrary or attacker controlled address, and it does not require any | |||
950 | /// special code executing next to the victim. This attack can also be mitigated | |||
951 | /// through retpolines, but those require either replacing indirect branches | |||
952 | /// with conditional direct branches or lowering them through a device that | |||
953 | /// blocks speculation. This mitigation can replace these retpoline-style | |||
954 | /// mitigations for jump tables and other indirect branches within a function | |||
955 | /// when variant 2 isn't a risk while allowing limited speculation. Indirect | |||
956 | /// calls, however, cannot be mitigated through this technique without changing | |||
957 | /// the ABI in a fundamental way. | |||
958 | SmallVector<MachineInstr *, 16> | |||
959 | X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches( | |||
960 | MachineFunction &MF) { | |||
961 | // We use the SSAUpdater to insert PHI nodes for the target addresses of | |||
962 | // indirect branches. We don't actually need the full power of the SSA updater | |||
963 | // in this particular case as we always have immediately available values, but | |||
964 | // this avoids us having to re-implement the PHI construction logic. | |||
965 | MachineSSAUpdater TargetAddrSSA(MF); | |||
966 | TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass)); | |||
967 | ||||
968 | // Track which blocks were terminated with an indirect branch. | |||
969 | SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs; | |||
970 | ||||
971 | // We need to know what blocks end up reached via indirect branches. We | |||
972 | // expect this to be a subset of those whose address is taken and so track it | |||
973 | // directly via the CFG. | |||
974 | SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs; | |||
975 | ||||
976 | // Walk all the blocks which end in an indirect branch and make the | |||
977 | // target address available. | |||
978 | for (MachineBasicBlock &MBB : MF) { | |||
979 | // Find the last terminator. | |||
980 | auto MII = MBB.instr_rbegin(); | |||
981 | while (MII != MBB.instr_rend() && MII->isDebugInstr()) | |||
982 | ++MII; | |||
983 | if (MII == MBB.instr_rend()) | |||
984 | continue; | |||
985 | MachineInstr &TI = *MII; | |||
986 | if (!TI.isTerminator() || !TI.isBranch()) | |||
987 | // No terminator or non-branch terminator. | |||
988 | continue; | |||
989 | ||||
990 | unsigned TargetReg; | |||
991 | ||||
992 | switch (TI.getOpcode()) { | |||
993 | default: | |||
994 | // Direct branch or conditional branch (leading to fallthrough). | |||
995 | continue; | |||
996 | ||||
997 | case X86::FARJMP16m: | |||
998 | case X86::FARJMP32m: | |||
999 | case X86::FARJMP64: | |||
1000 | // We cannot mitigate far jumps or calls, but we also don't expect them | |||
1001 | // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks. | |||
1002 | continue; | |||
1003 | ||||
1004 | case X86::JMP16m: | |||
1005 | case X86::JMP16m_NT: | |||
1006 | case X86::JMP32m: | |||
1007 | case X86::JMP32m_NT: | |||
1008 | case X86::JMP64m: | |||
1009 | case X86::JMP64m_NT: | |||
1010 | // Mostly as documentation. | |||
1011 | report_fatal_error("Memory operand jumps should have been unfolded!"); | |||
1012 | ||||
1013 | case X86::JMP16r: | |||
1014 | report_fatal_error( | |||
1015 | "Support for 16-bit indirect branches is not implemented."); | |||
1016 | case X86::JMP32r: | |||
1017 | report_fatal_error( | |||
1018 | "Support for 32-bit indirect branches is not implemented."); | |||
1019 | ||||
1020 | case X86::JMP64r: | |||
1021 | TargetReg = TI.getOperand(0).getReg(); | |||
1022 | } | |||
1023 | ||||
1024 | // We have definitely found an indirect branch. Verify that there are no | |||
1025 | // preceding conditional branches as we don't yet support that. | |||
1026 | if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) { | |||
1027 | return !OtherTI.isDebugInstr() && &OtherTI != &TI; | |||
1028 | })) { | |||
1029 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found other terminators in a block with an indirect " "branch! This is not yet supported! Terminator sequence:\n"; for (MachineInstr &MI : MBB.terminators()) { MI.dump(); dbgs () << '\n'; } }; } } while (false) | |||
1030 | dbgs() << "ERROR: Found other terminators in a block with an indirect "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found other terminators in a block with an indirect " "branch! This is not yet supported! Terminator sequence:\n"; for (MachineInstr &MI : MBB.terminators()) { MI.dump(); dbgs () << '\n'; } }; } } while (false) | |||
1031 | "branch! This is not yet supported! Terminator sequence:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found other terminators in a block with an indirect " "branch! This is not yet supported! Terminator sequence:\n"; for (MachineInstr &MI : MBB.terminators()) { MI.dump(); dbgs () << '\n'; } }; } } while (false) | |||
1032 | for (MachineInstr &MI : MBB.terminators()) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found other terminators in a block with an indirect " "branch! This is not yet supported! Terminator sequence:\n"; for (MachineInstr &MI : MBB.terminators()) { MI.dump(); dbgs () << '\n'; } }; } } while (false) | |||
1033 | MI.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found other terminators in a block with an indirect " "branch! This is not yet supported! Terminator sequence:\n"; for (MachineInstr &MI : MBB.terminators()) { MI.dump(); dbgs () << '\n'; } }; } } while (false) | |||
1034 | dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found other terminators in a block with an indirect " "branch! This is not yet supported! Terminator sequence:\n"; for (MachineInstr &MI : MBB.terminators()) { MI.dump(); dbgs () << '\n'; } }; } } while (false) | |||
1035 | }do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found other terminators in a block with an indirect " "branch! This is not yet supported! Terminator sequence:\n"; for (MachineInstr &MI : MBB.terminators()) { MI.dump(); dbgs () << '\n'; } }; } } while (false) | |||
1036 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found other terminators in a block with an indirect " "branch! This is not yet supported! Terminator sequence:\n"; for (MachineInstr &MI : MBB.terminators()) { MI.dump(); dbgs () << '\n'; } }; } } while (false); | |||
1037 | report_fatal_error("Unimplemented terminator sequence!"); | |||
1038 | } | |||
1039 | ||||
1040 | // Make the target register an available value for this block. | |||
1041 | TargetAddrSSA.AddAvailableValue(&MBB, TargetReg); | |||
1042 | IndirectTerminatedMBBs.insert(&MBB); | |||
1043 | ||||
1044 | // Add all the successors to our target candidates. | |||
1045 | for (MachineBasicBlock *Succ : MBB.successors()) | |||
1046 | IndirectTargetMBBs.insert(Succ); | |||
1047 | } | |||
1048 | ||||
1049 | // Keep track of the cmov instructions we insert so we can return them. | |||
1050 | SmallVector<MachineInstr *, 16> CMovs; | |||
1051 | ||||
1052 | // If we didn't find any indirect branches with targets, nothing to do here. | |||
1053 | if (IndirectTargetMBBs.empty()) | |||
1054 | return CMovs; | |||
1055 | ||||
1056 | // We found indirect branches and targets that need to be instrumented to | |||
1057 | // harden loads within them. Walk the blocks of the function (to get a stable | |||
1058 | // ordering) and instrument each target of an indirect branch. | |||
1059 | for (MachineBasicBlock &MBB : MF) { | |||
1060 | // Skip the blocks that aren't candidate targets. | |||
1061 | if (!IndirectTargetMBBs.count(&MBB)) | |||
1062 | continue; | |||
1063 | ||||
1064 | // We don't expect EH pads to ever be reached via an indirect branch. If | |||
1065 | // this is desired for some reason, we could simply skip them here rather | |||
1066 | // than asserting. | |||
1067 | assert(!MBB.isEHPad() &&((!MBB.isEHPad() && "Unexpected EH pad as target of an indirect branch!" ) ? static_cast<void> (0) : __assert_fail ("!MBB.isEHPad() && \"Unexpected EH pad as target of an indirect branch!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1068, __PRETTY_FUNCTION__)) | |||
1068 | "Unexpected EH pad as target of an indirect branch!")((!MBB.isEHPad() && "Unexpected EH pad as target of an indirect branch!" ) ? static_cast<void> (0) : __assert_fail ("!MBB.isEHPad() && \"Unexpected EH pad as target of an indirect branch!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1068, __PRETTY_FUNCTION__)); | |||
1069 | ||||
1070 | // We should never end up threading EFLAGS into a block to harden | |||
1071 | // conditional jumps as there would be an additional successor via the | |||
1072 | // indirect branch. As a consequence, all such edges would be split before | |||
1073 | // reaching here, and the inserted block will handle the EFLAGS-based | |||
1074 | // hardening. | |||
1075 | assert(!MBB.isLiveIn(X86::EFLAGS) &&((!MBB.isLiveIn(X86::EFLAGS) && "Cannot check within a block that already has live-in EFLAGS!" ) ? static_cast<void> (0) : __assert_fail ("!MBB.isLiveIn(X86::EFLAGS) && \"Cannot check within a block that already has live-in EFLAGS!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1076, __PRETTY_FUNCTION__)) | |||
1076 | "Cannot check within a block that already has live-in EFLAGS!")((!MBB.isLiveIn(X86::EFLAGS) && "Cannot check within a block that already has live-in EFLAGS!" ) ? static_cast<void> (0) : __assert_fail ("!MBB.isLiveIn(X86::EFLAGS) && \"Cannot check within a block that already has live-in EFLAGS!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1076, __PRETTY_FUNCTION__)); | |||
1077 | ||||
1078 | // We can't handle having non-indirect edges into this block unless this is | |||
1079 | // the only successor and we can synthesize the necessary target address. | |||
1080 | for (MachineBasicBlock *Pred : MBB.predecessors()) { | |||
1081 | // If we've already handled this by extracting the target directly, | |||
1082 | // nothing to do. | |||
1083 | if (IndirectTerminatedMBBs.count(Pred)) | |||
1084 | continue; | |||
1085 | ||||
1086 | // Otherwise, we have to be the only successor. We generally expect this | |||
1087 | // to be true as conditional branches should have had a critical edge | |||
1088 | // split already. We don't however need to worry about EH pad successors | |||
1089 | // as they'll happily ignore the target and their hardening strategy is | |||
1090 | // resilient to all ways in which they could be reached speculatively. | |||
1091 | if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) { | |||
1092 | return Succ->isEHPad() || Succ == &MBB; | |||
1093 | })) { | |||
1094 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found conditional entry to target of indirect " "branch!\n"; Pred->dump(); MBB.dump(); }; } } while (false ) | |||
1095 | dbgs() << "ERROR: Found conditional entry to target of indirect "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found conditional entry to target of indirect " "branch!\n"; Pred->dump(); MBB.dump(); }; } } while (false ) | |||
1096 | "branch!\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found conditional entry to target of indirect " "branch!\n"; Pred->dump(); MBB.dump(); }; } } while (false ) | |||
1097 | Pred->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found conditional entry to target of indirect " "branch!\n"; Pred->dump(); MBB.dump(); }; } } while (false ) | |||
1098 | MBB.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found conditional entry to target of indirect " "branch!\n"; Pred->dump(); MBB.dump(); }; } } while (false ) | |||
1099 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { { dbgs() << "ERROR: Found conditional entry to target of indirect " "branch!\n"; Pred->dump(); MBB.dump(); }; } } while (false ); | |||
1100 | report_fatal_error("Cannot harden a conditional entry to a target of " | |||
1101 | "an indirect branch!"); | |||
1102 | } | |||
1103 | ||||
1104 | // Now we need to compute the address of this block and install it as a | |||
1105 | // synthetic target in the predecessor. We do this at the bottom of the | |||
1106 | // predecessor. | |||
1107 | auto InsertPt = Pred->getFirstTerminator(); | |||
1108 | unsigned TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass); | |||
1109 | if (MF.getTarget().getCodeModel() == CodeModel::Small && | |||
1110 | !Subtarget->isPositionIndependent()) { | |||
1111 | // Directly materialize it into an immediate. | |||
1112 | auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), | |||
1113 | TII->get(X86::MOV64ri32), TargetReg) | |||
1114 | .addMBB(&MBB); | |||
1115 | ++NumInstsInserted; | |||
1116 | (void)AddrI; | |||
1117 | LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting mov: "; AddrI-> dump(); dbgs() << "\n"; } } while (false) | |||
1118 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting mov: "; AddrI-> dump(); dbgs() << "\n"; } } while (false); | |||
1119 | } else { | |||
1120 | auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r), | |||
1121 | TargetReg) | |||
1122 | .addReg(/*Base*/ X86::RIP) | |||
1123 | .addImm(/*Scale*/ 1) | |||
1124 | .addReg(/*Index*/ 0) | |||
1125 | .addMBB(&MBB) | |||
1126 | .addReg(/*Segment*/ 0); | |||
1127 | ++NumInstsInserted; | |||
1128 | (void)AddrI; | |||
1129 | LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting lea: "; AddrI-> dump(); dbgs() << "\n"; } } while (false) | |||
1130 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting lea: "; AddrI-> dump(); dbgs() << "\n"; } } while (false); | |||
1131 | } | |||
1132 | // And make this available. | |||
1133 | TargetAddrSSA.AddAvailableValue(Pred, TargetReg); | |||
1134 | } | |||
1135 | ||||
1136 | // Materialize the needed SSA value of the target. Note that we need the | |||
1137 | // middle of the block as this block might at the bottom have an indirect | |||
1138 | // branch back to itself. We can do this here because at this point, every | |||
1139 | // predecessor of this block has an available value. This is basically just | |||
1140 | // automating the construction of a PHI node for this target. | |||
1141 | unsigned TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB); | |||
1142 | ||||
1143 | // Insert a comparison of the incoming target register with this block's | |||
1144 | // address. This also requires us to mark the block as having its address | |||
1145 | // taken explicitly. | |||
1146 | MBB.setHasAddressTaken(); | |||
1147 | auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin()); | |||
1148 | if (MF.getTarget().getCodeModel() == CodeModel::Small && | |||
1149 | !Subtarget->isPositionIndependent()) { | |||
1150 | // Check directly against a relocated immediate when we can. | |||
1151 | auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32)) | |||
1152 | .addReg(TargetReg, RegState::Kill) | |||
1153 | .addMBB(&MBB); | |||
1154 | ++NumInstsInserted; | |||
1155 | (void)CheckI; | |||
1156 | LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting cmp: "; CheckI-> dump(); dbgs() << "\n"; } } while (false); | |||
1157 | } else { | |||
1158 | // Otherwise compute the address into a register first. | |||
1159 | unsigned AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass); | |||
1160 | auto AddrI = | |||
1161 | BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg) | |||
1162 | .addReg(/*Base*/ X86::RIP) | |||
1163 | .addImm(/*Scale*/ 1) | |||
1164 | .addReg(/*Index*/ 0) | |||
1165 | .addMBB(&MBB) | |||
1166 | .addReg(/*Segment*/ 0); | |||
1167 | ++NumInstsInserted; | |||
1168 | (void)AddrI; | |||
1169 | LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting lea: "; AddrI-> dump(); dbgs() << "\n"; } } while (false); | |||
1170 | auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr)) | |||
1171 | .addReg(TargetReg, RegState::Kill) | |||
1172 | .addReg(AddrReg, RegState::Kill); | |||
1173 | ++NumInstsInserted; | |||
1174 | (void)CheckI; | |||
1175 | LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting cmp: "; CheckI-> dump(); dbgs() << "\n"; } } while (false); | |||
1176 | } | |||
1177 | ||||
1178 | // Now cmov over the predicate if the comparison wasn't equal. | |||
1179 | int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; | |||
1180 | auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); | |||
1181 | unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); | |||
1182 | auto CMovI = | |||
1183 | BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg) | |||
1184 | .addReg(PS->InitialReg) | |||
1185 | .addReg(PS->PoisonReg) | |||
1186 | .addImm(X86::COND_NE); | |||
1187 | CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); | |||
1188 | ++NumInstsInserted; | |||
1189 | LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting cmov: "; CMovI-> dump(); dbgs() << "\n"; } } while (false); | |||
1190 | CMovs.push_back(&*CMovI); | |||
1191 | ||||
1192 | // And put the new value into the available values for SSA form of our | |||
1193 | // predicate state. | |||
1194 | PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg); | |||
1195 | } | |||
1196 | ||||
1197 | // Return all the newly inserted cmov instructions of the predicate state. | |||
1198 | return CMovs; | |||
1199 | } | |||
1200 | ||||
1201 | /// Returns true if the instruction has no behavior (specified or otherwise) | |||
1202 | /// that is based on the value of any of its register operands | |||
1203 | /// | |||
1204 | /// A classical example of something that is inherently not data invariant is an | |||
1205 | /// indirect jump -- the destination is loaded into icache based on the bits set | |||
1206 | /// in the jump destination register. | |||
1207 | /// | |||
1208 | /// FIXME: This should become part of our instruction tables. | |||
1209 | static bool isDataInvariant(MachineInstr &MI) { | |||
1210 | switch (MI.getOpcode()) { | |||
1211 | default: | |||
1212 | // By default, assume that the instruction is not data invariant. | |||
1213 | return false; | |||
1214 | ||||
1215 | // Some target-independent operations that trivially lower to data-invariant | |||
1216 | // instructions. | |||
1217 | case TargetOpcode::COPY: | |||
1218 | case TargetOpcode::INSERT_SUBREG: | |||
1219 | case TargetOpcode::SUBREG_TO_REG: | |||
1220 | return true; | |||
1221 | ||||
1222 | // On x86 it is believed that imul is constant time w.r.t. the loaded data. | |||
1223 | // However, they set flags and are perhaps the most surprisingly constant | |||
1224 | // time operations so we call them out here separately. | |||
1225 | case X86::IMUL16rr: | |||
1226 | case X86::IMUL16rri8: | |||
1227 | case X86::IMUL16rri: | |||
1228 | case X86::IMUL32rr: | |||
1229 | case X86::IMUL32rri8: | |||
1230 | case X86::IMUL32rri: | |||
1231 | case X86::IMUL64rr: | |||
1232 | case X86::IMUL64rri32: | |||
1233 | case X86::IMUL64rri8: | |||
1234 | ||||
1235 | // Bit scanning and counting instructions that are somewhat surprisingly | |||
1236 | // constant time as they scan across bits and do other fairly complex | |||
1237 | // operations like popcnt, but are believed to be constant time on x86. | |||
1238 | // However, these set flags. | |||
1239 | case X86::BSF16rr: | |||
1240 | case X86::BSF32rr: | |||
1241 | case X86::BSF64rr: | |||
1242 | case X86::BSR16rr: | |||
1243 | case X86::BSR32rr: | |||
1244 | case X86::BSR64rr: | |||
1245 | case X86::LZCNT16rr: | |||
1246 | case X86::LZCNT32rr: | |||
1247 | case X86::LZCNT64rr: | |||
1248 | case X86::POPCNT16rr: | |||
1249 | case X86::POPCNT32rr: | |||
1250 | case X86::POPCNT64rr: | |||
1251 | case X86::TZCNT16rr: | |||
1252 | case X86::TZCNT32rr: | |||
1253 | case X86::TZCNT64rr: | |||
1254 | ||||
1255 | // Bit manipulation instructions are effectively combinations of basic | |||
1256 | // arithmetic ops, and should still execute in constant time. These also | |||
1257 | // set flags. | |||
1258 | case X86::BLCFILL32rr: | |||
1259 | case X86::BLCFILL64rr: | |||
1260 | case X86::BLCI32rr: | |||
1261 | case X86::BLCI64rr: | |||
1262 | case X86::BLCIC32rr: | |||
1263 | case X86::BLCIC64rr: | |||
1264 | case X86::BLCMSK32rr: | |||
1265 | case X86::BLCMSK64rr: | |||
1266 | case X86::BLCS32rr: | |||
1267 | case X86::BLCS64rr: | |||
1268 | case X86::BLSFILL32rr: | |||
1269 | case X86::BLSFILL64rr: | |||
1270 | case X86::BLSI32rr: | |||
1271 | case X86::BLSI64rr: | |||
1272 | case X86::BLSIC32rr: | |||
1273 | case X86::BLSIC64rr: | |||
1274 | case X86::BLSMSK32rr: | |||
1275 | case X86::BLSMSK64rr: | |||
1276 | case X86::BLSR32rr: | |||
1277 | case X86::BLSR64rr: | |||
1278 | case X86::TZMSK32rr: | |||
1279 | case X86::TZMSK64rr: | |||
1280 | ||||
1281 | // Bit extracting and clearing instructions should execute in constant time, | |||
1282 | // and set flags. | |||
1283 | case X86::BEXTR32rr: | |||
1284 | case X86::BEXTR64rr: | |||
1285 | case X86::BEXTRI32ri: | |||
1286 | case X86::BEXTRI64ri: | |||
1287 | case X86::BZHI32rr: | |||
1288 | case X86::BZHI64rr: | |||
1289 | ||||
1290 | // Shift and rotate. | |||
1291 | case X86::ROL8r1: case X86::ROL16r1: case X86::ROL32r1: case X86::ROL64r1: | |||
1292 | case X86::ROL8rCL: case X86::ROL16rCL: case X86::ROL32rCL: case X86::ROL64rCL: | |||
1293 | case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri: | |||
1294 | case X86::ROR8r1: case X86::ROR16r1: case X86::ROR32r1: case X86::ROR64r1: | |||
1295 | case X86::ROR8rCL: case X86::ROR16rCL: case X86::ROR32rCL: case X86::ROR64rCL: | |||
1296 | case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri: | |||
1297 | case X86::SAR8r1: case X86::SAR16r1: case X86::SAR32r1: case X86::SAR64r1: | |||
1298 | case X86::SAR8rCL: case X86::SAR16rCL: case X86::SAR32rCL: case X86::SAR64rCL: | |||
1299 | case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri: | |||
1300 | case X86::SHL8r1: case X86::SHL16r1: case X86::SHL32r1: case X86::SHL64r1: | |||
1301 | case X86::SHL8rCL: case X86::SHL16rCL: case X86::SHL32rCL: case X86::SHL64rCL: | |||
1302 | case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri: | |||
1303 | case X86::SHR8r1: case X86::SHR16r1: case X86::SHR32r1: case X86::SHR64r1: | |||
1304 | case X86::SHR8rCL: case X86::SHR16rCL: case X86::SHR32rCL: case X86::SHR64rCL: | |||
1305 | case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri: | |||
1306 | case X86::SHLD16rrCL: case X86::SHLD32rrCL: case X86::SHLD64rrCL: | |||
1307 | case X86::SHLD16rri8: case X86::SHLD32rri8: case X86::SHLD64rri8: | |||
1308 | case X86::SHRD16rrCL: case X86::SHRD32rrCL: case X86::SHRD64rrCL: | |||
1309 | case X86::SHRD16rri8: case X86::SHRD32rri8: case X86::SHRD64rri8: | |||
1310 | ||||
1311 | // Basic arithmetic is constant time on the input but does set flags. | |||
1312 | case X86::ADC8rr: case X86::ADC8ri: | |||
1313 | case X86::ADC16rr: case X86::ADC16ri: case X86::ADC16ri8: | |||
1314 | case X86::ADC32rr: case X86::ADC32ri: case X86::ADC32ri8: | |||
1315 | case X86::ADC64rr: case X86::ADC64ri8: case X86::ADC64ri32: | |||
1316 | case X86::ADD8rr: case X86::ADD8ri: | |||
1317 | case X86::ADD16rr: case X86::ADD16ri: case X86::ADD16ri8: | |||
1318 | case X86::ADD32rr: case X86::ADD32ri: case X86::ADD32ri8: | |||
1319 | case X86::ADD64rr: case X86::ADD64ri8: case X86::ADD64ri32: | |||
1320 | case X86::AND8rr: case X86::AND8ri: | |||
1321 | case X86::AND16rr: case X86::AND16ri: case X86::AND16ri8: | |||
1322 | case X86::AND32rr: case X86::AND32ri: case X86::AND32ri8: | |||
1323 | case X86::AND64rr: case X86::AND64ri8: case X86::AND64ri32: | |||
1324 | case X86::OR8rr: case X86::OR8ri: | |||
1325 | case X86::OR16rr: case X86::OR16ri: case X86::OR16ri8: | |||
1326 | case X86::OR32rr: case X86::OR32ri: case X86::OR32ri8: | |||
1327 | case X86::OR64rr: case X86::OR64ri8: case X86::OR64ri32: | |||
1328 | case X86::SBB8rr: case X86::SBB8ri: | |||
1329 | case X86::SBB16rr: case X86::SBB16ri: case X86::SBB16ri8: | |||
1330 | case X86::SBB32rr: case X86::SBB32ri: case X86::SBB32ri8: | |||
1331 | case X86::SBB64rr: case X86::SBB64ri8: case X86::SBB64ri32: | |||
1332 | case X86::SUB8rr: case X86::SUB8ri: | |||
1333 | case X86::SUB16rr: case X86::SUB16ri: case X86::SUB16ri8: | |||
1334 | case X86::SUB32rr: case X86::SUB32ri: case X86::SUB32ri8: | |||
1335 | case X86::SUB64rr: case X86::SUB64ri8: case X86::SUB64ri32: | |||
1336 | case X86::XOR8rr: case X86::XOR8ri: | |||
1337 | case X86::XOR16rr: case X86::XOR16ri: case X86::XOR16ri8: | |||
1338 | case X86::XOR32rr: case X86::XOR32ri: case X86::XOR32ri8: | |||
1339 | case X86::XOR64rr: case X86::XOR64ri8: case X86::XOR64ri32: | |||
1340 | // Arithmetic with just 32-bit and 64-bit variants and no immediates. | |||
1341 | case X86::ADCX32rr: case X86::ADCX64rr: | |||
1342 | case X86::ADOX32rr: case X86::ADOX64rr: | |||
1343 | case X86::ANDN32rr: case X86::ANDN64rr: | |||
1344 | // Unary arithmetic operations. | |||
1345 | case X86::DEC8r: case X86::DEC16r: case X86::DEC32r: case X86::DEC64r: | |||
1346 | case X86::INC8r: case X86::INC16r: case X86::INC32r: case X86::INC64r: | |||
1347 | case X86::NEG8r: case X86::NEG16r: case X86::NEG32r: case X86::NEG64r: | |||
1348 | // Check whether the EFLAGS implicit-def is dead. We assume that this will | |||
1349 | // always find the implicit-def because this code should only be reached | |||
1350 | // for instructions that do in fact implicitly def this. | |||
1351 | if (!MI.findRegisterDefOperand(X86::EFLAGS)->isDead()) { | |||
1352 | // If we would clobber EFLAGS that are used, just bail for now. | |||
1353 | LLVM_DEBUG(dbgs() << " Unable to harden post-load due to EFLAGS: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Unable to harden post-load due to EFLAGS: " ; MI.dump(); dbgs() << "\n"; } } while (false) | |||
1354 | MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Unable to harden post-load due to EFLAGS: " ; MI.dump(); dbgs() << "\n"; } } while (false); | |||
1355 | return false; | |||
1356 | } | |||
1357 | ||||
1358 | // Otherwise, fallthrough to handle these the same as instructions that | |||
1359 | // don't set EFLAGS. | |||
1360 | LLVM_FALLTHROUGH[[clang::fallthrough]]; | |||
1361 | ||||
1362 | // Unlike other arithmetic, NOT doesn't set EFLAGS. | |||
1363 | case X86::NOT8r: case X86::NOT16r: case X86::NOT32r: case X86::NOT64r: | |||
1364 | ||||
1365 | // Various move instructions used to zero or sign extend things. Note that we | |||
1366 | // intentionally don't support the _NOREX variants as we can't handle that | |||
1367 | // register constraint anyways. | |||
1368 | case X86::MOVSX16rr8: | |||
1369 | case X86::MOVSX32rr8: case X86::MOVSX32rr16: | |||
1370 | case X86::MOVSX64rr8: case X86::MOVSX64rr16: case X86::MOVSX64rr32: | |||
1371 | case X86::MOVZX16rr8: | |||
1372 | case X86::MOVZX32rr8: case X86::MOVZX32rr16: | |||
1373 | case X86::MOVZX64rr8: case X86::MOVZX64rr16: | |||
1374 | case X86::MOV32rr: | |||
1375 | ||||
1376 | // Arithmetic instructions that are both constant time and don't set flags. | |||
1377 | case X86::RORX32ri: | |||
1378 | case X86::RORX64ri: | |||
1379 | case X86::SARX32rr: | |||
1380 | case X86::SARX64rr: | |||
1381 | case X86::SHLX32rr: | |||
1382 | case X86::SHLX64rr: | |||
1383 | case X86::SHRX32rr: | |||
1384 | case X86::SHRX64rr: | |||
1385 | ||||
1386 | // LEA doesn't actually access memory, and its arithmetic is constant time. | |||
1387 | case X86::LEA16r: | |||
1388 | case X86::LEA32r: | |||
1389 | case X86::LEA64_32r: | |||
1390 | case X86::LEA64r: | |||
1391 | return true; | |||
1392 | } | |||
1393 | } | |||
1394 | ||||
1395 | /// Returns true if the instruction has no behavior (specified or otherwise) | |||
1396 | /// that is based on the value loaded from memory or the value of any | |||
1397 | /// non-address register operands. | |||
1398 | /// | |||
1399 | /// For example, if the latency of the instruction is dependent on the | |||
1400 | /// particular bits set in any of the registers *or* any of the bits loaded from | |||
1401 | /// memory. | |||
1402 | /// | |||
1403 | /// A classical example of something that is inherently not data invariant is an | |||
1404 | /// indirect jump -- the destination is loaded into icache based on the bits set | |||
1405 | /// in the jump destination register. | |||
1406 | /// | |||
1407 | /// FIXME: This should become part of our instruction tables. | |||
1408 | static bool isDataInvariantLoad(MachineInstr &MI) { | |||
1409 | switch (MI.getOpcode()) { | |||
1410 | default: | |||
1411 | // By default, assume that the load will immediately leak. | |||
1412 | return false; | |||
1413 | ||||
1414 | // On x86 it is believed that imul is constant time w.r.t. the loaded data. | |||
1415 | // However, they set flags and are perhaps the most surprisingly constant | |||
1416 | // time operations so we call them out here separately. | |||
1417 | case X86::IMUL16rm: | |||
1418 | case X86::IMUL16rmi8: | |||
1419 | case X86::IMUL16rmi: | |||
1420 | case X86::IMUL32rm: | |||
1421 | case X86::IMUL32rmi8: | |||
1422 | case X86::IMUL32rmi: | |||
1423 | case X86::IMUL64rm: | |||
1424 | case X86::IMUL64rmi32: | |||
1425 | case X86::IMUL64rmi8: | |||
1426 | ||||
1427 | // Bit scanning and counting instructions that are somewhat surprisingly | |||
1428 | // constant time as they scan across bits and do other fairly complex | |||
1429 | // operations like popcnt, but are believed to be constant time on x86. | |||
1430 | // However, these set flags. | |||
1431 | case X86::BSF16rm: | |||
1432 | case X86::BSF32rm: | |||
1433 | case X86::BSF64rm: | |||
1434 | case X86::BSR16rm: | |||
1435 | case X86::BSR32rm: | |||
1436 | case X86::BSR64rm: | |||
1437 | case X86::LZCNT16rm: | |||
1438 | case X86::LZCNT32rm: | |||
1439 | case X86::LZCNT64rm: | |||
1440 | case X86::POPCNT16rm: | |||
1441 | case X86::POPCNT32rm: | |||
1442 | case X86::POPCNT64rm: | |||
1443 | case X86::TZCNT16rm: | |||
1444 | case X86::TZCNT32rm: | |||
1445 | case X86::TZCNT64rm: | |||
1446 | ||||
1447 | // Bit manipulation instructions are effectively combinations of basic | |||
1448 | // arithmetic ops, and should still execute in constant time. These also | |||
1449 | // set flags. | |||
1450 | case X86::BLCFILL32rm: | |||
1451 | case X86::BLCFILL64rm: | |||
1452 | case X86::BLCI32rm: | |||
1453 | case X86::BLCI64rm: | |||
1454 | case X86::BLCIC32rm: | |||
1455 | case X86::BLCIC64rm: | |||
1456 | case X86::BLCMSK32rm: | |||
1457 | case X86::BLCMSK64rm: | |||
1458 | case X86::BLCS32rm: | |||
1459 | case X86::BLCS64rm: | |||
1460 | case X86::BLSFILL32rm: | |||
1461 | case X86::BLSFILL64rm: | |||
1462 | case X86::BLSI32rm: | |||
1463 | case X86::BLSI64rm: | |||
1464 | case X86::BLSIC32rm: | |||
1465 | case X86::BLSIC64rm: | |||
1466 | case X86::BLSMSK32rm: | |||
1467 | case X86::BLSMSK64rm: | |||
1468 | case X86::BLSR32rm: | |||
1469 | case X86::BLSR64rm: | |||
1470 | case X86::TZMSK32rm: | |||
1471 | case X86::TZMSK64rm: | |||
1472 | ||||
1473 | // Bit extracting and clearing instructions should execute in constant time, | |||
1474 | // and set flags. | |||
1475 | case X86::BEXTR32rm: | |||
1476 | case X86::BEXTR64rm: | |||
1477 | case X86::BEXTRI32mi: | |||
1478 | case X86::BEXTRI64mi: | |||
1479 | case X86::BZHI32rm: | |||
1480 | case X86::BZHI64rm: | |||
1481 | ||||
1482 | // Basic arithmetic is constant time on the input but does set flags. | |||
1483 | case X86::ADC8rm: | |||
1484 | case X86::ADC16rm: | |||
1485 | case X86::ADC32rm: | |||
1486 | case X86::ADC64rm: | |||
1487 | case X86::ADCX32rm: | |||
1488 | case X86::ADCX64rm: | |||
1489 | case X86::ADD8rm: | |||
1490 | case X86::ADD16rm: | |||
1491 | case X86::ADD32rm: | |||
1492 | case X86::ADD64rm: | |||
1493 | case X86::ADOX32rm: | |||
1494 | case X86::ADOX64rm: | |||
1495 | case X86::AND8rm: | |||
1496 | case X86::AND16rm: | |||
1497 | case X86::AND32rm: | |||
1498 | case X86::AND64rm: | |||
1499 | case X86::ANDN32rm: | |||
1500 | case X86::ANDN64rm: | |||
1501 | case X86::OR8rm: | |||
1502 | case X86::OR16rm: | |||
1503 | case X86::OR32rm: | |||
1504 | case X86::OR64rm: | |||
1505 | case X86::SBB8rm: | |||
1506 | case X86::SBB16rm: | |||
1507 | case X86::SBB32rm: | |||
1508 | case X86::SBB64rm: | |||
1509 | case X86::SUB8rm: | |||
1510 | case X86::SUB16rm: | |||
1511 | case X86::SUB32rm: | |||
1512 | case X86::SUB64rm: | |||
1513 | case X86::XOR8rm: | |||
1514 | case X86::XOR16rm: | |||
1515 | case X86::XOR32rm: | |||
1516 | case X86::XOR64rm: | |||
1517 | // Check whether the EFLAGS implicit-def is dead. We assume that this will | |||
1518 | // always find the implicit-def because this code should only be reached | |||
1519 | // for instructions that do in fact implicitly def this. | |||
1520 | if (!MI.findRegisterDefOperand(X86::EFLAGS)->isDead()) { | |||
1521 | // If we would clobber EFLAGS that are used, just bail for now. | |||
1522 | LLVM_DEBUG(dbgs() << " Unable to harden post-load due to EFLAGS: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Unable to harden post-load due to EFLAGS: " ; MI.dump(); dbgs() << "\n"; } } while (false) | |||
1523 | MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Unable to harden post-load due to EFLAGS: " ; MI.dump(); dbgs() << "\n"; } } while (false); | |||
1524 | return false; | |||
1525 | } | |||
1526 | ||||
1527 | // Otherwise, fallthrough to handle these the same as instructions that | |||
1528 | // don't set EFLAGS. | |||
1529 | LLVM_FALLTHROUGH[[clang::fallthrough]]; | |||
1530 | ||||
1531 | // Integer multiply w/o affecting flags is still believed to be constant | |||
1532 | // time on x86. Called out separately as this is among the most surprising | |||
1533 | // instructions to exhibit that behavior. | |||
1534 | case X86::MULX32rm: | |||
1535 | case X86::MULX64rm: | |||
1536 | ||||
1537 | // Arithmetic instructions that are both constant time and don't set flags. | |||
1538 | case X86::RORX32mi: | |||
1539 | case X86::RORX64mi: | |||
1540 | case X86::SARX32rm: | |||
1541 | case X86::SARX64rm: | |||
1542 | case X86::SHLX32rm: | |||
1543 | case X86::SHLX64rm: | |||
1544 | case X86::SHRX32rm: | |||
1545 | case X86::SHRX64rm: | |||
1546 | ||||
1547 | // Conversions are believed to be constant time and don't set flags. | |||
1548 | case X86::CVTTSD2SI64rm: case X86::VCVTTSD2SI64rm: case X86::VCVTTSD2SI64Zrm: | |||
1549 | case X86::CVTTSD2SIrm: case X86::VCVTTSD2SIrm: case X86::VCVTTSD2SIZrm: | |||
1550 | case X86::CVTTSS2SI64rm: case X86::VCVTTSS2SI64rm: case X86::VCVTTSS2SI64Zrm: | |||
1551 | case X86::CVTTSS2SIrm: case X86::VCVTTSS2SIrm: case X86::VCVTTSS2SIZrm: | |||
1552 | case X86::CVTSI2SDrm: case X86::VCVTSI2SDrm: case X86::VCVTSI2SDZrm: | |||
1553 | case X86::CVTSI2SSrm: case X86::VCVTSI2SSrm: case X86::VCVTSI2SSZrm: | |||
1554 | case X86::CVTSI642SDrm: case X86::VCVTSI642SDrm: case X86::VCVTSI642SDZrm: | |||
1555 | case X86::CVTSI642SSrm: case X86::VCVTSI642SSrm: case X86::VCVTSI642SSZrm: | |||
1556 | case X86::CVTSS2SDrm: case X86::VCVTSS2SDrm: case X86::VCVTSS2SDZrm: | |||
1557 | case X86::CVTSD2SSrm: case X86::VCVTSD2SSrm: case X86::VCVTSD2SSZrm: | |||
1558 | // AVX512 added unsigned integer conversions. | |||
1559 | case X86::VCVTTSD2USI64Zrm: | |||
1560 | case X86::VCVTTSD2USIZrm: | |||
1561 | case X86::VCVTTSS2USI64Zrm: | |||
1562 | case X86::VCVTTSS2USIZrm: | |||
1563 | case X86::VCVTUSI2SDZrm: | |||
1564 | case X86::VCVTUSI642SDZrm: | |||
1565 | case X86::VCVTUSI2SSZrm: | |||
1566 | case X86::VCVTUSI642SSZrm: | |||
1567 | ||||
1568 | // Loads to register don't set flags. | |||
1569 | case X86::MOV8rm: | |||
1570 | case X86::MOV8rm_NOREX: | |||
1571 | case X86::MOV16rm: | |||
1572 | case X86::MOV32rm: | |||
1573 | case X86::MOV64rm: | |||
1574 | case X86::MOVSX16rm8: | |||
1575 | case X86::MOVSX32rm16: | |||
1576 | case X86::MOVSX32rm8: | |||
1577 | case X86::MOVSX32rm8_NOREX: | |||
1578 | case X86::MOVSX64rm16: | |||
1579 | case X86::MOVSX64rm32: | |||
1580 | case X86::MOVSX64rm8: | |||
1581 | case X86::MOVZX16rm8: | |||
1582 | case X86::MOVZX32rm16: | |||
1583 | case X86::MOVZX32rm8: | |||
1584 | case X86::MOVZX32rm8_NOREX: | |||
1585 | case X86::MOVZX64rm16: | |||
1586 | case X86::MOVZX64rm8: | |||
1587 | return true; | |||
1588 | } | |||
1589 | } | |||
1590 | ||||
1591 | static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, | |||
1592 | const TargetRegisterInfo &TRI) { | |||
1593 | // Check if EFLAGS are alive by seeing if there is a def of them or they | |||
1594 | // live-in, and then seeing if that def is in turn used. | |||
1595 | for (MachineInstr &MI : llvm::reverse(llvm::make_range(MBB.begin(), I))) { | |||
1596 | if (MachineOperand *DefOp = MI.findRegisterDefOperand(X86::EFLAGS)) { | |||
1597 | // If the def is dead, then EFLAGS is not live. | |||
1598 | if (DefOp->isDead()) | |||
1599 | return false; | |||
1600 | ||||
1601 | // Otherwise we've def'ed it, and it is live. | |||
1602 | return true; | |||
1603 | } | |||
1604 | // While at this instruction, also check if we use and kill EFLAGS | |||
1605 | // which means it isn't live. | |||
1606 | if (MI.killsRegister(X86::EFLAGS, &TRI)) | |||
1607 | return false; | |||
1608 | } | |||
1609 | ||||
1610 | // If we didn't find anything conclusive (neither definitely alive or | |||
1611 | // definitely dead) return whether it lives into the block. | |||
1612 | return MBB.isLiveIn(X86::EFLAGS); | |||
1613 | } | |||
1614 | ||||
1615 | /// Trace the predicate state through each of the blocks in the function, | |||
1616 | /// hardening everything necessary along the way. | |||
1617 | /// | |||
1618 | /// We call this routine once the initial predicate state has been established | |||
1619 | /// for each basic block in the function in the SSA updater. This routine traces | |||
1620 | /// it through the instructions within each basic block, and for non-returning | |||
1621 | /// blocks informs the SSA updater about the final state that lives out of the | |||
1622 | /// block. Along the way, it hardens any vulnerable instruction using the | |||
1623 | /// currently valid predicate state. We have to do these two things together | |||
1624 | /// because the SSA updater only works across blocks. Within a block, we track | |||
1625 | /// the current predicate state directly and update it as it changes. | |||
1626 | /// | |||
1627 | /// This operates in two passes over each block. First, we analyze the loads in | |||
1628 | /// the block to determine which strategy will be used to harden them: hardening | |||
1629 | /// the address or hardening the loaded value when loaded into a register | |||
1630 | /// amenable to hardening. We have to process these first because the two | |||
1631 | /// strategies may interact -- later hardening may change what strategy we wish | |||
1632 | /// to use. We also will analyze data dependencies between loads and avoid | |||
1633 | /// hardening those loads that are data dependent on a load with a hardened | |||
1634 | /// address. We also skip hardening loads already behind an LFENCE as that is | |||
1635 | /// sufficient to harden them against misspeculation. | |||
1636 | /// | |||
1637 | /// Second, we actively trace the predicate state through the block, applying | |||
1638 | /// the hardening steps we determined necessary in the first pass as we go. | |||
1639 | /// | |||
1640 | /// These two passes are applied to each basic block. We operate one block at a | |||
1641 | /// time to simplify reasoning about reachability and sequencing. | |||
1642 | void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden( | |||
1643 | MachineFunction &MF) { | |||
1644 | SmallPtrSet<MachineInstr *, 16> HardenPostLoad; | |||
1645 | SmallPtrSet<MachineInstr *, 16> HardenLoadAddr; | |||
1646 | ||||
1647 | SmallSet<unsigned, 16> HardenedAddrRegs; | |||
1648 | ||||
1649 | SmallDenseMap<unsigned, unsigned, 32> AddrRegToHardenedReg; | |||
1650 | ||||
1651 | // Track the set of load-dependent registers through the basic block. Because | |||
1652 | // the values of these registers have an existing data dependency on a loaded | |||
1653 | // value which we would have checked, we can omit any checks on them. | |||
1654 | SparseBitVector<> LoadDepRegs; | |||
1655 | ||||
1656 | for (MachineBasicBlock &MBB : MF) { | |||
1657 | // The first pass over the block: collect all the loads which can have their | |||
1658 | // loaded value hardened and all the loads that instead need their address | |||
1659 | // hardened. During this walk we propagate load dependence for address | |||
1660 | // hardened loads and also look for LFENCE to stop hardening wherever | |||
1661 | // possible. When deciding whether or not to harden the loaded value or not, | |||
1662 | // we check to see if any registers used in the address will have been | |||
1663 | // hardened at this point and if so, harden any remaining address registers | |||
1664 | // as that often successfully re-uses hardened addresses and minimizes | |||
1665 | // instructions. | |||
1666 | // | |||
1667 | // FIXME: We should consider an aggressive mode where we continue to keep as | |||
1668 | // many loads value hardened even when some address register hardening would | |||
1669 | // be free (due to reuse). | |||
1670 | // | |||
1671 | // Note that we only need this pass if we are actually hardening loads. | |||
1672 | if (HardenLoads) | |||
| ||||
1673 | for (MachineInstr &MI : MBB) { | |||
1674 | // We naively assume that all def'ed registers of an instruction have | |||
1675 | // a data dependency on all of their operands. | |||
1676 | // FIXME: Do a more careful analysis of x86 to build a conservative | |||
1677 | // model here. | |||
1678 | if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) { | |||
1679 | return Op.isReg() && LoadDepRegs.test(Op.getReg()); | |||
1680 | })) | |||
1681 | for (MachineOperand &Def : MI.defs()) | |||
1682 | if (Def.isReg()) | |||
1683 | LoadDepRegs.set(Def.getReg()); | |||
1684 | ||||
1685 | // Both Intel and AMD are guiding that they will change the semantics of | |||
1686 | // LFENCE to be a speculation barrier, so if we see an LFENCE, there is | |||
1687 | // no more need to guard things in this block. | |||
1688 | if (MI.getOpcode() == X86::LFENCE) | |||
1689 | break; | |||
1690 | ||||
1691 | // If this instruction cannot load, nothing to do. | |||
1692 | if (!MI.mayLoad()) | |||
1693 | continue; | |||
1694 | ||||
1695 | // Some instructions which "load" are trivially safe or unimportant. | |||
1696 | if (MI.getOpcode() == X86::MFENCE) | |||
1697 | continue; | |||
1698 | ||||
1699 | // Extract the memory operand information about this instruction. | |||
1700 | // FIXME: This doesn't handle loading pseudo instructions which we often | |||
1701 | // could handle with similarly generic logic. We probably need to add an | |||
1702 | // MI-layer routine similar to the MC-layer one we use here which maps | |||
1703 | // pseudos much like this maps real instructions. | |||
1704 | const MCInstrDesc &Desc = MI.getDesc(); | |||
1705 | int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); | |||
1706 | if (MemRefBeginIdx < 0) { | |||
1707 | LLVM_DEBUG(dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "WARNING: unable to harden loading instruction: " ; MI.dump(); } } while (false) | |||
1708 | << "WARNING: unable to harden loading instruction: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "WARNING: unable to harden loading instruction: " ; MI.dump(); } } while (false) | |||
1709 | MI.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << "WARNING: unable to harden loading instruction: " ; MI.dump(); } } while (false); | |||
1710 | continue; | |||
1711 | } | |||
1712 | ||||
1713 | MemRefBeginIdx += X86II::getOperandBias(Desc); | |||
1714 | ||||
1715 | MachineOperand &BaseMO = | |||
1716 | MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); | |||
1717 | MachineOperand &IndexMO = | |||
1718 | MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); | |||
1719 | ||||
1720 | // If we have at least one (non-frame-index, non-RIP) register operand, | |||
1721 | // and neither operand is load-dependent, we need to check the load. | |||
1722 | unsigned BaseReg = 0, IndexReg = 0; | |||
1723 | if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP && | |||
1724 | BaseMO.getReg() != X86::NoRegister) | |||
1725 | BaseReg = BaseMO.getReg(); | |||
1726 | if (IndexMO.getReg() != X86::NoRegister) | |||
1727 | IndexReg = IndexMO.getReg(); | |||
1728 | ||||
1729 | if (!BaseReg && !IndexReg) | |||
1730 | // No register operands! | |||
1731 | continue; | |||
1732 | ||||
1733 | // If any register operand is dependent, this load is dependent and we | |||
1734 | // needn't check it. | |||
1735 | // FIXME: Is this true in the case where we are hardening loads after | |||
1736 | // they complete? Unclear, need to investigate. | |||
1737 | if ((BaseReg && LoadDepRegs.test(BaseReg)) || | |||
1738 | (IndexReg && LoadDepRegs.test(IndexReg))) | |||
1739 | continue; | |||
1740 | ||||
1741 | // If post-load hardening is enabled, this load is compatible with | |||
1742 | // post-load hardening, and we aren't already going to harden one of the | |||
1743 | // address registers, queue it up to be hardened post-load. Notably, | |||
1744 | // even once hardened this won't introduce a useful dependency that | |||
1745 | // could prune out subsequent loads. | |||
1746 | if (EnablePostLoadHardening && isDataInvariantLoad(MI) && | |||
1747 | MI.getDesc().getNumDefs() == 1 && MI.getOperand(0).isReg() && | |||
1748 | canHardenRegister(MI.getOperand(0).getReg()) && | |||
1749 | !HardenedAddrRegs.count(BaseReg) && | |||
1750 | !HardenedAddrRegs.count(IndexReg)) { | |||
1751 | HardenPostLoad.insert(&MI); | |||
1752 | HardenedAddrRegs.insert(MI.getOperand(0).getReg()); | |||
1753 | continue; | |||
1754 | } | |||
1755 | ||||
1756 | // Record this instruction for address hardening and record its register | |||
1757 | // operands as being address-hardened. | |||
1758 | HardenLoadAddr.insert(&MI); | |||
1759 | if (BaseReg) | |||
1760 | HardenedAddrRegs.insert(BaseReg); | |||
1761 | if (IndexReg) | |||
1762 | HardenedAddrRegs.insert(IndexReg); | |||
1763 | ||||
1764 | for (MachineOperand &Def : MI.defs()) | |||
1765 | if (Def.isReg()) | |||
1766 | LoadDepRegs.set(Def.getReg()); | |||
1767 | } | |||
1768 | ||||
1769 | // Now re-walk the instructions in the basic block, and apply whichever | |||
1770 | // hardening strategy we have elected. Note that we do this in a second | |||
1771 | // pass specifically so that we have the complete set of instructions for | |||
1772 | // which we will do post-load hardening and can defer it in certain | |||
1773 | // circumstances. | |||
1774 | for (MachineInstr &MI : MBB) { | |||
1775 | if (HardenLoads) { | |||
1776 | // We cannot both require hardening the def of a load and its address. | |||
1777 | assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&((!(HardenLoadAddr.count(&MI) && HardenPostLoad.count (&MI)) && "Requested to harden both the address and def of a load!" ) ? static_cast<void> (0) : __assert_fail ("!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) && \"Requested to harden both the address and def of a load!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1778, __PRETTY_FUNCTION__)) | |||
1778 | "Requested to harden both the address and def of a load!")((!(HardenLoadAddr.count(&MI) && HardenPostLoad.count (&MI)) && "Requested to harden both the address and def of a load!" ) ? static_cast<void> (0) : __assert_fail ("!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) && \"Requested to harden both the address and def of a load!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1778, __PRETTY_FUNCTION__)); | |||
1779 | ||||
1780 | // Check if this is a load whose address needs to be hardened. | |||
1781 | if (HardenLoadAddr.erase(&MI)) { | |||
1782 | const MCInstrDesc &Desc = MI.getDesc(); | |||
1783 | int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); | |||
1784 | assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!")((MemRefBeginIdx >= 0 && "Cannot have an invalid index here!" ) ? static_cast<void> (0) : __assert_fail ("MemRefBeginIdx >= 0 && \"Cannot have an invalid index here!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1784, __PRETTY_FUNCTION__)); | |||
1785 | ||||
1786 | MemRefBeginIdx += X86II::getOperandBias(Desc); | |||
1787 | ||||
1788 | MachineOperand &BaseMO = | |||
1789 | MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); | |||
1790 | MachineOperand &IndexMO = | |||
1791 | MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); | |||
1792 | hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg); | |||
1793 | continue; | |||
1794 | } | |||
1795 | ||||
1796 | // Test if this instruction is one of our post load instructions (and | |||
1797 | // remove it from the set if so). | |||
1798 | if (HardenPostLoad.erase(&MI)) { | |||
1799 | assert(!MI.isCall() && "Must not try to post-load harden a call!")((!MI.isCall() && "Must not try to post-load harden a call!" ) ? static_cast<void> (0) : __assert_fail ("!MI.isCall() && \"Must not try to post-load harden a call!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1799, __PRETTY_FUNCTION__)); | |||
1800 | ||||
1801 | // If this is a data-invariant load, we want to try and sink any | |||
1802 | // hardening as far as possible. | |||
1803 | if (isDataInvariantLoad(MI)) { | |||
1804 | // Sink the instruction we'll need to harden as far as we can down | |||
1805 | // the graph. | |||
1806 | MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad); | |||
1807 | ||||
1808 | // If we managed to sink this instruction, update everything so we | |||
1809 | // harden that instruction when we reach it in the instruction | |||
1810 | // sequence. | |||
1811 | if (SunkMI != &MI) { | |||
1812 | // If in sinking there was no instruction needing to be hardened, | |||
1813 | // we're done. | |||
1814 | if (!SunkMI) | |||
1815 | continue; | |||
1816 | ||||
1817 | // Otherwise, add this to the set of defs we harden. | |||
1818 | HardenPostLoad.insert(SunkMI); | |||
1819 | continue; | |||
1820 | } | |||
1821 | } | |||
1822 | ||||
1823 | unsigned HardenedReg = hardenPostLoad(MI); | |||
1824 | ||||
1825 | // Mark the resulting hardened register as such so we don't re-harden. | |||
1826 | AddrRegToHardenedReg[HardenedReg] = HardenedReg; | |||
1827 | ||||
1828 | continue; | |||
1829 | } | |||
1830 | ||||
1831 | // Check for an indirect call or branch that may need its input hardened | |||
1832 | // even if we couldn't find the specific load used, or were able to | |||
1833 | // avoid hardening it for some reason. Note that here we cannot break | |||
1834 | // out afterward as we may still need to handle any call aspect of this | |||
1835 | // instruction. | |||
1836 | if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps) | |||
1837 | hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg); | |||
1838 | } | |||
1839 | ||||
1840 | // After we finish hardening loads we handle interprocedural hardening if | |||
1841 | // enabled and relevant for this instruction. | |||
1842 | if (!HardenInterprocedurally) | |||
1843 | continue; | |||
1844 | if (!MI.isCall() && !MI.isReturn()) | |||
1845 | continue; | |||
1846 | ||||
1847 | // If this is a direct return (IE, not a tail call) just directly harden | |||
1848 | // it. | |||
1849 | if (MI.isReturn() && !MI.isCall()) { | |||
1850 | hardenReturnInstr(MI); | |||
1851 | continue; | |||
1852 | } | |||
1853 | ||||
1854 | // Otherwise we have a call. We need to handle transferring the predicate | |||
1855 | // state into a call and recovering it after the call returns (unless this | |||
1856 | // is a tail call). | |||
1857 | assert(MI.isCall() && "Should only reach here for calls!")((MI.isCall() && "Should only reach here for calls!") ? static_cast<void> (0) : __assert_fail ("MI.isCall() && \"Should only reach here for calls!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1857, __PRETTY_FUNCTION__)); | |||
1858 | tracePredStateThroughCall(MI); | |||
1859 | } | |||
1860 | ||||
1861 | HardenPostLoad.clear(); | |||
1862 | HardenLoadAddr.clear(); | |||
1863 | HardenedAddrRegs.clear(); | |||
1864 | AddrRegToHardenedReg.clear(); | |||
1865 | ||||
1866 | // Currently, we only track data-dependent loads within a basic block. | |||
1867 | // FIXME: We should see if this is necessary or if we could be more | |||
1868 | // aggressive here without opening up attack avenues. | |||
1869 | LoadDepRegs.clear(); | |||
1870 | } | |||
1871 | } | |||
1872 | ||||
1873 | /// Save EFLAGS into the returned GPR. This can in turn be restored with | |||
1874 | /// `restoreEFLAGS`. | |||
1875 | /// | |||
1876 | /// Note that LLVM can only lower very simple patterns of saved and restored | |||
1877 | /// EFLAGS registers. The restore should always be within the same basic block | |||
1878 | /// as the save so that no PHI nodes are inserted. | |||
1879 | unsigned X86SpeculativeLoadHardeningPass::saveEFLAGS( | |||
1880 | MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, | |||
1881 | DebugLoc Loc) { | |||
1882 | // FIXME: Hard coding this to a 32-bit register class seems weird, but matches | |||
1883 | // what instruction selection does. | |||
1884 | unsigned Reg = MRI->createVirtualRegister(&X86::GR32RegClass); | |||
1885 | // We directly copy the FLAGS register and rely on later lowering to clean | |||
1886 | // this up into the appropriate setCC instructions. | |||
1887 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS); | |||
1888 | ++NumInstsInserted; | |||
1889 | return Reg; | |||
1890 | } | |||
1891 | ||||
1892 | /// Restore EFLAGS from the provided GPR. This should be produced by | |||
1893 | /// `saveEFLAGS`. | |||
1894 | /// | |||
1895 | /// This must be done within the same basic block as the save in order to | |||
1896 | /// reliably lower. | |||
1897 | void X86SpeculativeLoadHardeningPass::restoreEFLAGS( | |||
1898 | MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc, | |||
1899 | unsigned Reg) { | |||
1900 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg); | |||
1901 | ++NumInstsInserted; | |||
1902 | } | |||
1903 | ||||
1904 | /// Takes the current predicate state (in a register) and merges it into the | |||
1905 | /// stack pointer. The state is essentially a single bit, but we merge this in | |||
1906 | /// a way that won't form non-canonical pointers and also will be preserved | |||
1907 | /// across normal stack adjustments. | |||
1908 | void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP( | |||
1909 | MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, DebugLoc Loc, | |||
1910 | unsigned PredStateReg) { | |||
1911 | unsigned TmpReg = MRI->createVirtualRegister(PS->RC); | |||
1912 | // FIXME: This hard codes a shift distance based on the number of bits needed | |||
1913 | // to stay canonical on 64-bit. We should compute this somehow and support | |||
1914 | // 32-bit as part of that. | |||
1915 | auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg) | |||
1916 | .addReg(PredStateReg, RegState::Kill) | |||
1917 | .addImm(47); | |||
1918 | ShiftI->addRegisterDead(X86::EFLAGS, TRI); | |||
1919 | ++NumInstsInserted; | |||
1920 | auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP) | |||
1921 | .addReg(X86::RSP) | |||
1922 | .addReg(TmpReg, RegState::Kill); | |||
1923 | OrI->addRegisterDead(X86::EFLAGS, TRI); | |||
1924 | ++NumInstsInserted; | |||
1925 | } | |||
1926 | ||||
1927 | /// Extracts the predicate state stored in the high bits of the stack pointer. | |||
1928 | unsigned X86SpeculativeLoadHardeningPass::extractPredStateFromSP( | |||
1929 | MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, | |||
1930 | DebugLoc Loc) { | |||
1931 | unsigned PredStateReg = MRI->createVirtualRegister(PS->RC); | |||
1932 | unsigned TmpReg = MRI->createVirtualRegister(PS->RC); | |||
1933 | ||||
1934 | // We know that the stack pointer will have any preserved predicate state in | |||
1935 | // its high bit. We just want to smear this across the other bits. Turns out, | |||
1936 | // this is exactly what an arithmetic right shift does. | |||
1937 | BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg) | |||
1938 | .addReg(X86::RSP); | |||
1939 | auto ShiftI = | |||
1940 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg) | |||
1941 | .addReg(TmpReg, RegState::Kill) | |||
1942 | .addImm(TRI->getRegSizeInBits(*PS->RC) - 1); | |||
1943 | ShiftI->addRegisterDead(X86::EFLAGS, TRI); | |||
1944 | ++NumInstsInserted; | |||
1945 | ||||
1946 | return PredStateReg; | |||
1947 | } | |||
1948 | ||||
1949 | void X86SpeculativeLoadHardeningPass::hardenLoadAddr( | |||
1950 | MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO, | |||
1951 | SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) { | |||
1952 | MachineBasicBlock &MBB = *MI.getParent(); | |||
1953 | DebugLoc Loc = MI.getDebugLoc(); | |||
1954 | ||||
1955 | // Check if EFLAGS are alive by seeing if there is a def of them or they | |||
1956 | // live-in, and then seeing if that def is in turn used. | |||
1957 | bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI); | |||
1958 | ||||
1959 | SmallVector<MachineOperand *, 2> HardenOpRegs; | |||
1960 | ||||
1961 | if (BaseMO.isFI()) { | |||
1962 | // A frame index is never a dynamically controllable load, so only | |||
1963 | // harden it if we're covering fixed address loads as well. | |||
1964 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Skipping hardening base of explicit stack frame load: " ; MI.dump(); dbgs() << "\n"; } } while (false) | |||
1965 | dbgs() << " Skipping hardening base of explicit stack frame load: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Skipping hardening base of explicit stack frame load: " ; MI.dump(); dbgs() << "\n"; } } while (false) | |||
1966 | MI.dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Skipping hardening base of explicit stack frame load: " ; MI.dump(); dbgs() << "\n"; } } while (false); | |||
1967 | } else if (BaseMO.getReg() == X86::RSP) { | |||
1968 | // Some idempotent atomic operations are lowered directly to a locked | |||
1969 | // OR with 0 to the top of stack(or slightly offset from top) which uses an | |||
1970 | // explicit RSP register as the base. | |||
1971 | assert(IndexMO.getReg() == X86::NoRegister &&((IndexMO.getReg() == X86::NoRegister && "Explicit RSP access with dynamic index!" ) ? static_cast<void> (0) : __assert_fail ("IndexMO.getReg() == X86::NoRegister && \"Explicit RSP access with dynamic index!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1972, __PRETTY_FUNCTION__)) | |||
1972 | "Explicit RSP access with dynamic index!")((IndexMO.getReg() == X86::NoRegister && "Explicit RSP access with dynamic index!" ) ? static_cast<void> (0) : __assert_fail ("IndexMO.getReg() == X86::NoRegister && \"Explicit RSP access with dynamic index!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1972, __PRETTY_FUNCTION__)); | |||
1973 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Cannot harden base of explicit RSP offset in a load!" ; } } while (false) | |||
1974 | dbgs() << " Cannot harden base of explicit RSP offset in a load!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Cannot harden base of explicit RSP offset in a load!" ; } } while (false); | |||
1975 | } else if (BaseMO.getReg() == X86::RIP || | |||
1976 | BaseMO.getReg() == X86::NoRegister) { | |||
1977 | // For both RIP-relative addressed loads or absolute loads, we cannot | |||
1978 | // meaningfully harden them because the address being loaded has no | |||
1979 | // dynamic component. | |||
1980 | // | |||
1981 | // FIXME: When using a segment base (like TLS does) we end up with the | |||
1982 | // dynamic address being the base plus -1 because we can't mutate the | |||
1983 | // segment register here. This allows the signed 32-bit offset to point at | |||
1984 | // valid segment-relative addresses and load them successfully. | |||
1985 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Cannot harden base of " << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base") << " address in a load!"; } } while (false) | |||
1986 | dbgs() << " Cannot harden base of "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Cannot harden base of " << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base") << " address in a load!"; } } while (false) | |||
1987 | << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Cannot harden base of " << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base") << " address in a load!"; } } while (false) | |||
1988 | << " address in a load!")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Cannot harden base of " << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base") << " address in a load!"; } } while (false); | |||
1989 | } else { | |||
1990 | assert(BaseMO.isReg() &&((BaseMO.isReg() && "Only allowed to have a frame index or register base." ) ? static_cast<void> (0) : __assert_fail ("BaseMO.isReg() && \"Only allowed to have a frame index or register base.\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1991, __PRETTY_FUNCTION__)) | |||
1991 | "Only allowed to have a frame index or register base.")((BaseMO.isReg() && "Only allowed to have a frame index or register base." ) ? static_cast<void> (0) : __assert_fail ("BaseMO.isReg() && \"Only allowed to have a frame index or register base.\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 1991, __PRETTY_FUNCTION__)); | |||
1992 | HardenOpRegs.push_back(&BaseMO); | |||
1993 | } | |||
1994 | ||||
1995 | if (IndexMO.getReg() != X86::NoRegister && | |||
1996 | (HardenOpRegs.empty() || | |||
1997 | HardenOpRegs.front()->getReg() != IndexMO.getReg())) | |||
1998 | HardenOpRegs.push_back(&IndexMO); | |||
1999 | ||||
2000 | assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&(((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) && "Should have exactly one or two registers to harden!") ? static_cast <void> (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) && \"Should have exactly one or two registers to harden!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2001, __PRETTY_FUNCTION__)) | |||
2001 | "Should have exactly one or two registers to harden!")(((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) && "Should have exactly one or two registers to harden!") ? static_cast <void> (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) && \"Should have exactly one or two registers to harden!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2001, __PRETTY_FUNCTION__)); | |||
2002 | assert((HardenOpRegs.size() == 1 ||(((HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && "Should not have two of the same registers!" ) ? static_cast<void> (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && \"Should not have two of the same registers!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2004, __PRETTY_FUNCTION__)) | |||
2003 | HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&(((HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && "Should not have two of the same registers!" ) ? static_cast<void> (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && \"Should not have two of the same registers!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2004, __PRETTY_FUNCTION__)) | |||
2004 | "Should not have two of the same registers!")(((HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && "Should not have two of the same registers!" ) ? static_cast<void> (0) : __assert_fail ("(HardenOpRegs.size() == 1 || HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) && \"Should not have two of the same registers!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2004, __PRETTY_FUNCTION__)); | |||
2005 | ||||
2006 | // Remove any registers that have alreaded been checked. | |||
2007 | llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) { | |||
2008 | // See if this operand's register has already been checked. | |||
2009 | auto It = AddrRegToHardenedReg.find(Op->getReg()); | |||
2010 | if (It == AddrRegToHardenedReg.end()) | |||
2011 | // Not checked, so retain this one. | |||
2012 | return false; | |||
2013 | ||||
2014 | // Otherwise, we can directly update this operand and remove it. | |||
2015 | Op->setReg(It->second); | |||
2016 | return true; | |||
2017 | }); | |||
2018 | // If there are none left, we're done. | |||
2019 | if (HardenOpRegs.empty()) | |||
2020 | return; | |||
2021 | ||||
2022 | // Compute the current predicate state. | |||
2023 | unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); | |||
2024 | ||||
2025 | auto InsertPt = MI.getIterator(); | |||
2026 | ||||
2027 | // If EFLAGS are live and we don't have access to instructions that avoid | |||
2028 | // clobbering EFLAGS we need to save and restore them. This in turn makes | |||
2029 | // the EFLAGS no longer live. | |||
2030 | unsigned FlagsReg = 0; | |||
2031 | if (EFLAGSLive && !Subtarget->hasBMI2()) { | |||
2032 | EFLAGSLive = false; | |||
2033 | FlagsReg = saveEFLAGS(MBB, InsertPt, Loc); | |||
2034 | } | |||
2035 | ||||
2036 | for (MachineOperand *Op : HardenOpRegs) { | |||
2037 | unsigned OpReg = Op->getReg(); | |||
2038 | auto *OpRC = MRI->getRegClass(OpReg); | |||
2039 | unsigned TmpReg = MRI->createVirtualRegister(OpRC); | |||
2040 | ||||
2041 | // If this is a vector register, we'll need somewhat custom logic to handle | |||
2042 | // hardening it. | |||
2043 | if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) || | |||
2044 | OpRC->hasSuperClassEq(&X86::VR256RegClass))) { | |||
2045 | assert(Subtarget->hasAVX2() && "AVX2-specific register classes!")((Subtarget->hasAVX2() && "AVX2-specific register classes!" ) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasAVX2() && \"AVX2-specific register classes!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2045, __PRETTY_FUNCTION__)); | |||
2046 | bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass); | |||
2047 | ||||
2048 | // Move our state into a vector register. | |||
2049 | // FIXME: We could skip this at the cost of longer encodings with AVX-512 | |||
2050 | // but that doesn't seem likely worth it. | |||
2051 | unsigned VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass); | |||
2052 | auto MovI = | |||
2053 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg) | |||
2054 | .addReg(StateReg); | |||
2055 | (void)MovI; | |||
2056 | ++NumInstsInserted; | |||
2057 | LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting mov: "; MovI-> dump(); dbgs() << "\n"; } } while (false); | |||
2058 | ||||
2059 | // Broadcast it across the vector register. | |||
2060 | unsigned VBStateReg = MRI->createVirtualRegister(OpRC); | |||
2061 | auto BroadcastI = BuildMI(MBB, InsertPt, Loc, | |||
2062 | TII->get(Is128Bit ? X86::VPBROADCASTQrr | |||
2063 | : X86::VPBROADCASTQYrr), | |||
2064 | VBStateReg) | |||
2065 | .addReg(VStateReg); | |||
2066 | (void)BroadcastI; | |||
2067 | ++NumInstsInserted; | |||
2068 | LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting broadcast: "; BroadcastI ->dump(); dbgs() << "\n"; } } while (false) | |||
2069 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting broadcast: "; BroadcastI ->dump(); dbgs() << "\n"; } } while (false); | |||
2070 | ||||
2071 | // Merge our potential poison state into the value with a vector or. | |||
2072 | auto OrI = | |||
2073 | BuildMI(MBB, InsertPt, Loc, | |||
2074 | TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg) | |||
2075 | .addReg(VBStateReg) | |||
2076 | .addReg(OpReg); | |||
2077 | (void)OrI; | |||
2078 | ++NumInstsInserted; | |||
2079 | LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting or: "; OrI->dump (); dbgs() << "\n"; } } while (false); | |||
2080 | } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) || | |||
2081 | OpRC->hasSuperClassEq(&X86::VR256XRegClass) || | |||
2082 | OpRC->hasSuperClassEq(&X86::VR512RegClass)) { | |||
2083 | assert(Subtarget->hasAVX512() && "AVX512-specific register classes!")((Subtarget->hasAVX512() && "AVX512-specific register classes!" ) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasAVX512() && \"AVX512-specific register classes!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2083, __PRETTY_FUNCTION__)); | |||
2084 | bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass); | |||
2085 | bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass); | |||
2086 | if (Is128Bit || Is256Bit) | |||
2087 | assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!")((Subtarget->hasVLX() && "AVX512VL-specific register classes!" ) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasVLX() && \"AVX512VL-specific register classes!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2087, __PRETTY_FUNCTION__)); | |||
2088 | ||||
2089 | // Broadcast our state into a vector register. | |||
2090 | unsigned VStateReg = MRI->createVirtualRegister(OpRC); | |||
2091 | unsigned BroadcastOp = | |||
2092 | Is128Bit ? X86::VPBROADCASTQrZ128r | |||
2093 | : Is256Bit ? X86::VPBROADCASTQrZ256r : X86::VPBROADCASTQrZr; | |||
2094 | auto BroadcastI = | |||
2095 | BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg) | |||
2096 | .addReg(StateReg); | |||
2097 | (void)BroadcastI; | |||
2098 | ++NumInstsInserted; | |||
2099 | LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting broadcast: "; BroadcastI ->dump(); dbgs() << "\n"; } } while (false) | |||
2100 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting broadcast: "; BroadcastI ->dump(); dbgs() << "\n"; } } while (false); | |||
2101 | ||||
2102 | // Merge our potential poison state into the value with a vector or. | |||
2103 | unsigned OrOp = Is128Bit ? X86::VPORQZ128rr | |||
2104 | : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr; | |||
2105 | auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg) | |||
2106 | .addReg(VStateReg) | |||
2107 | .addReg(OpReg); | |||
2108 | (void)OrI; | |||
2109 | ++NumInstsInserted; | |||
2110 | LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting or: "; OrI->dump (); dbgs() << "\n"; } } while (false); | |||
2111 | } else { | |||
2112 | // FIXME: Need to support GR32 here for 32-bit code. | |||
2113 | assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&((OpRC->hasSuperClassEq(&X86::GR64RegClass) && "Not a supported register class for address hardening!") ? static_cast <void> (0) : __assert_fail ("OpRC->hasSuperClassEq(&X86::GR64RegClass) && \"Not a supported register class for address hardening!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2114, __PRETTY_FUNCTION__)) | |||
2114 | "Not a supported register class for address hardening!")((OpRC->hasSuperClassEq(&X86::GR64RegClass) && "Not a supported register class for address hardening!") ? static_cast <void> (0) : __assert_fail ("OpRC->hasSuperClassEq(&X86::GR64RegClass) && \"Not a supported register class for address hardening!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2114, __PRETTY_FUNCTION__)); | |||
2115 | ||||
2116 | if (!EFLAGSLive) { | |||
2117 | // Merge our potential poison state into the value with an or. | |||
2118 | auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg) | |||
2119 | .addReg(StateReg) | |||
2120 | .addReg(OpReg); | |||
2121 | OrI->addRegisterDead(X86::EFLAGS, TRI); | |||
2122 | ++NumInstsInserted; | |||
2123 | LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting or: "; OrI->dump (); dbgs() << "\n"; } } while (false); | |||
2124 | } else { | |||
2125 | // We need to avoid touching EFLAGS so shift out all but the least | |||
2126 | // significant bit using the instruction that doesn't update flags. | |||
2127 | auto ShiftI = | |||
2128 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg) | |||
2129 | .addReg(OpReg) | |||
2130 | .addReg(StateReg); | |||
2131 | (void)ShiftI; | |||
2132 | ++NumInstsInserted; | |||
2133 | LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting shrx: "; ShiftI-> dump(); dbgs() << "\n"; } } while (false) | |||
2134 | dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting shrx: "; ShiftI-> dump(); dbgs() << "\n"; } } while (false); | |||
2135 | } | |||
2136 | } | |||
2137 | ||||
2138 | // Record this register as checked and update the operand. | |||
2139 | assert(!AddrRegToHardenedReg.count(Op->getReg()) &&((!AddrRegToHardenedReg.count(Op->getReg()) && "Should not have checked this register yet!" ) ? static_cast<void> (0) : __assert_fail ("!AddrRegToHardenedReg.count(Op->getReg()) && \"Should not have checked this register yet!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2140, __PRETTY_FUNCTION__)) | |||
2140 | "Should not have checked this register yet!")((!AddrRegToHardenedReg.count(Op->getReg()) && "Should not have checked this register yet!" ) ? static_cast<void> (0) : __assert_fail ("!AddrRegToHardenedReg.count(Op->getReg()) && \"Should not have checked this register yet!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2140, __PRETTY_FUNCTION__)); | |||
2141 | AddrRegToHardenedReg[Op->getReg()] = TmpReg; | |||
2142 | Op->setReg(TmpReg); | |||
2143 | ++NumAddrRegsHardened; | |||
2144 | } | |||
2145 | ||||
2146 | // And restore the flags if needed. | |||
2147 | if (FlagsReg) | |||
2148 | restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg); | |||
2149 | } | |||
2150 | ||||
2151 | MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst( | |||
2152 | MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) { | |||
2153 | assert(isDataInvariantLoad(InitialMI) &&((isDataInvariantLoad(InitialMI) && "Cannot get here with a non-invariant load!" ) ? static_cast<void> (0) : __assert_fail ("isDataInvariantLoad(InitialMI) && \"Cannot get here with a non-invariant load!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2154, __PRETTY_FUNCTION__)) | |||
2154 | "Cannot get here with a non-invariant load!")((isDataInvariantLoad(InitialMI) && "Cannot get here with a non-invariant load!" ) ? static_cast<void> (0) : __assert_fail ("isDataInvariantLoad(InitialMI) && \"Cannot get here with a non-invariant load!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2154, __PRETTY_FUNCTION__)); | |||
2155 | ||||
2156 | // See if we can sink hardening the loaded value. | |||
2157 | auto SinkCheckToSingleUse = | |||
2158 | [&](MachineInstr &MI) -> Optional<MachineInstr *> { | |||
2159 | unsigned DefReg = MI.getOperand(0).getReg(); | |||
2160 | ||||
2161 | // We need to find a single use which we can sink the check. We can | |||
2162 | // primarily do this because many uses may already end up checked on their | |||
2163 | // own. | |||
2164 | MachineInstr *SingleUseMI = nullptr; | |||
2165 | for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) { | |||
2166 | // If we're already going to harden this use, it is data invariant and | |||
2167 | // within our block. | |||
2168 | if (HardenedInstrs.count(&UseMI)) { | |||
2169 | if (!isDataInvariantLoad(UseMI)) { | |||
2170 | // If we've already decided to harden a non-load, we must have sunk | |||
2171 | // some other post-load hardened instruction to it and it must itself | |||
2172 | // be data-invariant. | |||
2173 | assert(isDataInvariant(UseMI) &&((isDataInvariant(UseMI) && "Data variant instruction being hardened!" ) ? static_cast<void> (0) : __assert_fail ("isDataInvariant(UseMI) && \"Data variant instruction being hardened!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2174, __PRETTY_FUNCTION__)) | |||
2174 | "Data variant instruction being hardened!")((isDataInvariant(UseMI) && "Data variant instruction being hardened!" ) ? static_cast<void> (0) : __assert_fail ("isDataInvariant(UseMI) && \"Data variant instruction being hardened!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2174, __PRETTY_FUNCTION__)); | |||
2175 | continue; | |||
2176 | } | |||
2177 | ||||
2178 | // Otherwise, this is a load and the load component can't be data | |||
2179 | // invariant so check how this register is being used. | |||
2180 | const MCInstrDesc &Desc = UseMI.getDesc(); | |||
2181 | int MemRefBeginIdx = X86II::getMemoryOperandNo(Desc.TSFlags); | |||
2182 | assert(MemRefBeginIdx >= 0 &&((MemRefBeginIdx >= 0 && "Should always have mem references here!" ) ? static_cast<void> (0) : __assert_fail ("MemRefBeginIdx >= 0 && \"Should always have mem references here!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2183, __PRETTY_FUNCTION__)) | |||
2183 | "Should always have mem references here!")((MemRefBeginIdx >= 0 && "Should always have mem references here!" ) ? static_cast<void> (0) : __assert_fail ("MemRefBeginIdx >= 0 && \"Should always have mem references here!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2183, __PRETTY_FUNCTION__)); | |||
2184 | MemRefBeginIdx += X86II::getOperandBias(Desc); | |||
2185 | ||||
2186 | MachineOperand &BaseMO = | |||
2187 | UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg); | |||
2188 | MachineOperand &IndexMO = | |||
2189 | UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg); | |||
2190 | if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) || | |||
2191 | (IndexMO.isReg() && IndexMO.getReg() == DefReg)) | |||
2192 | // The load uses the register as part of its address making it not | |||
2193 | // invariant. | |||
2194 | return {}; | |||
2195 | ||||
2196 | continue; | |||
2197 | } | |||
2198 | ||||
2199 | if (SingleUseMI) | |||
2200 | // We already have a single use, this would make two. Bail. | |||
2201 | return {}; | |||
2202 | ||||
2203 | // If this single use isn't data invariant, isn't in this block, or has | |||
2204 | // interfering EFLAGS, we can't sink the hardening to it. | |||
2205 | if (!isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent()) | |||
2206 | return {}; | |||
2207 | ||||
2208 | // If this instruction defines multiple registers bail as we won't harden | |||
2209 | // all of them. | |||
2210 | if (UseMI.getDesc().getNumDefs() > 1) | |||
2211 | return {}; | |||
2212 | ||||
2213 | // If this register isn't a virtual register we can't walk uses of sanely, | |||
2214 | // just bail. Also check that its register class is one of the ones we | |||
2215 | // can harden. | |||
2216 | unsigned UseDefReg = UseMI.getOperand(0).getReg(); | |||
2217 | if (!TRI->isVirtualRegister(UseDefReg) || | |||
2218 | !canHardenRegister(UseDefReg)) | |||
2219 | return {}; | |||
2220 | ||||
2221 | SingleUseMI = &UseMI; | |||
2222 | } | |||
2223 | ||||
2224 | // If SingleUseMI is still null, there is no use that needs its own | |||
2225 | // checking. Otherwise, it is the single use that needs checking. | |||
2226 | return {SingleUseMI}; | |||
2227 | }; | |||
2228 | ||||
2229 | MachineInstr *MI = &InitialMI; | |||
2230 | while (Optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) { | |||
2231 | // Update which MI we're checking now. | |||
2232 | MI = *SingleUse; | |||
2233 | if (!MI) | |||
2234 | break; | |||
2235 | } | |||
2236 | ||||
2237 | return MI; | |||
2238 | } | |||
2239 | ||||
2240 | bool X86SpeculativeLoadHardeningPass::canHardenRegister(unsigned Reg) { | |||
2241 | auto *RC = MRI->getRegClass(Reg); | |||
2242 | int RegBytes = TRI->getRegSizeInBits(*RC) / 8; | |||
2243 | if (RegBytes > 8) | |||
2244 | // We don't support post-load hardening of vectors. | |||
2245 | return false; | |||
2246 | ||||
2247 | // If this register class is explicitly constrained to a class that doesn't | |||
2248 | // require REX prefix, we may not be able to satisfy that constraint when | |||
2249 | // emitting the hardening instructions, so bail out here. | |||
2250 | // FIXME: This seems like a pretty lame hack. The way this comes up is when we | |||
2251 | // end up both with a NOREX and REX-only register as operands to the hardening | |||
2252 | // instructions. It would be better to fix that code to handle this situation | |||
2253 | // rather than hack around it in this way. | |||
2254 | const TargetRegisterClass *NOREXRegClasses[] = { | |||
2255 | &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass, | |||
2256 | &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass}; | |||
2257 | if (RC == NOREXRegClasses[Log2_32(RegBytes)]) | |||
| ||||
2258 | return false; | |||
2259 | ||||
2260 | const TargetRegisterClass *GPRRegClasses[] = { | |||
2261 | &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass, | |||
2262 | &X86::GR64RegClass}; | |||
2263 | return RC->hasSuperClassEq(GPRRegClasses[Log2_32(RegBytes)]); | |||
2264 | } | |||
2265 | ||||
2266 | /// Harden a value in a register. | |||
2267 | /// | |||
2268 | /// This is the low-level logic to fully harden a value sitting in a register | |||
2269 | /// against leaking during speculative execution. | |||
2270 | /// | |||
2271 | /// Unlike hardening an address that is used by a load, this routine is required | |||
2272 | /// to hide *all* incoming bits in the register. | |||
2273 | /// | |||
2274 | /// `Reg` must be a virtual register. Currently, it is required to be a GPR no | |||
2275 | /// larger than the predicate state register. FIXME: We should support vector | |||
2276 | /// registers here by broadcasting the predicate state. | |||
2277 | /// | |||
2278 | /// The new, hardened virtual register is returned. It will have the same | |||
2279 | /// register class as `Reg`. | |||
2280 | unsigned X86SpeculativeLoadHardeningPass::hardenValueInRegister( | |||
2281 | unsigned Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertPt, | |||
2282 | DebugLoc Loc) { | |||
2283 | assert(canHardenRegister(Reg) && "Cannot harden this register!")((canHardenRegister(Reg) && "Cannot harden this register!" ) ? static_cast<void> (0) : __assert_fail ("canHardenRegister(Reg) && \"Cannot harden this register!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2283, __PRETTY_FUNCTION__)); | |||
2284 | assert(TRI->isVirtualRegister(Reg) && "Cannot harden a physical register!")((TRI->isVirtualRegister(Reg) && "Cannot harden a physical register!" ) ? static_cast<void> (0) : __assert_fail ("TRI->isVirtualRegister(Reg) && \"Cannot harden a physical register!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2284, __PRETTY_FUNCTION__)); | |||
2285 | ||||
2286 | auto *RC = MRI->getRegClass(Reg); | |||
2287 | int Bytes = TRI->getRegSizeInBits(*RC) / 8; | |||
2288 | ||||
2289 | unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); | |||
2290 | ||||
2291 | // FIXME: Need to teach this about 32-bit mode. | |||
2292 | if (Bytes != 8) { | |||
2293 | unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit}; | |||
2294 | unsigned SubRegImm = SubRegImms[Log2_32(Bytes)]; | |||
2295 | unsigned NarrowStateReg = MRI->createVirtualRegister(RC); | |||
2296 | BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg) | |||
2297 | .addReg(StateReg, 0, SubRegImm); | |||
2298 | StateReg = NarrowStateReg; | |||
2299 | } | |||
2300 | ||||
2301 | unsigned FlagsReg = 0; | |||
2302 | if (isEFLAGSLive(MBB, InsertPt, *TRI)) | |||
2303 | FlagsReg = saveEFLAGS(MBB, InsertPt, Loc); | |||
2304 | ||||
2305 | unsigned NewReg = MRI->createVirtualRegister(RC); | |||
2306 | unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr}; | |||
2307 | unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)]; | |||
2308 | auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg) | |||
2309 | .addReg(StateReg) | |||
2310 | .addReg(Reg); | |||
2311 | OrI->addRegisterDead(X86::EFLAGS, TRI); | |||
2312 | ++NumInstsInserted; | |||
2313 | LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting or: "; OrI->dump (); dbgs() << "\n"; } } while (false); | |||
2314 | ||||
2315 | if (FlagsReg) | |||
2316 | restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg); | |||
2317 | ||||
2318 | return NewReg; | |||
2319 | } | |||
2320 | ||||
2321 | /// Harden a load by hardening the loaded value in the defined register. | |||
2322 | /// | |||
2323 | /// We can harden a non-leaking load into a register without touching the | |||
2324 | /// address by just hiding all of the loaded bits during misspeculation. We use | |||
2325 | /// an `or` instruction to do this because we set up our poison value as all | |||
2326 | /// ones. And the goal is just for the loaded bits to not be exposed to | |||
2327 | /// execution and coercing them to one is sufficient. | |||
2328 | /// | |||
2329 | /// Returns the newly hardened register. | |||
2330 | unsigned X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) { | |||
2331 | MachineBasicBlock &MBB = *MI.getParent(); | |||
2332 | DebugLoc Loc = MI.getDebugLoc(); | |||
2333 | ||||
2334 | auto &DefOp = MI.getOperand(0); | |||
2335 | unsigned OldDefReg = DefOp.getReg(); | |||
2336 | auto *DefRC = MRI->getRegClass(OldDefReg); | |||
2337 | ||||
2338 | // Because we want to completely replace the uses of this def'ed value with | |||
2339 | // the hardened value, create a dedicated new register that will only be used | |||
2340 | // to communicate the unhardened value to the hardening. | |||
2341 | unsigned UnhardenedReg = MRI->createVirtualRegister(DefRC); | |||
2342 | DefOp.setReg(UnhardenedReg); | |||
2343 | ||||
2344 | // Now harden this register's value, getting a hardened reg that is safe to | |||
2345 | // use. Note that we insert the instructions to compute this *after* the | |||
2346 | // defining instruction, not before it. | |||
2347 | unsigned HardenedReg = hardenValueInRegister( | |||
2348 | UnhardenedReg, MBB, std::next(MI.getIterator()), Loc); | |||
2349 | ||||
2350 | // Finally, replace the old register (which now only has the uses of the | |||
2351 | // original def) with the hardened register. | |||
2352 | MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg); | |||
2353 | ||||
2354 | ++NumPostLoadRegsHardened; | |||
2355 | return HardenedReg; | |||
2356 | } | |||
2357 | ||||
2358 | /// Harden a return instruction. | |||
2359 | /// | |||
2360 | /// Returns implicitly perform a load which we need to harden. Without hardening | |||
2361 | /// this load, an attacker my speculatively write over the return address to | |||
2362 | /// steer speculation of the return to an attacker controlled address. This is | |||
2363 | /// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in | |||
2364 | /// this paper: | |||
2365 | /// https://people.csail.mit.edu/vlk/spectre11.pdf | |||
2366 | /// | |||
2367 | /// We can harden this by introducing an LFENCE that will delay any load of the | |||
2368 | /// return address until prior instructions have retired (and thus are not being | |||
2369 | /// speculated), or we can harden the address used by the implicit load: the | |||
2370 | /// stack pointer. | |||
2371 | /// | |||
2372 | /// If we are not using an LFENCE, hardening the stack pointer has an additional | |||
2373 | /// benefit: it allows us to pass the predicate state accumulated in this | |||
2374 | /// function back to the caller. In the absence of a BCBS attack on the return, | |||
2375 | /// the caller will typically be resumed and speculatively executed due to the | |||
2376 | /// Return Stack Buffer (RSB) prediction which is very accurate and has a high | |||
2377 | /// priority. It is possible that some code from the caller will be executed | |||
2378 | /// speculatively even during a BCBS-attacked return until the steering takes | |||
2379 | /// effect. Whenever this happens, the caller can recover the (poisoned) | |||
2380 | /// predicate state from the stack pointer and continue to harden loads. | |||
2381 | void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) { | |||
2382 | MachineBasicBlock &MBB = *MI.getParent(); | |||
2383 | DebugLoc Loc = MI.getDebugLoc(); | |||
2384 | auto InsertPt = MI.getIterator(); | |||
2385 | ||||
2386 | if (FenceCallAndRet) | |||
2387 | // No need to fence here as we'll fence at the return site itself. That | |||
2388 | // handles more cases than we can handle here. | |||
2389 | return; | |||
2390 | ||||
2391 | // Take our predicate state, shift it to the high 17 bits (so that we keep | |||
2392 | // pointers canonical) and merge it into RSP. This will allow the caller to | |||
2393 | // extract it when we return (speculatively). | |||
2394 | mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB)); | |||
2395 | } | |||
2396 | ||||
2397 | /// Trace the predicate state through a call. | |||
2398 | /// | |||
2399 | /// There are several layers of this needed to handle the full complexity of | |||
2400 | /// calls. | |||
2401 | /// | |||
2402 | /// First, we need to send the predicate state into the called function. We do | |||
2403 | /// this by merging it into the high bits of the stack pointer. | |||
2404 | /// | |||
2405 | /// For tail calls, this is all we need to do. | |||
2406 | /// | |||
2407 | /// For calls where we might return and resume the control flow, we need to | |||
2408 | /// extract the predicate state from the high bits of the stack pointer after | |||
2409 | /// control returns from the called function. | |||
2410 | /// | |||
2411 | /// We also need to verify that we intended to return to this location in the | |||
2412 | /// code. An attacker might arrange for the processor to mispredict the return | |||
2413 | /// to this valid but incorrect return address in the program rather than the | |||
2414 | /// correct one. See the paper on this attack, called "ret2spec" by the | |||
2415 | /// researchers, here: | |||
2416 | /// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf | |||
2417 | /// | |||
2418 | /// The way we verify that we returned to the correct location is by preserving | |||
2419 | /// the expected return address across the call. One technique involves taking | |||
2420 | /// advantage of the red-zone to load the return address from `8(%rsp)` where it | |||
2421 | /// was left by the RET instruction when it popped `%rsp`. Alternatively, we can | |||
2422 | /// directly save the address into a register that will be preserved across the | |||
2423 | /// call. We compare this intended return address against the address | |||
2424 | /// immediately following the call (the observed return address). If these | |||
2425 | /// mismatch, we have detected misspeculation and can poison our predicate | |||
2426 | /// state. | |||
2427 | void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall( | |||
2428 | MachineInstr &MI) { | |||
2429 | MachineBasicBlock &MBB = *MI.getParent(); | |||
2430 | MachineFunction &MF = *MBB.getParent(); | |||
2431 | auto InsertPt = MI.getIterator(); | |||
2432 | DebugLoc Loc = MI.getDebugLoc(); | |||
2433 | ||||
2434 | if (FenceCallAndRet) { | |||
2435 | if (MI.isReturn()) | |||
2436 | // Tail call, we don't return to this function. | |||
2437 | // FIXME: We should also handle noreturn calls. | |||
2438 | return; | |||
2439 | ||||
2440 | // We don't need to fence before the call because the function should fence | |||
2441 | // in its entry. However, we do need to fence after the call returns. | |||
2442 | // Fencing before the return doesn't correctly handle cases where the return | |||
2443 | // itself is mispredicted. | |||
2444 | BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE)); | |||
2445 | ++NumInstsInserted; | |||
2446 | ++NumLFENCEsInserted; | |||
2447 | return; | |||
2448 | } | |||
2449 | ||||
2450 | // First, we transfer the predicate state into the called function by merging | |||
2451 | // it into the stack pointer. This will kill the current def of the state. | |||
2452 | unsigned StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB); | |||
2453 | mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg); | |||
2454 | ||||
2455 | // If this call is also a return, it is a tail call and we don't need anything | |||
2456 | // else to handle it so just return. Also, if there are no further | |||
2457 | // instructions and no successors, this call does not return so we can also | |||
2458 | // bail. | |||
2459 | if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty())) | |||
2460 | return; | |||
2461 | ||||
2462 | // Create a symbol to track the return address and attach it to the call | |||
2463 | // machine instruction. We will lower extra symbols attached to call | |||
2464 | // instructions as label immediately following the call. | |||
2465 | MCSymbol *RetSymbol = | |||
2466 | MF.getContext().createTempSymbol("slh_ret_addr", | |||
2467 | /*AlwaysAddSuffix*/ true); | |||
2468 | MI.setPostInstrSymbol(MF, RetSymbol); | |||
2469 | ||||
2470 | const TargetRegisterClass *AddrRC = &X86::GR64RegClass; | |||
2471 | unsigned ExpectedRetAddrReg = 0; | |||
2472 | ||||
2473 | // If we have no red zones or if the function returns twice (possibly without | |||
2474 | // using the `ret` instruction) like setjmp, we need to save the expected | |||
2475 | // return address prior to the call. | |||
2476 | if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) || | |||
2477 | MF.exposesReturnsTwice()) { | |||
2478 | // If we don't have red zones, we need to compute the expected return | |||
2479 | // address prior to the call and store it in a register that lives across | |||
2480 | // the call. | |||
2481 | // | |||
2482 | // In some ways, this is doubly satisfying as a mitigation because it will | |||
2483 | // also successfully detect stack smashing bugs in some cases (typically, | |||
2484 | // when a callee-saved register is used and the callee doesn't push it onto | |||
2485 | // the stack). But that isn't our primary goal, so we only use it as | |||
2486 | // a fallback. | |||
2487 | // | |||
2488 | // FIXME: It isn't clear that this is reliable in the face of | |||
2489 | // rematerialization in the register allocator. We somehow need to force | |||
2490 | // that to not occur for this particular instruction, and instead to spill | |||
2491 | // or otherwise preserve the value computed *prior* to the call. | |||
2492 | // | |||
2493 | // FIXME: It is even less clear why MachineCSE can't just fold this when we | |||
2494 | // end up having to use identical instructions both before and after the | |||
2495 | // call to feed the comparison. | |||
2496 | ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC); | |||
2497 | if (MF.getTarget().getCodeModel() == CodeModel::Small && | |||
2498 | !Subtarget->isPositionIndependent()) { | |||
2499 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg) | |||
2500 | .addSym(RetSymbol); | |||
2501 | } else { | |||
2502 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg) | |||
2503 | .addReg(/*Base*/ X86::RIP) | |||
2504 | .addImm(/*Scale*/ 1) | |||
2505 | .addReg(/*Index*/ 0) | |||
2506 | .addSym(RetSymbol) | |||
2507 | .addReg(/*Segment*/ 0); | |||
2508 | } | |||
2509 | } | |||
2510 | ||||
2511 | // Step past the call to handle when it returns. | |||
2512 | ++InsertPt; | |||
2513 | ||||
2514 | // If we didn't pre-compute the expected return address into a register, then | |||
2515 | // red zones are enabled and the return address is still available on the | |||
2516 | // stack immediately after the call. As the very first instruction, we load it | |||
2517 | // into a register. | |||
2518 | if (!ExpectedRetAddrReg) { | |||
2519 | ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC); | |||
2520 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg) | |||
2521 | .addReg(/*Base*/ X86::RSP) | |||
2522 | .addImm(/*Scale*/ 1) | |||
2523 | .addReg(/*Index*/ 0) | |||
2524 | .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so | |||
2525 | // the return address is 8-bytes past it. | |||
2526 | .addReg(/*Segment*/ 0); | |||
2527 | } | |||
2528 | ||||
2529 | // Now we extract the callee's predicate state from the stack pointer. | |||
2530 | unsigned NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc); | |||
2531 | ||||
2532 | // Test the expected return address against our actual address. If we can | |||
2533 | // form this basic block's address as an immediate, this is easy. Otherwise | |||
2534 | // we compute it. | |||
2535 | if (MF.getTarget().getCodeModel() == CodeModel::Small && | |||
2536 | !Subtarget->isPositionIndependent()) { | |||
2537 | // FIXME: Could we fold this with the load? It would require careful EFLAGS | |||
2538 | // management. | |||
2539 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32)) | |||
2540 | .addReg(ExpectedRetAddrReg, RegState::Kill) | |||
2541 | .addSym(RetSymbol); | |||
2542 | } else { | |||
2543 | unsigned ActualRetAddrReg = MRI->createVirtualRegister(AddrRC); | |||
2544 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg) | |||
2545 | .addReg(/*Base*/ X86::RIP) | |||
2546 | .addImm(/*Scale*/ 1) | |||
2547 | .addReg(/*Index*/ 0) | |||
2548 | .addSym(RetSymbol) | |||
2549 | .addReg(/*Segment*/ 0); | |||
2550 | BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr)) | |||
2551 | .addReg(ExpectedRetAddrReg, RegState::Kill) | |||
2552 | .addReg(ActualRetAddrReg, RegState::Kill); | |||
2553 | } | |||
2554 | ||||
2555 | // Now conditionally update the predicate state we just extracted if we ended | |||
2556 | // up at a different return address than expected. | |||
2557 | int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8; | |||
2558 | auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes); | |||
2559 | ||||
2560 | unsigned UpdatedStateReg = MRI->createVirtualRegister(PS->RC); | |||
2561 | auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg) | |||
2562 | .addReg(NewStateReg, RegState::Kill) | |||
2563 | .addReg(PS->PoisonReg) | |||
2564 | .addImm(X86::COND_NE); | |||
2565 | CMovI->findRegisterUseOperand(X86::EFLAGS)->setIsKill(true); | |||
2566 | ++NumInstsInserted; | |||
2567 | LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("x86-slh")) { dbgs() << " Inserting cmov: "; CMovI-> dump(); dbgs() << "\n"; } } while (false); | |||
2568 | ||||
2569 | PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg); | |||
2570 | } | |||
2571 | ||||
2572 | /// An attacker may speculatively store over a value that is then speculatively | |||
2573 | /// loaded and used as the target of an indirect call or jump instruction. This | |||
2574 | /// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described | |||
2575 | /// in this paper: | |||
2576 | /// https://people.csail.mit.edu/vlk/spectre11.pdf | |||
2577 | /// | |||
2578 | /// When this happens, the speculative execution of the call or jump will end up | |||
2579 | /// being steered to this attacker controlled address. While most such loads | |||
2580 | /// will be adequately hardened already, we want to ensure that they are | |||
2581 | /// definitively treated as needing post-load hardening. While address hardening | |||
2582 | /// is sufficient to prevent secret data from leaking to the attacker, it may | |||
2583 | /// not be sufficient to prevent an attacker from steering speculative | |||
2584 | /// execution. We forcibly unfolded all relevant loads above and so will always | |||
2585 | /// have an opportunity to post-load harden here, we just need to scan for cases | |||
2586 | /// not already flagged and add them. | |||
2587 | void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr( | |||
2588 | MachineInstr &MI, | |||
2589 | SmallDenseMap<unsigned, unsigned, 32> &AddrRegToHardenedReg) { | |||
2590 | switch (MI.getOpcode()) { | |||
2591 | case X86::FARCALL16m: | |||
2592 | case X86::FARCALL32m: | |||
2593 | case X86::FARCALL64: | |||
2594 | case X86::FARJMP16m: | |||
2595 | case X86::FARJMP32m: | |||
2596 | case X86::FARJMP64: | |||
2597 | // We don't need to harden either far calls or far jumps as they are | |||
2598 | // safe from Spectre. | |||
2599 | return; | |||
2600 | ||||
2601 | default: | |||
2602 | break; | |||
2603 | } | |||
2604 | ||||
2605 | // We should never see a loading instruction at this point, as those should | |||
2606 | // have been unfolded. | |||
2607 | assert(!MI.mayLoad() && "Found a lingering loading instruction!")((!MI.mayLoad() && "Found a lingering loading instruction!" ) ? static_cast<void> (0) : __assert_fail ("!MI.mayLoad() && \"Found a lingering loading instruction!\"" , "/build/llvm-toolchain-snapshot-9~svn362543/lib/Target/X86/X86SpeculativeLoadHardening.cpp" , 2607, __PRETTY_FUNCTION__)); | |||
2608 | ||||
2609 | // If the first operand isn't a register, this is a branch or call | |||
2610 | // instruction with an immediate operand which doesn't need to be hardened. | |||
2611 | if (!MI.getOperand(0).isReg()) | |||
2612 | return; | |||
2613 | ||||
2614 | // For all of these, the target register is the first operand of the | |||
2615 | // instruction. | |||
2616 | auto &TargetOp = MI.getOperand(0); | |||
2617 | unsigned OldTargetReg = TargetOp.getReg(); | |||
2618 | ||||
2619 | // Try to lookup a hardened version of this register. We retain a reference | |||
2620 | // here as we want to update the map to track any newly computed hardened | |||
2621 | // register. | |||
2622 | unsigned &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg]; | |||
2623 | ||||
2624 | // If we don't have a hardened register yet, compute one. Otherwise, just use | |||
2625 | // the already hardened register. | |||
2626 | // | |||
2627 | // FIXME: It is a little suspect that we use partially hardened registers that | |||
2628 | // only feed addresses. The complexity of partial hardening with SHRX | |||
2629 | // continues to pile up. Should definitively measure its value and consider | |||
2630 | // eliminating it. | |||
2631 | if (!HardenedTargetReg) | |||
2632 | HardenedTargetReg = hardenValueInRegister( | |||
2633 | OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc()); | |||
2634 | ||||
2635 | // Set the target operand to the hardened register. | |||
2636 | TargetOp.setReg(HardenedTargetReg); | |||
2637 | ||||
2638 | ++NumCallsOrJumpsHardened; | |||
2639 | } | |||
2640 | ||||
2641 | INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,static void *initializeX86SpeculativeLoadHardeningPassPassOnce (PassRegistry &Registry) { | |||
2642 | "X86 speculative load hardener", false, false)static void *initializeX86SpeculativeLoadHardeningPassPassOnce (PassRegistry &Registry) { | |||
2643 | INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,PassInfo *PI = new PassInfo( "X86 speculative load hardener", "x86-slh", &X86SpeculativeLoadHardeningPass::ID, PassInfo ::NormalCtor_t(callDefaultCtor<X86SpeculativeLoadHardeningPass >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeX86SpeculativeLoadHardeningPassPassFlag ; void llvm::initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &Registry) { llvm::call_once(InitializeX86SpeculativeLoadHardeningPassPassFlag , initializeX86SpeculativeLoadHardeningPassPassOnce, std::ref (Registry)); } | |||
2644 | "X86 speculative load hardener", false, false)PassInfo *PI = new PassInfo( "X86 speculative load hardener", "x86-slh", &X86SpeculativeLoadHardeningPass::ID, PassInfo ::NormalCtor_t(callDefaultCtor<X86SpeculativeLoadHardeningPass >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeX86SpeculativeLoadHardeningPassPassFlag ; void llvm::initializeX86SpeculativeLoadHardeningPassPass(PassRegistry &Registry) { llvm::call_once(InitializeX86SpeculativeLoadHardeningPassPassFlag , initializeX86SpeculativeLoadHardeningPassPassOnce, std::ref (Registry)); } | |||
2645 | ||||
2646 | FunctionPass *llvm::createX86SpeculativeLoadHardeningPass() { | |||
2647 | return new X86SpeculativeLoadHardeningPass(); | |||
2648 | } |