LLVM 20.0.0git
X86FixupInstTuning.cpp
Go to the documentation of this file.
1//===-- X86FixupInstTunings.cpp - replace instructions -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file does a tuning pass replacing slower machine instructions
10// with faster ones. We do this here, as opposed to during normal ISel, as
11// attempting to get the "right" instruction can break patterns. This pass
12// is not meant search for special cases where an instruction can be transformed
13// to another, it is only meant to do transformations where the old instruction
14// is always replacable with the new instructions. For example:
15//
16// `vpermq ymm` -> `vshufd ymm`
17// -- BAD, not always valid (lane cross/non-repeated mask)
18//
19// `vpermilps ymm` -> `vshufd ymm`
20// -- GOOD, always replaceable
21//
22//===----------------------------------------------------------------------===//
23
24#include "X86.h"
25#include "X86InstrInfo.h"
26#include "X86Subtarget.h"
27#include "llvm/ADT/Statistic.h"
31
32using namespace llvm;
33
34#define DEBUG_TYPE "x86-fixup-inst-tuning"
35
36STATISTIC(NumInstChanges, "Number of instructions changes");
37
38namespace {
39class X86FixupInstTuningPass : public MachineFunctionPass {
40public:
41 static char ID;
42
43 X86FixupInstTuningPass() : MachineFunctionPass(ID) {}
44
45 StringRef getPassName() const override { return "X86 Fixup Inst Tuning"; }
46
47 bool runOnMachineFunction(MachineFunction &MF) override;
48 bool processInstruction(MachineFunction &MF, MachineBasicBlock &MBB,
50
51 // This pass runs after regalloc and doesn't support VReg operands.
54 MachineFunctionProperties::Property::NoVRegs);
55 }
56
57private:
58 const X86InstrInfo *TII = nullptr;
59 const X86Subtarget *ST = nullptr;
60 const MCSchedModel *SM = nullptr;
61};
62} // end anonymous namespace
63
64char X86FixupInstTuningPass::ID = 0;
65
66INITIALIZE_PASS(X86FixupInstTuningPass, DEBUG_TYPE, DEBUG_TYPE, false, false)
67
69 return new X86FixupInstTuningPass();
70}
71
72template <typename T>
73static std::optional<bool> CmpOptionals(T NewVal, T CurVal) {
74 if (NewVal.has_value() && CurVal.has_value() && *NewVal != *CurVal)
75 return *NewVal < *CurVal;
76
77 return std::nullopt;
78}
79
80bool X86FixupInstTuningPass::processInstruction(
83 MachineInstr &MI = *I;
84 unsigned Opc = MI.getOpcode();
85 unsigned NumOperands = MI.getDesc().getNumOperands();
86
87 auto GetInstTput = [&](unsigned Opcode) -> std::optional<double> {
88 // We already checked that SchedModel exists in `NewOpcPreferable`.
90 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
91 };
92
93 auto GetInstLat = [&](unsigned Opcode) -> std::optional<double> {
94 // We already checked that SchedModel exists in `NewOpcPreferable`.
96 *ST, *(SM->getSchedClassDesc(TII->get(Opcode).getSchedClass())));
97 };
98
99 auto GetInstSize = [&](unsigned Opcode) -> std::optional<unsigned> {
100 if (unsigned Size = TII->get(Opcode).getSize())
101 return Size;
102 // Zero size means we where unable to compute it.
103 return std::nullopt;
104 };
105
106 auto NewOpcPreferable = [&](unsigned NewOpc,
107 bool ReplaceInTie = true) -> bool {
108 std::optional<bool> Res;
109 if (SM->hasInstrSchedModel()) {
110 // Compare tput -> lat -> code size.
111 Res = CmpOptionals(GetInstTput(NewOpc), GetInstTput(Opc));
112 if (Res.has_value())
113 return *Res;
114
115 Res = CmpOptionals(GetInstLat(NewOpc), GetInstLat(Opc));
116 if (Res.has_value())
117 return *Res;
118 }
119
120 Res = CmpOptionals(GetInstSize(Opc), GetInstSize(NewOpc));
121 if (Res.has_value())
122 return *Res;
123
124 // We either have either were unable to get tput/lat/codesize or all values
125 // were equal. Return specified option for a tie.
126 return ReplaceInTie;
127 };
128
129 // `vpermilpd r, i` -> `vshufpd r, r, i`
130 // `vpermilpd r, i, k` -> `vshufpd r, r, i, k`
131 // `vshufpd` is always as fast or faster than `vpermilpd` and takes
132 // 1 less byte of code size for VEX and EVEX encoding.
133 auto ProcessVPERMILPDri = [&](unsigned NewOpc) -> bool {
134 if (!NewOpcPreferable(NewOpc))
135 return false;
136 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
137 MI.removeOperand(NumOperands - 1);
138 MI.addOperand(MI.getOperand(NumOperands - 2));
139 MI.setDesc(TII->get(NewOpc));
140 MI.addOperand(MachineOperand::CreateImm(MaskImm));
141 return true;
142 };
143
144 // `vpermilps r, i` -> `vshufps r, r, i`
145 // `vpermilps r, i, k` -> `vshufps r, r, i, k`
146 // `vshufps` is always as fast or faster than `vpermilps` and takes
147 // 1 less byte of code size for VEX and EVEX encoding.
148 auto ProcessVPERMILPSri = [&](unsigned NewOpc) -> bool {
149 if (!NewOpcPreferable(NewOpc))
150 return false;
151 unsigned MaskImm = MI.getOperand(NumOperands - 1).getImm();
152 MI.removeOperand(NumOperands - 1);
153 MI.addOperand(MI.getOperand(NumOperands - 2));
154 MI.setDesc(TII->get(NewOpc));
155 MI.addOperand(MachineOperand::CreateImm(MaskImm));
156 return true;
157 };
158
159 // `vpermilps m, i` -> `vpshufd m, i` iff no domain delay penalty on shuffles.
160 // `vpshufd` is always as fast or faster than `vpermilps` and takes 1 less
161 // byte of code size.
162 auto ProcessVPERMILPSmi = [&](unsigned NewOpc) -> bool {
163 // TODO: Might be work adding bypass delay if -Os/-Oz is enabled as
164 // `vpshufd` saves a byte of code size.
165 if (!ST->hasNoDomainDelayShuffle() ||
166 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
167 return false;
168 MI.setDesc(TII->get(NewOpc));
169 return true;
170 };
171
172 // `vunpcklpd/vmovlhps r, r` -> `vunpcklqdq r, r`/`vshufpd r, r, 0x00`
173 // `vunpckhpd/vmovlhps r, r` -> `vunpckhqdq r, r`/`vshufpd r, r, 0xff`
174 // `vunpcklpd r, r, k` -> `vunpcklqdq r, r, k`/`vshufpd r, r, k, 0x00`
175 // `vunpckhpd r, r, k` -> `vunpckhqdq r, r, k`/`vshufpd r, r, k, 0xff`
176 // `vunpcklpd r, m` -> `vunpcklqdq r, m, k`
177 // `vunpckhpd r, m` -> `vunpckhqdq r, m, k`
178 // `vunpcklpd r, m, k` -> `vunpcklqdq r, m, k`
179 // `vunpckhpd r, m, k` -> `vunpckhqdq r, m, k`
180 // 1) If no bypass delay and `vunpck{l|h}qdq` faster than `vunpck{l|h}pd`
181 // -> `vunpck{l|h}qdq`
182 // 2) If `vshufpd` faster than `vunpck{l|h}pd`
183 // -> `vshufpd`
184 //
185 // `vunpcklps` -> `vunpckldq` (for all operand types if no bypass delay)
186 auto ProcessUNPCK = [&](unsigned NewOpc, unsigned MaskImm) -> bool {
187 if (!NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
188 return false;
189
190 MI.setDesc(TII->get(NewOpc));
191 MI.addOperand(MachineOperand::CreateImm(MaskImm));
192 return true;
193 };
194
195 auto ProcessUNPCKToIntDomain = [&](unsigned NewOpc) -> bool {
196 // TODO it may be worth it to set ReplaceInTie to `true` as there is no real
197 // downside to the integer unpck, but if someone doesn't specify exact
198 // target we won't find it faster.
199 if (!ST->hasNoDomainDelayShuffle() ||
200 !NewOpcPreferable(NewOpc, /*ReplaceInTie*/ false))
201 return false;
202 MI.setDesc(TII->get(NewOpc));
203 return true;
204 };
205
206 auto ProcessUNPCKLPDrr = [&](unsigned NewOpcIntDomain,
207 unsigned NewOpc) -> bool {
208 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
209 return true;
210 return ProcessUNPCK(NewOpc, 0x00);
211 };
212 auto ProcessUNPCKHPDrr = [&](unsigned NewOpcIntDomain,
213 unsigned NewOpc) -> bool {
214 if (ProcessUNPCKToIntDomain(NewOpcIntDomain))
215 return true;
216 return ProcessUNPCK(NewOpc, 0xff);
217 };
218
219 auto ProcessUNPCKPDrm = [&](unsigned NewOpcIntDomain) -> bool {
220 return ProcessUNPCKToIntDomain(NewOpcIntDomain);
221 };
222
223 auto ProcessUNPCKPS = [&](unsigned NewOpc) -> bool {
224 return ProcessUNPCKToIntDomain(NewOpc);
225 };
226
227 switch (Opc) {
228 case X86::VPERMILPDri:
229 return ProcessVPERMILPDri(X86::VSHUFPDrri);
230 case X86::VPERMILPDYri:
231 return ProcessVPERMILPDri(X86::VSHUFPDYrri);
232 case X86::VPERMILPDZ128ri:
233 return ProcessVPERMILPDri(X86::VSHUFPDZ128rri);
234 case X86::VPERMILPDZ256ri:
235 return ProcessVPERMILPDri(X86::VSHUFPDZ256rri);
236 case X86::VPERMILPDZri:
237 return ProcessVPERMILPDri(X86::VSHUFPDZrri);
238 case X86::VPERMILPDZ128rikz:
239 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrikz);
240 case X86::VPERMILPDZ256rikz:
241 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrikz);
242 case X86::VPERMILPDZrikz:
243 return ProcessVPERMILPDri(X86::VSHUFPDZrrikz);
244 case X86::VPERMILPDZ128rik:
245 return ProcessVPERMILPDri(X86::VSHUFPDZ128rrik);
246 case X86::VPERMILPDZ256rik:
247 return ProcessVPERMILPDri(X86::VSHUFPDZ256rrik);
248 case X86::VPERMILPDZrik:
249 return ProcessVPERMILPDri(X86::VSHUFPDZrrik);
250
251 case X86::VPERMILPSri:
252 return ProcessVPERMILPSri(X86::VSHUFPSrri);
253 case X86::VPERMILPSYri:
254 return ProcessVPERMILPSri(X86::VSHUFPSYrri);
255 case X86::VPERMILPSZ128ri:
256 return ProcessVPERMILPSri(X86::VSHUFPSZ128rri);
257 case X86::VPERMILPSZ256ri:
258 return ProcessVPERMILPSri(X86::VSHUFPSZ256rri);
259 case X86::VPERMILPSZri:
260 return ProcessVPERMILPSri(X86::VSHUFPSZrri);
261 case X86::VPERMILPSZ128rikz:
262 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrikz);
263 case X86::VPERMILPSZ256rikz:
264 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrikz);
265 case X86::VPERMILPSZrikz:
266 return ProcessVPERMILPSri(X86::VSHUFPSZrrikz);
267 case X86::VPERMILPSZ128rik:
268 return ProcessVPERMILPSri(X86::VSHUFPSZ128rrik);
269 case X86::VPERMILPSZ256rik:
270 return ProcessVPERMILPSri(X86::VSHUFPSZ256rrik);
271 case X86::VPERMILPSZrik:
272 return ProcessVPERMILPSri(X86::VSHUFPSZrrik);
273 case X86::VPERMILPSmi:
274 return ProcessVPERMILPSmi(X86::VPSHUFDmi);
275 case X86::VPERMILPSYmi:
276 // TODO: See if there is a more generic way we can test if the replacement
277 // instruction is supported.
278 return ST->hasAVX2() ? ProcessVPERMILPSmi(X86::VPSHUFDYmi) : false;
279 case X86::VPERMILPSZ128mi:
280 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mi);
281 case X86::VPERMILPSZ256mi:
282 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mi);
283 case X86::VPERMILPSZmi:
284 return ProcessVPERMILPSmi(X86::VPSHUFDZmi);
285 case X86::VPERMILPSZ128mikz:
286 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mikz);
287 case X86::VPERMILPSZ256mikz:
288 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mikz);
289 case X86::VPERMILPSZmikz:
290 return ProcessVPERMILPSmi(X86::VPSHUFDZmikz);
291 case X86::VPERMILPSZ128mik:
292 return ProcessVPERMILPSmi(X86::VPSHUFDZ128mik);
293 case X86::VPERMILPSZ256mik:
294 return ProcessVPERMILPSmi(X86::VPSHUFDZ256mik);
295 case X86::VPERMILPSZmik:
296 return ProcessVPERMILPSmi(X86::VPSHUFDZmik);
297
298 case X86::MOVLHPSrr:
299 case X86::UNPCKLPDrr:
300 return ProcessUNPCKLPDrr(X86::PUNPCKLQDQrr, X86::SHUFPDrri);
301 case X86::VMOVLHPSrr:
302 case X86::VUNPCKLPDrr:
303 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQrr, X86::VSHUFPDrri);
304 case X86::VUNPCKLPDYrr:
305 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQYrr, X86::VSHUFPDYrri);
306 // VMOVLHPS is always 128 bits.
307 case X86::VMOVLHPSZrr:
308 case X86::VUNPCKLPDZ128rr:
309 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rr, X86::VSHUFPDZ128rri);
310 case X86::VUNPCKLPDZ256rr:
311 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rr, X86::VSHUFPDZ256rri);
312 case X86::VUNPCKLPDZrr:
313 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrr, X86::VSHUFPDZrri);
314 case X86::VUNPCKLPDZ128rrk:
315 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrk, X86::VSHUFPDZ128rrik);
316 case X86::VUNPCKLPDZ256rrk:
317 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrk, X86::VSHUFPDZ256rrik);
318 case X86::VUNPCKLPDZrrk:
319 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrk, X86::VSHUFPDZrrik);
320 case X86::VUNPCKLPDZ128rrkz:
321 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
322 case X86::VUNPCKLPDZ256rrkz:
323 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
324 case X86::VUNPCKLPDZrrkz:
325 return ProcessUNPCKLPDrr(X86::VPUNPCKLQDQZrrkz, X86::VSHUFPDZrrikz);
326 case X86::UNPCKHPDrr:
327 return ProcessUNPCKHPDrr(X86::PUNPCKHQDQrr, X86::SHUFPDrri);
328 case X86::VUNPCKHPDrr:
329 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQrr, X86::VSHUFPDrri);
330 case X86::VUNPCKHPDYrr:
331 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQYrr, X86::VSHUFPDYrri);
332 case X86::VUNPCKHPDZ128rr:
333 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rr, X86::VSHUFPDZ128rri);
334 case X86::VUNPCKHPDZ256rr:
335 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rr, X86::VSHUFPDZ256rri);
336 case X86::VUNPCKHPDZrr:
337 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrr, X86::VSHUFPDZrri);
338 case X86::VUNPCKHPDZ128rrk:
339 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrk, X86::VSHUFPDZ128rrik);
340 case X86::VUNPCKHPDZ256rrk:
341 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrk, X86::VSHUFPDZ256rrik);
342 case X86::VUNPCKHPDZrrk:
343 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrk, X86::VSHUFPDZrrik);
344 case X86::VUNPCKHPDZ128rrkz:
345 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ128rrkz, X86::VSHUFPDZ128rrikz);
346 case X86::VUNPCKHPDZ256rrkz:
347 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZ256rrkz, X86::VSHUFPDZ256rrikz);
348 case X86::VUNPCKHPDZrrkz:
349 return ProcessUNPCKHPDrr(X86::VPUNPCKHQDQZrrkz, X86::VSHUFPDZrrikz);
350 case X86::UNPCKLPDrm:
351 return ProcessUNPCKPDrm(X86::PUNPCKLQDQrm);
352 case X86::VUNPCKLPDrm:
353 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQrm);
354 case X86::VUNPCKLPDYrm:
355 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQYrm);
356 case X86::VUNPCKLPDZ128rm:
357 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rm);
358 case X86::VUNPCKLPDZ256rm:
359 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rm);
360 case X86::VUNPCKLPDZrm:
361 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrm);
362 case X86::VUNPCKLPDZ128rmk:
363 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmk);
364 case X86::VUNPCKLPDZ256rmk:
365 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmk);
366 case X86::VUNPCKLPDZrmk:
367 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmk);
368 case X86::VUNPCKLPDZ128rmkz:
369 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ128rmkz);
370 case X86::VUNPCKLPDZ256rmkz:
371 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZ256rmkz);
372 case X86::VUNPCKLPDZrmkz:
373 return ProcessUNPCKPDrm(X86::VPUNPCKLQDQZrmkz);
374 case X86::UNPCKHPDrm:
375 return ProcessUNPCKPDrm(X86::PUNPCKHQDQrm);
376 case X86::VUNPCKHPDrm:
377 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQrm);
378 case X86::VUNPCKHPDYrm:
379 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQYrm);
380 case X86::VUNPCKHPDZ128rm:
381 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rm);
382 case X86::VUNPCKHPDZ256rm:
383 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rm);
384 case X86::VUNPCKHPDZrm:
385 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrm);
386 case X86::VUNPCKHPDZ128rmk:
387 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmk);
388 case X86::VUNPCKHPDZ256rmk:
389 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmk);
390 case X86::VUNPCKHPDZrmk:
391 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmk);
392 case X86::VUNPCKHPDZ128rmkz:
393 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ128rmkz);
394 case X86::VUNPCKHPDZ256rmkz:
395 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZ256rmkz);
396 case X86::VUNPCKHPDZrmkz:
397 return ProcessUNPCKPDrm(X86::VPUNPCKHQDQZrmkz);
398
399 case X86::UNPCKLPSrr:
400 return ProcessUNPCKPS(X86::PUNPCKLDQrr);
401 case X86::VUNPCKLPSrr:
402 return ProcessUNPCKPS(X86::VPUNPCKLDQrr);
403 case X86::VUNPCKLPSYrr:
404 return ProcessUNPCKPS(X86::VPUNPCKLDQYrr);
405 case X86::VUNPCKLPSZ128rr:
406 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rr);
407 case X86::VUNPCKLPSZ256rr:
408 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rr);
409 case X86::VUNPCKLPSZrr:
410 return ProcessUNPCKPS(X86::VPUNPCKLDQZrr);
411 case X86::VUNPCKLPSZ128rrk:
412 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrk);
413 case X86::VUNPCKLPSZ256rrk:
414 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrk);
415 case X86::VUNPCKLPSZrrk:
416 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrk);
417 case X86::VUNPCKLPSZ128rrkz:
418 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rrkz);
419 case X86::VUNPCKLPSZ256rrkz:
420 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rrkz);
421 case X86::VUNPCKLPSZrrkz:
422 return ProcessUNPCKPS(X86::VPUNPCKLDQZrrkz);
423 case X86::UNPCKHPSrr:
424 return ProcessUNPCKPS(X86::PUNPCKHDQrr);
425 case X86::VUNPCKHPSrr:
426 return ProcessUNPCKPS(X86::VPUNPCKHDQrr);
427 case X86::VUNPCKHPSYrr:
428 return ProcessUNPCKPS(X86::VPUNPCKHDQYrr);
429 case X86::VUNPCKHPSZ128rr:
430 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rr);
431 case X86::VUNPCKHPSZ256rr:
432 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rr);
433 case X86::VUNPCKHPSZrr:
434 return ProcessUNPCKPS(X86::VPUNPCKHDQZrr);
435 case X86::VUNPCKHPSZ128rrk:
436 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrk);
437 case X86::VUNPCKHPSZ256rrk:
438 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrk);
439 case X86::VUNPCKHPSZrrk:
440 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrk);
441 case X86::VUNPCKHPSZ128rrkz:
442 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rrkz);
443 case X86::VUNPCKHPSZ256rrkz:
444 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rrkz);
445 case X86::VUNPCKHPSZrrkz:
446 return ProcessUNPCKPS(X86::VPUNPCKHDQZrrkz);
447 case X86::UNPCKLPSrm:
448 return ProcessUNPCKPS(X86::PUNPCKLDQrm);
449 case X86::VUNPCKLPSrm:
450 return ProcessUNPCKPS(X86::VPUNPCKLDQrm);
451 case X86::VUNPCKLPSYrm:
452 return ProcessUNPCKPS(X86::VPUNPCKLDQYrm);
453 case X86::VUNPCKLPSZ128rm:
454 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rm);
455 case X86::VUNPCKLPSZ256rm:
456 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rm);
457 case X86::VUNPCKLPSZrm:
458 return ProcessUNPCKPS(X86::VPUNPCKLDQZrm);
459 case X86::VUNPCKLPSZ128rmk:
460 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmk);
461 case X86::VUNPCKLPSZ256rmk:
462 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmk);
463 case X86::VUNPCKLPSZrmk:
464 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmk);
465 case X86::VUNPCKLPSZ128rmkz:
466 return ProcessUNPCKPS(X86::VPUNPCKLDQZ128rmkz);
467 case X86::VUNPCKLPSZ256rmkz:
468 return ProcessUNPCKPS(X86::VPUNPCKLDQZ256rmkz);
469 case X86::VUNPCKLPSZrmkz:
470 return ProcessUNPCKPS(X86::VPUNPCKLDQZrmkz);
471 case X86::UNPCKHPSrm:
472 return ProcessUNPCKPS(X86::PUNPCKHDQrm);
473 case X86::VUNPCKHPSrm:
474 return ProcessUNPCKPS(X86::VPUNPCKHDQrm);
475 case X86::VUNPCKHPSYrm:
476 return ProcessUNPCKPS(X86::VPUNPCKHDQYrm);
477 case X86::VUNPCKHPSZ128rm:
478 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rm);
479 case X86::VUNPCKHPSZ256rm:
480 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rm);
481 case X86::VUNPCKHPSZrm:
482 return ProcessUNPCKPS(X86::VPUNPCKHDQZrm);
483 case X86::VUNPCKHPSZ128rmk:
484 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmk);
485 case X86::VUNPCKHPSZ256rmk:
486 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmk);
487 case X86::VUNPCKHPSZrmk:
488 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmk);
489 case X86::VUNPCKHPSZ128rmkz:
490 return ProcessUNPCKPS(X86::VPUNPCKHDQZ128rmkz);
491 case X86::VUNPCKHPSZ256rmkz:
492 return ProcessUNPCKPS(X86::VPUNPCKHDQZ256rmkz);
493 case X86::VUNPCKHPSZrmkz:
494 return ProcessUNPCKPS(X86::VPUNPCKHDQZrmkz);
495 default:
496 return false;
497 }
498}
499
500bool X86FixupInstTuningPass::runOnMachineFunction(MachineFunction &MF) {
501 LLVM_DEBUG(dbgs() << "Start X86FixupInstTuning\n";);
502 bool Changed = false;
504 TII = ST->getInstrInfo();
505 SM = &ST->getSchedModel();
506
507 for (MachineBasicBlock &MBB : MF) {
508 for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
509 if (processInstruction(MF, MBB, I)) {
510 ++NumInstChanges;
511 Changed = true;
512 }
513 }
514 }
515 LLVM_DEBUG(dbgs() << "End X86FixupInstTuning\n";);
516 return Changed;
517}
MachineBasicBlock & MBB
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static std::optional< bool > CmpOptionals(T NewVal, T CurVal)
#define DEBUG_TYPE
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
unsigned getSize(const MachineInstr &MI) const
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
Definition: MachineInstr.h:69
static MachineOperand CreateImm(int64_t Val)
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FunctionPass * createX86FixupInstTuning()
Return a pass that replaces equivalent slower instructions with faster ones.
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:253
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
Definition: MCSchedule.cpp:42
static double getReciprocalThroughput(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Definition: MCSchedule.cpp:90