LLVM  13.0.0git
InstrBuilder.cpp
Go to the documentation of this file.
1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the InstrBuilder interface.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/MCA/InstrBuilder.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/WithColor.h"
21 
22 #define DEBUG_TYPE "llvm-mca"
23 
24 namespace llvm {
25 namespace mca {
26 
27 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
28  const llvm::MCInstrInfo &mcii,
29  const llvm::MCRegisterInfo &mri,
30  const llvm::MCInstrAnalysis *mcia)
31  : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
32  FirstReturnInst(true) {
33  const MCSchedModel &SM = STI.getSchedModel();
34  ProcResourceMasks.resize(SM.getNumProcResourceKinds());
35  computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
36 }
37 
39  const MCSchedClassDesc &SCDesc,
40  const MCSubtargetInfo &STI,
41  ArrayRef<uint64_t> ProcResourceMasks) {
42  const MCSchedModel &SM = STI.getSchedModel();
43 
44  // Populate resources consumed.
45  using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
46  std::vector<ResourcePlusCycles> Worklist;
47 
48  // Track cycles contributed by resources that are in a "Super" relationship.
49  // This is required if we want to correctly match the behavior of method
50  // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
51  // of "consumed" processor resources and resource cycles, the logic in
52  // ExpandProcResource() doesn't update the number of resource cycles
53  // contributed by a "Super" resource to a group.
54  // We need to take this into account when we find that a processor resource is
55  // part of a group, and it is also used as the "Super" of other resources.
56  // This map stores the number of cycles contributed by sub-resources that are
57  // part of a "Super" resource. The key value is the "Super" resource mask ID.
58  DenseMap<uint64_t, unsigned> SuperResources;
59 
60  unsigned NumProcResources = SM.getNumProcResourceKinds();
61  APInt Buffers(NumProcResources, 0);
62 
63  bool AllInOrderResources = true;
64  bool AnyDispatchHazards = false;
65  for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
66  const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
68  if (!PRE->Cycles) {
69 #ifndef NDEBUG
71  << "Ignoring invalid write of zero cycles on processor resource "
72  << PR.Name << "\n";
73  WithColor::note() << "found in scheduling class " << SCDesc.Name
74  << " (write index #" << I << ")\n";
75 #endif
76  continue;
77  }
78 
79  uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
80  if (PR.BufferSize < 0) {
81  AllInOrderResources = false;
82  } else {
84  AnyDispatchHazards |= (PR.BufferSize == 0);
85  AllInOrderResources &= (PR.BufferSize <= 1);
86  }
87 
88  CycleSegment RCy(0, PRE->Cycles, false);
89  Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
90  if (PR.SuperIdx) {
91  uint64_t Super = ProcResourceMasks[PR.SuperIdx];
92  SuperResources[Super] += PRE->Cycles;
93  }
94  }
95 
96  ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
97 
98  // Sort elements by mask popcount, so that we prioritize resource units over
99  // resource groups, and smaller groups over larger groups.
100  sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
101  unsigned popcntA = countPopulation(A.first);
102  unsigned popcntB = countPopulation(B.first);
103  if (popcntA < popcntB)
104  return true;
105  if (popcntA > popcntB)
106  return false;
107  return A.first < B.first;
108  });
109 
110  uint64_t UsedResourceUnits = 0;
111  uint64_t UsedResourceGroups = 0;
112 
113  // Remove cycles contributed by smaller resources.
114  for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
115  ResourcePlusCycles &A = Worklist[I];
116  if (!A.second.size()) {
117  assert(countPopulation(A.first) > 1 && "Expected a group!");
118  UsedResourceGroups |= PowerOf2Floor(A.first);
119  continue;
120  }
121 
122  ID.Resources.emplace_back(A);
123  uint64_t NormalizedMask = A.first;
124  if (countPopulation(A.first) == 1) {
125  UsedResourceUnits |= A.first;
126  } else {
127  // Remove the leading 1 from the resource group mask.
128  NormalizedMask ^= PowerOf2Floor(NormalizedMask);
129  UsedResourceGroups |= (A.first ^ NormalizedMask);
130  }
131 
132  for (unsigned J = I + 1; J < E; ++J) {
133  ResourcePlusCycles &B = Worklist[J];
134  if ((NormalizedMask & B.first) == NormalizedMask) {
135  B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
136  if (countPopulation(B.first) > 1)
137  B.second.NumUnits++;
138  }
139  }
140  }
141 
142  // A SchedWrite may specify a number of cycles in which a resource group
143  // is reserved. For example (on target x86; cpu Haswell):
144  //
145  // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
146  // let ResourceCycles = [2, 2, 3];
147  // }
148  //
149  // This means:
150  // Resource units HWPort0 and HWPort1 are both used for 2cy.
151  // Resource group HWPort01 is the union of HWPort0 and HWPort1.
152  // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
153  // will not be usable for 2 entire cycles from instruction issue.
154  //
155  // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
156  // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
157  // extra delay on top of the 2 cycles latency.
158  // During those extra cycles, HWPort01 is not usable by other instructions.
159  for (ResourcePlusCycles &RPC : ID.Resources) {
160  if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
161  // Remove the leading 1 from the resource group mask.
162  uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
163  uint64_t MaxResourceUnits = countPopulation(Mask);
164  if (RPC.second.NumUnits > countPopulation(Mask)) {
165  RPC.second.setReserved();
166  RPC.second.NumUnits = MaxResourceUnits;
167  }
168  }
169  }
170 
171  // Identify extra buffers that are consumed through super resources.
172  for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
173  for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
174  const MCProcResourceDesc &PR = *SM.getProcResource(I);
175  if (PR.BufferSize == -1)
176  continue;
177 
178  uint64_t Mask = ProcResourceMasks[I];
179  if (Mask != SR.first && ((Mask & SR.first) == SR.first))
181  }
182  }
183 
184  ID.UsedBuffers = Buffers.getZExtValue();
185  ID.UsedProcResUnits = UsedResourceUnits;
186  ID.UsedProcResGroups = UsedResourceGroups;
187 
188  LLVM_DEBUG({
189  for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
190  dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
191  << "Reserved=" << R.second.isReserved() << ", "
192  << "#Units=" << R.second.NumUnits << ", "
193  << "cy=" << R.second.size() << '\n';
194  uint64_t BufferIDs = ID.UsedBuffers;
195  while (BufferIDs) {
196  uint64_t Current = BufferIDs & (-BufferIDs);
197  dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
198  BufferIDs ^= Current;
199  }
200  dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
201  dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
202  << '\n';
203  });
204 }
205 
206 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
207  const MCSchedClassDesc &SCDesc,
208  const MCSubtargetInfo &STI) {
209  if (MCDesc.isCall()) {
210  // We cannot estimate how long this call will take.
211  // Artificially set an arbitrarily high latency (100cy).
212  ID.MaxLatency = 100U;
213  return;
214  }
215 
216  int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
217  // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
218  ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
219 }
220 
221 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
222  // Count register definitions, and skip non register operands in the process.
223  unsigned I, E;
224  unsigned NumExplicitDefs = MCDesc.getNumDefs();
225  for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
226  const MCOperand &Op = MCI.getOperand(I);
227  if (Op.isReg())
228  --NumExplicitDefs;
229  }
230 
231  if (NumExplicitDefs) {
232  return make_error<InstructionError<MCInst>>(
233  "Expected more register operand definitions.", MCI);
234  }
235 
236  if (MCDesc.hasOptionalDef()) {
237  // Always assume that the optional definition is the last operand.
238  const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
239  if (I == MCI.getNumOperands() || !Op.isReg()) {
240  std::string Message =
241  "expected a register operand for an optional definition. Instruction "
242  "has not been correctly analyzed.";
243  return make_error<InstructionError<MCInst>>(Message, MCI);
244  }
245  }
246 
247  return ErrorSuccess();
248 }
249 
250 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
251  unsigned SchedClassID) {
252  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
253  const MCSchedModel &SM = STI.getSchedModel();
254  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
255 
256  // Assumptions made by this algorithm:
257  // 1. The number of explicit and implicit register definitions in a MCInst
258  // matches the number of explicit and implicit definitions according to
259  // the opcode descriptor (MCInstrDesc).
260  // 2. Uses start at index #(MCDesc.getNumDefs()).
261  // 3. There can only be a single optional register definition, an it is
262  // either the last operand of the sequence (excluding extra operands
263  // contributed by variadic opcodes) or one of the explicit register
264  // definitions. The latter occurs for some Thumb1 instructions.
265  //
266  // These assumptions work quite well for most out-of-order in-tree targets
267  // like x86. This is mainly because the vast majority of instructions is
268  // expanded to MCInst using a straightforward lowering logic that preserves
269  // the ordering of the operands.
270  //
271  // About assumption 1.
272  // The algorithm allows non-register operands between register operand
273  // definitions. This helps to handle some special ARM instructions with
274  // implicit operand increment (-mtriple=armv7):
275  //
276  // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
277  // @ <MCOperand Reg:59>
278  // @ <MCOperand Imm:0> (!!)
279  // @ <MCOperand Reg:67>
280  // @ <MCOperand Imm:0>
281  // @ <MCOperand Imm:14>
282  // @ <MCOperand Reg:0>>
283  //
284  // MCDesc reports:
285  // 6 explicit operands.
286  // 1 optional definition
287  // 2 explicit definitions (!!)
288  //
289  // The presence of an 'Imm' operand between the two register definitions
290  // breaks the assumption that "register definitions are always at the
291  // beginning of the operand sequence".
292  //
293  // To workaround this issue, this algorithm ignores (i.e. skips) any
294  // non-register operands between register definitions. The optional
295  // definition is still at index #(NumOperands-1).
296  //
297  // According to assumption 2. register reads start at #(NumExplicitDefs-1).
298  // That means, register R1 from the example is both read and written.
299  unsigned NumExplicitDefs = MCDesc.getNumDefs();
300  unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
301  unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
302  unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
303  if (MCDesc.hasOptionalDef())
304  TotalDefs++;
305 
306  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
307  ID.Writes.resize(TotalDefs + NumVariadicOps);
308  // Iterate over the operands list, and skip non-register operands.
309  // The first NumExplicitDefs register operands are expected to be register
310  // definitions.
311  unsigned CurrentDef = 0;
312  unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
313  unsigned i = 0;
314  for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
315  const MCOperand &Op = MCI.getOperand(i);
316  if (!Op.isReg())
317  continue;
318 
319  if (MCDesc.OpInfo[CurrentDef].isOptionalDef()) {
320  OptionalDefIdx = CurrentDef++;
321  continue;
322  }
323 
324  WriteDescriptor &Write = ID.Writes[CurrentDef];
325  Write.OpIndex = i;
326  if (CurrentDef < NumWriteLatencyEntries) {
327  const MCWriteLatencyEntry &WLE =
328  *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
329  // Conservatively default to MaxLatency.
330  Write.Latency =
331  WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
332  Write.SClassOrWriteResourceID = WLE.WriteResourceID;
333  } else {
334  // Assign a default latency for this write.
335  Write.Latency = ID.MaxLatency;
336  Write.SClassOrWriteResourceID = 0;
337  }
338  Write.IsOptionalDef = false;
339  LLVM_DEBUG({
340  dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
341  << ", Latency=" << Write.Latency
342  << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
343  });
344  CurrentDef++;
345  }
346 
347  assert(CurrentDef == NumExplicitDefs &&
348  "Expected more register operand definitions.");
349  for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
350  unsigned Index = NumExplicitDefs + CurrentDef;
351  WriteDescriptor &Write = ID.Writes[Index];
352  Write.OpIndex = ~CurrentDef;
353  Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
354  if (Index < NumWriteLatencyEntries) {
355  const MCWriteLatencyEntry &WLE =
356  *STI.getWriteLatencyEntry(&SCDesc, Index);
357  // Conservatively default to MaxLatency.
358  Write.Latency =
359  WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
360  Write.SClassOrWriteResourceID = WLE.WriteResourceID;
361  } else {
362  // Assign a default latency for this write.
363  Write.Latency = ID.MaxLatency;
364  Write.SClassOrWriteResourceID = 0;
365  }
366 
367  Write.IsOptionalDef = false;
368  assert(Write.RegisterID != 0 && "Expected a valid phys register!");
369  LLVM_DEBUG({
370  dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
371  << ", PhysReg=" << MRI.getName(Write.RegisterID)
372  << ", Latency=" << Write.Latency
373  << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
374  });
375  }
376 
377  if (MCDesc.hasOptionalDef()) {
378  WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
379  Write.OpIndex = OptionalDefIdx;
380  // Assign a default latency for this write.
381  Write.Latency = ID.MaxLatency;
382  Write.SClassOrWriteResourceID = 0;
383  Write.IsOptionalDef = true;
384  LLVM_DEBUG({
385  dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
386  << ", Latency=" << Write.Latency
387  << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
388  });
389  }
390 
391  if (!NumVariadicOps)
392  return;
393 
394  // FIXME: if an instruction opcode is flagged 'mayStore', and it has no
395  // "unmodeledSideEffects', then this logic optimistically assumes that any
396  // extra register operands in the variadic sequence is not a register
397  // definition.
398  //
399  // Otherwise, we conservatively assume that any register operand from the
400  // variadic sequence is both a register read and a register write.
401  bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
402  !MCDesc.hasUnmodeledSideEffects();
403  CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
404  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
405  I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
406  const MCOperand &Op = MCI.getOperand(OpIndex);
407  if (!Op.isReg())
408  continue;
409 
410  WriteDescriptor &Write = ID.Writes[CurrentDef];
411  Write.OpIndex = OpIndex;
412  // Assign a default latency for this write.
413  Write.Latency = ID.MaxLatency;
414  Write.SClassOrWriteResourceID = 0;
415  Write.IsOptionalDef = false;
416  ++CurrentDef;
417  LLVM_DEBUG({
418  dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
419  << ", Latency=" << Write.Latency
420  << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
421  });
422  }
423 
424  ID.Writes.resize(CurrentDef);
425 }
426 
427 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
428  unsigned SchedClassID) {
429  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
430  unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
431  unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
432  // Remove the optional definition.
433  if (MCDesc.hasOptionalDef())
434  --NumExplicitUses;
435  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
436  unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
437  ID.Reads.resize(TotalUses);
438  unsigned CurrentUse = 0;
439  for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
440  ++I, ++OpIndex) {
441  const MCOperand &Op = MCI.getOperand(OpIndex);
442  if (!Op.isReg())
443  continue;
444 
445  ReadDescriptor &Read = ID.Reads[CurrentUse];
446  Read.OpIndex = OpIndex;
447  Read.UseIndex = I;
448  Read.SchedClassID = SchedClassID;
449  ++CurrentUse;
450  LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
451  << ", UseIndex=" << Read.UseIndex << '\n');
452  }
453 
454  // For the purpose of ReadAdvance, implicit uses come directly after explicit
455  // uses. The "UseIndex" must be updated according to that implicit layout.
456  for (unsigned I = 0; I < NumImplicitUses; ++I) {
457  ReadDescriptor &Read = ID.Reads[CurrentUse + I];
458  Read.OpIndex = ~I;
459  Read.UseIndex = NumExplicitUses + I;
460  Read.RegisterID = MCDesc.getImplicitUses()[I];
461  Read.SchedClassID = SchedClassID;
462  LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
463  << ", UseIndex=" << Read.UseIndex << ", RegisterID="
464  << MRI.getName(Read.RegisterID) << '\n');
465  }
466 
467  CurrentUse += NumImplicitUses;
468 
469  // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
470  // "unmodeledSideEffects", then this logic optimistically assumes that any
471  // extra register operand in the variadic sequence is not a register
472  // definition.
473  bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
474  !MCDesc.hasUnmodeledSideEffects();
475  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
476  I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
477  const MCOperand &Op = MCI.getOperand(OpIndex);
478  if (!Op.isReg())
479  continue;
480 
481  ReadDescriptor &Read = ID.Reads[CurrentUse];
482  Read.OpIndex = OpIndex;
483  Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
484  Read.SchedClassID = SchedClassID;
485  ++CurrentUse;
486  LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
487  << ", UseIndex=" << Read.UseIndex << '\n');
488  }
489 
490  ID.Reads.resize(CurrentUse);
491 }
492 
493 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
494  const MCInst &MCI) const {
495  if (ID.NumMicroOps != 0)
496  return ErrorSuccess();
497 
498  bool UsesBuffers = ID.UsedBuffers;
499  bool UsesResources = !ID.Resources.empty();
500  if (!UsesBuffers && !UsesResources)
501  return ErrorSuccess();
502 
503  // FIXME: see PR44797. We should revisit these checks and possibly move them
504  // in CodeGenSchedule.cpp.
505  StringRef Message = "found an inconsistent instruction that decodes to zero "
506  "opcodes and that consumes scheduler resources.";
507  return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
508 }
509 
510 Expected<const InstrDesc &>
511 InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
513  "Itineraries are not yet supported!");
514 
515  // Obtain the instruction descriptor from the opcode.
516  unsigned short Opcode = MCI.getOpcode();
517  const MCInstrDesc &MCDesc = MCII.get(Opcode);
518  const MCSchedModel &SM = STI.getSchedModel();
519 
520  // Then obtain the scheduling class information from the instruction.
521  unsigned SchedClassID = MCDesc.getSchedClass();
522  bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
523 
524  // Try to solve variant scheduling classes.
525  if (IsVariant) {
526  unsigned CPUID = SM.getProcessorID();
527  while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
528  SchedClassID =
529  STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
530 
531  if (!SchedClassID) {
532  return make_error<InstructionError<MCInst>>(
533  "unable to resolve scheduling class for write variant.", MCI);
534  }
535  }
536 
537  // Check if this instruction is supported. Otherwise, report an error.
538  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
539  if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
540  return make_error<InstructionError<MCInst>>(
541  "found an unsupported instruction in the input assembly sequence.",
542  MCI);
543  }
544 
545  LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
546  LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
547 
548  // Create a new empty descriptor.
549  std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
550  ID->NumMicroOps = SCDesc.NumMicroOps;
551  ID->SchedClassID = SchedClassID;
552 
553  if (MCDesc.isCall() && FirstCallInst) {
554  // We don't correctly model calls.
555  WithColor::warning() << "found a call in the input assembly sequence.\n";
556  WithColor::note() << "call instructions are not correctly modeled. "
557  << "Assume a latency of 100cy.\n";
558  FirstCallInst = false;
559  }
560 
561  if (MCDesc.isReturn() && FirstReturnInst) {
562  WithColor::warning() << "found a return instruction in the input"
563  << " assembly sequence.\n";
564  WithColor::note() << "program counter updates are ignored.\n";
565  FirstReturnInst = false;
566  }
567 
568  ID->MayLoad = MCDesc.mayLoad();
569  ID->MayStore = MCDesc.mayStore();
570  ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
571  ID->BeginGroup = SCDesc.BeginGroup;
572  ID->EndGroup = SCDesc.EndGroup;
573  ID->RetireOOO = SCDesc.RetireOOO;
574 
575  initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
576  computeMaxLatency(*ID, MCDesc, SCDesc, STI);
577 
578  if (Error Err = verifyOperands(MCDesc, MCI))
579  return std::move(Err);
580 
581  populateWrites(*ID, MCI, SchedClassID);
582  populateReads(*ID, MCI, SchedClassID);
583 
584  LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
585  LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
586 
587  // Sanity check on the instruction descriptor.
588  if (Error Err = verifyInstrDesc(*ID, MCI))
589  return std::move(Err);
590 
591  // Now add the new descriptor.
592  bool IsVariadic = MCDesc.isVariadic();
593  if (!IsVariadic && !IsVariant) {
594  Descriptors[MCI.getOpcode()] = std::move(ID);
595  return *Descriptors[MCI.getOpcode()];
596  }
597 
598  VariantDescriptors[&MCI] = std::move(ID);
599  return *VariantDescriptors[&MCI];
600 }
601 
602 Expected<const InstrDesc &>
603 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
604  if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
605  return *Descriptors[MCI.getOpcode()];
606 
607  if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
608  return *VariantDescriptors[&MCI];
609 
610  return createInstrDescImpl(MCI);
611 }
612 
613 Expected<std::unique_ptr<Instruction>>
615  Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
616  if (!DescOrErr)
617  return DescOrErr.takeError();
618  const InstrDesc &D = *DescOrErr;
619  std::unique_ptr<Instruction> NewIS = std::make_unique<Instruction>(D);
620 
621  // Check if this is a dependency breaking instruction.
622  APInt Mask;
623 
624  bool IsZeroIdiom = false;
625  bool IsDepBreaking = false;
626  if (MCIA) {
627  unsigned ProcID = STI.getSchedModel().getProcessorID();
628  IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
629  IsDepBreaking =
630  IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
631  if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
632  NewIS->setOptimizableMove();
633  }
634 
635  // Initialize Reads first.
636  MCPhysReg RegID = 0;
637  for (const ReadDescriptor &RD : D.Reads) {
638  if (!RD.isImplicitRead()) {
639  // explicit read.
640  const MCOperand &Op = MCI.getOperand(RD.OpIndex);
641  // Skip non-register operands.
642  if (!Op.isReg())
643  continue;
644  RegID = Op.getReg();
645  } else {
646  // Implicit read.
647  RegID = RD.RegisterID;
648  }
649 
650  // Skip invalid register operands.
651  if (!RegID)
652  continue;
653 
654  // Okay, this is a register operand. Create a ReadState for it.
655  NewIS->getUses().emplace_back(RD, RegID);
656  ReadState &RS = NewIS->getUses().back();
657 
658  if (IsDepBreaking) {
659  // A mask of all zeroes means: explicit input operands are not
660  // independent.
661  if (Mask.isNullValue()) {
662  if (!RD.isImplicitRead())
664  } else {
665  // Check if this register operand is independent according to `Mask`.
666  // Note that Mask may not have enough bits to describe all explicit and
667  // implicit input operands. If this register operand doesn't have a
668  // corresponding bit in Mask, then conservatively assume that it is
669  // dependent.
670  if (Mask.getBitWidth() > RD.UseIndex) {
671  // Okay. This map describe register use `RD.UseIndex`.
672  if (Mask[RD.UseIndex])
674  }
675  }
676  }
677  }
678 
679  // Early exit if there are no writes.
680  if (D.Writes.empty())
681  return std::move(NewIS);
682 
683  // Track register writes that implicitly clear the upper portion of the
684  // underlying super-registers using an APInt.
685  APInt WriteMask(D.Writes.size(), 0);
686 
687  // Now query the MCInstrAnalysis object to obtain information about which
688  // register writes implicitly clear the upper portion of a super-register.
689  if (MCIA)
690  MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
691 
692  // Initialize writes.
693  unsigned WriteIndex = 0;
694  for (const WriteDescriptor &WD : D.Writes) {
695  RegID = WD.isImplicitWrite() ? WD.RegisterID
696  : MCI.getOperand(WD.OpIndex).getReg();
697  // Check if this is a optional definition that references NoReg.
698  if (WD.IsOptionalDef && !RegID) {
699  ++WriteIndex;
700  continue;
701  }
702 
703  assert(RegID && "Expected a valid register ID!");
704  NewIS->getDefs().emplace_back(WD, RegID,
705  /* ClearsSuperRegs */ WriteMask[WriteIndex],
706  /* WritesZero */ IsZeroIdiom);
707  ++WriteIndex;
708  }
709 
710  return std::move(NewIS);
711 }
712 } // namespace mca
713 } // namespace llvm
i
i
Definition: README.txt:29
llvm::MCInstrDesc::getNumDefs
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:243
llvm::mca::ReadDescriptor::UseIndex
unsigned UseIndex
Definition: Instruction.h:72
llvm::MCProcResourceDesc::BufferSize
int BufferSize
Definition: MCSchedule.h:49
llvm
Definition: AllocatorList.h:23
llvm::MCSchedClassDesc::NumWriteLatencyEntries
uint16_t NumWriteLatencyEntries
Definition: MCSchedule.h:124
llvm::MCRegisterInfo::getName
const char * getName(MCRegister RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register.
Definition: MCRegisterInfo.h:485
llvm::lltok::Error
@ Error
Definition: LLToken.h:21
llvm::MCSchedClassDesc::Name
const char * Name
Definition: MCSchedule.h:115
llvm::Latency
@ Latency
Definition: SIMachineScheduler.h:32
while
while(!ItemsToConvert.empty())
Definition: NVPTXLowerArgs.cpp:200
llvm::MCSubtargetInfo::getSchedModel
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
Definition: MCSubtargetInfo.h:162
llvm::MCInstrAnalysis::isDependencyBreaking
virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking instruction for the subtarget associated with CPUID .
Definition: MCInstrAnalysis.h:133
llvm::mca::ReadState::setIndependentFromDef
void setIndependentFromDef()
Definition: Instruction.h:273
llvm::MCInstrAnalysis::clearsSuperRegisters
virtual bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Writes) const
Returns true if at least one of the register writes performed by.
Definition: MCInstrAnalysis.cpp:19
APInt.h
DenseMap.h
llvm::mca::WriteDescriptor::OpIndex
int OpIndex
Definition: Instruction.h:41
llvm::MCSchedClassDesc::NumWriteProcResEntries
uint16_t NumWriteProcResEntries
Definition: MCSchedule.h:122
llvm::MCWriteProcResEntry
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
llvm::Expected
Tagged union holding either a T or a Error.
Definition: APFloat.h:42
llvm::MCInst
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
llvm::mca::initializeUsedResources
static void initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, ArrayRef< uint64_t > ProcResourceMasks)
Definition: InstrBuilder.cpp:38
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::MCInst::getNumOperands
unsigned getNumOperands() const
Definition: MCInst.h:208
llvm::MCInstrAnalysis::isZeroIdiom
virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking zero-idiom for the given subtarget.
Definition: MCInstrAnalysis.h:108
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:122
llvm::MCInstrDesc::hasUnmodeledSideEffects
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:454
llvm::MCSchedModel::getNumProcResourceKinds
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:336
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
llvm::mca::ReadDescriptor::isImplicitRead
bool isImplicitRead() const
Definition: Instruction.h:79
llvm::MCSubtargetInfo::resolveVariantSchedClass
virtual unsigned resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID) const
Resolve a variant scheduling class for the given MCInst and CPU.
Definition: MCSubtargetInfo.h:219
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1442
llvm::PowerOf2Floor
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:695
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::MCWriteProcResEntry::Cycles
uint16_t Cycles
Definition: MCSchedule.h:66
llvm::MCSchedClassDesc
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:110
llvm::MCInstrDesc::getImplicitDefs
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:581
MCInst.h
llvm::MCInstrDesc
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:195
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::MCInstrAnalysis
Definition: MCInstrAnalysis.h:27
llvm::MCInstrDesc::getNumImplicitDefs
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:584
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1631
llvm::mca::ReadState
Tracks register operand latency in cycles.
Definition: Instruction.h:227
llvm::mca::InstrBuilder::createInstruction
Expected< std::unique_ptr< Instruction > > createInstruction(const MCInst &MCI)
Definition: InstrBuilder.cpp:614
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:605
llvm::MCSubtargetInfo::getWriteLatencyEntry
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
Definition: MCSubtargetInfo.h:175
llvm::MCSchedModel::computeInstrLatency
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
Definition: MCSchedule.cpp:40
llvm::MCOperandInfo::isOptionalDef
bool isOptionalDef() const
Set if this operand is a optional def.
Definition: MCInstrDesc.h:111
llvm::MCInstrDesc::mayLoad
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:429
llvm::WithColor::warning
static raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
Definition: WithColor.cpp:63
llvm::pdb::OMFSegDescFlags::Read
@ Read
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::MCProcResourceDesc
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:32
llvm::mca::InstrDesc
An instruction descriptor.
Definition: Instruction.h:348
llvm::mca::WriteDescriptor::IsOptionalDef
bool IsOptionalDef
Definition: Instruction.h:59
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::MCInstrDesc::isCall
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:279
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::DenseMap< uint64_t, unsigned >
llvm::mca::ReadDescriptor::OpIndex
int OpIndex
Definition: Instruction.h:69
llvm::ErrorSuccess
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition: Error.h:330
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::mca::ResourceUsage
Helper used by class InstrDesc to describe how hardware resources are used.
Definition: Instruction.h:337
llvm::MCSchedClassDesc::InvalidNumMicroOps
static const unsigned short InvalidNumMicroOps
Definition: MCSchedule.h:111
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::mca::computeMaxLatency
static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI)
Definition: InstrBuilder.cpp:206
llvm::MCInstrDesc::OpInfo
const MCOperandInfo * OpInfo
Definition: MCInstrDesc.h:206
llvm::MCInstrInfo::getName
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:68
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::MCInstrDesc::mayStore
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:435
llvm::mca::verifyOperands
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI)
Definition: InstrBuilder.cpp:221
llvm::MCSubtargetInfo::getWriteProcResBegin
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
Definition: MCSubtargetInfo.h:166
llvm::ArrayRef< uint64_t >
llvm::MCSchedModel::getProcessorID
unsigned getProcessorID() const
Definition: MCSchedule.h:318
llvm::MCProcResourceDesc::Name
const char * Name
Definition: MCSchedule.h:33
llvm::MCRegisterInfo
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Definition: MCRegisterInfo.h:135
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
WithColor.h
llvm::MCInstrInfo
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:25
llvm::mca::ReadDescriptor
A register read descriptor.
Definition: Instruction.h:65
uint16_t
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::MCSchedClassDesc::isVariant
bool isVariant() const
Definition: MCSchedule.h:131
llvm::MCInstrDesc::hasOptionalDef
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:260
llvm::Error
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
llvm::MCSchedModel::getProcResource
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:340
llvm::MCWriteProcResEntry::ProcResourceIdx
uint16_t ProcResourceIdx
Definition: MCSchedule.h:65
llvm::sort
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1423
llvm::MCInst::getOpcode
unsigned getOpcode() const
Definition: MCInst.h:198
llvm::MCSchedModel
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:245
llvm::mca::getResourceStateIndex
unsigned getResourceStateIndex(uint64_t Mask)
Definition: Support.h:100
llvm::MCInstrAnalysis::isOptimizableRegisterMove
virtual bool isOptimizableRegisterMove(const MCInst &MI, unsigned CPUID) const
Returns true if MI is a candidate for move elimination.
Definition: MCInstrAnalysis.h:144
llvm::mca::CycleSegment
A sequence of cycles.
Definition: Instruction.h:290
llvm::Expected::takeError
Error takeError()
Take ownership of the stored error.
Definition: Error.h:557
llvm::mca::ReadDescriptor::RegisterID
MCPhysReg RegisterID
Definition: Instruction.h:74
llvm::mca::WriteDescriptor::isImplicitWrite
bool isImplicitWrite() const
Definition: Instruction.h:61
llvm::MCInst::getOperand
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
llvm::MCProcResourceDesc::SuperIdx
unsigned SuperIdx
Definition: MCSchedule.h:35
llvm::MCSchedModel::getSchedClassDesc
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:347
llvm::MCOperand
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
llvm::MCInstrInfo::get
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:62
llvm::WithColor::note
static raw_ostream & note()
Convenience method for printing "note: " to stderr.
Definition: WithColor.cpp:65
llvm::mca::computeProcResourceMasks
void computeProcResourceMasks(const MCSchedModel &SM, MutableArrayRef< uint64_t > Masks)
Populates vector Masks with processor resource masks.
Definition: Support.cpp:39
llvm::pdb::OMFSegDescFlags::Write
@ Write
llvm::MCSchedModel::hasInstrSchedModel
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:321
raw_ostream.h
true
basic Basic Alias true
Definition: BasicAliasAnalysis.cpp:1799
llvm::format_hex
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
Definition: Format.h:186
llvm::MCSubtargetInfo
Generic base class for all target subtargets.
Definition: MCSubtargetInfo.h:75
llvm::MCInstrDesc::getNumOperands
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:228
Debug.h
llvm::mca::WriteDescriptor::RegisterID
MCPhysReg RegisterID
Definition: Instruction.h:46
InstrBuilder.h
llvm::mca::WriteDescriptor
A register write descriptor.
Definition: Instruction.h:37
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::MCOperand::getReg
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69