LLVM  10.0.0svn
InstrBuilder.cpp
Go to the documentation of this file.
1 //===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 ///
10 /// This file implements the InstrBuilder interface.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/MCA/InstrBuilder.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/MC/MCInst.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/WithColor.h"
21 
22 #define DEBUG_TYPE "llvm-mca"
23 
24 namespace llvm {
25 namespace mca {
26 
27 InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
28  const llvm::MCInstrInfo &mcii,
29  const llvm::MCRegisterInfo &mri,
30  const llvm::MCInstrAnalysis *mcia)
31  : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), FirstCallInst(true),
32  FirstReturnInst(true) {
33  const MCSchedModel &SM = STI.getSchedModel();
34  ProcResourceMasks.resize(SM.getNumProcResourceKinds());
35  computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
36 }
37 
39  const MCSchedClassDesc &SCDesc,
40  const MCSubtargetInfo &STI,
41  ArrayRef<uint64_t> ProcResourceMasks) {
42  const MCSchedModel &SM = STI.getSchedModel();
43 
44  // Populate resources consumed.
45  using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
46  std::vector<ResourcePlusCycles> Worklist;
47 
48  // Track cycles contributed by resources that are in a "Super" relationship.
49  // This is required if we want to correctly match the behavior of method
50  // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
51  // of "consumed" processor resources and resource cycles, the logic in
52  // ExpandProcResource() doesn't update the number of resource cycles
53  // contributed by a "Super" resource to a group.
54  // We need to take this into account when we find that a processor resource is
55  // part of a group, and it is also used as the "Super" of other resources.
56  // This map stores the number of cycles contributed by sub-resources that are
57  // part of a "Super" resource. The key value is the "Super" resource mask ID.
58  DenseMap<uint64_t, unsigned> SuperResources;
59 
60  unsigned NumProcResources = SM.getNumProcResourceKinds();
61  APInt Buffers(NumProcResources, 0);
62 
63  bool AllInOrderResources = true;
64  bool AnyDispatchHazards = false;
65  for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
66  const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
68  if (!PRE->Cycles) {
69 #ifndef NDEBUG
71  << "Ignoring invalid write of zero cycles on processor resource "
72  << PR.Name << "\n";
73  WithColor::note() << "found in scheduling class " << SCDesc.Name
74  << " (write index #" << I << ")\n";
75 #endif
76  continue;
77  }
78 
79  uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
80  if (PR.BufferSize < 0) {
81  AllInOrderResources = false;
82  } else {
83  Buffers.setBit(getResourceStateIndex(Mask));
84  AnyDispatchHazards |= (PR.BufferSize == 0);
85  AllInOrderResources &= (PR.BufferSize <= 1);
86  }
87 
88  CycleSegment RCy(0, PRE->Cycles, false);
89  Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
90  if (PR.SuperIdx) {
91  uint64_t Super = ProcResourceMasks[PR.SuperIdx];
92  SuperResources[Super] += PRE->Cycles;
93  }
94  }
95 
96  ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
97 
98  // Sort elements by mask popcount, so that we prioritize resource units over
99  // resource groups, and smaller groups over larger groups.
100  sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
101  unsigned popcntA = countPopulation(A.first);
102  unsigned popcntB = countPopulation(B.first);
103  if (popcntA < popcntB)
104  return true;
105  if (popcntA > popcntB)
106  return false;
107  return A.first < B.first;
108  });
109 
110  uint64_t UsedResourceUnits = 0;
111  uint64_t UsedResourceGroups = 0;
112 
113  // Remove cycles contributed by smaller resources.
114  for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
115  ResourcePlusCycles &A = Worklist[I];
116  if (!A.second.size()) {
117  assert(countPopulation(A.first) > 1 && "Expected a group!");
118  UsedResourceGroups |= PowerOf2Floor(A.first);
119  continue;
120  }
121 
122  ID.Resources.emplace_back(A);
123  uint64_t NormalizedMask = A.first;
124  if (countPopulation(A.first) == 1) {
125  UsedResourceUnits |= A.first;
126  } else {
127  // Remove the leading 1 from the resource group mask.
128  NormalizedMask ^= PowerOf2Floor(NormalizedMask);
129  UsedResourceGroups |= (A.first ^ NormalizedMask);
130  }
131 
132  for (unsigned J = I + 1; J < E; ++J) {
133  ResourcePlusCycles &B = Worklist[J];
134  if ((NormalizedMask & B.first) == NormalizedMask) {
135  B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
136  if (countPopulation(B.first) > 1)
137  B.second.NumUnits++;
138  }
139  }
140  }
141 
142  // A SchedWrite may specify a number of cycles in which a resource group
143  // is reserved. For example (on target x86; cpu Haswell):
144  //
145  // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
146  // let ResourceCycles = [2, 2, 3];
147  // }
148  //
149  // This means:
150  // Resource units HWPort0 and HWPort1 are both used for 2cy.
151  // Resource group HWPort01 is the union of HWPort0 and HWPort1.
152  // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
153  // will not be usable for 2 entire cycles from instruction issue.
154  //
155  // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
156  // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
157  // extra delay on top of the 2 cycles latency.
158  // During those extra cycles, HWPort01 is not usable by other instructions.
159  for (ResourcePlusCycles &RPC : ID.Resources) {
160  if (countPopulation(RPC.first) > 1 && !RPC.second.isReserved()) {
161  // Remove the leading 1 from the resource group mask.
162  uint64_t Mask = RPC.first ^ PowerOf2Floor(RPC.first);
163  if ((Mask & UsedResourceUnits) == Mask)
164  RPC.second.setReserved();
165  }
166  }
167 
168  // Identify extra buffers that are consumed through super resources.
169  for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
170  for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
171  const MCProcResourceDesc &PR = *SM.getProcResource(I);
172  if (PR.BufferSize == -1)
173  continue;
174 
175  uint64_t Mask = ProcResourceMasks[I];
176  if (Mask != SR.first && ((Mask & SR.first) == SR.first))
177  Buffers.setBit(getResourceStateIndex(Mask));
178  }
179  }
180 
181  ID.UsedBuffers = Buffers.getZExtValue();
182  ID.UsedProcResUnits = UsedResourceUnits;
183  ID.UsedProcResGroups = UsedResourceGroups;
184 
185  LLVM_DEBUG({
186  for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
187  dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
188  << "Reserved=" << R.second.isReserved() << ", "
189  << "#Units=" << R.second.NumUnits << ", "
190  << "cy=" << R.second.size() << '\n';
191  uint64_t BufferIDs = ID.UsedBuffers;
192  while (BufferIDs) {
193  uint64_t Current = BufferIDs & (-BufferIDs);
194  dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
195  BufferIDs ^= Current;
196  }
197  dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
198  dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
199  << '\n';
200  });
201 }
202 
203 static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
204  const MCSchedClassDesc &SCDesc,
205  const MCSubtargetInfo &STI) {
206  if (MCDesc.isCall()) {
207  // We cannot estimate how long this call will take.
208  // Artificially set an arbitrarily high latency (100cy).
209  ID.MaxLatency = 100U;
210  return;
211  }
212 
213  int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);
214  // If latency is unknown, then conservatively assume a MaxLatency of 100cy.
215  ID.MaxLatency = Latency < 0 ? 100U : static_cast<unsigned>(Latency);
216 }
217 
218 static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
219  // Count register definitions, and skip non register operands in the process.
220  unsigned I, E;
221  unsigned NumExplicitDefs = MCDesc.getNumDefs();
222  for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
223  const MCOperand &Op = MCI.getOperand(I);
224  if (Op.isReg())
225  --NumExplicitDefs;
226  }
227 
228  if (NumExplicitDefs) {
229  return make_error<InstructionError<MCInst>>(
230  "Expected more register operand definitions.", MCI);
231  }
232 
233  if (MCDesc.hasOptionalDef()) {
234  // Always assume that the optional definition is the last operand.
235  const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
236  if (I == MCI.getNumOperands() || !Op.isReg()) {
237  std::string Message =
238  "expected a register operand for an optional definition. Instruction "
239  "has not been correctly analyzed.";
240  return make_error<InstructionError<MCInst>>(Message, MCI);
241  }
242  }
243 
244  return ErrorSuccess();
245 }
246 
247 void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
248  unsigned SchedClassID) {
249  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
250  const MCSchedModel &SM = STI.getSchedModel();
251  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
252 
253  // Assumptions made by this algorithm:
254  // 1. The number of explicit and implicit register definitions in a MCInst
255  // matches the number of explicit and implicit definitions according to
256  // the opcode descriptor (MCInstrDesc).
257  // 2. Uses start at index #(MCDesc.getNumDefs()).
258  // 3. There can only be a single optional register definition, an it is
259  // always the last operand of the sequence (excluding extra operands
260  // contributed by variadic opcodes).
261  //
262  // These assumptions work quite well for most out-of-order in-tree targets
263  // like x86. This is mainly because the vast majority of instructions is
264  // expanded to MCInst using a straightforward lowering logic that preserves
265  // the ordering of the operands.
266  //
267  // About assumption 1.
268  // The algorithm allows non-register operands between register operand
269  // definitions. This helps to handle some special ARM instructions with
270  // implicit operand increment (-mtriple=armv7):
271  //
272  // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
273  // @ <MCOperand Reg:59>
274  // @ <MCOperand Imm:0> (!!)
275  // @ <MCOperand Reg:67>
276  // @ <MCOperand Imm:0>
277  // @ <MCOperand Imm:14>
278  // @ <MCOperand Reg:0>>
279  //
280  // MCDesc reports:
281  // 6 explicit operands.
282  // 1 optional definition
283  // 2 explicit definitions (!!)
284  //
285  // The presence of an 'Imm' operand between the two register definitions
286  // breaks the assumption that "register definitions are always at the
287  // beginning of the operand sequence".
288  //
289  // To workaround this issue, this algorithm ignores (i.e. skips) any
290  // non-register operands between register definitions. The optional
291  // definition is still at index #(NumOperands-1).
292  //
293  // According to assumption 2. register reads start at #(NumExplicitDefs-1).
294  // That means, register R1 from the example is both read and written.
295  unsigned NumExplicitDefs = MCDesc.getNumDefs();
296  unsigned NumImplicitDefs = MCDesc.getNumImplicitDefs();
297  unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
298  unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
299  if (MCDesc.hasOptionalDef())
300  TotalDefs++;
301 
302  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
303  ID.Writes.resize(TotalDefs + NumVariadicOps);
304  // Iterate over the operands list, and skip non-register operands.
305  // The first NumExplictDefs register operands are expected to be register
306  // definitions.
307  unsigned CurrentDef = 0;
308  unsigned i = 0;
309  for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
310  const MCOperand &Op = MCI.getOperand(i);
311  if (!Op.isReg())
312  continue;
313 
314  WriteDescriptor &Write = ID.Writes[CurrentDef];
315  Write.OpIndex = i;
316  if (CurrentDef < NumWriteLatencyEntries) {
317  const MCWriteLatencyEntry &WLE =
318  *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
319  // Conservatively default to MaxLatency.
320  Write.Latency =
321  WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
323  } else {
324  // Assign a default latency for this write.
325  Write.Latency = ID.MaxLatency;
326  Write.SClassOrWriteResourceID = 0;
327  }
328  Write.IsOptionalDef = false;
329  LLVM_DEBUG({
330  dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
331  << ", Latency=" << Write.Latency
332  << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
333  });
334  CurrentDef++;
335  }
336 
337  assert(CurrentDef == NumExplicitDefs &&
338  "Expected more register operand definitions.");
339  for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
340  unsigned Index = NumExplicitDefs + CurrentDef;
341  WriteDescriptor &Write = ID.Writes[Index];
342  Write.OpIndex = ~CurrentDef;
343  Write.RegisterID = MCDesc.getImplicitDefs()[CurrentDef];
344  if (Index < NumWriteLatencyEntries) {
345  const MCWriteLatencyEntry &WLE =
346  *STI.getWriteLatencyEntry(&SCDesc, Index);
347  // Conservatively default to MaxLatency.
348  Write.Latency =
349  WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
351  } else {
352  // Assign a default latency for this write.
353  Write.Latency = ID.MaxLatency;
354  Write.SClassOrWriteResourceID = 0;
355  }
356 
357  Write.IsOptionalDef = false;
358  assert(Write.RegisterID != 0 && "Expected a valid phys register!");
359  LLVM_DEBUG({
360  dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
361  << ", PhysReg=" << MRI.getName(Write.RegisterID)
362  << ", Latency=" << Write.Latency
363  << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
364  });
365  }
366 
367  if (MCDesc.hasOptionalDef()) {
368  WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
369  Write.OpIndex = MCDesc.getNumOperands() - 1;
370  // Assign a default latency for this write.
371  Write.Latency = ID.MaxLatency;
372  Write.SClassOrWriteResourceID = 0;
373  Write.IsOptionalDef = true;
374  LLVM_DEBUG({
375  dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
376  << ", Latency=" << Write.Latency
377  << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
378  });
379  }
380 
381  if (!NumVariadicOps)
382  return;
383 
384  // FIXME: if an instruction opcode is flagged 'mayStore', and it has no
385  // "unmodeledSideEffects', then this logic optimistically assumes that any
386  // extra register operands in the variadic sequence is not a register
387  // definition.
388  //
389  // Otherwise, we conservatively assume that any register operand from the
390  // variadic sequence is both a register read and a register write.
391  bool AssumeUsesOnly = MCDesc.mayStore() && !MCDesc.mayLoad() &&
392  !MCDesc.hasUnmodeledSideEffects();
393  CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
394  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
395  I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
396  const MCOperand &Op = MCI.getOperand(OpIndex);
397  if (!Op.isReg())
398  continue;
399 
400  WriteDescriptor &Write = ID.Writes[CurrentDef];
401  Write.OpIndex = OpIndex;
402  // Assign a default latency for this write.
403  Write.Latency = ID.MaxLatency;
404  Write.SClassOrWriteResourceID = 0;
405  Write.IsOptionalDef = false;
406  ++CurrentDef;
407  LLVM_DEBUG({
408  dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
409  << ", Latency=" << Write.Latency
410  << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
411  });
412  }
413 
414  ID.Writes.resize(CurrentDef);
415 }
416 
417 void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
418  unsigned SchedClassID) {
419  const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
420  unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
421  unsigned NumImplicitUses = MCDesc.getNumImplicitUses();
422  // Remove the optional definition.
423  if (MCDesc.hasOptionalDef())
424  --NumExplicitUses;
425  unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
426  unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
427  ID.Reads.resize(TotalUses);
428  unsigned CurrentUse = 0;
429  for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
430  ++I, ++OpIndex) {
431  const MCOperand &Op = MCI.getOperand(OpIndex);
432  if (!Op.isReg())
433  continue;
434 
435  ReadDescriptor &Read = ID.Reads[CurrentUse];
436  Read.OpIndex = OpIndex;
437  Read.UseIndex = I;
438  Read.SchedClassID = SchedClassID;
439  ++CurrentUse;
440  LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
441  << ", UseIndex=" << Read.UseIndex << '\n');
442  }
443 
444  // For the purpose of ReadAdvance, implicit uses come directly after explicit
445  // uses. The "UseIndex" must be updated according to that implicit layout.
446  for (unsigned I = 0; I < NumImplicitUses; ++I) {
447  ReadDescriptor &Read = ID.Reads[CurrentUse + I];
448  Read.OpIndex = ~I;
449  Read.UseIndex = NumExplicitUses + I;
450  Read.RegisterID = MCDesc.getImplicitUses()[I];
451  Read.SchedClassID = SchedClassID;
452  LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
453  << ", UseIndex=" << Read.UseIndex << ", RegisterID="
454  << MRI.getName(Read.RegisterID) << '\n');
455  }
456 
457  CurrentUse += NumImplicitUses;
458 
459  // FIXME: If an instruction opcode is marked as 'mayLoad', and it has no
460  // "unmodeledSideEffects", then this logic optimistically assumes that any
461  // extra register operand in the variadic sequence is not a register
462  // definition.
463  bool AssumeDefsOnly = !MCDesc.mayStore() && MCDesc.mayLoad() &&
464  !MCDesc.hasUnmodeledSideEffects();
465  for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
466  I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
467  const MCOperand &Op = MCI.getOperand(OpIndex);
468  if (!Op.isReg())
469  continue;
470 
471  ReadDescriptor &Read = ID.Reads[CurrentUse];
472  Read.OpIndex = OpIndex;
473  Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
474  Read.SchedClassID = SchedClassID;
475  ++CurrentUse;
476  LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
477  << ", UseIndex=" << Read.UseIndex << '\n');
478  }
479 
480  ID.Reads.resize(CurrentUse);
481 }
482 
483 Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
484  const MCInst &MCI) const {
485  if (ID.NumMicroOps != 0)
486  return ErrorSuccess();
487 
488  bool UsesMemory = ID.MayLoad || ID.MayStore;
489  bool UsesBuffers = ID.UsedBuffers;
490  bool UsesResources = !ID.Resources.empty();
491  if (!UsesMemory && !UsesBuffers && !UsesResources)
492  return ErrorSuccess();
493 
494  StringRef Message;
495  if (UsesMemory) {
496  Message = "found an inconsistent instruction that decodes "
497  "into zero opcodes and that consumes load/store "
498  "unit resources.";
499  } else {
500  Message = "found an inconsistent instruction that decodes "
501  "to zero opcodes and that consumes scheduler "
502  "resources.";
503  }
504 
505  return make_error<InstructionError<MCInst>>(Message, MCI);
506 }
507 
509 InstrBuilder::createInstrDescImpl(const MCInst &MCI) {
511  "Itineraries are not yet supported!");
512 
513  // Obtain the instruction descriptor from the opcode.
514  unsigned short Opcode = MCI.getOpcode();
515  const MCInstrDesc &MCDesc = MCII.get(Opcode);
516  const MCSchedModel &SM = STI.getSchedModel();
517 
518  // Then obtain the scheduling class information from the instruction.
519  unsigned SchedClassID = MCDesc.getSchedClass();
520  bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
521 
522  // Try to solve variant scheduling classes.
523  if (IsVariant) {
524  unsigned CPUID = SM.getProcessorID();
525  while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
526  SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, CPUID);
527 
528  if (!SchedClassID) {
529  return make_error<InstructionError<MCInst>>(
530  "unable to resolve scheduling class for write variant.", MCI);
531  }
532  }
533 
534  // Check if this instruction is supported. Otherwise, report an error.
535  const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
536  if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
537  return make_error<InstructionError<MCInst>>(
538  "found an unsupported instruction in the input assembly sequence.",
539  MCI);
540  }
541 
542  LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
543  LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
544 
545  // Create a new empty descriptor.
546  std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
547  ID->NumMicroOps = SCDesc.NumMicroOps;
548  ID->SchedClassID = SchedClassID;
549 
550  if (MCDesc.isCall() && FirstCallInst) {
551  // We don't correctly model calls.
552  WithColor::warning() << "found a call in the input assembly sequence.\n";
553  WithColor::note() << "call instructions are not correctly modeled. "
554  << "Assume a latency of 100cy.\n";
555  FirstCallInst = false;
556  }
557 
558  if (MCDesc.isReturn() && FirstReturnInst) {
559  WithColor::warning() << "found a return instruction in the input"
560  << " assembly sequence.\n";
561  WithColor::note() << "program counter updates are ignored.\n";
562  FirstReturnInst = false;
563  }
564 
565  ID->MayLoad = MCDesc.mayLoad();
566  ID->MayStore = MCDesc.mayStore();
567  ID->HasSideEffects = MCDesc.hasUnmodeledSideEffects();
568  ID->BeginGroup = SCDesc.BeginGroup;
569  ID->EndGroup = SCDesc.EndGroup;
570 
571  initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
572  computeMaxLatency(*ID, MCDesc, SCDesc, STI);
573 
574  if (Error Err = verifyOperands(MCDesc, MCI))
575  return std::move(Err);
576 
577  populateWrites(*ID, MCI, SchedClassID);
578  populateReads(*ID, MCI, SchedClassID);
579 
580  LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
581  LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
582 
583  // Sanity check on the instruction descriptor.
584  if (Error Err = verifyInstrDesc(*ID, MCI))
585  return std::move(Err);
586 
587  // Now add the new descriptor.
588  bool IsVariadic = MCDesc.isVariadic();
589  if (!IsVariadic && !IsVariant) {
590  Descriptors[MCI.getOpcode()] = std::move(ID);
591  return *Descriptors[MCI.getOpcode()];
592  }
593 
594  VariantDescriptors[&MCI] = std::move(ID);
595  return *VariantDescriptors[&MCI];
596 }
597 
599 InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI) {
600  if (Descriptors.find_as(MCI.getOpcode()) != Descriptors.end())
601  return *Descriptors[MCI.getOpcode()];
602 
603  if (VariantDescriptors.find(&MCI) != VariantDescriptors.end())
604  return *VariantDescriptors[&MCI];
605 
606  return createInstrDescImpl(MCI);
607 }
608 
611  Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI);
612  if (!DescOrErr)
613  return DescOrErr.takeError();
614  const InstrDesc &D = *DescOrErr;
615  std::unique_ptr<Instruction> NewIS = std::make_unique<Instruction>(D);
616 
617  // Check if this is a dependency breaking instruction.
618  APInt Mask;
619 
620  bool IsZeroIdiom = false;
621  bool IsDepBreaking = false;
622  if (MCIA) {
623  unsigned ProcID = STI.getSchedModel().getProcessorID();
624  IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
625  IsDepBreaking =
626  IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
627  if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
628  NewIS->setOptimizableMove();
629  }
630 
631  // Initialize Reads first.
632  MCPhysReg RegID = 0;
633  for (const ReadDescriptor &RD : D.Reads) {
634  if (!RD.isImplicitRead()) {
635  // explicit read.
636  const MCOperand &Op = MCI.getOperand(RD.OpIndex);
637  // Skip non-register operands.
638  if (!Op.isReg())
639  continue;
640  RegID = Op.getReg();
641  } else {
642  // Implicit read.
643  RegID = RD.RegisterID;
644  }
645 
646  // Skip invalid register operands.
647  if (!RegID)
648  continue;
649 
650  // Okay, this is a register operand. Create a ReadState for it.
651  NewIS->getUses().emplace_back(RD, RegID);
652  ReadState &RS = NewIS->getUses().back();
653 
654  if (IsDepBreaking) {
655  // A mask of all zeroes means: explicit input operands are not
656  // independent.
657  if (Mask.isNullValue()) {
658  if (!RD.isImplicitRead())
660  } else {
661  // Check if this register operand is independent according to `Mask`.
662  // Note that Mask may not have enough bits to describe all explicit and
663  // implicit input operands. If this register operand doesn't have a
664  // corresponding bit in Mask, then conservatively assume that it is
665  // dependent.
666  if (Mask.getBitWidth() > RD.UseIndex) {
667  // Okay. This map describe register use `RD.UseIndex`.
668  if (Mask[RD.UseIndex])
670  }
671  }
672  }
673  }
674 
675  // Early exit if there are no writes.
676  if (D.Writes.empty())
677  return std::move(NewIS);
678 
679  // Track register writes that implicitly clear the upper portion of the
680  // underlying super-registers using an APInt.
681  APInt WriteMask(D.Writes.size(), 0);
682 
683  // Now query the MCInstrAnalysis object to obtain information about which
684  // register writes implicitly clear the upper portion of a super-register.
685  if (MCIA)
686  MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
687 
688  // Initialize writes.
689  unsigned WriteIndex = 0;
690  for (const WriteDescriptor &WD : D.Writes) {
691  RegID = WD.isImplicitWrite() ? WD.RegisterID
692  : MCI.getOperand(WD.OpIndex).getReg();
693  // Check if this is a optional definition that references NoReg.
694  if (WD.IsOptionalDef && !RegID) {
695  ++WriteIndex;
696  continue;
697  }
698 
699  assert(RegID && "Expected a valid register ID!");
700  NewIS->getDefs().emplace_back(WD, RegID,
701  /* ClearsSuperRegs */ WriteMask[WriteIndex],
702  /* WritesZero */ IsZeroIdiom);
703  ++WriteIndex;
704  }
705 
706  return std::move(NewIS);
707 }
708 } // namespace mca
709 } // namespace llvm
Expected< std::unique_ptr< Instruction > > createInstruction(const MCInst &MCI)
unsigned getNumImplicitUses() const
Return the number of implicit uses this instruction has.
Definition: MCInstrDesc.h:551
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1571
A sequence of cycles.
Definition: Instruction.h:290
unsigned getNumImplicitDefs() const
Return the number of implicit defs this instruct has.
Definition: MCInstrDesc.h:573
This class represents lattice values for constants.
Definition: AllocatorList.h:23
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
Definition: Format.h:186
Subclass of Error for the sole purpose of identifying the success path in the type system...
Definition: Error.h:324
bool isImplicitRead() const
Definition: Instruction.h:79
A register read descriptor.
Definition: Instruction.h:65
const MCPhysReg * getImplicitUses() const
Return a list of registers that are potentially read by any instance of this machine instruction...
Definition: MCInstrDesc.h:548
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:179
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:443
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:339
bool isReg() const
Definition: MCInst.h:57
static raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
Definition: WithColor.cpp:62
block Block Frequency true
unsigned getProcessorID() const
Definition: MCSchedule.h:317
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:346
Error takeError()
Take ownership of the stored error.
Definition: Error.h:552
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:418
bool isReturn() const
Return true if the instruction is a return.
Definition: MCInstrDesc.h:265
void setBit(unsigned BitPosition)
Set a given bit to 1.
Definition: APInt.h:1402
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:226
Tracks register operand latency in cycles.
Definition: Instruction.h:227
SmallVector< ReadDescriptor, 4 > Reads
Definition: Instruction.h:350
static raw_ostream & note()
Convenience method for printing "note: " to stderr.
Definition: WithColor.cpp:64
Tagged union holding either a T or a Error.
Definition: yaml2obj.h:21
This file implements a class to represent arbitrary precision integral constant values and operations...
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:64
virtual bool isOptimizableRegisterMove(const MCInst &MI, unsigned CPUID) const
Returns true if MI is a candidate for move elimination.
void setIndependentFromDef()
Definition: Instruction.h:273
Helper used by class InstrDesc to describe how hardware resources are used.
Definition: Instruction.h:337
uint64_t UsedProcResUnits
Definition: Instruction.h:360
uint16_t NumWriteProcResEntries
Definition: MCSchedule.h:121
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
A register write descriptor.
Definition: Instruction.h:37
void computeProcResourceMasks(const MCSchedModel &SM, MutableArrayRef< uint64_t > Masks)
Populates vector Masks with processor resource masks.
Definition: Support.cpp:39
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:158
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:19
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
uint64_t UsedProcResGroups
Definition: Instruction.h:363
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:601
const MCPhysReg * getImplicitDefs() const
Return a list of registers that are potentially written by any instance of this machine instruction...
Definition: MCInstrDesc.h:570
virtual bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Writes) const
Returns true if at least one of the register writes performed by.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:258
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:64
bool isVariadic() const
Return true if this instruction can have a variable number of operands.
Definition: MCInstrDesc.h:254
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Summarize the scheduling resources required for an instruction of a particular scheduling class...
Definition: MCSchedule.h:110
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:23
virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking instruction for the subtarget associated with CPUID ...
unsigned getNumOperands() const
Definition: MCInst.h:181
SmallVector< std::pair< uint64_t, ResourceUsage >, 4 > Resources
Definition: Instruction.h:354
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:50
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1095
static const unsigned short InvalidNumMicroOps
Definition: MCSchedule.h:111
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:320
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:556
Specify the latency in cpu cycles for a particular scheduling class and def index.
Definition: MCSchedule.h:78
bool isImplicitWrite() const
Definition: Instruction.h:61
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:32
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:179
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:241
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isVariant() const
Definition: MCSchedule.h:130
const char * getName(MCRegister RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register...
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:424
virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking zero-idiom for the given subtarget.
An instruction descriptor.
Definition: Instruction.h:348
static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI)
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
Definition: MCSchedule.cpp:40
unsigned getResourceStateIndex(uint64_t Mask)
Definition: Support.h:99
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode...
Definition: MCInstrInfo.h:44
#define I(x, y, z)
Definition: MD5.cpp:58
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:277
Generic base class for all target subtargets.
virtual unsigned resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, unsigned CPUID) const
Resolve a variant scheduling class for the given MCInst and CPU.
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
A builder class for instructions that are statically analyzed by llvm-mca.
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:684
Lightweight error class with error context and mandatory checking.
Definition: Error.h:157
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
uint16_t NumWriteLatencyEntries
Definition: MCSchedule.h:123
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
unsigned getOpcode() const
Definition: MCInst.h:171
#define LLVM_DEBUG(X)
Definition: Debug.h:122
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:34
static void initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, ArrayRef< uint64_t > ProcResourceMasks)
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:244
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:335
SmallVector< WriteDescriptor, 4 > Writes
Definition: Instruction.h:349
void resize(size_type N)
Definition: SmallVector.h:344