LLVM 19.0.0git
InstrBuilder.cpp
Go to the documentation of this file.
1//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// This file implements the InstrBuilder interface.
11///
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/Statistic.h"
18#include "llvm/MC/MCInst.h"
19#include "llvm/Support/Debug.h"
22
23#define DEBUG_TYPE "llvm-mca-instrbuilder"
24
25namespace llvm {
26namespace mca {
27
28char RecycledInstErr::ID = 0;
29
30InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,
31 const llvm::MCInstrInfo &mcii,
32 const llvm::MCRegisterInfo &mri,
33 const llvm::MCInstrAnalysis *mcia,
34 const mca::InstrumentManager &im, unsigned cl)
35 : STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),
36 FirstReturnInst(true), CallLatency(cl) {
37 const MCSchedModel &SM = STI.getSchedModel();
38 ProcResourceMasks.resize(SM.getNumProcResourceKinds());
39 computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);
40}
41
43 const MCSchedClassDesc &SCDesc,
44 const MCSubtargetInfo &STI,
45 ArrayRef<uint64_t> ProcResourceMasks) {
46 const MCSchedModel &SM = STI.getSchedModel();
47
48 // Populate resources consumed.
49 using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;
51
52 // Track cycles contributed by resources that are in a "Super" relationship.
53 // This is required if we want to correctly match the behavior of method
54 // SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set
55 // of "consumed" processor resources and resource cycles, the logic in
56 // ExpandProcResource() doesn't update the number of resource cycles
57 // contributed by a "Super" resource to a group.
58 // We need to take this into account when we find that a processor resource is
59 // part of a group, and it is also used as the "Super" of other resources.
60 // This map stores the number of cycles contributed by sub-resources that are
61 // part of a "Super" resource. The key value is the "Super" resource mask ID.
62 DenseMap<uint64_t, unsigned> SuperResources;
63
64 unsigned NumProcResources = SM.getNumProcResourceKinds();
65 APInt Buffers(NumProcResources, 0);
66
67 bool AllInOrderResources = true;
68 bool AnyDispatchHazards = false;
69 for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {
70 const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;
72 if (!PRE->ReleaseAtCycle) {
73#ifndef NDEBUG
75 << "Ignoring invalid write of zero cycles on processor resource "
76 << PR.Name << "\n";
77 WithColor::note() << "found in scheduling class " << SCDesc.Name
78 << " (write index #" << I << ")\n";
79#endif
80 continue;
81 }
82
83 uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];
84 if (PR.BufferSize < 0) {
85 AllInOrderResources = false;
86 } else {
87 Buffers.setBit(getResourceStateIndex(Mask));
88 AnyDispatchHazards |= (PR.BufferSize == 0);
89 AllInOrderResources &= (PR.BufferSize <= 1);
90 }
91
92 CycleSegment RCy(0, PRE->ReleaseAtCycle, false);
93 Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));
94 if (PR.SuperIdx) {
95 uint64_t Super = ProcResourceMasks[PR.SuperIdx];
96 SuperResources[Super] += PRE->ReleaseAtCycle;
97 }
98 }
99
100 ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;
101
102 // Sort elements by mask popcount, so that we prioritize resource units over
103 // resource groups, and smaller groups over larger groups.
104 sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {
105 unsigned popcntA = llvm::popcount(A.first);
106 unsigned popcntB = llvm::popcount(B.first);
107 if (popcntA < popcntB)
108 return true;
109 if (popcntA > popcntB)
110 return false;
111 return A.first < B.first;
112 });
113
114 uint64_t UsedResourceUnits = 0;
115 uint64_t UsedResourceGroups = 0;
116 uint64_t UnitsFromResourceGroups = 0;
117
118 // Remove cycles contributed by smaller resources, and check if there
119 // are partially overlapping resource groups.
120 ID.HasPartiallyOverlappingGroups = false;
121
122 for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {
123 ResourcePlusCycles &A = Worklist[I];
124 if (!A.second.size()) {
125 assert(llvm::popcount(A.first) > 1 && "Expected a group!");
126 UsedResourceGroups |= llvm::bit_floor(A.first);
127 continue;
128 }
129
130 ID.Resources.emplace_back(A);
131 uint64_t NormalizedMask = A.first;
132
133 if (llvm::popcount(A.first) == 1) {
134 UsedResourceUnits |= A.first;
135 } else {
136 // Remove the leading 1 from the resource group mask.
137 NormalizedMask ^= llvm::bit_floor(NormalizedMask);
138 if (UnitsFromResourceGroups & NormalizedMask)
139 ID.HasPartiallyOverlappingGroups = true;
140
141 UnitsFromResourceGroups |= NormalizedMask;
142 UsedResourceGroups |= (A.first ^ NormalizedMask);
143 }
144
145 for (unsigned J = I + 1; J < E; ++J) {
146 ResourcePlusCycles &B = Worklist[J];
147 if ((NormalizedMask & B.first) == NormalizedMask) {
148 B.second.CS.subtract(A.second.size() - SuperResources[A.first]);
149 if (llvm::popcount(B.first) > 1)
150 B.second.NumUnits++;
151 }
152 }
153 }
154
155 // A SchedWrite may specify a number of cycles in which a resource group
156 // is reserved. For example (on target x86; cpu Haswell):
157 //
158 // SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {
159 // let ReleaseAtCycles = [2, 2, 3];
160 // }
161 //
162 // This means:
163 // Resource units HWPort0 and HWPort1 are both used for 2cy.
164 // Resource group HWPort01 is the union of HWPort0 and HWPort1.
165 // Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01
166 // will not be usable for 2 entire cycles from instruction issue.
167 //
168 // On top of those 2cy, SchedWriteRes explicitly specifies an extra latency
169 // of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an
170 // extra delay on top of the 2 cycles latency.
171 // During those extra cycles, HWPort01 is not usable by other instructions.
172 for (ResourcePlusCycles &RPC : ID.Resources) {
173 if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) {
174 // Remove the leading 1 from the resource group mask.
175 uint64_t Mask = RPC.first ^ llvm::bit_floor(RPC.first);
176 uint64_t MaxResourceUnits = llvm::popcount(Mask);
177 if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) {
178 RPC.second.setReserved();
179 RPC.second.NumUnits = MaxResourceUnits;
180 }
181 }
182 }
183
184 // Identify extra buffers that are consumed through super resources.
185 for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {
186 for (unsigned I = 1, E = NumProcResources; I < E; ++I) {
187 const MCProcResourceDesc &PR = *SM.getProcResource(I);
188 if (PR.BufferSize == -1)
189 continue;
190
191 uint64_t Mask = ProcResourceMasks[I];
192 if (Mask != SR.first && ((Mask & SR.first) == SR.first))
193 Buffers.setBit(getResourceStateIndex(Mask));
194 }
195 }
196
197 ID.UsedBuffers = Buffers.getZExtValue();
198 ID.UsedProcResUnits = UsedResourceUnits;
199 ID.UsedProcResGroups = UsedResourceGroups;
200
201 LLVM_DEBUG({
202 for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)
203 dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "
204 << "Reserved=" << R.second.isReserved() << ", "
205 << "#Units=" << R.second.NumUnits << ", "
206 << "cy=" << R.second.size() << '\n';
207 uint64_t BufferIDs = ID.UsedBuffers;
208 while (BufferIDs) {
209 uint64_t Current = BufferIDs & (-BufferIDs);
210 dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';
211 BufferIDs ^= Current;
212 }
213 dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';
214 dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)
215 << '\n';
216 dbgs() << "\t\tHasPartiallyOverlappingGroups="
217 << ID.HasPartiallyOverlappingGroups << '\n';
218 });
219}
220
221static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,
222 const MCSchedClassDesc &SCDesc,
223 const MCSubtargetInfo &STI,
224 unsigned CallLatency) {
225 if (MCDesc.isCall()) {
226 // We cannot estimate how long this call will take.
227 // Artificially set an arbitrarily high latency.
228 ID.MaxLatency = CallLatency;
229 return;
230 }
231
233 // If latency is unknown, then conservatively assume the MaxLatency set for
234 // calls.
235 ID.MaxLatency = Latency < 0 ? CallLatency : static_cast<unsigned>(Latency);
236}
237
238static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {
239 // Count register definitions, and skip non register operands in the process.
240 unsigned I, E;
241 unsigned NumExplicitDefs = MCDesc.getNumDefs();
242 for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {
243 const MCOperand &Op = MCI.getOperand(I);
244 if (Op.isReg())
245 --NumExplicitDefs;
246 }
247
248 if (NumExplicitDefs) {
249 return make_error<InstructionError<MCInst>>(
250 "Expected more register operand definitions.", MCI);
251 }
252
253 if (MCDesc.hasOptionalDef()) {
254 // Always assume that the optional definition is the last operand.
255 const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);
256 if (I == MCI.getNumOperands() || !Op.isReg()) {
257 std::string Message =
258 "expected a register operand for an optional definition. Instruction "
259 "has not been correctly analyzed.";
260 return make_error<InstructionError<MCInst>>(Message, MCI);
261 }
262 }
263
264 return ErrorSuccess();
265}
266
267void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,
268 unsigned SchedClassID) {
269 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
270 const MCSchedModel &SM = STI.getSchedModel();
271 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
272
273 // Assumptions made by this algorithm:
274 // 1. The number of explicit and implicit register definitions in a MCInst
275 // matches the number of explicit and implicit definitions according to
276 // the opcode descriptor (MCInstrDesc).
277 // 2. Uses start at index #(MCDesc.getNumDefs()).
278 // 3. There can only be a single optional register definition, an it is
279 // either the last operand of the sequence (excluding extra operands
280 // contributed by variadic opcodes) or one of the explicit register
281 // definitions. The latter occurs for some Thumb1 instructions.
282 //
283 // These assumptions work quite well for most out-of-order in-tree targets
284 // like x86. This is mainly because the vast majority of instructions is
285 // expanded to MCInst using a straightforward lowering logic that preserves
286 // the ordering of the operands.
287 //
288 // About assumption 1.
289 // The algorithm allows non-register operands between register operand
290 // definitions. This helps to handle some special ARM instructions with
291 // implicit operand increment (-mtriple=armv7):
292 //
293 // vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed
294 // @ <MCOperand Reg:59>
295 // @ <MCOperand Imm:0> (!!)
296 // @ <MCOperand Reg:67>
297 // @ <MCOperand Imm:0>
298 // @ <MCOperand Imm:14>
299 // @ <MCOperand Reg:0>>
300 //
301 // MCDesc reports:
302 // 6 explicit operands.
303 // 1 optional definition
304 // 2 explicit definitions (!!)
305 //
306 // The presence of an 'Imm' operand between the two register definitions
307 // breaks the assumption that "register definitions are always at the
308 // beginning of the operand sequence".
309 //
310 // To workaround this issue, this algorithm ignores (i.e. skips) any
311 // non-register operands between register definitions. The optional
312 // definition is still at index #(NumOperands-1).
313 //
314 // According to assumption 2. register reads start at #(NumExplicitDefs-1).
315 // That means, register R1 from the example is both read and written.
316 unsigned NumExplicitDefs = MCDesc.getNumDefs();
317 unsigned NumImplicitDefs = MCDesc.implicit_defs().size();
318 unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;
319 unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;
320 if (MCDesc.hasOptionalDef())
321 TotalDefs++;
322
323 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
324 ID.Writes.resize(TotalDefs + NumVariadicOps);
325 // Iterate over the operands list, and skip non-register or constant register
326 // operands. The first NumExplicitDefs register operands are expected to be
327 // register definitions.
328 unsigned CurrentDef = 0;
329 unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;
330 unsigned i = 0;
331 for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {
332 const MCOperand &Op = MCI.getOperand(i);
333 if (!Op.isReg())
334 continue;
335
336 if (MCDesc.operands()[CurrentDef].isOptionalDef()) {
337 OptionalDefIdx = CurrentDef++;
338 continue;
339 }
340 if (MRI.isConstant(Op.getReg())) {
341 CurrentDef++;
342 continue;
343 }
344
345 WriteDescriptor &Write = ID.Writes[CurrentDef];
346 Write.OpIndex = i;
347 if (CurrentDef < NumWriteLatencyEntries) {
348 const MCWriteLatencyEntry &WLE =
349 *STI.getWriteLatencyEntry(&SCDesc, CurrentDef);
350 // Conservatively default to MaxLatency.
351 Write.Latency =
352 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
353 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
354 } else {
355 // Assign a default latency for this write.
356 Write.Latency = ID.MaxLatency;
357 Write.SClassOrWriteResourceID = 0;
358 }
359 Write.IsOptionalDef = false;
360 LLVM_DEBUG({
361 dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex
362 << ", Latency=" << Write.Latency
363 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
364 });
365 CurrentDef++;
366 }
367
368 assert(CurrentDef == NumExplicitDefs &&
369 "Expected more register operand definitions.");
370 for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {
371 unsigned Index = NumExplicitDefs + CurrentDef;
372 WriteDescriptor &Write = ID.Writes[Index];
373 Write.OpIndex = ~CurrentDef;
374 Write.RegisterID = MCDesc.implicit_defs()[CurrentDef];
375 if (Index < NumWriteLatencyEntries) {
376 const MCWriteLatencyEntry &WLE =
377 *STI.getWriteLatencyEntry(&SCDesc, Index);
378 // Conservatively default to MaxLatency.
379 Write.Latency =
380 WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);
381 Write.SClassOrWriteResourceID = WLE.WriteResourceID;
382 } else {
383 // Assign a default latency for this write.
384 Write.Latency = ID.MaxLatency;
385 Write.SClassOrWriteResourceID = 0;
386 }
387
388 Write.IsOptionalDef = false;
389 assert(Write.RegisterID != 0 && "Expected a valid phys register!");
390 LLVM_DEBUG({
391 dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex
392 << ", PhysReg=" << MRI.getName(Write.RegisterID)
393 << ", Latency=" << Write.Latency
394 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
395 });
396 }
397
398 if (MCDesc.hasOptionalDef()) {
399 WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];
400 Write.OpIndex = OptionalDefIdx;
401 // Assign a default latency for this write.
402 Write.Latency = ID.MaxLatency;
403 Write.SClassOrWriteResourceID = 0;
404 Write.IsOptionalDef = true;
405 LLVM_DEBUG({
406 dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex
407 << ", Latency=" << Write.Latency
408 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
409 });
410 }
411
412 if (!NumVariadicOps)
413 return;
414
415 bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();
416 CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();
417 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
418 I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {
419 const MCOperand &Op = MCI.getOperand(OpIndex);
420 if (!Op.isReg())
421 continue;
422 if (MRI.isConstant(Op.getReg()))
423 continue;
424
425 WriteDescriptor &Write = ID.Writes[CurrentDef];
426 Write.OpIndex = OpIndex;
427 // Assign a default latency for this write.
428 Write.Latency = ID.MaxLatency;
429 Write.SClassOrWriteResourceID = 0;
430 Write.IsOptionalDef = false;
431 ++CurrentDef;
432 LLVM_DEBUG({
433 dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex
434 << ", Latency=" << Write.Latency
435 << ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';
436 });
437 }
438
439 ID.Writes.resize(CurrentDef);
440}
441
442void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,
443 unsigned SchedClassID) {
444 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
445 unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();
446 unsigned NumImplicitUses = MCDesc.implicit_uses().size();
447 // Remove the optional definition.
448 if (MCDesc.hasOptionalDef())
449 --NumExplicitUses;
450 unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();
451 unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;
452 ID.Reads.resize(TotalUses);
453 unsigned CurrentUse = 0;
454 for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;
455 ++I, ++OpIndex) {
456 const MCOperand &Op = MCI.getOperand(OpIndex);
457 if (!Op.isReg())
458 continue;
459 if (MRI.isConstant(Op.getReg()))
460 continue;
461
462 ReadDescriptor &Read = ID.Reads[CurrentUse];
463 Read.OpIndex = OpIndex;
464 Read.UseIndex = I;
465 Read.SchedClassID = SchedClassID;
466 ++CurrentUse;
467 LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex
468 << ", UseIndex=" << Read.UseIndex << '\n');
469 }
470
471 // For the purpose of ReadAdvance, implicit uses come directly after explicit
472 // uses. The "UseIndex" must be updated according to that implicit layout.
473 for (unsigned I = 0; I < NumImplicitUses; ++I) {
474 ReadDescriptor &Read = ID.Reads[CurrentUse + I];
475 Read.OpIndex = ~I;
476 Read.UseIndex = NumExplicitUses + I;
477 Read.RegisterID = MCDesc.implicit_uses()[I];
478 if (MRI.isConstant(Read.RegisterID))
479 continue;
480 Read.SchedClassID = SchedClassID;
481 LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex
482 << ", UseIndex=" << Read.UseIndex << ", RegisterID="
483 << MRI.getName(Read.RegisterID) << '\n');
484 }
485
486 CurrentUse += NumImplicitUses;
487
488 bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();
489 for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();
490 I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {
491 const MCOperand &Op = MCI.getOperand(OpIndex);
492 if (!Op.isReg())
493 continue;
494
495 ReadDescriptor &Read = ID.Reads[CurrentUse];
496 Read.OpIndex = OpIndex;
497 Read.UseIndex = NumExplicitUses + NumImplicitUses + I;
498 Read.SchedClassID = SchedClassID;
499 ++CurrentUse;
500 LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex
501 << ", UseIndex=" << Read.UseIndex << '\n');
502 }
503
504 ID.Reads.resize(CurrentUse);
505}
506
507Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,
508 const MCInst &MCI) const {
509 if (ID.NumMicroOps != 0)
510 return ErrorSuccess();
511
512 bool UsesBuffers = ID.UsedBuffers;
513 bool UsesResources = !ID.Resources.empty();
514 if (!UsesBuffers && !UsesResources)
515 return ErrorSuccess();
516
517 // FIXME: see PR44797. We should revisit these checks and possibly move them
518 // in CodeGenSchedule.cpp.
519 StringRef Message = "found an inconsistent instruction that decodes to zero "
520 "opcodes and that consumes scheduler resources.";
521 return make_error<InstructionError<MCInst>>(std::string(Message), MCI);
522}
523
524Expected<const InstrDesc &>
525InstrBuilder::createInstrDescImpl(const MCInst &MCI,
526 const SmallVector<Instrument *> &IVec) {
528 "Itineraries are not yet supported!");
529
530 // Obtain the instruction descriptor from the opcode.
531 unsigned short Opcode = MCI.getOpcode();
532 const MCInstrDesc &MCDesc = MCII.get(Opcode);
533 const MCSchedModel &SM = STI.getSchedModel();
534
535 // Then obtain the scheduling class information from the instruction.
536 // Allow InstrumentManager to override and use a different SchedClassID
537 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
538 bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();
539
540 // Try to solve variant scheduling classes.
541 if (IsVariant) {
542 unsigned CPUID = SM.getProcessorID();
543 while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())
544 SchedClassID =
545 STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
546
547 if (!SchedClassID) {
548 return make_error<InstructionError<MCInst>>(
549 "unable to resolve scheduling class for write variant.", MCI);
550 }
551 }
552
553 // Check if this instruction is supported. Otherwise, report an error.
554 const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);
555 if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {
556 return make_error<InstructionError<MCInst>>(
557 "found an unsupported instruction in the input assembly sequence", MCI);
558 }
559
560 LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');
561 LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');
562 LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n');
563
564 // Create a new empty descriptor.
565 std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();
566 ID->NumMicroOps = SCDesc.NumMicroOps;
567 ID->SchedClassID = SchedClassID;
568
569 if (MCDesc.isCall() && FirstCallInst) {
570 // We don't correctly model calls.
571 WithColor::warning() << "found a call in the input assembly sequence.\n";
572 WithColor::note() << "call instructions are not correctly modeled. "
573 << "Assume a latency of " << CallLatency << "cy.\n";
574 FirstCallInst = false;
575 }
576
577 if (MCDesc.isReturn() && FirstReturnInst) {
578 WithColor::warning() << "found a return instruction in the input"
579 << " assembly sequence.\n";
580 WithColor::note() << "program counter updates are ignored.\n";
581 FirstReturnInst = false;
582 }
583
584 initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);
585 computeMaxLatency(*ID, MCDesc, SCDesc, STI, CallLatency);
586
587 if (Error Err = verifyOperands(MCDesc, MCI))
588 return std::move(Err);
589
590 populateWrites(*ID, MCI, SchedClassID);
591 populateReads(*ID, MCI, SchedClassID);
592
593 LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');
594 LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');
595
596 // Validation check on the instruction descriptor.
597 if (Error Err = verifyInstrDesc(*ID, MCI))
598 return std::move(Err);
599
600 // Now add the new descriptor.
601 bool IsVariadic = MCDesc.isVariadic();
602 if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) {
603 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
604 Descriptors[DKey] = std::move(ID);
605 return *Descriptors[DKey];
606 }
607
608 auto VDKey = std::make_pair(&MCI, SchedClassID);
609 VariantDescriptors[VDKey] = std::move(ID);
610 return *VariantDescriptors[VDKey];
611}
612
613Expected<const InstrDesc &>
614InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,
615 const SmallVector<Instrument *> &IVec) {
616 // Cache lookup using SchedClassID from Instrumentation
617 unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);
618
619 auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);
620 if (Descriptors.find_as(DKey) != Descriptors.end())
621 return *Descriptors[DKey];
622
623 unsigned CPUID = STI.getSchedModel().getProcessorID();
624 SchedClassID = STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);
625 auto VDKey = std::make_pair(&MCI, SchedClassID);
626 if (VariantDescriptors.contains(VDKey))
627 return *VariantDescriptors[VDKey];
628
629 return createInstrDescImpl(MCI, IVec);
630}
631
632STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");
633
636 const SmallVector<Instrument *> &IVec) {
637 Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec);
638 if (!DescOrErr)
639 return DescOrErr.takeError();
640 const InstrDesc &D = *DescOrErr;
641 Instruction *NewIS = nullptr;
642 std::unique_ptr<Instruction> CreatedIS;
643 bool IsInstRecycled = false;
644
645 if (!D.IsRecyclable)
646 ++NumVariantInst;
647
648 if (D.IsRecyclable && InstRecycleCB) {
649 if (auto *I = InstRecycleCB(D)) {
650 NewIS = I;
651 NewIS->reset();
652 IsInstRecycled = true;
653 }
654 }
655 if (!IsInstRecycled) {
656 CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode());
657 NewIS = CreatedIS.get();
658 }
659
660 const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());
661 const MCSchedClassDesc &SCDesc =
662 *STI.getSchedModel().getSchedClassDesc(D.SchedClassID);
663
664 NewIS->setMayLoad(MCDesc.mayLoad());
665 NewIS->setMayStore(MCDesc.mayStore());
667 NewIS->setBeginGroup(SCDesc.BeginGroup);
668 NewIS->setEndGroup(SCDesc.EndGroup);
669 NewIS->setRetireOOO(SCDesc.RetireOOO);
670
671 // Check if this is a dependency breaking instruction.
672 APInt Mask;
673
674 bool IsZeroIdiom = false;
675 bool IsDepBreaking = false;
676 if (MCIA) {
677 unsigned ProcID = STI.getSchedModel().getProcessorID();
678 IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);
679 IsDepBreaking =
680 IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);
681 if (MCIA->isOptimizableRegisterMove(MCI, ProcID))
682 NewIS->setOptimizableMove();
683 }
684
685 // Initialize Reads first.
686 MCPhysReg RegID = 0;
687 size_t Idx = 0U;
688 for (const ReadDescriptor &RD : D.Reads) {
689 if (!RD.isImplicitRead()) {
690 // explicit read.
691 const MCOperand &Op = MCI.getOperand(RD.OpIndex);
692 // Skip non-register operands.
693 if (!Op.isReg())
694 continue;
695 RegID = Op.getReg();
696 } else {
697 // Implicit read.
698 RegID = RD.RegisterID;
699 }
700
701 // Skip invalid register operands.
702 if (!RegID)
703 continue;
704
705 // Okay, this is a register operand. Create a ReadState for it.
706 ReadState *RS = nullptr;
707 if (IsInstRecycled && Idx < NewIS->getUses().size()) {
708 NewIS->getUses()[Idx] = ReadState(RD, RegID);
709 RS = &NewIS->getUses()[Idx++];
710 } else {
711 NewIS->getUses().emplace_back(RD, RegID);
712 RS = &NewIS->getUses().back();
713 ++Idx;
714 }
715
716 if (IsDepBreaking) {
717 // A mask of all zeroes means: explicit input operands are not
718 // independent.
719 if (Mask.isZero()) {
720 if (!RD.isImplicitRead())
722 } else {
723 // Check if this register operand is independent according to `Mask`.
724 // Note that Mask may not have enough bits to describe all explicit and
725 // implicit input operands. If this register operand doesn't have a
726 // corresponding bit in Mask, then conservatively assume that it is
727 // dependent.
728 if (Mask.getBitWidth() > RD.UseIndex) {
729 // Okay. This map describe register use `RD.UseIndex`.
730 if (Mask[RD.UseIndex])
732 }
733 }
734 }
735 }
736 if (IsInstRecycled && Idx < NewIS->getUses().size())
737 NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx);
738
739 // Early exit if there are no writes.
740 if (D.Writes.empty()) {
741 if (IsInstRecycled)
742 return llvm::make_error<RecycledInstErr>(NewIS);
743 else
744 return std::move(CreatedIS);
745 }
746
747 // Track register writes that implicitly clear the upper portion of the
748 // underlying super-registers using an APInt.
749 APInt WriteMask(D.Writes.size(), 0);
750
751 // Now query the MCInstrAnalysis object to obtain information about which
752 // register writes implicitly clear the upper portion of a super-register.
753 if (MCIA)
754 MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);
755
756 // Initialize writes.
757 unsigned WriteIndex = 0;
758 Idx = 0U;
759 for (const WriteDescriptor &WD : D.Writes) {
760 RegID = WD.isImplicitWrite() ? WD.RegisterID
761 : MCI.getOperand(WD.OpIndex).getReg();
762 // Check if this is a optional definition that references NoReg or a write
763 // to a constant register.
764 if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) {
765 ++WriteIndex;
766 continue;
767 }
768
769 assert(RegID && "Expected a valid register ID!");
770 if (IsInstRecycled && Idx < NewIS->getDefs().size()) {
771 NewIS->getDefs()[Idx++] =
772 WriteState(WD, RegID,
773 /* ClearsSuperRegs */ WriteMask[WriteIndex],
774 /* WritesZero */ IsZeroIdiom);
775 } else {
776 NewIS->getDefs().emplace_back(WD, RegID,
777 /* ClearsSuperRegs */ WriteMask[WriteIndex],
778 /* WritesZero */ IsZeroIdiom);
779 ++Idx;
780 }
781 ++WriteIndex;
782 }
783 if (IsInstRecycled && Idx < NewIS->getDefs().size())
784 NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx);
785
786 if (IsInstRecycled)
787 return llvm::make_error<RecycledInstErr>(NewIS);
788 else
789 return std::move(CreatedIS);
790}
791} // namespace mca
792} // namespace llvm
unsigned const MachineRegisterInfo * MRI
This file implements a class to represent arbitrary precision integral constant values and operations...
basic Basic Alias true
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
#define im(i)
A builder class for instructions that are statically analyzed by llvm-mca.
#define I(x, y, z)
Definition: MD5.cpp:58
while(!ToSimplify.empty())
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned OpIndex
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
Class for arbitrary precision integers.
Definition: APInt.h:77
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1499
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1309
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
Subclass of Error for the sole purpose of identifying the success path in the type system.
Definition: Error.h:332
Lightweight error class with error context and mandatory checking.
Definition: Error.h:160
Tagged union holding either a T or a Error.
Definition: Error.h:474
Error takeError()
Take ownership of the stored error.
Definition: Error.h:601
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
unsigned getNumOperands() const
Definition: MCInst.h:208
unsigned getOpcode() const
Definition: MCInst.h:198
const MCOperand & getOperand(unsigned i) const
Definition: MCInst.h:206
virtual bool isOptimizableRegisterMove(const MCInst &MI, unsigned CPUID) const
Returns true if MI is a candidate for move elimination.
virtual bool isDependencyBreaking(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking instruction for the subtarget associated with CPUID .
virtual bool isZeroIdiom(const MCInst &MI, APInt &Mask, unsigned CPUID) const
Returns true if MI is a dependency breaking zero-idiom for the given subtarget.
virtual bool clearsSuperRegisters(const MCRegisterInfo &MRI, const MCInst &Inst, APInt &Writes) const
Returns true if at least one of the register writes performed by.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
bool mayStore() const
Return true if this instruction could possibly modify memory.
Definition: MCInstrDesc.h:444
bool mayLoad() const
Return true if this instruction could possibly read memory.
Definition: MCInstrDesc.h:438
bool hasOptionalDef() const
Set if this instruction has an optional definition, e.g.
Definition: MCInstrDesc.h:265
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
bool variadicOpsAreDefs() const
Return true if variadic operands of this instruction are definitions.
Definition: MCInstrDesc.h:418
ArrayRef< MCPhysReg > implicit_defs() const
Return a list of registers that are potentially written by any instance of this machine instruction.
Definition: MCInstrDesc.h:579
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by other flags.
Definition: MCInstrDesc.h:463
bool isCall() const
Return true if the instruction is a call.
Definition: MCInstrDesc.h:288
Interface to description of machine instruction set.
Definition: MCInstrInfo.h:26
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
Instances of this class represent operands of the MCInst class.
Definition: MCInst.h:36
unsigned getReg() const
Returns the register number.
Definition: MCInst.h:69
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
const char * getName(MCRegister RegNo) const
Return the human-readable symbolic target-specific name for the specified physical register.
bool isConstant(MCRegister RegNo) const
Returns true if the given register is constant.
Generic base class for all target subtargets.
virtual unsigned resolveVariantSchedClass(unsigned SchedClass, const MCInst *MI, const MCInstrInfo *MCII, unsigned CPUID) const
Resolve a variant scheduling class for the given MCInst and CPU.
const MCWriteLatencyEntry * getWriteLatencyEntry(const MCSchedClassDesc *SC, unsigned DefIdx) const
const MCWriteProcResEntry * getWriteProcResBegin(const MCSchedClassDesc *SC) const
Return an iterator at the first process resource consumed by the given scheduling class.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
size_t size() const
Definition: SmallVector.h:91
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void resize(size_type N)
Definition: SmallVector.h:651
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
static raw_ostream & warning()
Convenience method for printing "warning: " to stderr.
Definition: WithColor.cpp:85
static raw_ostream & note()
Convenience method for printing "note: " to stderr.
Definition: WithColor.cpp:87
A sequence of cycles.
Definition: Instruction.h:389
Expected< std::unique_ptr< Instruction > > createInstruction(const MCInst &MCI, const SmallVector< Instrument * > &IVec)
void setEndGroup(bool newVal)
Definition: Instruction.h:585
void setRetireOOO(bool newVal)
Definition: Instruction.h:586
SmallVectorImpl< WriteState > & getDefs()
Definition: Instruction.h:535
void setBeginGroup(bool newVal)
Definition: Instruction.h:584
SmallVectorImpl< ReadState > & getUses()
Definition: Instruction.h:537
void setHasSideEffects(bool newVal)
Definition: Instruction.h:583
void setMayStore(bool newVal)
Definition: Instruction.h:582
void setMayLoad(bool newVal)
Definition: Instruction.h:581
An instruction propagated through the simulated instruction pipeline.
Definition: Instruction.h:600
This class allows targets to optionally customize the logic that resolves scheduling class IDs.
virtual unsigned getSchedClassID(const MCInstrInfo &MCII, const MCInst &MCI, const SmallVector< Instrument * > &IVec) const
Given an MCInst and a vector of Instrument, a target can return a SchedClassID.
Tracks register operand latency in cycles.
Definition: Instruction.h:326
void setIndependentFromDef()
Definition: Instruction.h:372
Tracks uses of a register definition (e.g.
Definition: Instruction.h:197
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
static void initializeUsedResources(InstrDesc &ID, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, ArrayRef< uint64_t > ProcResourceMasks)
static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc, const MCSchedClassDesc &SCDesc, const MCSubtargetInfo &STI, unsigned CallLatency)
void computeProcResourceMasks(const MCSchedModel &SM, MutableArrayRef< uint64_t > Masks)
Populates vector Masks with processor resource masks.
Definition: Support.cpp:40
unsigned getResourceStateIndex(uint64_t Mask)
Definition: Support.h:100
static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI)
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
FormattedNumber format_hex(uint64_t N, unsigned Width, bool Upper=false)
format_hex - Output N as a fixed width hexadecimal.
Definition: Format.h:187
DWARFExpression::Operation Op
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
Define a kind of processor resource that will be modeled by the scheduler.
Definition: MCSchedule.h:31
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:118
static const unsigned short InvalidNumMicroOps
Definition: MCSchedule.h:119
uint16_t NumWriteLatencyEntries
Definition: MCSchedule.h:132
uint16_t NumWriteProcResEntries
Definition: MCSchedule.h:130
Machine model for scheduling, bundling, and heuristics.
Definition: MCSchedule.h:253
const MCSchedClassDesc * getSchedClassDesc(unsigned SchedClassIdx) const
Definition: MCSchedule.h:360
unsigned getProcessorID() const
Definition: MCSchedule.h:331
unsigned getNumProcResourceKinds() const
Definition: MCSchedule.h:349
bool hasInstrSchedModel() const
Does this machine model include instruction-level scheduling.
Definition: MCSchedule.h:334
static int computeInstrLatency(const MCSubtargetInfo &STI, const MCSchedClassDesc &SCDesc)
Returns the latency value for the scheduling class.
Definition: MCSchedule.cpp:42
const MCProcResourceDesc * getProcResource(unsigned ProcResourceIdx) const
Definition: MCSchedule.h:353
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
uint16_t ReleaseAtCycle
Cycle at which the resource will be released by an instruction, relatively to the cycle in which the ...
Definition: MCSchedule.h:68
An instruction descriptor.
Definition: Instruction.h:447
A register read descriptor.
Definition: Instruction.h:163
bool isImplicitRead() const
Definition: Instruction.h:177
Helper used by class InstrDesc to describe how hardware resources are used.
Definition: Instruction.h:436
A register write descriptor.
Definition: Instruction.h:135
bool isImplicitWrite() const
Definition: Instruction.h:159