LLVM 17.0.0git
X86InstrFoldTables.cpp
Go to the documentation of this file.
1//===-- X86InstrFoldTables.cpp - X86 Instruction Folding Tables -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the X86 memory folding tables.
10//
11//===----------------------------------------------------------------------===//
12
13#include "X86InstrFoldTables.h"
14#include "X86InstrInfo.h"
15#include "llvm/ADT/STLExtras.h"
16#include <atomic>
17#include <vector>
18
19using namespace llvm;
20
21// These tables are sorted by their RegOp value allowing them to be binary
22// searched at runtime without the need for additional storage. The enum values
23// are currently emitted in X86GenInstrInfo.inc in alphabetical order. Which
24// makes sorting these tables a simple matter of alphabetizing the table.
25//
26// We also have a tablegen emitter that tries to autogenerate these tables
27// by comparing encoding information. This can be enabled by passing
28// X86_GEN_FOLD_TABLES=ON to cmake which fill produce X86GenFoldTables.inc
29// in the build area. There are currently some bugs in the autogenerated table
30// that require a manual review to copy them from the autogenerated table into
31// this table. It is unclear if we will ever be able to fully automate this
32// because as new instruction are added into holes in the X86 opcode map they
33// potentially pair up with old instructions and create new entries in the
34// tables that would be incorrect. The manual review process allows us a chance
35// to catch these before they become observable bugs.
36#include "X86MemFoldTables.inc"
38 { X86::VADDPDZ128rr, X86::VADDPDZ128rmb, TB_BCAST_SD },
39 { X86::VADDPDZ256rr, X86::VADDPDZ256rmb, TB_BCAST_SD },
40 { X86::VADDPDZrr, X86::VADDPDZrmb, TB_BCAST_SD },
41 { X86::VADDPSZ128rr, X86::VADDPSZ128rmb, TB_BCAST_SS },
42 { X86::VADDPSZ256rr, X86::VADDPSZ256rmb, TB_BCAST_SS },
43 { X86::VADDPSZrr, X86::VADDPSZrmb, TB_BCAST_SS },
44 { X86::VCMPPDZ128rri, X86::VCMPPDZ128rmbi, TB_BCAST_SD },
45 { X86::VCMPPDZ256rri, X86::VCMPPDZ256rmbi, TB_BCAST_SD },
46 { X86::VCMPPDZrri, X86::VCMPPDZrmbi, TB_BCAST_SD },
47 { X86::VCMPPSZ128rri, X86::VCMPPSZ128rmbi, TB_BCAST_SS },
48 { X86::VCMPPSZ256rri, X86::VCMPPSZ256rmbi, TB_BCAST_SS },
49 { X86::VCMPPSZrri, X86::VCMPPSZrmbi, TB_BCAST_SS },
50 { X86::VDIVPDZ128rr, X86::VDIVPDZ128rmb, TB_BCAST_SD },
51 { X86::VDIVPDZ256rr, X86::VDIVPDZ256rmb, TB_BCAST_SD },
52 { X86::VDIVPDZrr, X86::VDIVPDZrmb, TB_BCAST_SD },
53 { X86::VDIVPSZ128rr, X86::VDIVPSZ128rmb, TB_BCAST_SS },
54 { X86::VDIVPSZ256rr, X86::VDIVPSZ256rmb, TB_BCAST_SS },
55 { X86::VDIVPSZrr, X86::VDIVPSZrmb, TB_BCAST_SS },
56 { X86::VMAXCPDZ128rr, X86::VMAXCPDZ128rmb, TB_BCAST_SD },
57 { X86::VMAXCPDZ256rr, X86::VMAXCPDZ256rmb, TB_BCAST_SD },
58 { X86::VMAXCPDZrr, X86::VMAXCPDZrmb, TB_BCAST_SD },
59 { X86::VMAXCPSZ128rr, X86::VMAXCPSZ128rmb, TB_BCAST_SS },
60 { X86::VMAXCPSZ256rr, X86::VMAXCPSZ256rmb, TB_BCAST_SS },
61 { X86::VMAXCPSZrr, X86::VMAXCPSZrmb, TB_BCAST_SS },
62 { X86::VMAXPDZ128rr, X86::VMAXPDZ128rmb, TB_BCAST_SD },
63 { X86::VMAXPDZ256rr, X86::VMAXPDZ256rmb, TB_BCAST_SD },
64 { X86::VMAXPDZrr, X86::VMAXPDZrmb, TB_BCAST_SD },
65 { X86::VMAXPSZ128rr, X86::VMAXPSZ128rmb, TB_BCAST_SS },
66 { X86::VMAXPSZ256rr, X86::VMAXPSZ256rmb, TB_BCAST_SS },
67 { X86::VMAXPSZrr, X86::VMAXPSZrmb, TB_BCAST_SS },
68 { X86::VMINCPDZ128rr, X86::VMINCPDZ128rmb, TB_BCAST_SD },
69 { X86::VMINCPDZ256rr, X86::VMINCPDZ256rmb, TB_BCAST_SD },
70 { X86::VMINCPDZrr, X86::VMINCPDZrmb, TB_BCAST_SD },
71 { X86::VMINCPSZ128rr, X86::VMINCPSZ128rmb, TB_BCAST_SS },
72 { X86::VMINCPSZ256rr, X86::VMINCPSZ256rmb, TB_BCAST_SS },
73 { X86::VMINCPSZrr, X86::VMINCPSZrmb, TB_BCAST_SS },
74 { X86::VMINPDZ128rr, X86::VMINPDZ128rmb, TB_BCAST_SD },
75 { X86::VMINPDZ256rr, X86::VMINPDZ256rmb, TB_BCAST_SD },
76 { X86::VMINPDZrr, X86::VMINPDZrmb, TB_BCAST_SD },
77 { X86::VMINPSZ128rr, X86::VMINPSZ128rmb, TB_BCAST_SS },
78 { X86::VMINPSZ256rr, X86::VMINPSZ256rmb, TB_BCAST_SS },
79 { X86::VMINPSZrr, X86::VMINPSZrmb, TB_BCAST_SS },
80 { X86::VMULPDZ128rr, X86::VMULPDZ128rmb, TB_BCAST_SD },
81 { X86::VMULPDZ256rr, X86::VMULPDZ256rmb, TB_BCAST_SD },
82 { X86::VMULPDZrr, X86::VMULPDZrmb, TB_BCAST_SD },
83 { X86::VMULPSZ128rr, X86::VMULPSZ128rmb, TB_BCAST_SS },
84 { X86::VMULPSZ256rr, X86::VMULPSZ256rmb, TB_BCAST_SS },
85 { X86::VMULPSZrr, X86::VMULPSZrmb, TB_BCAST_SS },
86 { X86::VPADDDZ128rr, X86::VPADDDZ128rmb, TB_BCAST_D },
87 { X86::VPADDDZ256rr, X86::VPADDDZ256rmb, TB_BCAST_D },
88 { X86::VPADDDZrr, X86::VPADDDZrmb, TB_BCAST_D },
89 { X86::VPADDQZ128rr, X86::VPADDQZ128rmb, TB_BCAST_Q },
90 { X86::VPADDQZ256rr, X86::VPADDQZ256rmb, TB_BCAST_Q },
91 { X86::VPADDQZrr, X86::VPADDQZrmb, TB_BCAST_Q },
92 { X86::VPANDDZ128rr, X86::VPANDDZ128rmb, TB_BCAST_D },
93 { X86::VPANDDZ256rr, X86::VPANDDZ256rmb, TB_BCAST_D },
94 { X86::VPANDDZrr, X86::VPANDDZrmb, TB_BCAST_D },
95 { X86::VPANDNDZ128rr, X86::VPANDNDZ128rmb, TB_BCAST_D },
96 { X86::VPANDNDZ256rr, X86::VPANDNDZ256rmb, TB_BCAST_D },
97 { X86::VPANDNDZrr, X86::VPANDNDZrmb, TB_BCAST_D },
98 { X86::VPANDNQZ128rr, X86::VPANDNQZ128rmb, TB_BCAST_Q },
99 { X86::VPANDNQZ256rr, X86::VPANDNQZ256rmb, TB_BCAST_Q },
100 { X86::VPANDNQZrr, X86::VPANDNQZrmb, TB_BCAST_Q },
101 { X86::VPANDQZ128rr, X86::VPANDQZ128rmb, TB_BCAST_Q },
102 { X86::VPANDQZ256rr, X86::VPANDQZ256rmb, TB_BCAST_Q },
103 { X86::VPANDQZrr, X86::VPANDQZrmb, TB_BCAST_Q },
104 { X86::VPCMPDZ128rri, X86::VPCMPDZ128rmib, TB_BCAST_D },
105 { X86::VPCMPDZ256rri, X86::VPCMPDZ256rmib, TB_BCAST_D },
106 { X86::VPCMPDZrri, X86::VPCMPDZrmib, TB_BCAST_D },
107 { X86::VPCMPEQDZ128rr, X86::VPCMPEQDZ128rmb, TB_BCAST_D },
108 { X86::VPCMPEQDZ256rr, X86::VPCMPEQDZ256rmb, TB_BCAST_D },
109 { X86::VPCMPEQDZrr, X86::VPCMPEQDZrmb, TB_BCAST_D },
110 { X86::VPCMPEQQZ128rr, X86::VPCMPEQQZ128rmb, TB_BCAST_Q },
111 { X86::VPCMPEQQZ256rr, X86::VPCMPEQQZ256rmb, TB_BCAST_Q },
112 { X86::VPCMPEQQZrr, X86::VPCMPEQQZrmb, TB_BCAST_Q },
113 { X86::VPCMPGTDZ128rr, X86::VPCMPGTDZ128rmb, TB_BCAST_D },
114 { X86::VPCMPGTDZ256rr, X86::VPCMPGTDZ256rmb, TB_BCAST_D },
115 { X86::VPCMPGTDZrr, X86::VPCMPGTDZrmb, TB_BCAST_D },
116 { X86::VPCMPGTQZ128rr, X86::VPCMPGTQZ128rmb, TB_BCAST_Q },
117 { X86::VPCMPGTQZ256rr, X86::VPCMPGTQZ256rmb, TB_BCAST_Q },
118 { X86::VPCMPGTQZrr, X86::VPCMPGTQZrmb, TB_BCAST_Q },
119 { X86::VPCMPQZ128rri, X86::VPCMPQZ128rmib, TB_BCAST_Q },
120 { X86::VPCMPQZ256rri, X86::VPCMPQZ256rmib, TB_BCAST_Q },
121 { X86::VPCMPQZrri, X86::VPCMPQZrmib, TB_BCAST_Q },
122 { X86::VPCMPUDZ128rri, X86::VPCMPUDZ128rmib, TB_BCAST_D },
123 { X86::VPCMPUDZ256rri, X86::VPCMPUDZ256rmib, TB_BCAST_D },
124 { X86::VPCMPUDZrri, X86::VPCMPUDZrmib, TB_BCAST_D },
125 { X86::VPCMPUQZ128rri, X86::VPCMPUQZ128rmib, TB_BCAST_Q },
126 { X86::VPCMPUQZ256rri, X86::VPCMPUQZ256rmib, TB_BCAST_Q },
127 { X86::VPCMPUQZrri, X86::VPCMPUQZrmib, TB_BCAST_Q },
128 { X86::VPMAXSDZ128rr, X86::VPMAXSDZ128rmb, TB_BCAST_D },
129 { X86::VPMAXSDZ256rr, X86::VPMAXSDZ256rmb, TB_BCAST_D },
130 { X86::VPMAXSDZrr, X86::VPMAXSDZrmb, TB_BCAST_D },
131 { X86::VPMAXSQZ128rr, X86::VPMAXSQZ128rmb, TB_BCAST_Q },
132 { X86::VPMAXSQZ256rr, X86::VPMAXSQZ256rmb, TB_BCAST_Q },
133 { X86::VPMAXSQZrr, X86::VPMAXSQZrmb, TB_BCAST_Q },
134 { X86::VPMAXUDZ128rr, X86::VPMAXUDZ128rmb, TB_BCAST_D },
135 { X86::VPMAXUDZ256rr, X86::VPMAXUDZ256rmb, TB_BCAST_D },
136 { X86::VPMAXUDZrr, X86::VPMAXUDZrmb, TB_BCAST_D },
137 { X86::VPMAXUQZ128rr, X86::VPMAXUQZ128rmb, TB_BCAST_Q },
138 { X86::VPMAXUQZ256rr, X86::VPMAXUQZ256rmb, TB_BCAST_Q },
139 { X86::VPMAXUQZrr, X86::VPMAXUQZrmb, TB_BCAST_Q },
140 { X86::VPMINSDZ128rr, X86::VPMINSDZ128rmb, TB_BCAST_D },
141 { X86::VPMINSDZ256rr, X86::VPMINSDZ256rmb, TB_BCAST_D },
142 { X86::VPMINSDZrr, X86::VPMINSDZrmb, TB_BCAST_D },
143 { X86::VPMINSQZ128rr, X86::VPMINSQZ128rmb, TB_BCAST_Q },
144 { X86::VPMINSQZ256rr, X86::VPMINSQZ256rmb, TB_BCAST_Q },
145 { X86::VPMINSQZrr, X86::VPMINSQZrmb, TB_BCAST_Q },
146 { X86::VPMINUDZ128rr, X86::VPMINUDZ128rmb, TB_BCAST_D },
147 { X86::VPMINUDZ256rr, X86::VPMINUDZ256rmb, TB_BCAST_D },
148 { X86::VPMINUDZrr, X86::VPMINUDZrmb, TB_BCAST_D },
149 { X86::VPMINUQZ128rr, X86::VPMINUQZ128rmb, TB_BCAST_Q },
150 { X86::VPMINUQZ256rr, X86::VPMINUQZ256rmb, TB_BCAST_Q },
151 { X86::VPMINUQZrr, X86::VPMINUQZrmb, TB_BCAST_Q },
152 { X86::VPMULLDZ128rr, X86::VPMULLDZ128rmb, TB_BCAST_D },
153 { X86::VPMULLDZ256rr, X86::VPMULLDZ256rmb, TB_BCAST_D },
154 { X86::VPMULLDZrr, X86::VPMULLDZrmb, TB_BCAST_D },
155 { X86::VPMULLQZ128rr, X86::VPMULLQZ128rmb, TB_BCAST_Q },
156 { X86::VPMULLQZ256rr, X86::VPMULLQZ256rmb, TB_BCAST_Q },
157 { X86::VPMULLQZrr, X86::VPMULLQZrmb, TB_BCAST_Q },
158 { X86::VPORDZ128rr, X86::VPORDZ128rmb, TB_BCAST_D },
159 { X86::VPORDZ256rr, X86::VPORDZ256rmb, TB_BCAST_D },
160 { X86::VPORDZrr, X86::VPORDZrmb, TB_BCAST_D },
161 { X86::VPORQZ128rr, X86::VPORQZ128rmb, TB_BCAST_Q },
162 { X86::VPORQZ256rr, X86::VPORQZ256rmb, TB_BCAST_Q },
163 { X86::VPORQZrr, X86::VPORQZrmb, TB_BCAST_Q },
164 { X86::VPTESTMDZ128rr, X86::VPTESTMDZ128rmb, TB_BCAST_D },
165 { X86::VPTESTMDZ256rr, X86::VPTESTMDZ256rmb, TB_BCAST_D },
166 { X86::VPTESTMDZrr, X86::VPTESTMDZrmb, TB_BCAST_D },
167 { X86::VPTESTMQZ128rr, X86::VPTESTMQZ128rmb, TB_BCAST_Q },
168 { X86::VPTESTMQZ256rr, X86::VPTESTMQZ256rmb, TB_BCAST_Q },
169 { X86::VPTESTMQZrr, X86::VPTESTMQZrmb, TB_BCAST_Q },
170 { X86::VPTESTNMDZ128rr,X86::VPTESTNMDZ128rmb,TB_BCAST_D },
171 { X86::VPTESTNMDZ256rr,X86::VPTESTNMDZ256rmb,TB_BCAST_D },
172 { X86::VPTESTNMDZrr, X86::VPTESTNMDZrmb, TB_BCAST_D },
173 { X86::VPTESTNMQZ128rr,X86::VPTESTNMQZ128rmb,TB_BCAST_Q },
174 { X86::VPTESTNMQZ256rr,X86::VPTESTNMQZ256rmb,TB_BCAST_Q },
175 { X86::VPTESTNMQZrr, X86::VPTESTNMQZrmb, TB_BCAST_Q },
176 { X86::VPXORDZ128rr, X86::VPXORDZ128rmb, TB_BCAST_D },
177 { X86::VPXORDZ256rr, X86::VPXORDZ256rmb, TB_BCAST_D },
178 { X86::VPXORDZrr, X86::VPXORDZrmb, TB_BCAST_D },
179 { X86::VPXORQZ128rr, X86::VPXORQZ128rmb, TB_BCAST_Q },
180 { X86::VPXORQZ256rr, X86::VPXORQZ256rmb, TB_BCAST_Q },
181 { X86::VPXORQZrr, X86::VPXORQZrmb, TB_BCAST_Q },
182 { X86::VSUBPDZ128rr, X86::VSUBPDZ128rmb, TB_BCAST_SD },
183 { X86::VSUBPDZ256rr, X86::VSUBPDZ256rmb, TB_BCAST_SD },
184 { X86::VSUBPDZrr, X86::VSUBPDZrmb, TB_BCAST_SD },
185 { X86::VSUBPSZ128rr, X86::VSUBPSZ128rmb, TB_BCAST_SS },
186 { X86::VSUBPSZ256rr, X86::VSUBPSZ256rmb, TB_BCAST_SS },
187 { X86::VSUBPSZrr, X86::VSUBPSZrmb, TB_BCAST_SS },
188};
189
191 { X86::VFMADD132PDZ128r, X86::VFMADD132PDZ128mb, TB_BCAST_SD },
192 { X86::VFMADD132PDZ256r, X86::VFMADD132PDZ256mb, TB_BCAST_SD },
193 { X86::VFMADD132PDZr, X86::VFMADD132PDZmb, TB_BCAST_SD },
194 { X86::VFMADD132PSZ128r, X86::VFMADD132PSZ128mb, TB_BCAST_SS },
195 { X86::VFMADD132PSZ256r, X86::VFMADD132PSZ256mb, TB_BCAST_SS },
196 { X86::VFMADD132PSZr, X86::VFMADD132PSZmb, TB_BCAST_SS },
197 { X86::VFMADD213PDZ128r, X86::VFMADD213PDZ128mb, TB_BCAST_SD },
198 { X86::VFMADD213PDZ256r, X86::VFMADD213PDZ256mb, TB_BCAST_SD },
199 { X86::VFMADD213PDZr, X86::VFMADD213PDZmb, TB_BCAST_SD },
200 { X86::VFMADD213PSZ128r, X86::VFMADD213PSZ128mb, TB_BCAST_SS },
201 { X86::VFMADD213PSZ256r, X86::VFMADD213PSZ256mb, TB_BCAST_SS },
202 { X86::VFMADD213PSZr, X86::VFMADD213PSZmb, TB_BCAST_SS },
203 { X86::VFMADD231PDZ128r, X86::VFMADD231PDZ128mb, TB_BCAST_SD },
204 { X86::VFMADD231PDZ256r, X86::VFMADD231PDZ256mb, TB_BCAST_SD },
205 { X86::VFMADD231PDZr, X86::VFMADD231PDZmb, TB_BCAST_SD },
206 { X86::VFMADD231PSZ128r, X86::VFMADD231PSZ128mb, TB_BCAST_SS },
207 { X86::VFMADD231PSZ256r, X86::VFMADD231PSZ256mb, TB_BCAST_SS },
208 { X86::VFMADD231PSZr, X86::VFMADD231PSZmb, TB_BCAST_SS },
209 { X86::VFMADDSUB132PDZ128r, X86::VFMADDSUB132PDZ128mb, TB_BCAST_SD },
210 { X86::VFMADDSUB132PDZ256r, X86::VFMADDSUB132PDZ256mb, TB_BCAST_SD },
211 { X86::VFMADDSUB132PDZr, X86::VFMADDSUB132PDZmb, TB_BCAST_SD },
212 { X86::VFMADDSUB132PSZ128r, X86::VFMADDSUB132PSZ128mb, TB_BCAST_SS },
213 { X86::VFMADDSUB132PSZ256r, X86::VFMADDSUB132PSZ256mb, TB_BCAST_SS },
214 { X86::VFMADDSUB132PSZr, X86::VFMADDSUB132PSZmb, TB_BCAST_SS },
215 { X86::VFMADDSUB213PDZ128r, X86::VFMADDSUB213PDZ128mb, TB_BCAST_SD },
216 { X86::VFMADDSUB213PDZ256r, X86::VFMADDSUB213PDZ256mb, TB_BCAST_SD },
217 { X86::VFMADDSUB213PDZr, X86::VFMADDSUB213PDZmb, TB_BCAST_SD },
218 { X86::VFMADDSUB213PSZ128r, X86::VFMADDSUB213PSZ128mb, TB_BCAST_SS },
219 { X86::VFMADDSUB213PSZ256r, X86::VFMADDSUB213PSZ256mb, TB_BCAST_SS },
220 { X86::VFMADDSUB213PSZr, X86::VFMADDSUB213PSZmb, TB_BCAST_SS },
221 { X86::VFMADDSUB231PDZ128r, X86::VFMADDSUB231PDZ128mb, TB_BCAST_SD },
222 { X86::VFMADDSUB231PDZ256r, X86::VFMADDSUB231PDZ256mb, TB_BCAST_SD },
223 { X86::VFMADDSUB231PDZr, X86::VFMADDSUB231PDZmb, TB_BCAST_SD },
224 { X86::VFMADDSUB231PSZ128r, X86::VFMADDSUB231PSZ128mb, TB_BCAST_SS },
225 { X86::VFMADDSUB231PSZ256r, X86::VFMADDSUB231PSZ256mb, TB_BCAST_SS },
226 { X86::VFMADDSUB231PSZr, X86::VFMADDSUB231PSZmb, TB_BCAST_SS },
227 { X86::VFMSUB132PDZ128r, X86::VFMSUB132PDZ128mb, TB_BCAST_SD },
228 { X86::VFMSUB132PDZ256r, X86::VFMSUB132PDZ256mb, TB_BCAST_SD },
229 { X86::VFMSUB132PDZr, X86::VFMSUB132PDZmb, TB_BCAST_SD },
230 { X86::VFMSUB132PSZ128r, X86::VFMSUB132PSZ128mb, TB_BCAST_SS },
231 { X86::VFMSUB132PSZ256r, X86::VFMSUB132PSZ256mb, TB_BCAST_SS },
232 { X86::VFMSUB132PSZr, X86::VFMSUB132PSZmb, TB_BCAST_SS },
233 { X86::VFMSUB213PDZ128r, X86::VFMSUB213PDZ128mb, TB_BCAST_SD },
234 { X86::VFMSUB213PDZ256r, X86::VFMSUB213PDZ256mb, TB_BCAST_SD },
235 { X86::VFMSUB213PDZr, X86::VFMSUB213PDZmb, TB_BCAST_SD },
236 { X86::VFMSUB213PSZ128r, X86::VFMSUB213PSZ128mb, TB_BCAST_SS },
237 { X86::VFMSUB213PSZ256r, X86::VFMSUB213PSZ256mb, TB_BCAST_SS },
238 { X86::VFMSUB213PSZr, X86::VFMSUB213PSZmb, TB_BCAST_SS },
239 { X86::VFMSUB231PDZ128r, X86::VFMSUB231PDZ128mb, TB_BCAST_SD },
240 { X86::VFMSUB231PDZ256r, X86::VFMSUB231PDZ256mb, TB_BCAST_SD },
241 { X86::VFMSUB231PDZr, X86::VFMSUB231PDZmb, TB_BCAST_SD },
242 { X86::VFMSUB231PSZ128r, X86::VFMSUB231PSZ128mb, TB_BCAST_SS },
243 { X86::VFMSUB231PSZ256r, X86::VFMSUB231PSZ256mb, TB_BCAST_SS },
244 { X86::VFMSUB231PSZr, X86::VFMSUB231PSZmb, TB_BCAST_SS },
245 { X86::VFMSUBADD132PDZ128r, X86::VFMSUBADD132PDZ128mb, TB_BCAST_SD },
246 { X86::VFMSUBADD132PDZ256r, X86::VFMSUBADD132PDZ256mb, TB_BCAST_SD },
247 { X86::VFMSUBADD132PDZr, X86::VFMSUBADD132PDZmb, TB_BCAST_SD },
248 { X86::VFMSUBADD132PSZ128r, X86::VFMSUBADD132PSZ128mb, TB_BCAST_SS },
249 { X86::VFMSUBADD132PSZ256r, X86::VFMSUBADD132PSZ256mb, TB_BCAST_SS },
250 { X86::VFMSUBADD132PSZr, X86::VFMSUBADD132PSZmb, TB_BCAST_SS },
251 { X86::VFMSUBADD213PDZ128r, X86::VFMSUBADD213PDZ128mb, TB_BCAST_SD },
252 { X86::VFMSUBADD213PDZ256r, X86::VFMSUBADD213PDZ256mb, TB_BCAST_SD },
253 { X86::VFMSUBADD213PDZr, X86::VFMSUBADD213PDZmb, TB_BCAST_SD },
254 { X86::VFMSUBADD213PSZ128r, X86::VFMSUBADD213PSZ128mb, TB_BCAST_SS },
255 { X86::VFMSUBADD213PSZ256r, X86::VFMSUBADD213PSZ256mb, TB_BCAST_SS },
256 { X86::VFMSUBADD213PSZr, X86::VFMSUBADD213PSZmb, TB_BCAST_SS },
257 { X86::VFMSUBADD231PDZ128r, X86::VFMSUBADD231PDZ128mb, TB_BCAST_SD },
258 { X86::VFMSUBADD231PDZ256r, X86::VFMSUBADD231PDZ256mb, TB_BCAST_SD },
259 { X86::VFMSUBADD231PDZr, X86::VFMSUBADD231PDZmb, TB_BCAST_SD },
260 { X86::VFMSUBADD231PSZ128r, X86::VFMSUBADD231PSZ128mb, TB_BCAST_SS },
261 { X86::VFMSUBADD231PSZ256r, X86::VFMSUBADD231PSZ256mb, TB_BCAST_SS },
262 { X86::VFMSUBADD231PSZr, X86::VFMSUBADD231PSZmb, TB_BCAST_SS },
263 { X86::VFNMADD132PDZ128r, X86::VFNMADD132PDZ128mb, TB_BCAST_SD },
264 { X86::VFNMADD132PDZ256r, X86::VFNMADD132PDZ256mb, TB_BCAST_SD },
265 { X86::VFNMADD132PDZr, X86::VFNMADD132PDZmb, TB_BCAST_SD },
266 { X86::VFNMADD132PSZ128r, X86::VFNMADD132PSZ128mb, TB_BCAST_SS },
267 { X86::VFNMADD132PSZ256r, X86::VFNMADD132PSZ256mb, TB_BCAST_SS },
268 { X86::VFNMADD132PSZr, X86::VFNMADD132PSZmb, TB_BCAST_SS },
269 { X86::VFNMADD213PDZ128r, X86::VFNMADD213PDZ128mb, TB_BCAST_SD },
270 { X86::VFNMADD213PDZ256r, X86::VFNMADD213PDZ256mb, TB_BCAST_SD },
271 { X86::VFNMADD213PDZr, X86::VFNMADD213PDZmb, TB_BCAST_SD },
272 { X86::VFNMADD213PSZ128r, X86::VFNMADD213PSZ128mb, TB_BCAST_SS },
273 { X86::VFNMADD213PSZ256r, X86::VFNMADD213PSZ256mb, TB_BCAST_SS },
274 { X86::VFNMADD213PSZr, X86::VFNMADD213PSZmb, TB_BCAST_SS },
275 { X86::VFNMADD231PDZ128r, X86::VFNMADD231PDZ128mb, TB_BCAST_SD },
276 { X86::VFNMADD231PDZ256r, X86::VFNMADD231PDZ256mb, TB_BCAST_SD },
277 { X86::VFNMADD231PDZr, X86::VFNMADD231PDZmb, TB_BCAST_SD },
278 { X86::VFNMADD231PSZ128r, X86::VFNMADD231PSZ128mb, TB_BCAST_SS },
279 { X86::VFNMADD231PSZ256r, X86::VFNMADD231PSZ256mb, TB_BCAST_SS },
280 { X86::VFNMADD231PSZr, X86::VFNMADD231PSZmb, TB_BCAST_SS },
281 { X86::VFNMSUB132PDZ128r, X86::VFNMSUB132PDZ128mb, TB_BCAST_SD },
282 { X86::VFNMSUB132PDZ256r, X86::VFNMSUB132PDZ256mb, TB_BCAST_SD },
283 { X86::VFNMSUB132PDZr, X86::VFNMSUB132PDZmb, TB_BCAST_SD },
284 { X86::VFNMSUB132PSZ128r, X86::VFNMSUB132PSZ128mb, TB_BCAST_SS },
285 { X86::VFNMSUB132PSZ256r, X86::VFNMSUB132PSZ256mb, TB_BCAST_SS },
286 { X86::VFNMSUB132PSZr, X86::VFNMSUB132PSZmb, TB_BCAST_SS },
287 { X86::VFNMSUB213PDZ128r, X86::VFNMSUB213PDZ128mb, TB_BCAST_SD },
288 { X86::VFNMSUB213PDZ256r, X86::VFNMSUB213PDZ256mb, TB_BCAST_SD },
289 { X86::VFNMSUB213PDZr, X86::VFNMSUB213PDZmb, TB_BCAST_SD },
290 { X86::VFNMSUB213PSZ128r, X86::VFNMSUB213PSZ128mb, TB_BCAST_SS },
291 { X86::VFNMSUB213PSZ256r, X86::VFNMSUB213PSZ256mb, TB_BCAST_SS },
292 { X86::VFNMSUB213PSZr, X86::VFNMSUB213PSZmb, TB_BCAST_SS },
293 { X86::VFNMSUB231PDZ128r, X86::VFNMSUB231PDZ128mb, TB_BCAST_SD },
294 { X86::VFNMSUB231PDZ256r, X86::VFNMSUB231PDZ256mb, TB_BCAST_SD },
295 { X86::VFNMSUB231PDZr, X86::VFNMSUB231PDZmb, TB_BCAST_SD },
296 { X86::VFNMSUB231PSZ128r, X86::VFNMSUB231PSZ128mb, TB_BCAST_SS },
297 { X86::VFNMSUB231PSZ256r, X86::VFNMSUB231PSZ256mb, TB_BCAST_SS },
298 { X86::VFNMSUB231PSZr, X86::VFNMSUB231PSZmb, TB_BCAST_SS },
299 { X86::VPTERNLOGDZ128rri, X86::VPTERNLOGDZ128rmbi, TB_BCAST_D },
300 { X86::VPTERNLOGDZ256rri, X86::VPTERNLOGDZ256rmbi, TB_BCAST_D },
301 { X86::VPTERNLOGDZrri, X86::VPTERNLOGDZrmbi, TB_BCAST_D },
302 { X86::VPTERNLOGQZ128rri, X86::VPTERNLOGQZ128rmbi, TB_BCAST_Q },
303 { X86::VPTERNLOGQZ256rri, X86::VPTERNLOGQZ256rmbi, TB_BCAST_Q },
304 { X86::VPTERNLOGQZrri, X86::VPTERNLOGQZrmbi, TB_BCAST_Q },
305};
306
307static const X86MemoryFoldTableEntry *
309#ifndef NDEBUG
310 // Make sure the tables are sorted.
311 static std::atomic<bool> FoldTablesChecked(false);
312 if (!FoldTablesChecked.load(std::memory_order_relaxed)) {
313 assert(llvm::is_sorted(MemoryFoldTable2Addr) &&
314 std::adjacent_find(std::begin(MemoryFoldTable2Addr),
315 std::end(MemoryFoldTable2Addr)) ==
316 std::end(MemoryFoldTable2Addr) &&
317 "MemoryFoldTable2Addr is not sorted and unique!");
318 assert(llvm::is_sorted(MemoryFoldTable0) &&
319 std::adjacent_find(std::begin(MemoryFoldTable0),
320 std::end(MemoryFoldTable0)) ==
321 std::end(MemoryFoldTable0) &&
322 "MemoryFoldTable0 is not sorted and unique!");
323 assert(llvm::is_sorted(MemoryFoldTable1) &&
324 std::adjacent_find(std::begin(MemoryFoldTable1),
325 std::end(MemoryFoldTable1)) ==
326 std::end(MemoryFoldTable1) &&
327 "MemoryFoldTable1 is not sorted and unique!");
328 assert(llvm::is_sorted(MemoryFoldTable2) &&
329 std::adjacent_find(std::begin(MemoryFoldTable2),
330 std::end(MemoryFoldTable2)) ==
331 std::end(MemoryFoldTable2) &&
332 "MemoryFoldTable2 is not sorted and unique!");
333 assert(llvm::is_sorted(MemoryFoldTable3) &&
334 std::adjacent_find(std::begin(MemoryFoldTable3),
335 std::end(MemoryFoldTable3)) ==
336 std::end(MemoryFoldTable3) &&
337 "MemoryFoldTable3 is not sorted and unique!");
338 assert(llvm::is_sorted(MemoryFoldTable4) &&
339 std::adjacent_find(std::begin(MemoryFoldTable4),
340 std::end(MemoryFoldTable4)) ==
341 std::end(MemoryFoldTable4) &&
342 "MemoryFoldTable4 is not sorted and unique!");
344 std::adjacent_find(std::begin(BroadcastFoldTable2),
345 std::end(BroadcastFoldTable2)) ==
346 std::end(BroadcastFoldTable2) &&
347 "BroadcastFoldTable2 is not sorted and unique!");
349 std::adjacent_find(std::begin(BroadcastFoldTable3),
350 std::end(BroadcastFoldTable3)) ==
351 std::end(BroadcastFoldTable3) &&
352 "BroadcastFoldTable3 is not sorted and unique!");
353 FoldTablesChecked.store(true, std::memory_order_relaxed);
354 }
355#endif
356
357 const X86MemoryFoldTableEntry *Data = llvm::lower_bound(Table, RegOp);
358 if (Data != Table.end() && Data->KeyOp == RegOp &&
359 !(Data->Flags & TB_NO_FORWARD))
360 return Data;
361 return nullptr;
362}
363
366 return lookupFoldTableImpl(MemoryFoldTable2Addr, RegOp);
367}
368
370llvm::lookupFoldTable(unsigned RegOp, unsigned OpNum) {
372 if (OpNum == 0)
373 FoldTable = ArrayRef(MemoryFoldTable0);
374 else if (OpNum == 1)
375 FoldTable = ArrayRef(MemoryFoldTable1);
376 else if (OpNum == 2)
377 FoldTable = ArrayRef(MemoryFoldTable2);
378 else if (OpNum == 3)
379 FoldTable = ArrayRef(MemoryFoldTable3);
380 else if (OpNum == 4)
381 FoldTable = ArrayRef(MemoryFoldTable4);
382 else
383 return nullptr;
384
385 return lookupFoldTableImpl(FoldTable, RegOp);
386}
387
388namespace {
389
390// This class stores the memory unfolding tables. It is instantiated as a
391// function scope static variable to lazily init the unfolding table.
392struct X86MemUnfoldTable {
393 // Stores memory unfolding tables entries sorted by opcode.
394 std::vector<X86MemoryFoldTableEntry> Table;
395
396 X86MemUnfoldTable() {
397 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable2Addr)
398 // Index 0, folded load and store, no alignment requirement.
399 addTableEntry(Entry, TB_INDEX_0 | TB_FOLDED_LOAD | TB_FOLDED_STORE);
400
401 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable0)
402 // Index 0, mix of loads and stores.
403 addTableEntry(Entry, TB_INDEX_0);
404
405 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable1)
406 // Index 1, folded load
407 addTableEntry(Entry, TB_INDEX_1 | TB_FOLDED_LOAD);
408
409 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable2)
410 // Index 2, folded load
411 addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD);
412
413 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable3)
414 // Index 3, folded load
415 addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD);
416
417 for (const X86MemoryFoldTableEntry &Entry : MemoryFoldTable4)
418 // Index 4, folded load
419 addTableEntry(Entry, TB_INDEX_4 | TB_FOLDED_LOAD);
420
421 // Broadcast tables.
423 // Index 2, folded broadcast
424 addTableEntry(Entry, TB_INDEX_2 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
425
427 // Index 3, folded broadcast
428 addTableEntry(Entry, TB_INDEX_3 | TB_FOLDED_LOAD | TB_FOLDED_BCAST);
429
430 // Sort the memory->reg unfold table.
431 array_pod_sort(Table.begin(), Table.end());
432
433 // Now that it's sorted, ensure its unique.
434 assert(std::adjacent_find(Table.begin(), Table.end()) == Table.end() &&
435 "Memory unfolding table is not unique!");
436 }
437
438 void addTableEntry(const X86MemoryFoldTableEntry &Entry,
439 uint16_t ExtraFlags) {
440 // NOTE: This swaps the KeyOp and DstOp in the table so we can sort it.
441 if ((Entry.Flags & TB_NO_REVERSE) == 0)
442 Table.push_back({Entry.DstOp, Entry.KeyOp,
443 static_cast<uint16_t>(Entry.Flags | ExtraFlags) });
444 }
445};
446}
447
450 static X86MemUnfoldTable MemUnfoldTable;
451 auto &Table = MemUnfoldTable.Table;
452 auto I = llvm::lower_bound(Table, MemOp);
453 if (I != Table.end() && I->KeyOp == MemOp)
454 return &*I;
455 return nullptr;
456}
457
#define I(x, y, z)
Definition: MD5.cpp:58
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static const X86MemoryFoldTableEntry * lookupFoldTableImpl(ArrayRef< X86MemoryFoldTableEntry > Table, unsigned RegOp)
static const X86MemoryFoldTableEntry BroadcastFoldTable3[]
static const X86MemoryFoldTableEntry BroadcastFoldTable2[]
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
iterator end() const
Definition: ArrayRef.h:152
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
const X86MemoryFoldTableEntry * lookupTwoAddrFoldTable(unsigned RegOp)
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1962
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2001
const X86MemoryFoldTableEntry * lookupFoldTable(unsigned RegOp, unsigned OpNum)
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition: STLExtras.h:1690
const X86MemoryFoldTableEntry * lookupUnfoldTable(unsigned MemOp)