Bug Summary

File:llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 12576, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AArch64 -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-08-28-193554-24367-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/ObjCARCUtil.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/Analysis.h"
35#include "llvm/CodeGen/CallingConvLower.h"
36#include "llvm/CodeGen/MachineBasicBlock.h"
37#include "llvm/CodeGen/MachineFrameInfo.h"
38#include "llvm/CodeGen/MachineFunction.h"
39#include "llvm/CodeGen/MachineInstr.h"
40#include "llvm/CodeGen/MachineInstrBuilder.h"
41#include "llvm/CodeGen/MachineMemOperand.h"
42#include "llvm/CodeGen/MachineRegisterInfo.h"
43#include "llvm/CodeGen/RuntimeLibcalls.h"
44#include "llvm/CodeGen/SelectionDAG.h"
45#include "llvm/CodeGen/SelectionDAGNodes.h"
46#include "llvm/CodeGen/TargetCallingConv.h"
47#include "llvm/CodeGen/TargetInstrInfo.h"
48#include "llvm/CodeGen/ValueTypes.h"
49#include "llvm/IR/Attributes.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugLoc.h"
53#include "llvm/IR/DerivedTypes.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/GetElementPtrTypeIterator.h"
56#include "llvm/IR/GlobalValue.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/Instruction.h"
59#include "llvm/IR/Instructions.h"
60#include "llvm/IR/IntrinsicInst.h"
61#include "llvm/IR/Intrinsics.h"
62#include "llvm/IR/IntrinsicsAArch64.h"
63#include "llvm/IR/Module.h"
64#include "llvm/IR/OperandTraits.h"
65#include "llvm/IR/PatternMatch.h"
66#include "llvm/IR/Type.h"
67#include "llvm/IR/Use.h"
68#include "llvm/IR/Value.h"
69#include "llvm/MC/MCRegisterInfo.h"
70#include "llvm/Support/Casting.h"
71#include "llvm/Support/CodeGen.h"
72#include "llvm/Support/CommandLine.h"
73#include "llvm/Support/Compiler.h"
74#include "llvm/Support/Debug.h"
75#include "llvm/Support/ErrorHandling.h"
76#include "llvm/Support/KnownBits.h"
77#include "llvm/Support/MachineValueType.h"
78#include "llvm/Support/MathExtras.h"
79#include "llvm/Support/raw_ostream.h"
80#include "llvm/Target/TargetMachine.h"
81#include "llvm/Target/TargetOptions.h"
82#include <algorithm>
83#include <bitset>
84#include <cassert>
85#include <cctype>
86#include <cstdint>
87#include <cstdlib>
88#include <iterator>
89#include <limits>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace llvm::PatternMatch;
96
97#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
98
99STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
100STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
101STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
102
103// FIXME: The necessary dtprel relocations don't seem to be supported
104// well in the GNU bfd and gold linkers at the moment. Therefore, by
105// default, for now, fall back to GeneralDynamic code generation.
106cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
107 "aarch64-elf-ldtls-generation", cl::Hidden,
108 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
109 cl::init(false));
110
111static cl::opt<bool>
112EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
113 cl::desc("Enable AArch64 logical imm instruction "
114 "optimization"),
115 cl::init(true));
116
117// Temporary option added for the purpose of testing functionality added
118// to DAGCombiner.cpp in D92230. It is expected that this can be removed
119// in future when both implementations will be based off MGATHER rather
120// than the GLD1 nodes added for the SVE gather load intrinsics.
121static cl::opt<bool>
122EnableCombineMGatherIntrinsics("aarch64-enable-mgather-combine", cl::Hidden,
123 cl::desc("Combine extends of AArch64 masked "
124 "gather intrinsics"),
125 cl::init(true));
126
127/// Value type used for condition codes.
128static const MVT MVT_CC = MVT::i32;
129
130static inline EVT getPackedSVEVectorVT(EVT VT) {
131 switch (VT.getSimpleVT().SimpleTy) {
132 default:
133 llvm_unreachable("unexpected element type for vector")::llvm::llvm_unreachable_internal("unexpected element type for vector"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 133)
;
134 case MVT::i8:
135 return MVT::nxv16i8;
136 case MVT::i16:
137 return MVT::nxv8i16;
138 case MVT::i32:
139 return MVT::nxv4i32;
140 case MVT::i64:
141 return MVT::nxv2i64;
142 case MVT::f16:
143 return MVT::nxv8f16;
144 case MVT::f32:
145 return MVT::nxv4f32;
146 case MVT::f64:
147 return MVT::nxv2f64;
148 case MVT::bf16:
149 return MVT::nxv8bf16;
150 }
151}
152
153// NOTE: Currently there's only a need to return integer vector types. If this
154// changes then just add an extra "type" parameter.
155static inline EVT getPackedSVEVectorVT(ElementCount EC) {
156 switch (EC.getKnownMinValue()) {
157 default:
158 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 158)
;
159 case 16:
160 return MVT::nxv16i8;
161 case 8:
162 return MVT::nxv8i16;
163 case 4:
164 return MVT::nxv4i32;
165 case 2:
166 return MVT::nxv2i64;
167 }
168}
169
170static inline EVT getPromotedVTForPredicate(EVT VT) {
171 assert(VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> (VT.isScalableVector() && (
VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 172, __extension__ __PRETTY_FUNCTION__))
172 "Expected scalable predicate vector type!")(static_cast <bool> (VT.isScalableVector() && (
VT.getVectorElementType() == MVT::i1) && "Expected scalable predicate vector type!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && (VT.getVectorElementType() == MVT::i1) && \"Expected scalable predicate vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 172, __extension__ __PRETTY_FUNCTION__))
;
173 switch (VT.getVectorMinNumElements()) {
174 default:
175 llvm_unreachable("unexpected element count for vector")::llvm::llvm_unreachable_internal("unexpected element count for vector"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 175)
;
176 case 2:
177 return MVT::nxv2i64;
178 case 4:
179 return MVT::nxv4i32;
180 case 8:
181 return MVT::nxv8i16;
182 case 16:
183 return MVT::nxv16i8;
184 }
185}
186
187/// Returns true if VT's elements occupy the lowest bit positions of its
188/// associated register class without any intervening space.
189///
190/// For example, nxv2f16, nxv4f16 and nxv8f16 are legal types that belong to the
191/// same register class, but only nxv8f16 can be treated as a packed vector.
192static inline bool isPackedVectorType(EVT VT, SelectionDAG &DAG) {
193 assert(VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isVector() && DAG.getTargetLoweringInfo
().isTypeLegal(VT) && "Expected legal vector type!") ?
void (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 194, __extension__ __PRETTY_FUNCTION__))
194 "Expected legal vector type!")(static_cast <bool> (VT.isVector() && DAG.getTargetLoweringInfo
().isTypeLegal(VT) && "Expected legal vector type!") ?
void (0) : __assert_fail ("VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 194, __extension__ __PRETTY_FUNCTION__))
;
195 return VT.isFixedLengthVector() ||
196 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock;
197}
198
199// Returns true for ####_MERGE_PASSTHRU opcodes, whose operands have a leading
200// predicate and end with a passthru value matching the result type.
201static bool isMergePassthruOpcode(unsigned Opc) {
202 switch (Opc) {
203 default:
204 return false;
205 case AArch64ISD::BITREVERSE_MERGE_PASSTHRU:
206 case AArch64ISD::BSWAP_MERGE_PASSTHRU:
207 case AArch64ISD::CTLZ_MERGE_PASSTHRU:
208 case AArch64ISD::CTPOP_MERGE_PASSTHRU:
209 case AArch64ISD::DUP_MERGE_PASSTHRU:
210 case AArch64ISD::ABS_MERGE_PASSTHRU:
211 case AArch64ISD::NEG_MERGE_PASSTHRU:
212 case AArch64ISD::FNEG_MERGE_PASSTHRU:
213 case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
214 case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
215 case AArch64ISD::FCEIL_MERGE_PASSTHRU:
216 case AArch64ISD::FFLOOR_MERGE_PASSTHRU:
217 case AArch64ISD::FNEARBYINT_MERGE_PASSTHRU:
218 case AArch64ISD::FRINT_MERGE_PASSTHRU:
219 case AArch64ISD::FROUND_MERGE_PASSTHRU:
220 case AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU:
221 case AArch64ISD::FTRUNC_MERGE_PASSTHRU:
222 case AArch64ISD::FP_ROUND_MERGE_PASSTHRU:
223 case AArch64ISD::FP_EXTEND_MERGE_PASSTHRU:
224 case AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU:
225 case AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU:
226 case AArch64ISD::FCVTZU_MERGE_PASSTHRU:
227 case AArch64ISD::FCVTZS_MERGE_PASSTHRU:
228 case AArch64ISD::FSQRT_MERGE_PASSTHRU:
229 case AArch64ISD::FRECPX_MERGE_PASSTHRU:
230 case AArch64ISD::FABS_MERGE_PASSTHRU:
231 return true;
232 }
233}
234
235AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
236 const AArch64Subtarget &STI)
237 : TargetLowering(TM), Subtarget(&STI) {
238 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
239 // we have to make something up. Arbitrarily, choose ZeroOrOne.
240 setBooleanContents(ZeroOrOneBooleanContent);
241 // When comparing vectors the result sets the different elements in the
242 // vector to all-one or all-zero.
243 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
244
245 // Set up the register classes.
246 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
247 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
248
249 if (Subtarget->hasLS64()) {
250 addRegisterClass(MVT::i64x8, &AArch64::GPR64x8ClassRegClass);
251 setOperationAction(ISD::LOAD, MVT::i64x8, Custom);
252 setOperationAction(ISD::STORE, MVT::i64x8, Custom);
253 }
254
255 if (Subtarget->hasFPARMv8()) {
256 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
257 addRegisterClass(MVT::bf16, &AArch64::FPR16RegClass);
258 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
259 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
260 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
261 }
262
263 if (Subtarget->hasNEON()) {
264 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
265 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
266 // Someone set us up the NEON.
267 addDRTypeForNEON(MVT::v2f32);
268 addDRTypeForNEON(MVT::v8i8);
269 addDRTypeForNEON(MVT::v4i16);
270 addDRTypeForNEON(MVT::v2i32);
271 addDRTypeForNEON(MVT::v1i64);
272 addDRTypeForNEON(MVT::v1f64);
273 addDRTypeForNEON(MVT::v4f16);
274 if (Subtarget->hasBF16())
275 addDRTypeForNEON(MVT::v4bf16);
276
277 addQRTypeForNEON(MVT::v4f32);
278 addQRTypeForNEON(MVT::v2f64);
279 addQRTypeForNEON(MVT::v16i8);
280 addQRTypeForNEON(MVT::v8i16);
281 addQRTypeForNEON(MVT::v4i32);
282 addQRTypeForNEON(MVT::v2i64);
283 addQRTypeForNEON(MVT::v8f16);
284 if (Subtarget->hasBF16())
285 addQRTypeForNEON(MVT::v8bf16);
286 }
287
288 if (Subtarget->hasSVE()) {
289 // Add legal sve predicate types
290 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
291 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
292 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
293 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
294
295 // Add legal sve data types
296 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
297 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
298 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
299 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
300
301 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
302 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
303 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
304 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
305 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
306 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
307
308 if (Subtarget->hasBF16()) {
309 addRegisterClass(MVT::nxv2bf16, &AArch64::ZPRRegClass);
310 addRegisterClass(MVT::nxv4bf16, &AArch64::ZPRRegClass);
311 addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
312 }
313
314 if (Subtarget->useSVEForFixedLengthVectors()) {
315 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
316 if (useSVEForFixedLengthVectorVT(VT))
317 addRegisterClass(VT, &AArch64::ZPRRegClass);
318
319 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
320 if (useSVEForFixedLengthVectorVT(VT))
321 addRegisterClass(VT, &AArch64::ZPRRegClass);
322 }
323
324 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
325 setOperationAction(ISD::SADDSAT, VT, Legal);
326 setOperationAction(ISD::UADDSAT, VT, Legal);
327 setOperationAction(ISD::SSUBSAT, VT, Legal);
328 setOperationAction(ISD::USUBSAT, VT, Legal);
329 setOperationAction(ISD::UREM, VT, Expand);
330 setOperationAction(ISD::SREM, VT, Expand);
331 setOperationAction(ISD::SDIVREM, VT, Expand);
332 setOperationAction(ISD::UDIVREM, VT, Expand);
333 }
334
335 for (auto VT :
336 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
337 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
338 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
339
340 for (auto VT :
341 { MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32, MVT::nxv4f32,
342 MVT::nxv2f64 }) {
343 setCondCodeAction(ISD::SETO, VT, Expand);
344 setCondCodeAction(ISD::SETOLT, VT, Expand);
345 setCondCodeAction(ISD::SETLT, VT, Expand);
346 setCondCodeAction(ISD::SETOLE, VT, Expand);
347 setCondCodeAction(ISD::SETLE, VT, Expand);
348 setCondCodeAction(ISD::SETULT, VT, Expand);
349 setCondCodeAction(ISD::SETULE, VT, Expand);
350 setCondCodeAction(ISD::SETUGE, VT, Expand);
351 setCondCodeAction(ISD::SETUGT, VT, Expand);
352 setCondCodeAction(ISD::SETUEQ, VT, Expand);
353 setCondCodeAction(ISD::SETUNE, VT, Expand);
354
355 setOperationAction(ISD::FREM, VT, Expand);
356 setOperationAction(ISD::FPOW, VT, Expand);
357 setOperationAction(ISD::FPOWI, VT, Expand);
358 setOperationAction(ISD::FCOS, VT, Expand);
359 setOperationAction(ISD::FSIN, VT, Expand);
360 setOperationAction(ISD::FSINCOS, VT, Expand);
361 setOperationAction(ISD::FEXP, VT, Expand);
362 setOperationAction(ISD::FEXP2, VT, Expand);
363 setOperationAction(ISD::FLOG, VT, Expand);
364 setOperationAction(ISD::FLOG2, VT, Expand);
365 setOperationAction(ISD::FLOG10, VT, Expand);
366 }
367 }
368
369 // Compute derived properties from the register classes
370 computeRegisterProperties(Subtarget->getRegisterInfo());
371
372 // Provide all sorts of operation actions
373 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
374 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
375 setOperationAction(ISD::SETCC, MVT::i32, Custom);
376 setOperationAction(ISD::SETCC, MVT::i64, Custom);
377 setOperationAction(ISD::SETCC, MVT::f16, Custom);
378 setOperationAction(ISD::SETCC, MVT::f32, Custom);
379 setOperationAction(ISD::SETCC, MVT::f64, Custom);
380 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
381 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
382 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
383 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
384 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
385 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
386 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
387 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
388 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
389 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
390 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
391 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
392 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
393 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
394 setOperationAction(ISD::SELECT, MVT::i32, Custom);
395 setOperationAction(ISD::SELECT, MVT::i64, Custom);
396 setOperationAction(ISD::SELECT, MVT::f16, Custom);
397 setOperationAction(ISD::SELECT, MVT::f32, Custom);
398 setOperationAction(ISD::SELECT, MVT::f64, Custom);
399 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
400 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
401 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
402 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
403 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
404 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
405 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
406
407 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
408 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
409 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
410
411 setOperationAction(ISD::FREM, MVT::f32, Expand);
412 setOperationAction(ISD::FREM, MVT::f64, Expand);
413 setOperationAction(ISD::FREM, MVT::f80, Expand);
414
415 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
416
417 // Custom lowering hooks are needed for XOR
418 // to fold it into CSINC/CSINV.
419 setOperationAction(ISD::XOR, MVT::i32, Custom);
420 setOperationAction(ISD::XOR, MVT::i64, Custom);
421
422 // Virtually no operation on f128 is legal, but LLVM can't expand them when
423 // there's a valid register class, so we need custom operations in most cases.
424 setOperationAction(ISD::FABS, MVT::f128, Expand);
425 setOperationAction(ISD::FADD, MVT::f128, LibCall);
426 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
427 setOperationAction(ISD::FCOS, MVT::f128, Expand);
428 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
429 setOperationAction(ISD::FMA, MVT::f128, Expand);
430 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
431 setOperationAction(ISD::FNEG, MVT::f128, Expand);
432 setOperationAction(ISD::FPOW, MVT::f128, Expand);
433 setOperationAction(ISD::FREM, MVT::f128, Expand);
434 setOperationAction(ISD::FRINT, MVT::f128, Expand);
435 setOperationAction(ISD::FSIN, MVT::f128, Expand);
436 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
437 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
438 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
439 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
440 setOperationAction(ISD::SETCC, MVT::f128, Custom);
441 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
442 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
443 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
444 setOperationAction(ISD::SELECT, MVT::f128, Custom);
445 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
446 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
447
448 // Lowering for many of the conversions is actually specified by the non-f128
449 // type. The LowerXXX function will be trivial when f128 isn't involved.
450 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
451 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
452 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
453 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
454 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
455 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
456 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
457 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
458 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
459 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
460 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
461 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
462 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
463 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
464 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
465 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
466 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
467 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
468 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
469 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
470 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
471 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
472 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
473 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
474 setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
475 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
476 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
477 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
478 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
479 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
480
481 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i32, Custom);
482 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
483 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i32, Custom);
484 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
485
486 // Variable arguments.
487 setOperationAction(ISD::VASTART, MVT::Other, Custom);
488 setOperationAction(ISD::VAARG, MVT::Other, Custom);
489 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
490 setOperationAction(ISD::VAEND, MVT::Other, Expand);
491
492 // Variable-sized objects.
493 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
494 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
495
496 if (Subtarget->isTargetWindows())
497 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
498 else
499 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
500
501 // Constant pool entries
502 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
503
504 // BlockAddress
505 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
506
507 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
508 setOperationAction(ISD::ADDC, MVT::i32, Custom);
509 setOperationAction(ISD::ADDE, MVT::i32, Custom);
510 setOperationAction(ISD::SUBC, MVT::i32, Custom);
511 setOperationAction(ISD::SUBE, MVT::i32, Custom);
512 setOperationAction(ISD::ADDC, MVT::i64, Custom);
513 setOperationAction(ISD::ADDE, MVT::i64, Custom);
514 setOperationAction(ISD::SUBC, MVT::i64, Custom);
515 setOperationAction(ISD::SUBE, MVT::i64, Custom);
516
517 // AArch64 lacks both left-rotate and popcount instructions.
518 setOperationAction(ISD::ROTL, MVT::i32, Expand);
519 setOperationAction(ISD::ROTL, MVT::i64, Expand);
520 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
521 setOperationAction(ISD::ROTL, VT, Expand);
522 setOperationAction(ISD::ROTR, VT, Expand);
523 }
524
525 // AArch64 doesn't have i32 MULH{S|U}.
526 setOperationAction(ISD::MULHU, MVT::i32, Expand);
527 setOperationAction(ISD::MULHS, MVT::i32, Expand);
528
529 // AArch64 doesn't have {U|S}MUL_LOHI.
530 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
531 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
532
533 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
534 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
535 setOperationAction(ISD::CTPOP, MVT::i128, Custom);
536
537 setOperationAction(ISD::ABS, MVT::i32, Custom);
538 setOperationAction(ISD::ABS, MVT::i64, Custom);
539
540 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
541 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
542 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
543 setOperationAction(ISD::SDIVREM, VT, Expand);
544 setOperationAction(ISD::UDIVREM, VT, Expand);
545 }
546 setOperationAction(ISD::SREM, MVT::i32, Expand);
547 setOperationAction(ISD::SREM, MVT::i64, Expand);
548 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
549 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
550 setOperationAction(ISD::UREM, MVT::i32, Expand);
551 setOperationAction(ISD::UREM, MVT::i64, Expand);
552
553 // Custom lower Add/Sub/Mul with overflow.
554 setOperationAction(ISD::SADDO, MVT::i32, Custom);
555 setOperationAction(ISD::SADDO, MVT::i64, Custom);
556 setOperationAction(ISD::UADDO, MVT::i32, Custom);
557 setOperationAction(ISD::UADDO, MVT::i64, Custom);
558 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
559 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
560 setOperationAction(ISD::USUBO, MVT::i32, Custom);
561 setOperationAction(ISD::USUBO, MVT::i64, Custom);
562 setOperationAction(ISD::SMULO, MVT::i32, Custom);
563 setOperationAction(ISD::SMULO, MVT::i64, Custom);
564 setOperationAction(ISD::UMULO, MVT::i32, Custom);
565 setOperationAction(ISD::UMULO, MVT::i64, Custom);
566
567 setOperationAction(ISD::FSIN, MVT::f32, Expand);
568 setOperationAction(ISD::FSIN, MVT::f64, Expand);
569 setOperationAction(ISD::FCOS, MVT::f32, Expand);
570 setOperationAction(ISD::FCOS, MVT::f64, Expand);
571 setOperationAction(ISD::FPOW, MVT::f32, Expand);
572 setOperationAction(ISD::FPOW, MVT::f64, Expand);
573 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
574 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
575 if (Subtarget->hasFullFP16())
576 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
577 else
578 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
579
580 setOperationAction(ISD::FREM, MVT::f16, Promote);
581 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
582 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
583 setOperationAction(ISD::FPOW, MVT::f16, Promote);
584 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
585 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
586 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
587 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
588 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
589 setOperationAction(ISD::FCOS, MVT::f16, Promote);
590 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
591 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
592 setOperationAction(ISD::FSIN, MVT::f16, Promote);
593 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
594 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
595 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
596 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
597 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
598 setOperationAction(ISD::FEXP, MVT::f16, Promote);
599 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
600 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
601 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
602 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
603 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
604 setOperationAction(ISD::FLOG, MVT::f16, Promote);
605 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
606 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
607 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
608 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
609 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
610 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
611 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
612 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
613
614 if (!Subtarget->hasFullFP16()) {
615 setOperationAction(ISD::SELECT, MVT::f16, Promote);
616 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
617 setOperationAction(ISD::SETCC, MVT::f16, Promote);
618 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
619 setOperationAction(ISD::FADD, MVT::f16, Promote);
620 setOperationAction(ISD::FSUB, MVT::f16, Promote);
621 setOperationAction(ISD::FMUL, MVT::f16, Promote);
622 setOperationAction(ISD::FDIV, MVT::f16, Promote);
623 setOperationAction(ISD::FMA, MVT::f16, Promote);
624 setOperationAction(ISD::FNEG, MVT::f16, Promote);
625 setOperationAction(ISD::FABS, MVT::f16, Promote);
626 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
627 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
628 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
629 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
630 setOperationAction(ISD::FRINT, MVT::f16, Promote);
631 setOperationAction(ISD::FROUND, MVT::f16, Promote);
632 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Promote);
633 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
634 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
635 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
636 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
637 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
638
639 // promote v4f16 to v4f32 when that is known to be safe.
640 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
641 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
642 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
643 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
644 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
645 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
646 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
647 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
648
649 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
650 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
651 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
652 setOperationAction(ISD::FROUNDEVEN, MVT::v4f16, Expand);
653 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
654 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
655 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
656 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
657 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
658 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
659 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
660 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
661 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
662 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
663 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
664 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
665
666 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
667 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
668 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
669 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
670 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
671 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
672 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
673 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
674 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
675 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
676 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
677 setOperationAction(ISD::FROUNDEVEN, MVT::v8f16, Expand);
678 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
679 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
680 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
681 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
682 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
683 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
684 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
685 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
686 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
687 }
688
689 // AArch64 has implementations of a lot of rounding-like FP operations.
690 for (MVT Ty : {MVT::f32, MVT::f64}) {
691 setOperationAction(ISD::FFLOOR, Ty, Legal);
692 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
693 setOperationAction(ISD::FCEIL, Ty, Legal);
694 setOperationAction(ISD::FRINT, Ty, Legal);
695 setOperationAction(ISD::FTRUNC, Ty, Legal);
696 setOperationAction(ISD::FROUND, Ty, Legal);
697 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
698 setOperationAction(ISD::FMINNUM, Ty, Legal);
699 setOperationAction(ISD::FMAXNUM, Ty, Legal);
700 setOperationAction(ISD::FMINIMUM, Ty, Legal);
701 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
702 setOperationAction(ISD::LROUND, Ty, Legal);
703 setOperationAction(ISD::LLROUND, Ty, Legal);
704 setOperationAction(ISD::LRINT, Ty, Legal);
705 setOperationAction(ISD::LLRINT, Ty, Legal);
706 }
707
708 if (Subtarget->hasFullFP16()) {
709 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
710 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
711 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
712 setOperationAction(ISD::FRINT, MVT::f16, Legal);
713 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
714 setOperationAction(ISD::FROUND, MVT::f16, Legal);
715 setOperationAction(ISD::FROUNDEVEN, MVT::f16, Legal);
716 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
717 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
718 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
719 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
720 }
721
722 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
723
724 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
725 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
726
727 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
728 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
729 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
730 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
731 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
732
733 // Generate outline atomics library calls only if LSE was not specified for
734 // subtarget
735 if (Subtarget->outlineAtomics() && !Subtarget->hasLSE()) {
736 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, LibCall);
737 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, LibCall);
738 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, LibCall);
739 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, LibCall);
740 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, LibCall);
741 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, LibCall);
742 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, LibCall);
743 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, LibCall);
744 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, LibCall);
745 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, LibCall);
746 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, LibCall);
747 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, LibCall);
748 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, LibCall);
749 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, LibCall);
750 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, LibCall);
751 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, LibCall);
752 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, LibCall);
753 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i8, LibCall);
754 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i16, LibCall);
755 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i32, LibCall);
756 setOperationAction(ISD::ATOMIC_LOAD_CLR, MVT::i64, LibCall);
757 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, LibCall);
758 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
759 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
760 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
761#define LCALLNAMES(A, B, N) \
762 setLibcallName(A##N##_RELAX, #B #N "_relax"); \
763 setLibcallName(A##N##_ACQ, #B #N "_acq"); \
764 setLibcallName(A##N##_REL, #B #N "_rel"); \
765 setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
766#define LCALLNAME4(A, B) \
767 LCALLNAMES(A, B, 1) \
768 LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
769#define LCALLNAME5(A, B) \
770 LCALLNAMES(A, B, 1) \
771 LCALLNAMES(A, B, 2) \
772 LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
773 LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
774 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
775 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
776 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
777 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
778 LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
779#undef LCALLNAMES
780#undef LCALLNAME4
781#undef LCALLNAME5
782 }
783
784 // 128-bit loads and stores can be done without expanding
785 setOperationAction(ISD::LOAD, MVT::i128, Custom);
786 setOperationAction(ISD::STORE, MVT::i128, Custom);
787
788 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
789 // custom lowering, as there are no un-paired non-temporal stores and
790 // legalization will break up 256 bit inputs.
791 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
792 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
793 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
794 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
795 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
796 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
797 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
798
799 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
800 // This requires the Performance Monitors extension.
801 if (Subtarget->hasPerfMon())
802 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
803
804 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
805 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
806 // Issue __sincos_stret if available.
807 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
808 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
809 } else {
810 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
811 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
812 }
813
814 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
815 // MSVCRT doesn't have powi; fall back to pow
816 setLibcallName(RTLIB::POWI_F32, nullptr);
817 setLibcallName(RTLIB::POWI_F64, nullptr);
818 }
819
820 // Make floating-point constants legal for the large code model, so they don't
821 // become loads from the constant pool.
822 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
823 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
824 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
825 }
826
827 // AArch64 does not have floating-point extending loads, i1 sign-extending
828 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
829 for (MVT VT : MVT::fp_valuetypes()) {
830 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
831 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
832 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
833 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
834 }
835 for (MVT VT : MVT::integer_valuetypes())
836 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
837
838 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
839 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
840 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
841 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
842 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
843 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
844 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
845
846 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
847 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
848 setOperationAction(ISD::BITCAST, MVT::bf16, Custom);
849
850 // Indexed loads and stores are supported.
851 for (unsigned im = (unsigned)ISD::PRE_INC;
852 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
853 setIndexedLoadAction(im, MVT::i8, Legal);
854 setIndexedLoadAction(im, MVT::i16, Legal);
855 setIndexedLoadAction(im, MVT::i32, Legal);
856 setIndexedLoadAction(im, MVT::i64, Legal);
857 setIndexedLoadAction(im, MVT::f64, Legal);
858 setIndexedLoadAction(im, MVT::f32, Legal);
859 setIndexedLoadAction(im, MVT::f16, Legal);
860 setIndexedLoadAction(im, MVT::bf16, Legal);
861 setIndexedStoreAction(im, MVT::i8, Legal);
862 setIndexedStoreAction(im, MVT::i16, Legal);
863 setIndexedStoreAction(im, MVT::i32, Legal);
864 setIndexedStoreAction(im, MVT::i64, Legal);
865 setIndexedStoreAction(im, MVT::f64, Legal);
866 setIndexedStoreAction(im, MVT::f32, Legal);
867 setIndexedStoreAction(im, MVT::f16, Legal);
868 setIndexedStoreAction(im, MVT::bf16, Legal);
869 }
870
871 // Trap.
872 setOperationAction(ISD::TRAP, MVT::Other, Legal);
873 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
874 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
875
876 // We combine OR nodes for bitfield operations.
877 setTargetDAGCombine(ISD::OR);
878 // Try to create BICs for vector ANDs.
879 setTargetDAGCombine(ISD::AND);
880
881 // Vector add and sub nodes may conceal a high-half opportunity.
882 // Also, try to fold ADD into CSINC/CSINV..
883 setTargetDAGCombine(ISD::ADD);
884 setTargetDAGCombine(ISD::ABS);
885 setTargetDAGCombine(ISD::SUB);
886 setTargetDAGCombine(ISD::SRL);
887 setTargetDAGCombine(ISD::XOR);
888 setTargetDAGCombine(ISD::SINT_TO_FP);
889 setTargetDAGCombine(ISD::UINT_TO_FP);
890
891 // TODO: Do the same for FP_TO_*INT_SAT.
892 setTargetDAGCombine(ISD::FP_TO_SINT);
893 setTargetDAGCombine(ISD::FP_TO_UINT);
894 setTargetDAGCombine(ISD::FDIV);
895
896 // Try and combine setcc with csel
897 setTargetDAGCombine(ISD::SETCC);
898
899 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
900
901 setTargetDAGCombine(ISD::ANY_EXTEND);
902 setTargetDAGCombine(ISD::ZERO_EXTEND);
903 setTargetDAGCombine(ISD::SIGN_EXTEND);
904 setTargetDAGCombine(ISD::VECTOR_SPLICE);
905 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
906 setTargetDAGCombine(ISD::TRUNCATE);
907 setTargetDAGCombine(ISD::CONCAT_VECTORS);
908 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
909 setTargetDAGCombine(ISD::STORE);
910 if (Subtarget->supportsAddressTopByteIgnored())
911 setTargetDAGCombine(ISD::LOAD);
912
913 setTargetDAGCombine(ISD::MUL);
914
915 setTargetDAGCombine(ISD::SELECT);
916 setTargetDAGCombine(ISD::VSELECT);
917
918 setTargetDAGCombine(ISD::INTRINSIC_VOID);
919 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
920 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
921 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
922 setTargetDAGCombine(ISD::VECREDUCE_ADD);
923 setTargetDAGCombine(ISD::STEP_VECTOR);
924
925 setTargetDAGCombine(ISD::GlobalAddress);
926
927 // In case of strict alignment, avoid an excessive number of byte wide stores.
928 MaxStoresPerMemsetOptSize = 8;
929 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
930 ? MaxStoresPerMemsetOptSize : 32;
931
932 MaxGluedStoresPerMemcpy = 4;
933 MaxStoresPerMemcpyOptSize = 4;
934 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
935 ? MaxStoresPerMemcpyOptSize : 16;
936
937 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
938
939 MaxLoadsPerMemcmpOptSize = 4;
940 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
941 ? MaxLoadsPerMemcmpOptSize : 8;
942
943 setStackPointerRegisterToSaveRestore(AArch64::SP);
944
945 setSchedulingPreference(Sched::Hybrid);
946
947 EnableExtLdPromotion = true;
948
949 // Set required alignment.
950 setMinFunctionAlignment(Align(4));
951 // Set preferred alignments.
952 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
953 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
954
955 // Only change the limit for entries in a jump table if specified by
956 // the sub target, but not at the command line.
957 unsigned MaxJT = STI.getMaximumJumpTableSize();
958 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
959 setMaximumJumpTableSize(MaxJT);
960
961 setHasExtractBitsInsn(true);
962
963 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
964
965 if (Subtarget->hasNEON()) {
966 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
967 // silliness like this:
968 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
969 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
970 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
971 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
972 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
973 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
974 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
975 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
976 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
977 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
978 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
979 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
980 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
981 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
982 setOperationAction(ISD::FROUNDEVEN, MVT::v1f64, Expand);
983 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
984 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
985 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
986 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
987 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
988 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
989 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
990 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
991 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
992 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
993 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
994
995 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
996 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
997 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
998 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
999 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
1000
1001 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::v1i64, Expand);
1002 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::v1i64, Expand);
1003
1004 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
1005
1006 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
1007 // elements smaller than i32, so promote the input to i32 first.
1008 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
1009 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
1010 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
1011 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
1012 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i8, MVT::v16i32);
1013 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i8, MVT::v16i32);
1014
1015 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
1016 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1017 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1018 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
1019 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
1020 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
1021 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
1022 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
1023 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1024
1025 if (Subtarget->hasFullFP16()) {
1026 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
1027 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
1028 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
1029 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1030 } else {
1031 // when AArch64 doesn't have fullfp16 support, promote the input
1032 // to i32 first.
1033 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
1034 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
1035 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
1036 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
1037 }
1038
1039 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
1040 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
1041 setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
1042 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
1043 setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
1044 setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
1045 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1046 setOperationAction(ISD::BITREVERSE, MVT::v2i64, Custom);
1047 for (auto VT : {MVT::v1i64, MVT::v2i64}) {
1048 setOperationAction(ISD::UMAX, VT, Custom);
1049 setOperationAction(ISD::SMAX, VT, Custom);
1050 setOperationAction(ISD::UMIN, VT, Custom);
1051 setOperationAction(ISD::SMIN, VT, Custom);
1052 }
1053
1054 // AArch64 doesn't have MUL.2d:
1055 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1056 // Custom handling for some quad-vector types to detect MULL.
1057 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
1058 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
1059 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1060
1061 // Saturates
1062 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1063 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1064 setOperationAction(ISD::SADDSAT, VT, Legal);
1065 setOperationAction(ISD::UADDSAT, VT, Legal);
1066 setOperationAction(ISD::SSUBSAT, VT, Legal);
1067 setOperationAction(ISD::USUBSAT, VT, Legal);
1068 }
1069
1070 for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1071 MVT::v4i32}) {
1072 setOperationAction(ISD::ABDS, VT, Legal);
1073 setOperationAction(ISD::ABDU, VT, Legal);
1074 }
1075
1076 // Vector reductions
1077 for (MVT VT : { MVT::v4f16, MVT::v2f32,
1078 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
1079 if (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()) {
1080 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1081 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1082
1083 setOperationAction(ISD::VECREDUCE_FADD, VT, Legal);
1084 }
1085 }
1086 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
1087 MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
1088 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1089 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1090 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1091 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1092 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1093 }
1094 setOperationAction(ISD::VECREDUCE_ADD, MVT::v2i64, Custom);
1095
1096 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
1097 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
1098 // Likewise, narrowing and extending vector loads/stores aren't handled
1099 // directly.
1100 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
1101 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
1102
1103 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
1104 setOperationAction(ISD::MULHS, VT, Legal);
1105 setOperationAction(ISD::MULHU, VT, Legal);
1106 } else {
1107 setOperationAction(ISD::MULHS, VT, Expand);
1108 setOperationAction(ISD::MULHU, VT, Expand);
1109 }
1110 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1111 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1112
1113 setOperationAction(ISD::BSWAP, VT, Expand);
1114 setOperationAction(ISD::CTTZ, VT, Expand);
1115
1116 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
1117 setTruncStoreAction(VT, InnerVT, Expand);
1118 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1119 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1120 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1121 }
1122 }
1123
1124 // AArch64 has implementations of a lot of rounding-like FP operations.
1125 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
1126 setOperationAction(ISD::FFLOOR, Ty, Legal);
1127 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1128 setOperationAction(ISD::FCEIL, Ty, Legal);
1129 setOperationAction(ISD::FRINT, Ty, Legal);
1130 setOperationAction(ISD::FTRUNC, Ty, Legal);
1131 setOperationAction(ISD::FROUND, Ty, Legal);
1132 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1133 }
1134
1135 if (Subtarget->hasFullFP16()) {
1136 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
1137 setOperationAction(ISD::FFLOOR, Ty, Legal);
1138 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
1139 setOperationAction(ISD::FCEIL, Ty, Legal);
1140 setOperationAction(ISD::FRINT, Ty, Legal);
1141 setOperationAction(ISD::FTRUNC, Ty, Legal);
1142 setOperationAction(ISD::FROUND, Ty, Legal);
1143 setOperationAction(ISD::FROUNDEVEN, Ty, Legal);
1144 }
1145 }
1146
1147 if (Subtarget->hasSVE())
1148 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
1149
1150 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
1151
1152 setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1153 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1154 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
1155 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1156 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1157 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
1158 }
1159
1160 if (Subtarget->hasSVE()) {
1161 for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
1162 setOperationAction(ISD::BITREVERSE, VT, Custom);
1163 setOperationAction(ISD::BSWAP, VT, Custom);
1164 setOperationAction(ISD::CTLZ, VT, Custom);
1165 setOperationAction(ISD::CTPOP, VT, Custom);
1166 setOperationAction(ISD::CTTZ, VT, Custom);
1167 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1168 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1169 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1170 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1171 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1172 setOperationAction(ISD::MGATHER, VT, Custom);
1173 setOperationAction(ISD::MSCATTER, VT, Custom);
1174 setOperationAction(ISD::MLOAD, VT, Custom);
1175 setOperationAction(ISD::MUL, VT, Custom);
1176 setOperationAction(ISD::MULHS, VT, Custom);
1177 setOperationAction(ISD::MULHU, VT, Custom);
1178 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1179 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1180 setOperationAction(ISD::SELECT, VT, Custom);
1181 setOperationAction(ISD::SETCC, VT, Custom);
1182 setOperationAction(ISD::SDIV, VT, Custom);
1183 setOperationAction(ISD::UDIV, VT, Custom);
1184 setOperationAction(ISD::SMIN, VT, Custom);
1185 setOperationAction(ISD::UMIN, VT, Custom);
1186 setOperationAction(ISD::SMAX, VT, Custom);
1187 setOperationAction(ISD::UMAX, VT, Custom);
1188 setOperationAction(ISD::SHL, VT, Custom);
1189 setOperationAction(ISD::SRL, VT, Custom);
1190 setOperationAction(ISD::SRA, VT, Custom);
1191 setOperationAction(ISD::ABS, VT, Custom);
1192 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1193 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1194 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1195 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1196 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1197 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1198 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1199 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1200
1201 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1202 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1203 setOperationAction(ISD::SELECT_CC, VT, Expand);
1204 setOperationAction(ISD::ROTL, VT, Expand);
1205 setOperationAction(ISD::ROTR, VT, Expand);
1206 }
1207
1208 // Illegal unpacked integer vector types.
1209 for (auto VT : {MVT::nxv8i8, MVT::nxv4i16, MVT::nxv2i32}) {
1210 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1211 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1212 }
1213
1214 // Legalize unpacked bitcasts to REINTERPRET_CAST.
1215 for (auto VT : {MVT::nxv2i16, MVT::nxv4i16, MVT::nxv2i32, MVT::nxv2bf16,
1216 MVT::nxv2f16, MVT::nxv4f16, MVT::nxv2f32})
1217 setOperationAction(ISD::BITCAST, VT, Custom);
1218
1219 for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
1220 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1221 setOperationAction(ISD::SELECT, VT, Custom);
1222 setOperationAction(ISD::SETCC, VT, Custom);
1223 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1224 setOperationAction(ISD::TRUNCATE, VT, Custom);
1225 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1226 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1227 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1228
1229 setOperationAction(ISD::SELECT_CC, VT, Expand);
1230 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1231 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1232
1233 // There are no legal MVT::nxv16f## based types.
1234 if (VT != MVT::nxv16i1) {
1235 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1236 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1237 }
1238 }
1239
1240 // NEON doesn't support masked loads/stores/gathers/scatters, but SVE does
1241 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v1f64,
1242 MVT::v2f64, MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1243 MVT::v2i32, MVT::v4i32, MVT::v1i64, MVT::v2i64}) {
1244 setOperationAction(ISD::MLOAD, VT, Custom);
1245 setOperationAction(ISD::MSTORE, VT, Custom);
1246 setOperationAction(ISD::MGATHER, VT, Custom);
1247 setOperationAction(ISD::MSCATTER, VT, Custom);
1248 }
1249
1250 for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
1251 for (MVT InnerVT : MVT::fp_scalable_vector_valuetypes()) {
1252 // Avoid marking truncating FP stores as legal to prevent the
1253 // DAGCombiner from creating unsupported truncating stores.
1254 setTruncStoreAction(VT, InnerVT, Expand);
1255 // SVE does not have floating-point extending loads.
1256 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
1257 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
1258 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
1259 }
1260 }
1261
1262 // SVE supports truncating stores of 64 and 128-bit vectors
1263 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Custom);
1264 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Custom);
1265 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Custom);
1266 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
1267 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
1268
1269 for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
1270 MVT::nxv4f32, MVT::nxv2f64}) {
1271 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1272 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1273 setOperationAction(ISD::MGATHER, VT, Custom);
1274 setOperationAction(ISD::MSCATTER, VT, Custom);
1275 setOperationAction(ISD::MLOAD, VT, Custom);
1276 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1277 setOperationAction(ISD::SELECT, VT, Custom);
1278 setOperationAction(ISD::FADD, VT, Custom);
1279 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1280 setOperationAction(ISD::FDIV, VT, Custom);
1281 setOperationAction(ISD::FMA, VT, Custom);
1282 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1283 setOperationAction(ISD::FMAXNUM, VT, Custom);
1284 setOperationAction(ISD::FMINIMUM, VT, Custom);
1285 setOperationAction(ISD::FMINNUM, VT, Custom);
1286 setOperationAction(ISD::FMUL, VT, Custom);
1287 setOperationAction(ISD::FNEG, VT, Custom);
1288 setOperationAction(ISD::FSUB, VT, Custom);
1289 setOperationAction(ISD::FCEIL, VT, Custom);
1290 setOperationAction(ISD::FFLOOR, VT, Custom);
1291 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1292 setOperationAction(ISD::FRINT, VT, Custom);
1293 setOperationAction(ISD::FROUND, VT, Custom);
1294 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1295 setOperationAction(ISD::FTRUNC, VT, Custom);
1296 setOperationAction(ISD::FSQRT, VT, Custom);
1297 setOperationAction(ISD::FABS, VT, Custom);
1298 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1299 setOperationAction(ISD::FP_ROUND, VT, Custom);
1300 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1301 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1302 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1303 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1304 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1305
1306 setOperationAction(ISD::SELECT_CC, VT, Expand);
1307 }
1308
1309 for (auto VT : {MVT::nxv2bf16, MVT::nxv4bf16, MVT::nxv8bf16}) {
1310 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1311 setOperationAction(ISD::MGATHER, VT, Custom);
1312 setOperationAction(ISD::MSCATTER, VT, Custom);
1313 setOperationAction(ISD::MLOAD, VT, Custom);
1314 }
1315
1316 setOperationAction(ISD::SPLAT_VECTOR, MVT::nxv8bf16, Custom);
1317
1318 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
1319 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
1320
1321 // NOTE: Currently this has to happen after computeRegisterProperties rather
1322 // than the preferred option of combining it with the addRegisterClass call.
1323 if (Subtarget->useSVEForFixedLengthVectors()) {
1324 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
1325 if (useSVEForFixedLengthVectorVT(VT))
1326 addTypeForFixedLengthSVE(VT);
1327 for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
1328 if (useSVEForFixedLengthVectorVT(VT))
1329 addTypeForFixedLengthSVE(VT);
1330
1331 // 64bit results can mean a bigger than NEON input.
1332 for (auto VT : {MVT::v8i8, MVT::v4i16})
1333 setOperationAction(ISD::TRUNCATE, VT, Custom);
1334 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
1335
1336 // 128bit results imply a bigger than NEON input.
1337 for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
1338 setOperationAction(ISD::TRUNCATE, VT, Custom);
1339 for (auto VT : {MVT::v8f16, MVT::v4f32})
1340 setOperationAction(ISD::FP_ROUND, VT, Custom);
1341
1342 // These operations are not supported on NEON but SVE can do them.
1343 setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
1344 setOperationAction(ISD::CTLZ, MVT::v1i64, Custom);
1345 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1346 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
1347 setOperationAction(ISD::MUL, MVT::v1i64, Custom);
1348 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
1349 setOperationAction(ISD::MULHS, MVT::v1i64, Custom);
1350 setOperationAction(ISD::MULHS, MVT::v2i64, Custom);
1351 setOperationAction(ISD::MULHU, MVT::v1i64, Custom);
1352 setOperationAction(ISD::MULHU, MVT::v2i64, Custom);
1353 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
1354 setOperationAction(ISD::SDIV, MVT::v16i8, Custom);
1355 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
1356 setOperationAction(ISD::SDIV, MVT::v8i16, Custom);
1357 setOperationAction(ISD::SDIV, MVT::v2i32, Custom);
1358 setOperationAction(ISD::SDIV, MVT::v4i32, Custom);
1359 setOperationAction(ISD::SDIV, MVT::v1i64, Custom);
1360 setOperationAction(ISD::SDIV, MVT::v2i64, Custom);
1361 setOperationAction(ISD::SMAX, MVT::v1i64, Custom);
1362 setOperationAction(ISD::SMAX, MVT::v2i64, Custom);
1363 setOperationAction(ISD::SMIN, MVT::v1i64, Custom);
1364 setOperationAction(ISD::SMIN, MVT::v2i64, Custom);
1365 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
1366 setOperationAction(ISD::UDIV, MVT::v16i8, Custom);
1367 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
1368 setOperationAction(ISD::UDIV, MVT::v8i16, Custom);
1369 setOperationAction(ISD::UDIV, MVT::v2i32, Custom);
1370 setOperationAction(ISD::UDIV, MVT::v4i32, Custom);
1371 setOperationAction(ISD::UDIV, MVT::v1i64, Custom);
1372 setOperationAction(ISD::UDIV, MVT::v2i64, Custom);
1373 setOperationAction(ISD::UMAX, MVT::v1i64, Custom);
1374 setOperationAction(ISD::UMAX, MVT::v2i64, Custom);
1375 setOperationAction(ISD::UMIN, MVT::v1i64, Custom);
1376 setOperationAction(ISD::UMIN, MVT::v2i64, Custom);
1377 setOperationAction(ISD::VECREDUCE_SMAX, MVT::v2i64, Custom);
1378 setOperationAction(ISD::VECREDUCE_SMIN, MVT::v2i64, Custom);
1379 setOperationAction(ISD::VECREDUCE_UMAX, MVT::v2i64, Custom);
1380 setOperationAction(ISD::VECREDUCE_UMIN, MVT::v2i64, Custom);
1381
1382 // Int operations with no NEON support.
1383 for (auto VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
1384 MVT::v2i32, MVT::v4i32, MVT::v2i64}) {
1385 setOperationAction(ISD::BITREVERSE, VT, Custom);
1386 setOperationAction(ISD::CTTZ, VT, Custom);
1387 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1388 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1389 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1390 }
1391
1392 // FP operations with no NEON support.
1393 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32,
1394 MVT::v1f64, MVT::v2f64})
1395 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1396
1397 // Use SVE for vectors with more than 2 elements.
1398 for (auto VT : {MVT::v4f16, MVT::v8f16, MVT::v4f32})
1399 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1400 }
1401
1402 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv2i1, MVT::nxv2i64);
1403 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv4i1, MVT::nxv4i32);
1404 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv8i1, MVT::nxv8i16);
1405 setOperationPromotedToType(ISD::VECTOR_SPLICE, MVT::nxv16i1, MVT::nxv16i8);
1406 }
1407
1408 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
1409}
1410
1411void AArch64TargetLowering::addTypeForNEON(MVT VT) {
1412 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1412, __extension__ __PRETTY_FUNCTION__))
;
1413
1414 if (VT.isFloatingPoint()) {
1415 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
1416 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
1417 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
1418 }
1419
1420 // Mark vector float intrinsics as expand.
1421 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
1422 setOperationAction(ISD::FSIN, VT, Expand);
1423 setOperationAction(ISD::FCOS, VT, Expand);
1424 setOperationAction(ISD::FPOW, VT, Expand);
1425 setOperationAction(ISD::FLOG, VT, Expand);
1426 setOperationAction(ISD::FLOG2, VT, Expand);
1427 setOperationAction(ISD::FLOG10, VT, Expand);
1428 setOperationAction(ISD::FEXP, VT, Expand);
1429 setOperationAction(ISD::FEXP2, VT, Expand);
1430 }
1431
1432 // But we do support custom-lowering for FCOPYSIGN.
1433 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
1434 ((VT == MVT::v4f16 || VT == MVT::v8f16) && Subtarget->hasFullFP16()))
1435 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1436
1437 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1438 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1439 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1440 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1441 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1442 setOperationAction(ISD::SRA, VT, Custom);
1443 setOperationAction(ISD::SRL, VT, Custom);
1444 setOperationAction(ISD::SHL, VT, Custom);
1445 setOperationAction(ISD::OR, VT, Custom);
1446 setOperationAction(ISD::SETCC, VT, Custom);
1447 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
1448
1449 setOperationAction(ISD::SELECT, VT, Expand);
1450 setOperationAction(ISD::SELECT_CC, VT, Expand);
1451 setOperationAction(ISD::VSELECT, VT, Expand);
1452 for (MVT InnerVT : MVT::all_valuetypes())
1453 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
1454
1455 // CNT supports only B element sizes, then use UADDLP to widen.
1456 if (VT != MVT::v8i8 && VT != MVT::v16i8)
1457 setOperationAction(ISD::CTPOP, VT, Custom);
1458
1459 setOperationAction(ISD::UDIV, VT, Expand);
1460 setOperationAction(ISD::SDIV, VT, Expand);
1461 setOperationAction(ISD::UREM, VT, Expand);
1462 setOperationAction(ISD::SREM, VT, Expand);
1463 setOperationAction(ISD::FREM, VT, Expand);
1464
1465 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1466 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1467 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
1468 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
1469
1470 if (!VT.isFloatingPoint())
1471 setOperationAction(ISD::ABS, VT, Legal);
1472
1473 // [SU][MIN|MAX] are available for all NEON types apart from i64.
1474 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
1475 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
1476 setOperationAction(Opcode, VT, Legal);
1477
1478 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
1479 if (VT.isFloatingPoint() &&
1480 VT.getVectorElementType() != MVT::bf16 &&
1481 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
1482 for (unsigned Opcode :
1483 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
1484 setOperationAction(Opcode, VT, Legal);
1485
1486 if (Subtarget->isLittleEndian()) {
1487 for (unsigned im = (unsigned)ISD::PRE_INC;
1488 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
1489 setIndexedLoadAction(im, VT, Legal);
1490 setIndexedStoreAction(im, VT, Legal);
1491 }
1492 }
1493}
1494
1495void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
1496 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1496, __extension__ __PRETTY_FUNCTION__))
;
1497
1498 // By default everything must be expanded.
1499 for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
1500 setOperationAction(Op, VT, Expand);
1501
1502 // We use EXTRACT_SUBVECTOR to "cast" a scalable vector to a fixed length one.
1503 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1504
1505 if (VT.isFloatingPoint()) {
1506 setCondCodeAction(ISD::SETO, VT, Expand);
1507 setCondCodeAction(ISD::SETOLT, VT, Expand);
1508 setCondCodeAction(ISD::SETLT, VT, Expand);
1509 setCondCodeAction(ISD::SETOLE, VT, Expand);
1510 setCondCodeAction(ISD::SETLE, VT, Expand);
1511 setCondCodeAction(ISD::SETULT, VT, Expand);
1512 setCondCodeAction(ISD::SETULE, VT, Expand);
1513 setCondCodeAction(ISD::SETUGE, VT, Expand);
1514 setCondCodeAction(ISD::SETUGT, VT, Expand);
1515 setCondCodeAction(ISD::SETUEQ, VT, Expand);
1516 setCondCodeAction(ISD::SETUNE, VT, Expand);
1517 }
1518
1519 // Mark integer truncating stores as having custom lowering
1520 if (VT.isInteger()) {
1521 MVT InnerVT = VT.changeVectorElementType(MVT::i8);
1522 while (InnerVT != VT) {
1523 setTruncStoreAction(VT, InnerVT, Custom);
1524 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
1525 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
1526 InnerVT = InnerVT.changeVectorElementType(
1527 MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
1528 }
1529 }
1530
1531 // Lower fixed length vector operations to scalable equivalents.
1532 setOperationAction(ISD::ABS, VT, Custom);
1533 setOperationAction(ISD::ADD, VT, Custom);
1534 setOperationAction(ISD::AND, VT, Custom);
1535 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1536 setOperationAction(ISD::BITCAST, VT, Custom);
1537 setOperationAction(ISD::BITREVERSE, VT, Custom);
1538 setOperationAction(ISD::BSWAP, VT, Custom);
1539 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1540 setOperationAction(ISD::CTLZ, VT, Custom);
1541 setOperationAction(ISD::CTPOP, VT, Custom);
1542 setOperationAction(ISD::CTTZ, VT, Custom);
1543 setOperationAction(ISD::FABS, VT, Custom);
1544 setOperationAction(ISD::FADD, VT, Custom);
1545 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1546 setOperationAction(ISD::FCEIL, VT, Custom);
1547 setOperationAction(ISD::FDIV, VT, Custom);
1548 setOperationAction(ISD::FFLOOR, VT, Custom);
1549 setOperationAction(ISD::FMA, VT, Custom);
1550 setOperationAction(ISD::FMAXIMUM, VT, Custom);
1551 setOperationAction(ISD::FMAXNUM, VT, Custom);
1552 setOperationAction(ISD::FMINIMUM, VT, Custom);
1553 setOperationAction(ISD::FMINNUM, VT, Custom);
1554 setOperationAction(ISD::FMUL, VT, Custom);
1555 setOperationAction(ISD::FNEARBYINT, VT, Custom);
1556 setOperationAction(ISD::FNEG, VT, Custom);
1557 setOperationAction(ISD::FP_EXTEND, VT, Custom);
1558 setOperationAction(ISD::FP_ROUND, VT, Custom);
1559 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1560 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1561 setOperationAction(ISD::FRINT, VT, Custom);
1562 setOperationAction(ISD::FROUND, VT, Custom);
1563 setOperationAction(ISD::FROUNDEVEN, VT, Custom);
1564 setOperationAction(ISD::FSQRT, VT, Custom);
1565 setOperationAction(ISD::FSUB, VT, Custom);
1566 setOperationAction(ISD::FTRUNC, VT, Custom);
1567 setOperationAction(ISD::LOAD, VT, Custom);
1568 setOperationAction(ISD::MGATHER, VT, Custom);
1569 setOperationAction(ISD::MLOAD, VT, Custom);
1570 setOperationAction(ISD::MSCATTER, VT, Custom);
1571 setOperationAction(ISD::MSTORE, VT, Custom);
1572 setOperationAction(ISD::MUL, VT, Custom);
1573 setOperationAction(ISD::MULHS, VT, Custom);
1574 setOperationAction(ISD::MULHU, VT, Custom);
1575 setOperationAction(ISD::OR, VT, Custom);
1576 setOperationAction(ISD::SDIV, VT, Custom);
1577 setOperationAction(ISD::SELECT, VT, Custom);
1578 setOperationAction(ISD::SETCC, VT, Custom);
1579 setOperationAction(ISD::SHL, VT, Custom);
1580 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1581 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1582 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1583 setOperationAction(ISD::SMAX, VT, Custom);
1584 setOperationAction(ISD::SMIN, VT, Custom);
1585 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
1586 setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
1587 setOperationAction(ISD::SRA, VT, Custom);
1588 setOperationAction(ISD::SRL, VT, Custom);
1589 setOperationAction(ISD::STORE, VT, Custom);
1590 setOperationAction(ISD::SUB, VT, Custom);
1591 setOperationAction(ISD::TRUNCATE, VT, Custom);
1592 setOperationAction(ISD::UDIV, VT, Custom);
1593 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1594 setOperationAction(ISD::UMAX, VT, Custom);
1595 setOperationAction(ISD::UMIN, VT, Custom);
1596 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
1597 setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
1598 setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
1599 setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
1600 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
1601 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
1602 setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
1603 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1604 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
1605 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
1606 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
1607 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
1608 setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
1609 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1610 setOperationAction(ISD::VSELECT, VT, Custom);
1611 setOperationAction(ISD::XOR, VT, Custom);
1612 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1613}
1614
1615void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
1616 addRegisterClass(VT, &AArch64::FPR64RegClass);
1617 addTypeForNEON(VT);
1618}
1619
1620void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
1621 addRegisterClass(VT, &AArch64::FPR128RegClass);
1622 addTypeForNEON(VT);
1623}
1624
1625EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &,
1626 LLVMContext &C, EVT VT) const {
1627 if (!VT.isVector())
1628 return MVT::i32;
1629 if (VT.isScalableVector())
1630 return EVT::getVectorVT(C, MVT::i1, VT.getVectorElementCount());
1631 return VT.changeVectorElementTypeToInteger();
1632}
1633
1634static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
1635 const APInt &Demanded,
1636 TargetLowering::TargetLoweringOpt &TLO,
1637 unsigned NewOpc) {
1638 uint64_t OldImm = Imm, NewImm, Enc;
1639 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
1640
1641 // Return if the immediate is already all zeros, all ones, a bimm32 or a
1642 // bimm64.
1643 if (Imm == 0 || Imm == Mask ||
1644 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
1645 return false;
1646
1647 unsigned EltSize = Size;
1648 uint64_t DemandedBits = Demanded.getZExtValue();
1649
1650 // Clear bits that are not demanded.
1651 Imm &= DemandedBits;
1652
1653 while (true) {
1654 // The goal here is to set the non-demanded bits in a way that minimizes
1655 // the number of switching between 0 and 1. In order to achieve this goal,
1656 // we set the non-demanded bits to the value of the preceding demanded bits.
1657 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1658 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1659 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1660 // The final result is 0b11000011.
1661 uint64_t NonDemandedBits = ~DemandedBits;
1662 uint64_t InvertedImm = ~Imm & DemandedBits;
1663 uint64_t RotatedImm =
1664 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1665 NonDemandedBits;
1666 uint64_t Sum = RotatedImm + NonDemandedBits;
1667 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1668 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1669 NewImm = (Imm | Ones) & Mask;
1670
1671 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1672 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1673 // we halve the element size and continue the search.
1674 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1675 break;
1676
1677 // We cannot shrink the element size any further if it is 2-bits.
1678 if (EltSize == 2)
1679 return false;
1680
1681 EltSize /= 2;
1682 Mask >>= EltSize;
1683 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1684
1685 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1686 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1687 return false;
1688
1689 // Merge the upper and lower halves of Imm and DemandedBits.
1690 Imm |= Hi;
1691 DemandedBits |= DemandedBitsHi;
1692 }
1693
1694 ++NumOptimizedImms;
1695
1696 // Replicate the element across the register width.
1697 while (EltSize < Size) {
1698 NewImm |= NewImm << EltSize;
1699 EltSize *= 2;
1700 }
1701
1702 (void)OldImm;
1703 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1704, __extension__ __PRETTY_FUNCTION__))
1704 "demanded bits should never be altered")(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1704, __extension__ __PRETTY_FUNCTION__))
;
1705 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast <bool> (OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? void (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1705, __extension__ __PRETTY_FUNCTION__))
;
1706
1707 // Create the new constant immediate node.
1708 EVT VT = Op.getValueType();
1709 SDLoc DL(Op);
1710 SDValue New;
1711
1712 // If the new constant immediate is all-zeros or all-ones, let the target
1713 // independent DAG combine optimize this node.
1714 if (NewImm == 0 || NewImm == OrigMask) {
1715 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1716 TLO.DAG.getConstant(NewImm, DL, VT));
1717 // Otherwise, create a machine node so that target independent DAG combine
1718 // doesn't undo this optimization.
1719 } else {
1720 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1721 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1722 New = SDValue(
1723 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1724 }
1725
1726 return TLO.CombineTo(Op, New);
1727}
1728
1729bool AArch64TargetLowering::targetShrinkDemandedConstant(
1730 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1731 TargetLoweringOpt &TLO) const {
1732 // Delay this optimization to as late as possible.
1733 if (!TLO.LegalOps)
1734 return false;
1735
1736 if (!EnableOptimizeLogicalImm)
1737 return false;
1738
1739 EVT VT = Op.getValueType();
1740 if (VT.isVector())
1741 return false;
1742
1743 unsigned Size = VT.getSizeInBits();
1744 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1745, __extension__ __PRETTY_FUNCTION__))
1745 "i32 or i64 is expected after legalization.")(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1745, __extension__ __PRETTY_FUNCTION__))
;
1746
1747 // Exit early if we demand all bits.
1748 if (DemandedBits.countPopulation() == Size)
1749 return false;
1750
1751 unsigned NewOpc;
1752 switch (Op.getOpcode()) {
1753 default:
1754 return false;
1755 case ISD::AND:
1756 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1757 break;
1758 case ISD::OR:
1759 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1760 break;
1761 case ISD::XOR:
1762 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1763 break;
1764 }
1765 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1766 if (!C)
1767 return false;
1768 uint64_t Imm = C->getZExtValue();
1769 return optimizeLogicalImm(Op, Size, Imm, DemandedBits, TLO, NewOpc);
1770}
1771
1772/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1773/// Mask are known to be either zero or one and return them Known.
1774void AArch64TargetLowering::computeKnownBitsForTargetNode(
1775 const SDValue Op, KnownBits &Known,
1776 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1777 switch (Op.getOpcode()) {
1778 default:
1779 break;
1780 case AArch64ISD::CSEL: {
1781 KnownBits Known2;
1782 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1783 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1784 Known = KnownBits::commonBits(Known, Known2);
1785 break;
1786 }
1787 case AArch64ISD::LOADgot:
1788 case AArch64ISD::ADDlow: {
1789 if (!Subtarget->isTargetILP32())
1790 break;
1791 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1792 Known.Zero = APInt::getHighBitsSet(64, 32);
1793 break;
1794 }
1795 case ISD::INTRINSIC_W_CHAIN: {
1796 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1797 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1798 switch (IntID) {
1799 default: return;
1800 case Intrinsic::aarch64_ldaxr:
1801 case Intrinsic::aarch64_ldxr: {
1802 unsigned BitWidth = Known.getBitWidth();
1803 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1804 unsigned MemBits = VT.getScalarSizeInBits();
1805 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1806 return;
1807 }
1808 }
1809 break;
1810 }
1811 case ISD::INTRINSIC_WO_CHAIN:
1812 case ISD::INTRINSIC_VOID: {
1813 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1814 switch (IntNo) {
1815 default:
1816 break;
1817 case Intrinsic::aarch64_neon_umaxv:
1818 case Intrinsic::aarch64_neon_uminv: {
1819 // Figure out the datatype of the vector operand. The UMINV instruction
1820 // will zero extend the result, so we can mark as known zero all the
1821 // bits larger than the element datatype. 32-bit or larget doesn't need
1822 // this as those are legal types and will be handled by isel directly.
1823 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1824 unsigned BitWidth = Known.getBitWidth();
1825 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1826 assert(BitWidth >= 8 && "Unexpected width!")(static_cast <bool> (BitWidth >= 8 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1826, __extension__ __PRETTY_FUNCTION__))
;
1827 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1828 Known.Zero |= Mask;
1829 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1830 assert(BitWidth >= 16 && "Unexpected width!")(static_cast <bool> (BitWidth >= 16 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1830, __extension__ __PRETTY_FUNCTION__))
;
1831 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1832 Known.Zero |= Mask;
1833 }
1834 break;
1835 } break;
1836 }
1837 }
1838 }
1839}
1840
1841MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1842 EVT) const {
1843 return MVT::i64;
1844}
1845
1846bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1847 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1848 bool *Fast) const {
1849 if (Subtarget->requiresStrictAlign())
1850 return false;
1851
1852 if (Fast) {
1853 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1854 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1855 // See comments in performSTORECombine() for more details about
1856 // these conditions.
1857
1858 // Code that uses clang vector extensions can mark that it
1859 // wants unaligned accesses to be treated as fast by
1860 // underspecifying alignment to be 1 or 2.
1861 Alignment <= 2 ||
1862
1863 // Disregard v2i64. Memcpy lowering produces those and splitting
1864 // them regresses performance on micro-benchmarks and olden/bh.
1865 VT == MVT::v2i64;
1866 }
1867 return true;
1868}
1869
1870// Same as above but handling LLTs instead.
1871bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1872 LLT Ty, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1873 bool *Fast) const {
1874 if (Subtarget->requiresStrictAlign())
1875 return false;
1876
1877 if (Fast) {
1878 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1879 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1880 Ty.getSizeInBytes() != 16 ||
1881 // See comments in performSTORECombine() for more details about
1882 // these conditions.
1883
1884 // Code that uses clang vector extensions can mark that it
1885 // wants unaligned accesses to be treated as fast by
1886 // underspecifying alignment to be 1 or 2.
1887 Alignment <= 2 ||
1888
1889 // Disregard v2i64. Memcpy lowering produces those and splitting
1890 // them regresses performance on micro-benchmarks and olden/bh.
1891 Ty == LLT::fixed_vector(2, 64);
1892 }
1893 return true;
1894}
1895
1896FastISel *
1897AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1898 const TargetLibraryInfo *libInfo) const {
1899 return AArch64::createFastISel(funcInfo, libInfo);
1900}
1901
1902const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1903#define MAKE_CASE(V) \
1904 case V: \
1905 return #V;
1906 switch ((AArch64ISD::NodeType)Opcode) {
1907 case AArch64ISD::FIRST_NUMBER:
1908 break;
1909 MAKE_CASE(AArch64ISD::CALL)
1910 MAKE_CASE(AArch64ISD::ADRP)
1911 MAKE_CASE(AArch64ISD::ADR)
1912 MAKE_CASE(AArch64ISD::ADDlow)
1913 MAKE_CASE(AArch64ISD::LOADgot)
1914 MAKE_CASE(AArch64ISD::RET_FLAG)
1915 MAKE_CASE(AArch64ISD::BRCOND)
1916 MAKE_CASE(AArch64ISD::CSEL)
1917 MAKE_CASE(AArch64ISD::CSINV)
1918 MAKE_CASE(AArch64ISD::CSNEG)
1919 MAKE_CASE(AArch64ISD::CSINC)
1920 MAKE_CASE(AArch64ISD::THREAD_POINTER)
1921 MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
1922 MAKE_CASE(AArch64ISD::ADD_PRED)
1923 MAKE_CASE(AArch64ISD::MUL_PRED)
1924 MAKE_CASE(AArch64ISD::MULHS_PRED)
1925 MAKE_CASE(AArch64ISD::MULHU_PRED)
1926 MAKE_CASE(AArch64ISD::SDIV_PRED)
1927 MAKE_CASE(AArch64ISD::SHL_PRED)
1928 MAKE_CASE(AArch64ISD::SMAX_PRED)
1929 MAKE_CASE(AArch64ISD::SMIN_PRED)
1930 MAKE_CASE(AArch64ISD::SRA_PRED)
1931 MAKE_CASE(AArch64ISD::SRL_PRED)
1932 MAKE_CASE(AArch64ISD::SUB_PRED)
1933 MAKE_CASE(AArch64ISD::UDIV_PRED)
1934 MAKE_CASE(AArch64ISD::UMAX_PRED)
1935 MAKE_CASE(AArch64ISD::UMIN_PRED)
1936 MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
1937 MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
1938 MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
1939 MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
1940 MAKE_CASE(AArch64ISD::FFLOOR_MERGE_PASSTHRU)
1941 MAKE_CASE(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU)
1942 MAKE_CASE(AArch64ISD::FRINT_MERGE_PASSTHRU)
1943 MAKE_CASE(AArch64ISD::FROUND_MERGE_PASSTHRU)
1944 MAKE_CASE(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU)
1945 MAKE_CASE(AArch64ISD::FTRUNC_MERGE_PASSTHRU)
1946 MAKE_CASE(AArch64ISD::FP_ROUND_MERGE_PASSTHRU)
1947 MAKE_CASE(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU)
1948 MAKE_CASE(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU)
1949 MAKE_CASE(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU)
1950 MAKE_CASE(AArch64ISD::FCVTZU_MERGE_PASSTHRU)
1951 MAKE_CASE(AArch64ISD::FCVTZS_MERGE_PASSTHRU)
1952 MAKE_CASE(AArch64ISD::FSQRT_MERGE_PASSTHRU)
1953 MAKE_CASE(AArch64ISD::FRECPX_MERGE_PASSTHRU)
1954 MAKE_CASE(AArch64ISD::FABS_MERGE_PASSTHRU)
1955 MAKE_CASE(AArch64ISD::ABS_MERGE_PASSTHRU)
1956 MAKE_CASE(AArch64ISD::NEG_MERGE_PASSTHRU)
1957 MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
1958 MAKE_CASE(AArch64ISD::ADC)
1959 MAKE_CASE(AArch64ISD::SBC)
1960 MAKE_CASE(AArch64ISD::ADDS)
1961 MAKE_CASE(AArch64ISD::SUBS)
1962 MAKE_CASE(AArch64ISD::ADCS)
1963 MAKE_CASE(AArch64ISD::SBCS)
1964 MAKE_CASE(AArch64ISD::ANDS)
1965 MAKE_CASE(AArch64ISD::CCMP)
1966 MAKE_CASE(AArch64ISD::CCMN)
1967 MAKE_CASE(AArch64ISD::FCCMP)
1968 MAKE_CASE(AArch64ISD::FCMP)
1969 MAKE_CASE(AArch64ISD::STRICT_FCMP)
1970 MAKE_CASE(AArch64ISD::STRICT_FCMPE)
1971 MAKE_CASE(AArch64ISD::DUP)
1972 MAKE_CASE(AArch64ISD::DUPLANE8)
1973 MAKE_CASE(AArch64ISD::DUPLANE16)
1974 MAKE_CASE(AArch64ISD::DUPLANE32)
1975 MAKE_CASE(AArch64ISD::DUPLANE64)
1976 MAKE_CASE(AArch64ISD::MOVI)
1977 MAKE_CASE(AArch64ISD::MOVIshift)
1978 MAKE_CASE(AArch64ISD::MOVIedit)
1979 MAKE_CASE(AArch64ISD::MOVImsl)
1980 MAKE_CASE(AArch64ISD::FMOV)
1981 MAKE_CASE(AArch64ISD::MVNIshift)
1982 MAKE_CASE(AArch64ISD::MVNImsl)
1983 MAKE_CASE(AArch64ISD::BICi)
1984 MAKE_CASE(AArch64ISD::ORRi)
1985 MAKE_CASE(AArch64ISD::BSP)
1986 MAKE_CASE(AArch64ISD::EXTR)
1987 MAKE_CASE(AArch64ISD::ZIP1)
1988 MAKE_CASE(AArch64ISD::ZIP2)
1989 MAKE_CASE(AArch64ISD::UZP1)
1990 MAKE_CASE(AArch64ISD::UZP2)
1991 MAKE_CASE(AArch64ISD::TRN1)
1992 MAKE_CASE(AArch64ISD::TRN2)
1993 MAKE_CASE(AArch64ISD::REV16)
1994 MAKE_CASE(AArch64ISD::REV32)
1995 MAKE_CASE(AArch64ISD::REV64)
1996 MAKE_CASE(AArch64ISD::EXT)
1997 MAKE_CASE(AArch64ISD::SPLICE)
1998 MAKE_CASE(AArch64ISD::VSHL)
1999 MAKE_CASE(AArch64ISD::VLSHR)
2000 MAKE_CASE(AArch64ISD::VASHR)
2001 MAKE_CASE(AArch64ISD::VSLI)
2002 MAKE_CASE(AArch64ISD::VSRI)
2003 MAKE_CASE(AArch64ISD::CMEQ)
2004 MAKE_CASE(AArch64ISD::CMGE)
2005 MAKE_CASE(AArch64ISD::CMGT)
2006 MAKE_CASE(AArch64ISD::CMHI)
2007 MAKE_CASE(AArch64ISD::CMHS)
2008 MAKE_CASE(AArch64ISD::FCMEQ)
2009 MAKE_CASE(AArch64ISD::FCMGE)
2010 MAKE_CASE(AArch64ISD::FCMGT)
2011 MAKE_CASE(AArch64ISD::CMEQz)
2012 MAKE_CASE(AArch64ISD::CMGEz)
2013 MAKE_CASE(AArch64ISD::CMGTz)
2014 MAKE_CASE(AArch64ISD::CMLEz)
2015 MAKE_CASE(AArch64ISD::CMLTz)
2016 MAKE_CASE(AArch64ISD::FCMEQz)
2017 MAKE_CASE(AArch64ISD::FCMGEz)
2018 MAKE_CASE(AArch64ISD::FCMGTz)
2019 MAKE_CASE(AArch64ISD::FCMLEz)
2020 MAKE_CASE(AArch64ISD::FCMLTz)
2021 MAKE_CASE(AArch64ISD::SADDV)
2022 MAKE_CASE(AArch64ISD::UADDV)
2023 MAKE_CASE(AArch64ISD::SRHADD)
2024 MAKE_CASE(AArch64ISD::URHADD)
2025 MAKE_CASE(AArch64ISD::SHADD)
2026 MAKE_CASE(AArch64ISD::UHADD)
2027 MAKE_CASE(AArch64ISD::SDOT)
2028 MAKE_CASE(AArch64ISD::UDOT)
2029 MAKE_CASE(AArch64ISD::SMINV)
2030 MAKE_CASE(AArch64ISD::UMINV)
2031 MAKE_CASE(AArch64ISD::SMAXV)
2032 MAKE_CASE(AArch64ISD::UMAXV)
2033 MAKE_CASE(AArch64ISD::SADDV_PRED)
2034 MAKE_CASE(AArch64ISD::UADDV_PRED)
2035 MAKE_CASE(AArch64ISD::SMAXV_PRED)
2036 MAKE_CASE(AArch64ISD::UMAXV_PRED)
2037 MAKE_CASE(AArch64ISD::SMINV_PRED)
2038 MAKE_CASE(AArch64ISD::UMINV_PRED)
2039 MAKE_CASE(AArch64ISD::ORV_PRED)
2040 MAKE_CASE(AArch64ISD::EORV_PRED)
2041 MAKE_CASE(AArch64ISD::ANDV_PRED)
2042 MAKE_CASE(AArch64ISD::CLASTA_N)
2043 MAKE_CASE(AArch64ISD::CLASTB_N)
2044 MAKE_CASE(AArch64ISD::LASTA)
2045 MAKE_CASE(AArch64ISD::LASTB)
2046 MAKE_CASE(AArch64ISD::REINTERPRET_CAST)
2047 MAKE_CASE(AArch64ISD::LS64_BUILD)
2048 MAKE_CASE(AArch64ISD::LS64_EXTRACT)
2049 MAKE_CASE(AArch64ISD::TBL)
2050 MAKE_CASE(AArch64ISD::FADD_PRED)
2051 MAKE_CASE(AArch64ISD::FADDA_PRED)
2052 MAKE_CASE(AArch64ISD::FADDV_PRED)
2053 MAKE_CASE(AArch64ISD::FDIV_PRED)
2054 MAKE_CASE(AArch64ISD::FMA_PRED)
2055 MAKE_CASE(AArch64ISD::FMAX_PRED)
2056 MAKE_CASE(AArch64ISD::FMAXV_PRED)
2057 MAKE_CASE(AArch64ISD::FMAXNM_PRED)
2058 MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
2059 MAKE_CASE(AArch64ISD::FMIN_PRED)
2060 MAKE_CASE(AArch64ISD::FMINV_PRED)
2061 MAKE_CASE(AArch64ISD::FMINNM_PRED)
2062 MAKE_CASE(AArch64ISD::FMINNMV_PRED)
2063 MAKE_CASE(AArch64ISD::FMUL_PRED)
2064 MAKE_CASE(AArch64ISD::FSUB_PRED)
2065 MAKE_CASE(AArch64ISD::BIC)
2066 MAKE_CASE(AArch64ISD::BIT)
2067 MAKE_CASE(AArch64ISD::CBZ)
2068 MAKE_CASE(AArch64ISD::CBNZ)
2069 MAKE_CASE(AArch64ISD::TBZ)
2070 MAKE_CASE(AArch64ISD::TBNZ)
2071 MAKE_CASE(AArch64ISD::TC_RETURN)
2072 MAKE_CASE(AArch64ISD::PREFETCH)
2073 MAKE_CASE(AArch64ISD::SITOF)
2074 MAKE_CASE(AArch64ISD::UITOF)
2075 MAKE_CASE(AArch64ISD::NVCAST)
2076 MAKE_CASE(AArch64ISD::MRS)
2077 MAKE_CASE(AArch64ISD::SQSHL_I)
2078 MAKE_CASE(AArch64ISD::UQSHL_I)
2079 MAKE_CASE(AArch64ISD::SRSHR_I)
2080 MAKE_CASE(AArch64ISD::URSHR_I)
2081 MAKE_CASE(AArch64ISD::SQSHLU_I)
2082 MAKE_CASE(AArch64ISD::WrapperLarge)
2083 MAKE_CASE(AArch64ISD::LD2post)
2084 MAKE_CASE(AArch64ISD::LD3post)
2085 MAKE_CASE(AArch64ISD::LD4post)
2086 MAKE_CASE(AArch64ISD::ST2post)
2087 MAKE_CASE(AArch64ISD::ST3post)
2088 MAKE_CASE(AArch64ISD::ST4post)
2089 MAKE_CASE(AArch64ISD::LD1x2post)
2090 MAKE_CASE(AArch64ISD::LD1x3post)
2091 MAKE_CASE(AArch64ISD::LD1x4post)
2092 MAKE_CASE(AArch64ISD::ST1x2post)
2093 MAKE_CASE(AArch64ISD::ST1x3post)
2094 MAKE_CASE(AArch64ISD::ST1x4post)
2095 MAKE_CASE(AArch64ISD::LD1DUPpost)
2096 MAKE_CASE(AArch64ISD::LD2DUPpost)
2097 MAKE_CASE(AArch64ISD::LD3DUPpost)
2098 MAKE_CASE(AArch64ISD::LD4DUPpost)
2099 MAKE_CASE(AArch64ISD::LD1LANEpost)
2100 MAKE_CASE(AArch64ISD::LD2LANEpost)
2101 MAKE_CASE(AArch64ISD::LD3LANEpost)
2102 MAKE_CASE(AArch64ISD::LD4LANEpost)
2103 MAKE_CASE(AArch64ISD::ST2LANEpost)
2104 MAKE_CASE(AArch64ISD::ST3LANEpost)
2105 MAKE_CASE(AArch64ISD::ST4LANEpost)
2106 MAKE_CASE(AArch64ISD::SMULL)
2107 MAKE_CASE(AArch64ISD::UMULL)
2108 MAKE_CASE(AArch64ISD::FRECPE)
2109 MAKE_CASE(AArch64ISD::FRECPS)
2110 MAKE_CASE(AArch64ISD::FRSQRTE)
2111 MAKE_CASE(AArch64ISD::FRSQRTS)
2112 MAKE_CASE(AArch64ISD::STG)
2113 MAKE_CASE(AArch64ISD::STZG)
2114 MAKE_CASE(AArch64ISD::ST2G)
2115 MAKE_CASE(AArch64ISD::STZ2G)
2116 MAKE_CASE(AArch64ISD::SUNPKHI)
2117 MAKE_CASE(AArch64ISD::SUNPKLO)
2118 MAKE_CASE(AArch64ISD::UUNPKHI)
2119 MAKE_CASE(AArch64ISD::UUNPKLO)
2120 MAKE_CASE(AArch64ISD::INSR)
2121 MAKE_CASE(AArch64ISD::PTEST)
2122 MAKE_CASE(AArch64ISD::PTRUE)
2123 MAKE_CASE(AArch64ISD::LD1_MERGE_ZERO)
2124 MAKE_CASE(AArch64ISD::LD1S_MERGE_ZERO)
2125 MAKE_CASE(AArch64ISD::LDNF1_MERGE_ZERO)
2126 MAKE_CASE(AArch64ISD::LDNF1S_MERGE_ZERO)
2127 MAKE_CASE(AArch64ISD::LDFF1_MERGE_ZERO)
2128 MAKE_CASE(AArch64ISD::LDFF1S_MERGE_ZERO)
2129 MAKE_CASE(AArch64ISD::LD1RQ_MERGE_ZERO)
2130 MAKE_CASE(AArch64ISD::LD1RO_MERGE_ZERO)
2131 MAKE_CASE(AArch64ISD::SVE_LD2_MERGE_ZERO)
2132 MAKE_CASE(AArch64ISD::SVE_LD3_MERGE_ZERO)
2133 MAKE_CASE(AArch64ISD::SVE_LD4_MERGE_ZERO)
2134 MAKE_CASE(AArch64ISD::GLD1_MERGE_ZERO)
2135 MAKE_CASE(AArch64ISD::GLD1_SCALED_MERGE_ZERO)
2136 MAKE_CASE(AArch64ISD::GLD1_SXTW_MERGE_ZERO)
2137 MAKE_CASE(AArch64ISD::GLD1_UXTW_MERGE_ZERO)
2138 MAKE_CASE(AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO)
2139 MAKE_CASE(AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO)
2140 MAKE_CASE(AArch64ISD::GLD1_IMM_MERGE_ZERO)
2141 MAKE_CASE(AArch64ISD::GLD1S_MERGE_ZERO)
2142 MAKE_CASE(AArch64ISD::GLD1S_SCALED_MERGE_ZERO)
2143 MAKE_CASE(AArch64ISD::GLD1S_SXTW_MERGE_ZERO)
2144 MAKE_CASE(AArch64ISD::GLD1S_UXTW_MERGE_ZERO)
2145 MAKE_CASE(AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO)
2146 MAKE_CASE(AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO)
2147 MAKE_CASE(AArch64ISD::GLD1S_IMM_MERGE_ZERO)
2148 MAKE_CASE(AArch64ISD::GLDFF1_MERGE_ZERO)
2149 MAKE_CASE(AArch64ISD::GLDFF1_SCALED_MERGE_ZERO)
2150 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_MERGE_ZERO)
2151 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_MERGE_ZERO)
2152 MAKE_CASE(AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO)
2153 MAKE_CASE(AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO)
2154 MAKE_CASE(AArch64ISD::GLDFF1_IMM_MERGE_ZERO)
2155 MAKE_CASE(AArch64ISD::GLDFF1S_MERGE_ZERO)
2156 MAKE_CASE(AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO)
2157 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO)
2158 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO)
2159 MAKE_CASE(AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO)
2160 MAKE_CASE(AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO)
2161 MAKE_CASE(AArch64ISD::GLDFF1S_IMM_MERGE_ZERO)
2162 MAKE_CASE(AArch64ISD::GLDNT1_MERGE_ZERO)
2163 MAKE_CASE(AArch64ISD::GLDNT1_INDEX_MERGE_ZERO)
2164 MAKE_CASE(AArch64ISD::GLDNT1S_MERGE_ZERO)
2165 MAKE_CASE(AArch64ISD::ST1_PRED)
2166 MAKE_CASE(AArch64ISD::SST1_PRED)
2167 MAKE_CASE(AArch64ISD::SST1_SCALED_PRED)
2168 MAKE_CASE(AArch64ISD::SST1_SXTW_PRED)
2169 MAKE_CASE(AArch64ISD::SST1_UXTW_PRED)
2170 MAKE_CASE(AArch64ISD::SST1_SXTW_SCALED_PRED)
2171 MAKE_CASE(AArch64ISD::SST1_UXTW_SCALED_PRED)
2172 MAKE_CASE(AArch64ISD::SST1_IMM_PRED)
2173 MAKE_CASE(AArch64ISD::SSTNT1_PRED)
2174 MAKE_CASE(AArch64ISD::SSTNT1_INDEX_PRED)
2175 MAKE_CASE(AArch64ISD::LDP)
2176 MAKE_CASE(AArch64ISD::STP)
2177 MAKE_CASE(AArch64ISD::STNP)
2178 MAKE_CASE(AArch64ISD::BITREVERSE_MERGE_PASSTHRU)
2179 MAKE_CASE(AArch64ISD::BSWAP_MERGE_PASSTHRU)
2180 MAKE_CASE(AArch64ISD::CTLZ_MERGE_PASSTHRU)
2181 MAKE_CASE(AArch64ISD::CTPOP_MERGE_PASSTHRU)
2182 MAKE_CASE(AArch64ISD::DUP_MERGE_PASSTHRU)
2183 MAKE_CASE(AArch64ISD::INDEX_VECTOR)
2184 MAKE_CASE(AArch64ISD::UADDLP)
2185 MAKE_CASE(AArch64ISD::CALL_RVMARKER)
2186 }
2187#undef MAKE_CASE
2188 return nullptr;
2189}
2190
2191MachineBasicBlock *
2192AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
2193 MachineBasicBlock *MBB) const {
2194 // We materialise the F128CSEL pseudo-instruction as some control flow and a
2195 // phi node:
2196
2197 // OrigBB:
2198 // [... previous instrs leading to comparison ...]
2199 // b.ne TrueBB
2200 // b EndBB
2201 // TrueBB:
2202 // ; Fallthrough
2203 // EndBB:
2204 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
2205
2206 MachineFunction *MF = MBB->getParent();
2207 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2208 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
2209 DebugLoc DL = MI.getDebugLoc();
2210 MachineFunction::iterator It = ++MBB->getIterator();
2211
2212 Register DestReg = MI.getOperand(0).getReg();
2213 Register IfTrueReg = MI.getOperand(1).getReg();
2214 Register IfFalseReg = MI.getOperand(2).getReg();
2215 unsigned CondCode = MI.getOperand(3).getImm();
2216 bool NZCVKilled = MI.getOperand(4).isKill();
2217
2218 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
2219 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
2220 MF->insert(It, TrueBB);
2221 MF->insert(It, EndBB);
2222
2223 // Transfer rest of current basic-block to EndBB
2224 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
2225 MBB->end());
2226 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
2227
2228 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
2229 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
2230 MBB->addSuccessor(TrueBB);
2231 MBB->addSuccessor(EndBB);
2232
2233 // TrueBB falls through to the end.
2234 TrueBB->addSuccessor(EndBB);
2235
2236 if (!NZCVKilled) {
2237 TrueBB->addLiveIn(AArch64::NZCV);
2238 EndBB->addLiveIn(AArch64::NZCV);
2239 }
2240
2241 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
2242 .addReg(IfTrueReg)
2243 .addMBB(TrueBB)
2244 .addReg(IfFalseReg)
2245 .addMBB(MBB);
2246
2247 MI.eraseFromParent();
2248 return EndBB;
2249}
2250
2251MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
2252 MachineInstr &MI, MachineBasicBlock *BB) const {
2253 assert(!isAsynchronousEHPersonality(classifyEHPersonality((static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2255, __extension__ __PRETTY_FUNCTION__))
2254 BB->getParent()->getFunction().getPersonalityFn())) &&(static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2255, __extension__ __PRETTY_FUNCTION__))
2255 "SEH does not use catchret!")(static_cast <bool> (!isAsynchronousEHPersonality(classifyEHPersonality
( BB->getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? void (0) : __assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2255, __extension__ __PRETTY_FUNCTION__))
;
2256 return BB;
2257}
2258
2259MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
2260 MachineInstr &MI, MachineBasicBlock *BB) const {
2261 switch (MI.getOpcode()) {
2262 default:
2263#ifndef NDEBUG
2264 MI.dump();
2265#endif
2266 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2266)
;
2267
2268 case AArch64::F128CSEL:
2269 return EmitF128CSEL(MI, BB);
2270
2271 case TargetOpcode::STACKMAP:
2272 case TargetOpcode::PATCHPOINT:
2273 case TargetOpcode::STATEPOINT:
2274 return emitPatchPoint(MI, BB);
2275
2276 case AArch64::CATCHRET:
2277 return EmitLoweredCatchRet(MI, BB);
2278 }
2279}
2280
2281//===----------------------------------------------------------------------===//
2282// AArch64 Lowering private implementation.
2283//===----------------------------------------------------------------------===//
2284
2285//===----------------------------------------------------------------------===//
2286// Lowering Code
2287//===----------------------------------------------------------------------===//
2288
2289// Forward declarations of SVE fixed length lowering helpers
2290static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT);
2291static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2292static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V);
2293static SDValue convertFixedMaskToScalableVector(SDValue Mask,
2294 SelectionDAG &DAG);
2295
2296/// isZerosVector - Check whether SDNode N is a zero-filled vector.
2297static bool isZerosVector(const SDNode *N) {
2298 // Look through a bit convert.
2299 while (N->getOpcode() == ISD::BITCAST)
2300 N = N->getOperand(0).getNode();
2301
2302 if (ISD::isConstantSplatVectorAllZeros(N))
2303 return true;
2304
2305 if (N->getOpcode() != AArch64ISD::DUP)
2306 return false;
2307
2308 auto Opnd0 = N->getOperand(0);
2309 auto *CINT = dyn_cast<ConstantSDNode>(Opnd0);
2310 auto *CFP = dyn_cast<ConstantFPSDNode>(Opnd0);
2311 return (CINT && CINT->isNullValue()) || (CFP && CFP->isZero());
2312}
2313
2314/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
2315/// CC
2316static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
2317 switch (CC) {
2318 default:
2319 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2319)
;
2320 case ISD::SETNE:
2321 return AArch64CC::NE;
2322 case ISD::SETEQ:
2323 return AArch64CC::EQ;
2324 case ISD::SETGT:
2325 return AArch64CC::GT;
2326 case ISD::SETGE:
2327 return AArch64CC::GE;
2328 case ISD::SETLT:
2329 return AArch64CC::LT;
2330 case ISD::SETLE:
2331 return AArch64CC::LE;
2332 case ISD::SETUGT:
2333 return AArch64CC::HI;
2334 case ISD::SETUGE:
2335 return AArch64CC::HS;
2336 case ISD::SETULT:
2337 return AArch64CC::LO;
2338 case ISD::SETULE:
2339 return AArch64CC::LS;
2340 }
2341}
2342
2343/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
2344static void changeFPCCToAArch64CC(ISD::CondCode CC,
2345 AArch64CC::CondCode &CondCode,
2346 AArch64CC::CondCode &CondCode2) {
2347 CondCode2 = AArch64CC::AL;
2348 switch (CC) {
2349 default:
2350 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2350)
;
2351 case ISD::SETEQ:
2352 case ISD::SETOEQ:
2353 CondCode = AArch64CC::EQ;
2354 break;
2355 case ISD::SETGT:
2356 case ISD::SETOGT:
2357 CondCode = AArch64CC::GT;
2358 break;
2359 case ISD::SETGE:
2360 case ISD::SETOGE:
2361 CondCode = AArch64CC::GE;
2362 break;
2363 case ISD::SETOLT:
2364 CondCode = AArch64CC::MI;
2365 break;
2366 case ISD::SETOLE:
2367 CondCode = AArch64CC::LS;
2368 break;
2369 case ISD::SETONE:
2370 CondCode = AArch64CC::MI;
2371 CondCode2 = AArch64CC::GT;
2372 break;
2373 case ISD::SETO:
2374 CondCode = AArch64CC::VC;
2375 break;
2376 case ISD::SETUO:
2377 CondCode = AArch64CC::VS;
2378 break;
2379 case ISD::SETUEQ:
2380 CondCode = AArch64CC::EQ;
2381 CondCode2 = AArch64CC::VS;
2382 break;
2383 case ISD::SETUGT:
2384 CondCode = AArch64CC::HI;
2385 break;
2386 case ISD::SETUGE:
2387 CondCode = AArch64CC::PL;
2388 break;
2389 case ISD::SETLT:
2390 case ISD::SETULT:
2391 CondCode = AArch64CC::LT;
2392 break;
2393 case ISD::SETLE:
2394 case ISD::SETULE:
2395 CondCode = AArch64CC::LE;
2396 break;
2397 case ISD::SETNE:
2398 case ISD::SETUNE:
2399 CondCode = AArch64CC::NE;
2400 break;
2401 }
2402}
2403
2404/// Convert a DAG fp condition code to an AArch64 CC.
2405/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
2406/// should be AND'ed instead of OR'ed.
2407static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
2408 AArch64CC::CondCode &CondCode,
2409 AArch64CC::CondCode &CondCode2) {
2410 CondCode2 = AArch64CC::AL;
2411 switch (CC) {
2412 default:
2413 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2414 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2414, __extension__ __PRETTY_FUNCTION__))
;
2415 break;
2416 case ISD::SETONE:
2417 // (a one b)
2418 // == ((a olt b) || (a ogt b))
2419 // == ((a ord b) && (a une b))
2420 CondCode = AArch64CC::VC;
2421 CondCode2 = AArch64CC::NE;
2422 break;
2423 case ISD::SETUEQ:
2424 // (a ueq b)
2425 // == ((a uno b) || (a oeq b))
2426 // == ((a ule b) && (a uge b))
2427 CondCode = AArch64CC::PL;
2428 CondCode2 = AArch64CC::LE;
2429 break;
2430 }
2431}
2432
2433/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
2434/// CC usable with the vector instructions. Fewer operations are available
2435/// without a real NZCV register, so we have to use less efficient combinations
2436/// to get the same effect.
2437static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
2438 AArch64CC::CondCode &CondCode,
2439 AArch64CC::CondCode &CondCode2,
2440 bool &Invert) {
2441 Invert = false;
2442 switch (CC) {
2443 default:
2444 // Mostly the scalar mappings work fine.
2445 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
2446 break;
2447 case ISD::SETUO:
2448 Invert = true;
2449 LLVM_FALLTHROUGH[[gnu::fallthrough]];
2450 case ISD::SETO:
2451 CondCode = AArch64CC::MI;
2452 CondCode2 = AArch64CC::GE;
2453 break;
2454 case ISD::SETUEQ:
2455 case ISD::SETULT:
2456 case ISD::SETULE:
2457 case ISD::SETUGT:
2458 case ISD::SETUGE:
2459 // All of the compare-mask comparisons are ordered, but we can switch
2460 // between the two by a double inversion. E.g. ULE == !OGT.
2461 Invert = true;
2462 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
2463 CondCode, CondCode2);
2464 break;
2465 }
2466}
2467
2468static bool isLegalArithImmed(uint64_t C) {
2469 // Matches AArch64DAGToDAGISel::SelectArithImmed().
2470 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
2471 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
2472 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
2473 return IsLegal;
2474}
2475
2476// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
2477// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
2478// can be set differently by this operation. It comes down to whether
2479// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
2480// everything is fine. If not then the optimization is wrong. Thus general
2481// comparisons are only valid if op2 != 0.
2482//
2483// So, finally, the only LLVM-native comparisons that don't mention C and V
2484// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
2485// the absence of information about op2.
2486static bool isCMN(SDValue Op, ISD::CondCode CC) {
2487 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
2488 (CC == ISD::SETEQ || CC == ISD::SETNE);
2489}
2490
2491static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
2492 SelectionDAG &DAG, SDValue Chain,
2493 bool IsSignaling) {
2494 EVT VT = LHS.getValueType();
2495 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2495, __extension__ __PRETTY_FUNCTION__))
;
2496 assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("VT != MVT::f16 && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2496, __extension__ __PRETTY_FUNCTION__))
;
2497 unsigned Opcode =
2498 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
2499 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
2500}
2501
2502static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2503 const SDLoc &dl, SelectionDAG &DAG) {
2504 EVT VT = LHS.getValueType();
2505 const bool FullFP16 =
2506 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2507
2508 if (VT.isFloatingPoint()) {
2509 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2509, __extension__ __PRETTY_FUNCTION__))
;
2510 if (VT == MVT::f16 && !FullFP16) {
2511 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
2512 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
2513 VT = MVT::f32;
2514 }
2515 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
2516 }
2517
2518 // The CMP instruction is just an alias for SUBS, and representing it as
2519 // SUBS means that it's possible to get CSE with subtract operations.
2520 // A later phase can perform the optimization of setting the destination
2521 // register to WZR/XZR if it ends up being unused.
2522 unsigned Opcode = AArch64ISD::SUBS;
2523
2524 if (isCMN(RHS, CC)) {
2525 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
2526 Opcode = AArch64ISD::ADDS;
2527 RHS = RHS.getOperand(1);
2528 } else if (isCMN(LHS, CC)) {
2529 // As we are looking for EQ/NE compares, the operands can be commuted ; can
2530 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
2531 Opcode = AArch64ISD::ADDS;
2532 LHS = LHS.getOperand(1);
2533 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
2534 if (LHS.getOpcode() == ISD::AND) {
2535 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
2536 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
2537 // of the signed comparisons.
2538 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
2539 DAG.getVTList(VT, MVT_CC),
2540 LHS.getOperand(0),
2541 LHS.getOperand(1));
2542 // Replace all users of (and X, Y) with newly generated (ands X, Y)
2543 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
2544 return ANDSNode.getValue(1);
2545 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
2546 // Use result of ANDS
2547 return LHS.getValue(1);
2548 }
2549 }
2550
2551 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
2552 .getValue(1);
2553}
2554
2555/// \defgroup AArch64CCMP CMP;CCMP matching
2556///
2557/// These functions deal with the formation of CMP;CCMP;... sequences.
2558/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
2559/// a comparison. They set the NZCV flags to a predefined value if their
2560/// predicate is false. This allows to express arbitrary conjunctions, for
2561/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
2562/// expressed as:
2563/// cmp A
2564/// ccmp B, inv(CB), CA
2565/// check for CB flags
2566///
2567/// This naturally lets us implement chains of AND operations with SETCC
2568/// operands. And we can even implement some other situations by transforming
2569/// them:
2570/// - We can implement (NEG SETCC) i.e. negating a single comparison by
2571/// negating the flags used in a CCMP/FCCMP operations.
2572/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
2573/// by negating the flags we test for afterwards. i.e.
2574/// NEG (CMP CCMP CCCMP ...) can be implemented.
2575/// - Note that we can only ever negate all previously processed results.
2576/// What we can not implement by flipping the flags to test is a negation
2577/// of two sub-trees (because the negation affects all sub-trees emitted so
2578/// far, so the 2nd sub-tree we emit would also affect the first).
2579/// With those tools we can implement some OR operations:
2580/// - (OR (SETCC A) (SETCC B)) can be implemented via:
2581/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
2582/// - After transforming OR to NEG/AND combinations we may be able to use NEG
2583/// elimination rules from earlier to implement the whole thing as a
2584/// CCMP/FCCMP chain.
2585///
2586/// As complete example:
2587/// or (or (setCA (cmp A)) (setCB (cmp B)))
2588/// (and (setCC (cmp C)) (setCD (cmp D)))"
2589/// can be reassociated to:
2590/// or (and (setCC (cmp C)) setCD (cmp D))
2591// (or (setCA (cmp A)) (setCB (cmp B)))
2592/// can be transformed to:
2593/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
2594/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
2595/// which can be implemented as:
2596/// cmp C
2597/// ccmp D, inv(CD), CC
2598/// ccmp A, CA, inv(CD)
2599/// ccmp B, CB, inv(CA)
2600/// check for CB flags
2601///
2602/// A counterexample is "or (and A B) (and C D)" which translates to
2603/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
2604/// can only implement 1 of the inner (not) operations, but not both!
2605/// @{
2606
2607/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
2608static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
2609 ISD::CondCode CC, SDValue CCOp,
2610 AArch64CC::CondCode Predicate,
2611 AArch64CC::CondCode OutCC,
2612 const SDLoc &DL, SelectionDAG &DAG) {
2613 unsigned Opcode = 0;
2614 const bool FullFP16 =
2615 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
2616
2617 if (LHS.getValueType().isFloatingPoint()) {
2618 assert(LHS.getValueType() != MVT::f128)(static_cast <bool> (LHS.getValueType() != MVT::f128) ?
void (0) : __assert_fail ("LHS.getValueType() != MVT::f128",
"/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2618, __extension__ __PRETTY_FUNCTION__))
;
2619 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
2620 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
2621 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
2622 }
2623 Opcode = AArch64ISD::FCCMP;
2624 } else if (RHS.getOpcode() == ISD::SUB) {
2625 SDValue SubOp0 = RHS.getOperand(0);
2626 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
2627 // See emitComparison() on why we can only do this for SETEQ and SETNE.
2628 Opcode = AArch64ISD::CCMN;
2629 RHS = RHS.getOperand(1);
2630 }
2631 }
2632 if (Opcode == 0)
2633 Opcode = AArch64ISD::CCMP;
2634
2635 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
2636 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
2637 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
2638 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
2639 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
2640}
2641
2642/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
2643/// expressed as a conjunction. See \ref AArch64CCMP.
2644/// \param CanNegate Set to true if we can negate the whole sub-tree just by
2645/// changing the conditions on the SETCC tests.
2646/// (this means we can call emitConjunctionRec() with
2647/// Negate==true on this sub-tree)
2648/// \param MustBeFirst Set to true if this subtree needs to be negated and we
2649/// cannot do the negation naturally. We are required to
2650/// emit the subtree first in this case.
2651/// \param WillNegate Is true if are called when the result of this
2652/// subexpression must be negated. This happens when the
2653/// outer expression is an OR. We can use this fact to know
2654/// that we have a double negation (or (or ...) ...) that
2655/// can be implemented for free.
2656static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
2657 bool &MustBeFirst, bool WillNegate,
2658 unsigned Depth = 0) {
2659 if (!Val.hasOneUse())
2660 return false;
2661 unsigned Opcode = Val->getOpcode();
2662 if (Opcode == ISD::SETCC) {
2663 if (Val->getOperand(0).getValueType() == MVT::f128)
2664 return false;
2665 CanNegate = true;
2666 MustBeFirst = false;
2667 return true;
2668 }
2669 // Protect against exponential runtime and stack overflow.
2670 if (Depth > 6)
2671 return false;
2672 if (Opcode == ISD::AND || Opcode == ISD::OR) {
2673 bool IsOR = Opcode == ISD::OR;
2674 SDValue O0 = Val->getOperand(0);
2675 SDValue O1 = Val->getOperand(1);
2676 bool CanNegateL;
2677 bool MustBeFirstL;
2678 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
2679 return false;
2680 bool CanNegateR;
2681 bool MustBeFirstR;
2682 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
2683 return false;
2684
2685 if (MustBeFirstL && MustBeFirstR)
2686 return false;
2687
2688 if (IsOR) {
2689 // For an OR expression we need to be able to naturally negate at least
2690 // one side or we cannot do the transformation at all.
2691 if (!CanNegateL && !CanNegateR)
2692 return false;
2693 // If we the result of the OR will be negated and we can naturally negate
2694 // the leafs, then this sub-tree as a whole negates naturally.
2695 CanNegate = WillNegate && CanNegateL && CanNegateR;
2696 // If we cannot naturally negate the whole sub-tree, then this must be
2697 // emitted first.
2698 MustBeFirst = !CanNegate;
2699 } else {
2700 assert(Opcode == ISD::AND && "Must be OR or AND")(static_cast <bool> (Opcode == ISD::AND && "Must be OR or AND"
) ? void (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2700, __extension__ __PRETTY_FUNCTION__))
;
2701 // We cannot naturally negate an AND operation.
2702 CanNegate = false;
2703 MustBeFirst = MustBeFirstL || MustBeFirstR;
2704 }
2705 return true;
2706 }
2707 return false;
2708}
2709
2710/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
2711/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
2712/// Tries to transform the given i1 producing node @p Val to a series compare
2713/// and conditional compare operations. @returns an NZCV flags producing node
2714/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
2715/// transformation was not possible.
2716/// \p Negate is true if we want this sub-tree being negated just by changing
2717/// SETCC conditions.
2718static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
2719 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
2720 AArch64CC::CondCode Predicate) {
2721 // We're at a tree leaf, produce a conditional comparison operation.
2722 unsigned Opcode = Val->getOpcode();
2723 if (Opcode == ISD::SETCC) {
2724 SDValue LHS = Val->getOperand(0);
2725 SDValue RHS = Val->getOperand(1);
2726 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
2727 bool isInteger = LHS.getValueType().isInteger();
2728 if (Negate)
2729 CC = getSetCCInverse(CC, LHS.getValueType());
2730 SDLoc DL(Val);
2731 // Determine OutCC and handle FP special case.
2732 if (isInteger) {
2733 OutCC = changeIntCCToAArch64CC(CC);
2734 } else {
2735 assert(LHS.getValueType().isFloatingPoint())(static_cast <bool> (LHS.getValueType().isFloatingPoint
()) ? void (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2735, __extension__ __PRETTY_FUNCTION__))
;
2736 AArch64CC::CondCode ExtraCC;
2737 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
2738 // Some floating point conditions can't be tested with a single condition
2739 // code. Construct an additional comparison in this case.
2740 if (ExtraCC != AArch64CC::AL) {
2741 SDValue ExtraCmp;
2742 if (!CCOp.getNode())
2743 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
2744 else
2745 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
2746 ExtraCC, DL, DAG);
2747 CCOp = ExtraCmp;
2748 Predicate = ExtraCC;
2749 }
2750 }
2751
2752 // Produce a normal comparison if we are first in the chain
2753 if (!CCOp)
2754 return emitComparison(LHS, RHS, CC, DL, DAG);
2755 // Otherwise produce a ccmp.
2756 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2757 DAG);
2758 }
2759 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")(static_cast <bool> (Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2759, __extension__ __PRETTY_FUNCTION__))
;
2760
2761 bool IsOR = Opcode == ISD::OR;
2762
2763 SDValue LHS = Val->getOperand(0);
2764 bool CanNegateL;
2765 bool MustBeFirstL;
2766 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2767 assert(ValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2767, __extension__ __PRETTY_FUNCTION__))
;
2768 (void)ValidL;
2769
2770 SDValue RHS = Val->getOperand(1);
2771 bool CanNegateR;
2772 bool MustBeFirstR;
2773 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2774 assert(ValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (ValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2774, __extension__ __PRETTY_FUNCTION__))
;
2775 (void)ValidR;
2776
2777 // Swap sub-tree that must come first to the right side.
2778 if (MustBeFirstL) {
2779 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2779, __extension__ __PRETTY_FUNCTION__))
;
2780 std::swap(LHS, RHS);
2781 std::swap(CanNegateL, CanNegateR);
2782 std::swap(MustBeFirstL, MustBeFirstR);
2783 }
2784
2785 bool NegateR;
2786 bool NegateAfterR;
2787 bool NegateL;
2788 bool NegateAfterAll;
2789 if (Opcode == ISD::OR) {
2790 // Swap the sub-tree that we can negate naturally to the left.
2791 if (!CanNegateL) {
2792 assert(CanNegateR && "at least one side must be negatable")(static_cast <bool> (CanNegateR && "at least one side must be negatable"
) ? void (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2792, __extension__ __PRETTY_FUNCTION__))
;
2793 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")(static_cast <bool> (!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2793, __extension__ __PRETTY_FUNCTION__))
;
2794 assert(!Negate)(static_cast <bool> (!Negate) ? void (0) : __assert_fail
("!Negate", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2794, __extension__ __PRETTY_FUNCTION__))
;
2795 std::swap(LHS, RHS);
2796 NegateR = false;
2797 NegateAfterR = true;
2798 } else {
2799 // Negate the left sub-tree if possible, otherwise negate the result.
2800 NegateR = CanNegateR;
2801 NegateAfterR = !CanNegateR;
2802 }
2803 NegateL = true;
2804 NegateAfterAll = !Negate;
2805 } else {
2806 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")(static_cast <bool> (Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2806, __extension__ __PRETTY_FUNCTION__))
;
2807 assert(!Negate && "Valid conjunction/disjunction tree")(static_cast <bool> (!Negate && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2807, __extension__ __PRETTY_FUNCTION__))
;
2808
2809 NegateL = false;
2810 NegateR = false;
2811 NegateAfterR = false;
2812 NegateAfterAll = false;
2813 }
2814
2815 // Emit sub-trees.
2816 AArch64CC::CondCode RHSCC;
2817 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2818 if (NegateAfterR)
2819 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2820 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2821 if (NegateAfterAll)
2822 OutCC = AArch64CC::getInvertedCondCode(OutCC);
2823 return CmpL;
2824}
2825
2826/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2827/// In some cases this is even possible with OR operations in the expression.
2828/// See \ref AArch64CCMP.
2829/// \see emitConjunctionRec().
2830static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
2831 AArch64CC::CondCode &OutCC) {
2832 bool DummyCanNegate;
2833 bool DummyMustBeFirst;
2834 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2835 return SDValue();
2836
2837 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2838}
2839
2840/// @}
2841
2842/// Returns how profitable it is to fold a comparison's operand's shift and/or
2843/// extension operations.
2844static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2845 auto isSupportedExtend = [&](SDValue V) {
2846 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2847 return true;
2848
2849 if (V.getOpcode() == ISD::AND)
2850 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2851 uint64_t Mask = MaskCst->getZExtValue();
2852 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2853 }
2854
2855 return false;
2856 };
2857
2858 if (!Op.hasOneUse())
2859 return 0;
2860
2861 if (isSupportedExtend(Op))
2862 return 1;
2863
2864 unsigned Opc = Op.getOpcode();
2865 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2866 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2867 uint64_t Shift = ShiftCst->getZExtValue();
2868 if (isSupportedExtend(Op.getOperand(0)))
2869 return (Shift <= 4) ? 2 : 1;
2870 EVT VT = Op.getValueType();
2871 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2872 return 1;
2873 }
2874
2875 return 0;
2876}
2877
2878static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2879 SDValue &AArch64cc, SelectionDAG &DAG,
2880 const SDLoc &dl) {
2881 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2882 EVT VT = RHS.getValueType();
2883 uint64_t C = RHSC->getZExtValue();
2884 if (!isLegalArithImmed(C)) {
2885 // Constant does not fit, try adjusting it by one?
2886 switch (CC) {
2887 default:
2888 break;
2889 case ISD::SETLT:
2890 case ISD::SETGE:
2891 if ((VT == MVT::i32 && C != 0x80000000 &&
2892 isLegalArithImmed((uint32_t)(C - 1))) ||
2893 (VT == MVT::i64 && C != 0x80000000ULL &&
2894 isLegalArithImmed(C - 1ULL))) {
2895 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2896 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2897 RHS = DAG.getConstant(C, dl, VT);
2898 }
2899 break;
2900 case ISD::SETULT:
2901 case ISD::SETUGE:
2902 if ((VT == MVT::i32 && C != 0 &&
2903 isLegalArithImmed((uint32_t)(C - 1))) ||
2904 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2905 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2906 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2907 RHS = DAG.getConstant(C, dl, VT);
2908 }
2909 break;
2910 case ISD::SETLE:
2911 case ISD::SETGT:
2912 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2913 isLegalArithImmed((uint32_t)(C + 1))) ||
2914 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2915 isLegalArithImmed(C + 1ULL))) {
2916 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2917 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2918 RHS = DAG.getConstant(C, dl, VT);
2919 }
2920 break;
2921 case ISD::SETULE:
2922 case ISD::SETUGT:
2923 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2924 isLegalArithImmed((uint32_t)(C + 1))) ||
2925 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2926 isLegalArithImmed(C + 1ULL))) {
2927 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2928 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2929 RHS = DAG.getConstant(C, dl, VT);
2930 }
2931 break;
2932 }
2933 }
2934 }
2935
2936 // Comparisons are canonicalized so that the RHS operand is simpler than the
2937 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2938 // can fold some shift+extend operations on the RHS operand, so swap the
2939 // operands if that can be done.
2940 //
2941 // For example:
2942 // lsl w13, w11, #1
2943 // cmp w13, w12
2944 // can be turned into:
2945 // cmp w12, w11, lsl #1
2946 if (!isa<ConstantSDNode>(RHS) ||
2947 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2948 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2949
2950 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2951 std::swap(LHS, RHS);
2952 CC = ISD::getSetCCSwappedOperands(CC);
2953 }
2954 }
2955
2956 SDValue Cmp;
2957 AArch64CC::CondCode AArch64CC;
2958 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2959 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2960
2961 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2962 // For the i8 operand, the largest immediate is 255, so this can be easily
2963 // encoded in the compare instruction. For the i16 operand, however, the
2964 // largest immediate cannot be encoded in the compare.
2965 // Therefore, use a sign extending load and cmn to avoid materializing the
2966 // -1 constant. For example,
2967 // movz w1, #65535
2968 // ldrh w0, [x0, #0]
2969 // cmp w0, w1
2970 // >
2971 // ldrsh w0, [x0, #0]
2972 // cmn w0, #1
2973 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2974 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2975 // ensure both the LHS and RHS are truly zero extended and to make sure the
2976 // transformation is profitable.
2977 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2978 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2979 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2980 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2981 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2982 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2983 SDValue SExt =
2984 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2985 DAG.getValueType(MVT::i16));
2986 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2987 RHS.getValueType()),
2988 CC, dl, DAG);
2989 AArch64CC = changeIntCCToAArch64CC(CC);
2990 }
2991 }
2992
2993 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2994 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2995 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2996 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2997 }
2998 }
2999 }
3000
3001 if (!Cmp) {
3002 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
3003 AArch64CC = changeIntCCToAArch64CC(CC);
3004 }
3005 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
3006 return Cmp;
3007}
3008
3009static std::pair<SDValue, SDValue>
3010getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
3011 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3012, __extension__ __PRETTY_FUNCTION__))
3012 "Unsupported value type")(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3012, __extension__ __PRETTY_FUNCTION__))
;
3013 SDValue Value, Overflow;
3014 SDLoc DL(Op);
3015 SDValue LHS = Op.getOperand(0);
3016 SDValue RHS = Op.getOperand(1);
3017 unsigned Opc = 0;
3018 switch (Op.getOpcode()) {
3019 default:
3020 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3020)
;
3021 case ISD::SADDO:
3022 Opc = AArch64ISD::ADDS;
3023 CC = AArch64CC::VS;
3024 break;
3025 case ISD::UADDO:
3026 Opc = AArch64ISD::ADDS;
3027 CC = AArch64CC::HS;
3028 break;
3029 case ISD::SSUBO:
3030 Opc = AArch64ISD::SUBS;
3031 CC = AArch64CC::VS;
3032 break;
3033 case ISD::USUBO:
3034 Opc = AArch64ISD::SUBS;
3035 CC = AArch64CC::LO;
3036 break;
3037 // Multiply needs a little bit extra work.
3038 case ISD::SMULO:
3039 case ISD::UMULO: {
3040 CC = AArch64CC::NE;
3041 bool IsSigned = Op.getOpcode() == ISD::SMULO;
3042 if (Op.getValueType() == MVT::i32) {
3043 // Extend to 64-bits, then perform a 64-bit multiply.
3044 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3045 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
3046 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
3047 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3048 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3049
3050 // Check that the result fits into a 32-bit integer.
3051 SDVTList VTs = DAG.getVTList(MVT::i64, MVT_CC);
3052 if (IsSigned) {
3053 // cmp xreg, wreg, sxtw
3054 SDValue SExtMul = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Value);
3055 Overflow =
3056 DAG.getNode(AArch64ISD::SUBS, DL, VTs, Mul, SExtMul).getValue(1);
3057 } else {
3058 // tst xreg, #0xffffffff00000000
3059 SDValue UpperBits = DAG.getConstant(0xFFFFFFFF00000000, DL, MVT::i64);
3060 Overflow =
3061 DAG.getNode(AArch64ISD::ANDS, DL, VTs, Mul, UpperBits).getValue(1);
3062 }
3063 break;
3064 }
3065 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Expected an i64 value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3065, __extension__ __PRETTY_FUNCTION__))
;
3066 // For the 64 bit multiply
3067 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
3068 if (IsSigned) {
3069 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
3070 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
3071 DAG.getConstant(63, DL, MVT::i64));
3072 // It is important that LowerBits is last, otherwise the arithmetic
3073 // shift will not be folded into the compare (SUBS).
3074 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3075 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
3076 .getValue(1);
3077 } else {
3078 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
3079 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
3080 Overflow =
3081 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
3082 DAG.getConstant(0, DL, MVT::i64),
3083 UpperBits).getValue(1);
3084 }
3085 break;
3086 }
3087 } // switch (...)
3088
3089 if (Opc) {
3090 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
3091
3092 // Emit the AArch64 operation with overflow check.
3093 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
3094 Overflow = Value.getValue(1);
3095 }
3096 return std::make_pair(Value, Overflow);
3097}
3098
3099SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
3100 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
3101 return LowerToScalableOp(Op, DAG);
3102
3103 SDValue Sel = Op.getOperand(0);
3104 SDValue Other = Op.getOperand(1);
3105 SDLoc dl(Sel);
3106
3107 // If the operand is an overflow checking operation, invert the condition
3108 // code and kill the Not operation. I.e., transform:
3109 // (xor (overflow_op_bool, 1))
3110 // -->
3111 // (csel 1, 0, invert(cc), overflow_op_bool)
3112 // ... which later gets transformed to just a cset instruction with an
3113 // inverted condition code, rather than a cset + eor sequence.
3114 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
3115 // Only lower legal XALUO ops.
3116 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
3117 return SDValue();
3118
3119 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3120 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3121 AArch64CC::CondCode CC;
3122 SDValue Value, Overflow;
3123 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
3124 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3125 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
3126 CCVal, Overflow);
3127 }
3128 // If neither operand is a SELECT_CC, give up.
3129 if (Sel.getOpcode() != ISD::SELECT_CC)
3130 std::swap(Sel, Other);
3131 if (Sel.getOpcode() != ISD::SELECT_CC)
3132 return Op;
3133
3134 // The folding we want to perform is:
3135 // (xor x, (select_cc a, b, cc, 0, -1) )
3136 // -->
3137 // (csel x, (xor x, -1), cc ...)
3138 //
3139 // The latter will get matched to a CSINV instruction.
3140
3141 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
3142 SDValue LHS = Sel.getOperand(0);
3143 SDValue RHS = Sel.getOperand(1);
3144 SDValue TVal = Sel.getOperand(2);
3145 SDValue FVal = Sel.getOperand(3);
3146
3147 // FIXME: This could be generalized to non-integer comparisons.
3148 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
3149 return Op;
3150
3151 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
3152 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
3153
3154 // The values aren't constants, this isn't the pattern we're looking for.
3155 if (!CFVal || !CTVal)
3156 return Op;
3157
3158 // We can commute the SELECT_CC by inverting the condition. This
3159 // might be needed to make this fit into a CSINV pattern.
3160 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
3161 std::swap(TVal, FVal);
3162 std::swap(CTVal, CFVal);
3163 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
3164 }
3165
3166 // If the constants line up, perform the transform!
3167 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
3168 SDValue CCVal;
3169 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
3170
3171 FVal = Other;
3172 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
3173 DAG.getConstant(-1ULL, dl, Other.getValueType()));
3174
3175 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
3176 CCVal, Cmp);
3177 }
3178
3179 return Op;
3180}
3181
3182static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
3183 EVT VT = Op.getValueType();
3184
3185 // Let legalize expand this if it isn't a legal type yet.
3186 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3187 return SDValue();
3188
3189 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3190
3191 unsigned Opc;
3192 bool ExtraOp = false;
3193 switch (Op.getOpcode()) {
3194 default:
3195 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3195)
;
3196 case ISD::ADDC:
3197 Opc = AArch64ISD::ADDS;
3198 break;
3199 case ISD::SUBC:
3200 Opc = AArch64ISD::SUBS;
3201 break;
3202 case ISD::ADDE:
3203 Opc = AArch64ISD::ADCS;
3204 ExtraOp = true;
3205 break;
3206 case ISD::SUBE:
3207 Opc = AArch64ISD::SBCS;
3208 ExtraOp = true;
3209 break;
3210 }
3211
3212 if (!ExtraOp)
3213 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
3214 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
3215 Op.getOperand(2));
3216}
3217
3218static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
3219 // Let legalize expand this if it isn't a legal type yet.
3220 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3221 return SDValue();
3222
3223 SDLoc dl(Op);
3224 AArch64CC::CondCode CC;
3225 // The actual operation that sets the overflow or carry flag.
3226 SDValue Value, Overflow;
3227 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
3228
3229 // We use 0 and 1 as false and true values.
3230 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3231 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3232
3233 // We use an inverted condition, because the conditional select is inverted
3234 // too. This will allow it to be selected to a single instruction:
3235 // CSINC Wd, WZR, WZR, invert(cond).
3236 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
3237 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
3238 CCVal, Overflow);
3239
3240 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3241 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3242}
3243
3244// Prefetch operands are:
3245// 1: Address to prefetch
3246// 2: bool isWrite
3247// 3: int locality (0 = no locality ... 3 = extreme locality)
3248// 4: bool isDataCache
3249static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
3250 SDLoc DL(Op);
3251 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
3252 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
3253 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3254
3255 bool IsStream = !Locality;
3256 // When the locality number is set
3257 if (Locality) {
3258 // The front-end should have filtered out the out-of-range values
3259 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast <bool> (Locality <= 3 && "Prefetch locality out-of-range"
) ? void (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3259, __extension__ __PRETTY_FUNCTION__))
;
3260 // The locality degree is the opposite of the cache speed.
3261 // Put the number the other way around.
3262 // The encoding starts at 0 for level 1
3263 Locality = 3 - Locality;
3264 }
3265
3266 // built the mask value encoding the expected behavior.
3267 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
3268 (!IsData << 3) | // IsDataCache bit
3269 (Locality << 1) | // Cache level bits
3270 (unsigned)IsStream; // Stream bit
3271 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
3272 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
3273}
3274
3275SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
3276 SelectionDAG &DAG) const {
3277 EVT VT = Op.getValueType();
3278 if (VT.isScalableVector())
3279 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_EXTEND_MERGE_PASSTHRU);
3280
3281 if (useSVEForFixedLengthVectorVT(VT))
3282 return LowerFixedLengthFPExtendToSVE(Op, DAG);
3283
3284 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast <bool> (Op.getValueType() == MVT::f128 &&
"Unexpected lowering") ? void (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3284, __extension__ __PRETTY_FUNCTION__))
;
3285 return SDValue();
3286}
3287
3288SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
3289 SelectionDAG &DAG) const {
3290 if (Op.getValueType().isScalableVector())
3291 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FP_ROUND_MERGE_PASSTHRU);
3292
3293 bool IsStrict = Op->isStrictFPOpcode();
3294 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3295 EVT SrcVT = SrcVal.getValueType();
3296
3297 if (useSVEForFixedLengthVectorVT(SrcVT))
3298 return LowerFixedLengthFPRoundToSVE(Op, DAG);
3299
3300 if (SrcVT != MVT::f128) {
3301 // Expand cases where the input is a vector bigger than NEON.
3302 if (useSVEForFixedLengthVectorVT(SrcVT))
3303 return SDValue();
3304
3305 // It's legal except when f128 is involved
3306 return Op;
3307 }
3308
3309 return SDValue();
3310}
3311
3312SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
3313 SelectionDAG &DAG) const {
3314 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3315 // Any additional optimization in this function should be recorded
3316 // in the cost tables.
3317 EVT InVT = Op.getOperand(0).getValueType();
3318 EVT VT = Op.getValueType();
3319
3320 if (VT.isScalableVector()) {
3321 unsigned Opcode = Op.getOpcode() == ISD::FP_TO_UINT
3322 ? AArch64ISD::FCVTZU_MERGE_PASSTHRU
3323 : AArch64ISD::FCVTZS_MERGE_PASSTHRU;
3324 return LowerToPredicatedOp(Op, DAG, Opcode);
3325 }
3326
3327 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3328 return LowerFixedLengthFPToIntToSVE(Op, DAG);
3329
3330 unsigned NumElts = InVT.getVectorNumElements();
3331
3332 // f16 conversions are promoted to f32 when full fp16 is not supported.
3333 if (InVT.getVectorElementType() == MVT::f16 &&
3334 !Subtarget->hasFullFP16()) {
3335 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
3336 SDLoc dl(Op);
3337 return DAG.getNode(
3338 Op.getOpcode(), dl, Op.getValueType(),
3339 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
3340 }
3341
3342 uint64_t VTSize = VT.getFixedSizeInBits();
3343 uint64_t InVTSize = InVT.getFixedSizeInBits();
3344 if (VTSize < InVTSize) {
3345 SDLoc dl(Op);
3346 SDValue Cv =
3347 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
3348 Op.getOperand(0));
3349 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
3350 }
3351
3352 if (VTSize > InVTSize) {
3353 SDLoc dl(Op);
3354 MVT ExtVT =
3355 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
3356 VT.getVectorNumElements());
3357 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
3358 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
3359 }
3360
3361 // Type changing conversions are illegal.
3362 return Op;
3363}
3364
3365SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
3366 SelectionDAG &DAG) const {
3367 bool IsStrict = Op->isStrictFPOpcode();
3368 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3369
3370 if (SrcVal.getValueType().isVector())
3371 return LowerVectorFP_TO_INT(Op, DAG);
3372
3373 // f16 conversions are promoted to f32 when full fp16 is not supported.
3374 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
3375 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3375, __extension__ __PRETTY_FUNCTION__))
;
3376 SDLoc dl(Op);
3377 return DAG.getNode(
3378 Op.getOpcode(), dl, Op.getValueType(),
3379 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
3380 }
3381
3382 if (SrcVal.getValueType() != MVT::f128) {
3383 // It's legal except when f128 is involved
3384 return Op;
3385 }
3386
3387 return SDValue();
3388}
3389
3390SDValue
3391AArch64TargetLowering::LowerVectorFP_TO_INT_SAT(SDValue Op,
3392 SelectionDAG &DAG) const {
3393 // AArch64 FP-to-int conversions saturate to the destination element size, so
3394 // we can lower common saturating conversions to simple instructions.
3395 SDValue SrcVal = Op.getOperand(0);
3396 EVT SrcVT = SrcVal.getValueType();
3397 EVT DstVT = Op.getValueType();
3398 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3399
3400 uint64_t SrcElementWidth = SrcVT.getScalarSizeInBits();
3401 uint64_t DstElementWidth = DstVT.getScalarSizeInBits();
3402 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3403 assert(SatWidth <= DstElementWidth &&(static_cast <bool> (SatWidth <= DstElementWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstElementWidth && \"Saturation width cannot exceed result width\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3404, __extension__ __PRETTY_FUNCTION__))
3404 "Saturation width cannot exceed result width")(static_cast <bool> (SatWidth <= DstElementWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstElementWidth && \"Saturation width cannot exceed result width\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3404, __extension__ __PRETTY_FUNCTION__))
;
3405
3406 // TODO: Consider lowering to SVE operations, as in LowerVectorFP_TO_INT.
3407 // Currently, the `llvm.fpto[su]i.sat.*` instrinsics don't accept scalable
3408 // types, so this is hard to reach.
3409 if (DstVT.isScalableVector())
3410 return SDValue();
3411
3412 // TODO: Saturate to SatWidth explicitly.
3413 if (SatWidth != DstElementWidth)
3414 return SDValue();
3415
3416 EVT SrcElementVT = SrcVT.getVectorElementType();
3417
3418 // In the absence of FP16 support, promote f16 to f32, like
3419 // LowerVectorFP_TO_INT().
3420 if (SrcElementVT == MVT::f16 && !Subtarget->hasFullFP16()) {
3421 MVT F32VT = MVT::getVectorVT(MVT::f32, SrcVT.getVectorNumElements());
3422 return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
3423 DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), F32VT, SrcVal),
3424 Op.getOperand(1));
3425 }
3426
3427 // Cases that we can emit directly.
3428 if ((SrcElementWidth == DstElementWidth) &&
3429 (SrcElementVT == MVT::f64 || SrcElementVT == MVT::f32 ||
3430 (SrcElementVT == MVT::f16 && Subtarget->hasFullFP16()))) {
3431 return Op;
3432 }
3433
3434 // For all other cases, fall back on the expanded form.
3435 return SDValue();
3436}
3437
3438SDValue AArch64TargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
3439 SelectionDAG &DAG) const {
3440 // AArch64 FP-to-int conversions saturate to the destination register size, so
3441 // we can lower common saturating conversions to simple instructions.
3442 SDValue SrcVal = Op.getOperand(0);
3443 EVT SrcVT = SrcVal.getValueType();
3444
3445 if (SrcVT.isVector())
3446 return LowerVectorFP_TO_INT_SAT(Op, DAG);
3447
3448 EVT DstVT = Op.getValueType();
3449 EVT SatVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
3450 uint64_t SatWidth = SatVT.getScalarSizeInBits();
3451 uint64_t DstWidth = DstVT.getScalarSizeInBits();
3452 assert(SatWidth <= DstWidth && "Saturation width cannot exceed result width")(static_cast <bool> (SatWidth <= DstWidth &&
"Saturation width cannot exceed result width") ? void (0) : __assert_fail
("SatWidth <= DstWidth && \"Saturation width cannot exceed result width\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3452, __extension__ __PRETTY_FUNCTION__))
;
3453
3454 // TODO: Saturate to SatWidth explicitly.
3455 if (SatWidth != DstWidth)
3456 return SDValue();
3457
3458 // In the absence of FP16 support, promote f16 to f32, like LowerFP_TO_INT().
3459 if (SrcVT == MVT::f16 && !Subtarget->hasFullFP16())
3460 return DAG.getNode(Op.getOpcode(), SDLoc(Op), DstVT,
3461 DAG.getNode(ISD::FP_EXTEND, SDLoc(Op), MVT::f32, SrcVal),
3462 Op.getOperand(1));
3463
3464 // Cases that we can emit directly.
3465 if ((SrcVT == MVT::f64 || SrcVT == MVT::f32 ||
3466 (SrcVT == MVT::f16 && Subtarget->hasFullFP16())) &&
3467 (DstVT == MVT::i64 || DstVT == MVT::i32))
3468 return Op;
3469
3470 // For all other cases, fall back on the expanded form.
3471 return SDValue();
3472}
3473
3474SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
3475 SelectionDAG &DAG) const {
3476 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
3477 // Any additional optimization in this function should be recorded
3478 // in the cost tables.
3479 EVT VT = Op.getValueType();
3480 SDLoc dl(Op);
3481 SDValue In = Op.getOperand(0);
3482 EVT InVT = In.getValueType();
3483 unsigned Opc = Op.getOpcode();
3484 bool IsSigned = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
3485
3486 if (VT.isScalableVector()) {
3487 if (InVT.getVectorElementType() == MVT::i1) {
3488 // We can't directly extend an SVE predicate; extend it first.
3489 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3490 EVT CastVT = getPromotedVTForPredicate(InVT);
3491 In = DAG.getNode(CastOpc, dl, CastVT, In);
3492 return DAG.getNode(Opc, dl, VT, In);
3493 }
3494
3495 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
3496 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
3497 return LowerToPredicatedOp(Op, DAG, Opcode);
3498 }
3499
3500 if (useSVEForFixedLengthVectorVT(VT) || useSVEForFixedLengthVectorVT(InVT))
3501 return LowerFixedLengthIntToFPToSVE(Op, DAG);
3502
3503 uint64_t VTSize = VT.getFixedSizeInBits();
3504 uint64_t InVTSize = InVT.getFixedSizeInBits();
3505 if (VTSize < InVTSize) {
3506 MVT CastVT =
3507 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
3508 InVT.getVectorNumElements());
3509 In = DAG.getNode(Opc, dl, CastVT, In);
3510 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
3511 }
3512
3513 if (VTSize > InVTSize) {
3514 unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
3515 EVT CastVT = VT.changeVectorElementTypeToInteger();
3516 In = DAG.getNode(CastOpc, dl, CastVT, In);
3517 return DAG.getNode(Opc, dl, VT, In);
3518 }
3519
3520 return Op;
3521}
3522
3523SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
3524 SelectionDAG &DAG) const {
3525 if (Op.getValueType().isVector())
3526 return LowerVectorINT_TO_FP(Op, DAG);
3527
3528 bool IsStrict = Op->isStrictFPOpcode();
3529 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
3530
3531 // f16 conversions are promoted to f32 when full fp16 is not supported.
3532 if (Op.getValueType() == MVT::f16 &&
3533 !Subtarget->hasFullFP16()) {
3534 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")(static_cast <bool> (!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? void (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3534, __extension__ __PRETTY_FUNCTION__))
;
3535 SDLoc dl(Op);
3536 return DAG.getNode(
3537 ISD::FP_ROUND, dl, MVT::f16,
3538 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
3539 DAG.getIntPtrConstant(0, dl));
3540 }
3541
3542 // i128 conversions are libcalls.
3543 if (SrcVal.getValueType() == MVT::i128)
3544 return SDValue();
3545
3546 // Other conversions are legal, unless it's to the completely software-based
3547 // fp128.
3548 if (Op.getValueType() != MVT::f128)
3549 return Op;
3550 return SDValue();
3551}
3552
3553SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
3554 SelectionDAG &DAG) const {
3555 // For iOS, we want to call an alternative entry point: __sincos_stret,
3556 // which returns the values in two S / D registers.
3557 SDLoc dl(Op);
3558 SDValue Arg = Op.getOperand(0);
3559 EVT ArgVT = Arg.getValueType();
3560 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
3561
3562 ArgListTy Args;
3563 ArgListEntry Entry;
3564
3565 Entry.Node = Arg;
3566 Entry.Ty = ArgTy;
3567 Entry.IsSExt = false;
3568 Entry.IsZExt = false;
3569 Args.push_back(Entry);
3570
3571 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
3572 : RTLIB::SINCOS_STRET_F32;
3573 const char *LibcallName = getLibcallName(LC);
3574 SDValue Callee =
3575 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
3576
3577 StructType *RetTy = StructType::get(ArgTy, ArgTy);
3578 TargetLowering::CallLoweringInfo CLI(DAG);
3579 CLI.setDebugLoc(dl)
3580 .setChain(DAG.getEntryNode())
3581 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
3582
3583 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3584 return CallResult.first;
3585}
3586
3587static MVT getSVEContainerType(EVT ContentTy);
3588
3589SDValue AArch64TargetLowering::LowerBITCAST(SDValue Op,
3590 SelectionDAG &DAG) const {
3591 EVT OpVT = Op.getValueType();
3592 EVT ArgVT = Op.getOperand(0).getValueType();
3593
3594 if (useSVEForFixedLengthVectorVT(OpVT))
3595 return LowerFixedLengthBitcastToSVE(Op, DAG);
3596
3597 if (OpVT.isScalableVector()) {
3598 if (isTypeLegal(OpVT) && !isTypeLegal(ArgVT)) {
3599 assert(OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() &&(static_cast <bool> (OpVT.isFloatingPoint() && !
ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"
) ? void (0) : __assert_fail ("OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && \"Expected int->fp bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3600, __extension__ __PRETTY_FUNCTION__))
3600 "Expected int->fp bitcast!")(static_cast <bool> (OpVT.isFloatingPoint() && !
ArgVT.isFloatingPoint() && "Expected int->fp bitcast!"
) ? void (0) : __assert_fail ("OpVT.isFloatingPoint() && !ArgVT.isFloatingPoint() && \"Expected int->fp bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3600, __extension__ __PRETTY_FUNCTION__))
;
3601 SDValue ExtResult =
3602 DAG.getNode(ISD::ANY_EXTEND, SDLoc(Op), getSVEContainerType(ArgVT),
3603 Op.getOperand(0));
3604 return getSVESafeBitCast(OpVT, ExtResult, DAG);
3605 }
3606 return getSVESafeBitCast(OpVT, Op.getOperand(0), DAG);
3607 }
3608
3609 if (OpVT != MVT::f16 && OpVT != MVT::bf16)
3610 return SDValue();
3611
3612 assert(ArgVT == MVT::i16)(static_cast <bool> (ArgVT == MVT::i16) ? void (0) : __assert_fail
("ArgVT == MVT::i16", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3612, __extension__ __PRETTY_FUNCTION__))
;
3613 SDLoc DL(Op);
3614
3615 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
3616 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
3617 return SDValue(
3618 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, OpVT, Op,
3619 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
3620 0);
3621}
3622
3623static EVT getExtensionTo64Bits(const EVT &OrigVT) {
3624 if (OrigVT.getSizeInBits() >= 64)
3625 return OrigVT;
3626
3627 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3627, __extension__ __PRETTY_FUNCTION__))
;
3628
3629 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
3630 switch (OrigSimpleTy) {
3631 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3631)
;
3632 case MVT::v2i8:
3633 case MVT::v2i16:
3634 return MVT::v2i32;
3635 case MVT::v4i8:
3636 return MVT::v4i16;
3637 }
3638}
3639
3640static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
3641 const EVT &OrigTy,
3642 const EVT &ExtTy,
3643 unsigned ExtOpcode) {
3644 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
3645 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
3646 // 64-bits we need to insert a new extension so that it will be 64-bits.
3647 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3647, __extension__ __PRETTY_FUNCTION__))
;
3648 if (OrigTy.getSizeInBits() >= 64)
3649 return N;
3650
3651 // Must extend size to at least 64 bits to be used as an operand for VMULL.
3652 EVT NewVT = getExtensionTo64Bits(OrigTy);
3653
3654 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
3655}
3656
3657static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
3658 bool isSigned) {
3659 EVT VT = N->getValueType(0);
3660
3661 if (N->getOpcode() != ISD::BUILD_VECTOR)
3662 return false;
3663
3664 for (const SDValue &Elt : N->op_values()) {
3665 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
3666 unsigned EltSize = VT.getScalarSizeInBits();
3667 unsigned HalfSize = EltSize / 2;
3668 if (isSigned) {
3669 if (!isIntN(HalfSize, C->getSExtValue()))
3670 return false;
3671 } else {
3672 if (!isUIntN(HalfSize, C->getZExtValue()))
3673 return false;
3674 }
3675 continue;
3676 }
3677 return false;
3678 }
3679
3680 return true;
3681}
3682
3683static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
3684 if (N->getOpcode() == ISD::SIGN_EXTEND ||
3685 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
3686 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
3687 N->getOperand(0)->getValueType(0),
3688 N->getValueType(0),
3689 N->getOpcode());
3690
3691 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3691, __extension__ __PRETTY_FUNCTION__))
;
3692 EVT VT = N->getValueType(0);
3693 SDLoc dl(N);
3694 unsigned EltSize = VT.getScalarSizeInBits() / 2;
3695 unsigned NumElts = VT.getVectorNumElements();
3696 MVT TruncVT = MVT::getIntegerVT(EltSize);
3697 SmallVector<SDValue, 8> Ops;
3698 for (unsigned i = 0; i != NumElts; ++i) {
3699 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
3700 const APInt &CInt = C->getAPIntValue();
3701 // Element types smaller than 32 bits are not legal, so use i32 elements.
3702 // The values are implicitly truncated so sext vs. zext doesn't matter.
3703 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
3704 }
3705 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
3706}
3707
3708static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
3709 return N->getOpcode() == ISD::SIGN_EXTEND ||
3710 N->getOpcode() == ISD::ANY_EXTEND ||
3711 isExtendedBUILD_VECTOR(N, DAG, true);
3712}
3713
3714static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
3715 return N->getOpcode() == ISD::ZERO_EXTEND ||
3716 N->getOpcode() == ISD::ANY_EXTEND ||
3717 isExtendedBUILD_VECTOR(N, DAG, false);
3718}
3719
3720static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
3721 unsigned Opcode = N->getOpcode();
3722 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3723 SDNode *N0 = N->getOperand(0).getNode();
3724 SDNode *N1 = N->getOperand(1).getNode();
3725 return N0->hasOneUse() && N1->hasOneUse() &&
3726 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
3727 }
3728 return false;
3729}
3730
3731static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
3732 unsigned Opcode = N->getOpcode();
3733 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
3734 SDNode *N0 = N->getOperand(0).getNode();
3735 SDNode *N1 = N->getOperand(1).getNode();
3736 return N0->hasOneUse() && N1->hasOneUse() &&
3737 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
3738 }
3739 return false;
3740}
3741
3742SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
3743 SelectionDAG &DAG) const {
3744 // The rounding mode is in bits 23:22 of the FPSCR.
3745 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
3746 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
3747 // so that the shift + and get folded into a bitfield extract.
3748 SDLoc dl(Op);
3749
3750 SDValue Chain = Op.getOperand(0);
3751 SDValue FPCR_64 = DAG.getNode(
3752 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
3753 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
3754 Chain = FPCR_64.getValue(1);
3755 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
3756 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
3757 DAG.getConstant(1U << 22, dl, MVT::i32));
3758 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
3759 DAG.getConstant(22, dl, MVT::i32));
3760 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
3761 DAG.getConstant(3, dl, MVT::i32));
3762 return DAG.getMergeValues({AND, Chain}, dl);
3763}
3764
3765SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
3766 SelectionDAG &DAG) const {
3767 SDLoc DL(Op);
3768 SDValue Chain = Op->getOperand(0);
3769 SDValue RMValue = Op->getOperand(1);
3770
3771 // The rounding mode is in bits 23:22 of the FPCR.
3772 // The llvm.set.rounding argument value to the rounding mode in FPCR mapping
3773 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
3774 // ((arg - 1) & 3) << 22).
3775 //
3776 // The argument of llvm.set.rounding must be within the segment [0, 3], so
3777 // NearestTiesToAway (4) is not handled here. It is responsibility of the code
3778 // generated llvm.set.rounding to ensure this condition.
3779
3780 // Calculate new value of FPCR[23:22].
3781 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
3782 DAG.getConstant(1, DL, MVT::i32));
3783 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
3784 DAG.getConstant(0x3, DL, MVT::i32));
3785 RMValue =
3786 DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
3787 DAG.getConstant(AArch64::RoundingBitsPos, DL, MVT::i32));
3788 RMValue = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, RMValue);
3789
3790 // Get current value of FPCR.
3791 SDValue Ops[] = {
3792 Chain, DAG.getTargetConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)};
3793 SDValue FPCR =
3794 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other}, Ops);
3795 Chain = FPCR.getValue(1);
3796 FPCR = FPCR.getValue(0);
3797
3798 // Put new rounding mode into FPSCR[23:22].
3799 const int RMMask = ~(AArch64::Rounding::rmMask << AArch64::RoundingBitsPos);
3800 FPCR = DAG.getNode(ISD::AND, DL, MVT::i64, FPCR,
3801 DAG.getConstant(RMMask, DL, MVT::i64));
3802 FPCR = DAG.getNode(ISD::OR, DL, MVT::i64, FPCR, RMValue);
3803 SDValue Ops2[] = {
3804 Chain, DAG.getTargetConstant(Intrinsic::aarch64_set_fpcr, DL, MVT::i64),
3805 FPCR};
3806 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
3807}
3808
3809SDValue AArch64TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
3810 EVT VT = Op.getValueType();
3811
3812 // If SVE is available then i64 vector multiplications can also be made legal.
3813 bool OverrideNEON = VT == MVT::v2i64 || VT == MVT::v1i64;
3814
3815 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, OverrideNEON))
3816 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MUL_PRED, OverrideNEON);
3817
3818 // Multiplications are only custom-lowered for 128-bit vectors so that
3819 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
3820 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3821, __extension__ __PRETTY_FUNCTION__))
3821 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3821, __extension__ __PRETTY_FUNCTION__))
;
3822 SDNode *N0 = Op.getOperand(0).getNode();
3823 SDNode *N1 = Op.getOperand(1).getNode();
3824 unsigned NewOpc = 0;
3825 bool isMLA = false;
3826 bool isN0SExt = isSignExtended(N0, DAG);
3827 bool isN1SExt = isSignExtended(N1, DAG);
3828 if (isN0SExt && isN1SExt)
3829 NewOpc = AArch64ISD::SMULL;
3830 else {
3831 bool isN0ZExt = isZeroExtended(N0, DAG);
3832 bool isN1ZExt = isZeroExtended(N1, DAG);
3833 if (isN0ZExt && isN1ZExt)
3834 NewOpc = AArch64ISD::UMULL;
3835 else if (isN1SExt || isN1ZExt) {
3836 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
3837 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
3838 if (isN1SExt && isAddSubSExt(N0, DAG)) {
3839 NewOpc = AArch64ISD::SMULL;
3840 isMLA = true;
3841 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
3842 NewOpc = AArch64ISD::UMULL;
3843 isMLA = true;
3844 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
3845 std::swap(N0, N1);
3846 NewOpc = AArch64ISD::UMULL;
3847 isMLA = true;
3848 }
3849 }
3850
3851 if (!NewOpc) {
3852 if (VT == MVT::v2i64)
3853 // Fall through to expand this. It is not legal.
3854 return SDValue();
3855 else
3856 // Other vector multiplications are legal.
3857 return Op;
3858 }
3859 }
3860
3861 // Legalize to a S/UMULL instruction
3862 SDLoc DL(Op);
3863 SDValue Op0;
3864 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
3865 if (!isMLA) {
3866 Op0 = skipExtensionForVectorMULL(N0, DAG);
3867 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3869, __extension__ __PRETTY_FUNCTION__))
3868 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3869, __extension__ __PRETTY_FUNCTION__))
3869 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3869, __extension__ __PRETTY_FUNCTION__))
;
3870 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
3871 }
3872 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
3873 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
3874 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
3875 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
3876 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
3877 EVT Op1VT = Op1.getValueType();
3878 return DAG.getNode(N0->getOpcode(), DL, VT,
3879 DAG.getNode(NewOpc, DL, VT,
3880 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
3881 DAG.getNode(NewOpc, DL, VT,
3882 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
3883}
3884
3885static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
3886 int Pattern) {
3887 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
3888 DAG.getTargetConstant(Pattern, DL, MVT::i32));
3889}
3890
3891static SDValue lowerConvertToSVBool(SDValue Op, SelectionDAG &DAG) {
3892 SDLoc DL(Op);
3893 EVT OutVT = Op.getValueType();
3894 SDValue InOp = Op.getOperand(1);
3895 EVT InVT = InOp.getValueType();
3896
3897 // Return the operand if the cast isn't changing type,
3898 // i.e. <n x 16 x i1> -> <n x 16 x i1>
3899 if (InVT == OutVT)
3900 return InOp;
3901
3902 SDValue Reinterpret =
3903 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, InOp);
3904
3905 // If the argument converted to an svbool is a ptrue or a comparison, the
3906 // lanes introduced by the widening are zero by construction.
3907 switch (InOp.getOpcode()) {
3908 case AArch64ISD::SETCC_MERGE_ZERO:
3909 return Reinterpret;
3910 case ISD::INTRINSIC_WO_CHAIN:
3911 if (InOp.getConstantOperandVal(0) == Intrinsic::aarch64_sve_ptrue)
3912 return Reinterpret;
3913 }
3914
3915 // Otherwise, zero the newly introduced lanes.
3916 SDValue Mask = getPTrue(DAG, DL, InVT, AArch64SVEPredPattern::all);
3917 SDValue MaskReinterpret =
3918 DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, OutVT, Mask);
3919 return DAG.getNode(ISD::AND, DL, OutVT, Reinterpret, MaskReinterpret);
3920}
3921
3922SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
3923 SelectionDAG &DAG) const {
3924 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3925 SDLoc dl(Op);
3926 switch (IntNo) {
3927 default: return SDValue(); // Don't custom lower most intrinsics.
3928 case Intrinsic::thread_pointer: {
3929 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3930 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3931 }
3932 case Intrinsic::aarch64_neon_abs: {
3933 EVT Ty = Op.getValueType();
3934 if (Ty == MVT::i64) {
3935 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
3936 Op.getOperand(1));
3937 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3938 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3939 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3940 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3941 } else {
3942 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3943 }
3944 }
3945 case Intrinsic::aarch64_neon_smax:
3946 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3947 Op.getOperand(1), Op.getOperand(2));
3948 case Intrinsic::aarch64_neon_umax:
3949 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3950 Op.getOperand(1), Op.getOperand(2));
3951 case Intrinsic::aarch64_neon_smin:
3952 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3953 Op.getOperand(1), Op.getOperand(2));
3954 case Intrinsic::aarch64_neon_umin:
3955 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3956 Op.getOperand(1), Op.getOperand(2));
3957
3958 case Intrinsic::aarch64_sve_sunpkhi:
3959 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3960 Op.getOperand(1));
3961 case Intrinsic::aarch64_sve_sunpklo:
3962 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3963 Op.getOperand(1));
3964 case Intrinsic::aarch64_sve_uunpkhi:
3965 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3966 Op.getOperand(1));
3967 case Intrinsic::aarch64_sve_uunpklo:
3968 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3969 Op.getOperand(1));
3970 case Intrinsic::aarch64_sve_clasta_n:
3971 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3972 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3973 case Intrinsic::aarch64_sve_clastb_n:
3974 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3975 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3976 case Intrinsic::aarch64_sve_lasta:
3977 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3978 Op.getOperand(1), Op.getOperand(2));
3979 case Intrinsic::aarch64_sve_lastb:
3980 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3981 Op.getOperand(1), Op.getOperand(2));
3982 case Intrinsic::aarch64_sve_rev:
3983 return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
3984 Op.getOperand(1));
3985 case Intrinsic::aarch64_sve_tbl:
3986 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3987 Op.getOperand(1), Op.getOperand(2));
3988 case Intrinsic::aarch64_sve_trn1:
3989 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3990 Op.getOperand(1), Op.getOperand(2));
3991 case Intrinsic::aarch64_sve_trn2:
3992 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3993 Op.getOperand(1), Op.getOperand(2));
3994 case Intrinsic::aarch64_sve_uzp1:
3995 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3996 Op.getOperand(1), Op.getOperand(2));
3997 case Intrinsic::aarch64_sve_uzp2:
3998 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3999 Op.getOperand(1), Op.getOperand(2));
4000 case Intrinsic::aarch64_sve_zip1:
4001 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
4002 Op.getOperand(1), Op.getOperand(2));
4003 case Intrinsic::aarch64_sve_zip2:
4004 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
4005 Op.getOperand(1), Op.getOperand(2));
4006 case Intrinsic::aarch64_sve_splice:
4007 return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
4008 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4009 case Intrinsic::aarch64_sve_ptrue:
4010 return getPTrue(DAG, dl, Op.getValueType(),
4011 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
4012 case Intrinsic::aarch64_sve_clz:
4013 return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
4014 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4015 case Intrinsic::aarch64_sve_cnt: {
4016 SDValue Data = Op.getOperand(3);
4017 // CTPOP only supports integer operands.
4018 if (Data.getValueType().isFloatingPoint())
4019 Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
4020 return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
4021 Op.getOperand(2), Data, Op.getOperand(1));
4022 }
4023 case Intrinsic::aarch64_sve_dupq_lane:
4024 return LowerDUPQLane(Op, DAG);
4025 case Intrinsic::aarch64_sve_convert_from_svbool:
4026 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
4027 Op.getOperand(1));
4028 case Intrinsic::aarch64_sve_convert_to_svbool:
4029 return lowerConvertToSVBool(Op, DAG);
4030 case Intrinsic::aarch64_sve_fneg:
4031 return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4032 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4033 case Intrinsic::aarch64_sve_frintp:
4034 return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
4035 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4036 case Intrinsic::aarch64_sve_frintm:
4037 return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
4038 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4039 case Intrinsic::aarch64_sve_frinti:
4040 return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4041 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4042 case Intrinsic::aarch64_sve_frintx:
4043 return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
4044 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4045 case Intrinsic::aarch64_sve_frinta:
4046 return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
4047 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4048 case Intrinsic::aarch64_sve_frintn:
4049 return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl, Op.getValueType(),
4050 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4051 case Intrinsic::aarch64_sve_frintz:
4052 return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
4053 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4054 case Intrinsic::aarch64_sve_ucvtf:
4055 return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
4056 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4057 Op.getOperand(1));
4058 case Intrinsic::aarch64_sve_scvtf:
4059 return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
4060 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4061 Op.getOperand(1));
4062 case Intrinsic::aarch64_sve_fcvtzu:
4063 return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl,
4064 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4065 Op.getOperand(1));
4066 case Intrinsic::aarch64_sve_fcvtzs:
4067 return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl,
4068 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4069 Op.getOperand(1));
4070 case Intrinsic::aarch64_sve_fsqrt:
4071 return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
4072 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4073 case Intrinsic::aarch64_sve_frecpx:
4074 return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
4075 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4076 case Intrinsic::aarch64_sve_fabs:
4077 return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4078 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4079 case Intrinsic::aarch64_sve_abs:
4080 return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
4081 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4082 case Intrinsic::aarch64_sve_neg:
4083 return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
4084 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4085 case Intrinsic::aarch64_sve_insr: {
4086 SDValue Scalar = Op.getOperand(2);
4087 EVT ScalarTy = Scalar.getValueType();
4088 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
4089 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
4090
4091 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
4092 Op.getOperand(1), Scalar);
4093 }
4094 case Intrinsic::aarch64_sve_rbit:
4095 return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
4096 Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
4097 Op.getOperand(1));
4098 case Intrinsic::aarch64_sve_revb:
4099 return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
4100 Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
4101 case Intrinsic::aarch64_sve_sxtb:
4102 return DAG.getNode(
4103 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4104 Op.getOperand(2), Op.getOperand(3),
4105 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4106 Op.getOperand(1));
4107 case Intrinsic::aarch64_sve_sxth:
4108 return DAG.getNode(
4109 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4110 Op.getOperand(2), Op.getOperand(3),
4111 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4112 Op.getOperand(1));
4113 case Intrinsic::aarch64_sve_sxtw:
4114 return DAG.getNode(
4115 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4116 Op.getOperand(2), Op.getOperand(3),
4117 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4118 Op.getOperand(1));
4119 case Intrinsic::aarch64_sve_uxtb:
4120 return DAG.getNode(
4121 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4122 Op.getOperand(2), Op.getOperand(3),
4123 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
4124 Op.getOperand(1));
4125 case Intrinsic::aarch64_sve_uxth:
4126 return DAG.getNode(
4127 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4128 Op.getOperand(2), Op.getOperand(3),
4129 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
4130 Op.getOperand(1));
4131 case Intrinsic::aarch64_sve_uxtw:
4132 return DAG.getNode(
4133 AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
4134 Op.getOperand(2), Op.getOperand(3),
4135 DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
4136 Op.getOperand(1));
4137
4138 case Intrinsic::localaddress: {
4139 const auto &MF = DAG.getMachineFunction();
4140 const auto *RegInfo = Subtarget->getRegisterInfo();
4141 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
4142 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
4143 Op.getSimpleValueType());
4144 }
4145
4146 case Intrinsic::eh_recoverfp: {
4147 // FIXME: This needs to be implemented to correctly handle highly aligned
4148 // stack objects. For now we simply return the incoming FP. Refer D53541
4149 // for more details.
4150 SDValue FnOp = Op.getOperand(1);
4151 SDValue IncomingFPOp = Op.getOperand(2);
4152 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
4153 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
4154 if (!Fn)
4155 report_fatal_error(
4156 "llvm.eh.recoverfp must take a function as the first argument");
4157 return IncomingFPOp;
4158 }
4159
4160 case Intrinsic::aarch64_neon_vsri:
4161 case Intrinsic::aarch64_neon_vsli: {
4162 EVT Ty = Op.getValueType();
4163
4164 if (!Ty.isVector())
4165 report_fatal_error("Unexpected type for aarch64_neon_vsli");
4166
4167 assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits())(static_cast <bool> (Op.getConstantOperandVal(3) <= Ty
.getScalarSizeInBits()) ? void (0) : __assert_fail ("Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4167, __extension__ __PRETTY_FUNCTION__))
;
4168
4169 bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
4170 unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
4171 return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
4172 Op.getOperand(3));
4173 }
4174
4175 case Intrinsic::aarch64_neon_srhadd:
4176 case Intrinsic::aarch64_neon_urhadd:
4177 case Intrinsic::aarch64_neon_shadd:
4178 case Intrinsic::aarch64_neon_uhadd: {
4179 bool IsSignedAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4180 IntNo == Intrinsic::aarch64_neon_shadd);
4181 bool IsRoundingAdd = (IntNo == Intrinsic::aarch64_neon_srhadd ||
4182 IntNo == Intrinsic::aarch64_neon_urhadd);
4183 unsigned Opcode =
4184 IsSignedAdd ? (IsRoundingAdd ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
4185 : (IsRoundingAdd ? AArch64ISD::URHADD : AArch64ISD::UHADD);
4186 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4187 Op.getOperand(2));
4188 }
4189 case Intrinsic::aarch64_neon_sabd:
4190 case Intrinsic::aarch64_neon_uabd: {
4191 unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uabd ? ISD::ABDU
4192 : ISD::ABDS;
4193 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4194 Op.getOperand(2));
4195 }
4196 case Intrinsic::aarch64_neon_uaddlp: {
4197 unsigned Opcode = AArch64ISD::UADDLP;
4198 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
4199 }
4200 case Intrinsic::aarch64_neon_sdot:
4201 case Intrinsic::aarch64_neon_udot:
4202 case Intrinsic::aarch64_sve_sdot:
4203 case Intrinsic::aarch64_sve_udot: {
4204 unsigned Opcode = (IntNo == Intrinsic::aarch64_neon_udot ||
4205 IntNo == Intrinsic::aarch64_sve_udot)
4206 ? AArch64ISD::UDOT
4207 : AArch64ISD::SDOT;
4208 return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
4209 Op.getOperand(2), Op.getOperand(3));
4210 }
4211 }
4212}
4213
4214bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
4215 if (VT.getVectorElementType() == MVT::i8 ||
4216 VT.getVectorElementType() == MVT::i16) {
4217 EltTy = MVT::i32;
4218 return true;
4219 }
4220 return false;
4221}
4222
4223bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
4224 if (VT.getVectorElementType() == MVT::i32 &&
4225 VT.getVectorElementCount().getKnownMinValue() >= 4)
4226 return true;
4227
4228 return false;
4229}
4230
4231bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
4232 return ExtVal.getValueType().isScalableVector() ||
4233 useSVEForFixedLengthVectorVT(ExtVal.getValueType(),
4234 /*OverrideNEON=*/true);
4235}
4236
4237unsigned getGatherVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4238 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4239 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4240 AArch64ISD::GLD1_MERGE_ZERO},
4241 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4242 AArch64ISD::GLD1_UXTW_MERGE_ZERO},
4243 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4244 AArch64ISD::GLD1_MERGE_ZERO},
4245 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4246 AArch64ISD::GLD1_SXTW_MERGE_ZERO},
4247 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4248 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4249 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4250 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO},
4251 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4252 AArch64ISD::GLD1_SCALED_MERGE_ZERO},
4253 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4254 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO},
4255 };
4256 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4257 return AddrModes.find(Key)->second;
4258}
4259
4260unsigned getScatterVecOpcode(bool IsScaled, bool IsSigned, bool NeedsExtend) {
4261 std::map<std::tuple<bool, bool, bool>, unsigned> AddrModes = {
4262 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ false),
4263 AArch64ISD::SST1_PRED},
4264 {std::make_tuple(/*Scaled*/ false, /*Signed*/ false, /*Extend*/ true),
4265 AArch64ISD::SST1_UXTW_PRED},
4266 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ false),
4267 AArch64ISD::SST1_PRED},
4268 {std::make_tuple(/*Scaled*/ false, /*Signed*/ true, /*Extend*/ true),
4269 AArch64ISD::SST1_SXTW_PRED},
4270 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ false),
4271 AArch64ISD::SST1_SCALED_PRED},
4272 {std::make_tuple(/*Scaled*/ true, /*Signed*/ false, /*Extend*/ true),
4273 AArch64ISD::SST1_UXTW_SCALED_PRED},
4274 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ false),
4275 AArch64ISD::SST1_SCALED_PRED},
4276 {std::make_tuple(/*Scaled*/ true, /*Signed*/ true, /*Extend*/ true),
4277 AArch64ISD::SST1_SXTW_SCALED_PRED},
4278 };
4279 auto Key = std::make_tuple(IsScaled, IsSigned, NeedsExtend);
4280 return AddrModes.find(Key)->second;
4281}
4282
4283unsigned getSignExtendedGatherOpcode(unsigned Opcode) {
4284 switch (Opcode) {
4285 default:
4286 llvm_unreachable("unimplemented opcode")::llvm::llvm_unreachable_internal("unimplemented opcode", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4286)
;
4287 return Opcode;
4288 case AArch64ISD::GLD1_MERGE_ZERO:
4289 return AArch64ISD::GLD1S_MERGE_ZERO;
4290 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
4291 return AArch64ISD::GLD1S_IMM_MERGE_ZERO;
4292 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
4293 return AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
4294 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
4295 return AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
4296 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
4297 return AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
4298 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
4299 return AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
4300 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
4301 return AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
4302 }
4303}
4304
4305bool getGatherScatterIndexIsExtended(SDValue Index) {
4306 unsigned Opcode = Index.getOpcode();
4307 if (Opcode == ISD::SIGN_EXTEND_INREG)
4308 return true;
4309
4310 if (Opcode == ISD::AND) {
4311 SDValue Splat = Index.getOperand(1);
4312 if (Splat.getOpcode() != ISD::SPLAT_VECTOR)
4313 return false;
4314 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Splat.getOperand(0));
4315 if (!Mask || Mask->getZExtValue() != 0xFFFFFFFF)
4316 return false;
4317 return true;
4318 }
4319
4320 return false;
4321}
4322
4323// If the base pointer of a masked gather or scatter is null, we
4324// may be able to swap BasePtr & Index and use the vector + register
4325// or vector + immediate addressing mode, e.g.
4326// VECTOR + REGISTER:
4327// getelementptr nullptr, <vscale x N x T> (splat(%offset)) + %indices)
4328// -> getelementptr %offset, <vscale x N x T> %indices
4329// VECTOR + IMMEDIATE:
4330// getelementptr nullptr, <vscale x N x T> (splat(#x)) + %indices)
4331// -> getelementptr #x, <vscale x N x T> %indices
4332void selectGatherScatterAddrMode(SDValue &BasePtr, SDValue &Index, EVT MemVT,
4333 unsigned &Opcode, bool IsGather,
4334 SelectionDAG &DAG) {
4335 if (!isNullConstant(BasePtr))
4336 return;
4337
4338 // FIXME: This will not match for fixed vector type codegen as the nodes in
4339 // question will have fixed<->scalable conversions around them. This should be
4340 // moved to a DAG combine or complex pattern so that is executes after all of
4341 // the fixed vector insert and extracts have been removed. This deficiency
4342 // will result in a sub-optimal addressing mode being used, i.e. an ADD not
4343 // being folded into the scatter/gather.
4344 ConstantSDNode *Offset = nullptr;
4345 if (Index.getOpcode() == ISD::ADD)
4346 if (auto SplatVal = DAG.getSplatValue(Index.getOperand(1))) {
4347 if (isa<ConstantSDNode>(SplatVal))
4348 Offset = cast<ConstantSDNode>(SplatVal);
4349 else {
4350 BasePtr = SplatVal;
4351 Index = Index->getOperand(0);
4352 return;
4353 }
4354 }
4355
4356 unsigned NewOp =
4357 IsGather ? AArch64ISD::GLD1_IMM_MERGE_ZERO : AArch64ISD::SST1_IMM_PRED;
4358
4359 if (!Offset) {
4360 std::swap(BasePtr, Index);
4361 Opcode = NewOp;
4362 return;
4363 }
4364
4365 uint64_t OffsetVal = Offset->getZExtValue();
4366 unsigned ScalarSizeInBytes = MemVT.getScalarSizeInBits() / 8;
4367 auto ConstOffset = DAG.getConstant(OffsetVal, SDLoc(Index), MVT::i64);
4368
4369 if (OffsetVal % ScalarSizeInBytes || OffsetVal / ScalarSizeInBytes > 31) {
4370 // Index is out of range for the immediate addressing mode
4371 BasePtr = ConstOffset;
4372 Index = Index->getOperand(0);
4373 return;
4374 }
4375
4376 // Immediate is in range
4377 Opcode = NewOp;
4378 BasePtr = Index->getOperand(0);
4379 Index = ConstOffset;
4380}
4381
4382SDValue AArch64TargetLowering::LowerMGATHER(SDValue Op,
4383 SelectionDAG &DAG) const {
4384 SDLoc DL(Op);
4385 MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(Op);
4386 assert(MGT && "Can only custom lower gather load nodes")(static_cast <bool> (MGT && "Can only custom lower gather load nodes"
) ? void (0) : __assert_fail ("MGT && \"Can only custom lower gather load nodes\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4386, __extension__ __PRETTY_FUNCTION__))
;
4387
4388 bool IsFixedLength = MGT->getMemoryVT().isFixedLengthVector();
4389
4390 SDValue Index = MGT->getIndex();
4391 SDValue Chain = MGT->getChain();
4392 SDValue PassThru = MGT->getPassThru();
4393 SDValue Mask = MGT->getMask();
4394 SDValue BasePtr = MGT->getBasePtr();
4395 ISD::LoadExtType ExtTy = MGT->getExtensionType();
4396
4397 ISD::MemIndexType IndexType = MGT->getIndexType();
4398 bool IsScaled =
4399 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4400 bool IsSigned =
4401 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4402 bool IdxNeedsExtend =
4403 getGatherScatterIndexIsExtended(Index) ||
4404 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4405 bool ResNeedsSignExtend = ExtTy == ISD::EXTLOAD || ExtTy == ISD::SEXTLOAD;
4406
4407 EVT VT = PassThru.getSimpleValueType();
4408 EVT IndexVT = Index.getSimpleValueType();
4409 EVT MemVT = MGT->getMemoryVT();
4410 SDValue InputVT = DAG.getValueType(MemVT);
4411
4412 if (VT.getVectorElementType() == MVT::bf16 &&
4413 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4414 return SDValue();
4415
4416 if (IsFixedLength) {
4417 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4418, __extension__ __PRETTY_FUNCTION__))
4418 "Cannot lower when not using SVE for fixed vectors")(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4418, __extension__ __PRETTY_FUNCTION__))
;
4419 if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4420 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4421 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4422 } else {
4423 MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4424 IndexVT = MemVT.changeTypeToInteger();
4425 }
4426 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4427 Mask = DAG.getNode(
4428 ISD::ZERO_EXTEND, DL,
4429 VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
4430 }
4431
4432 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4433 PassThru = SDValue();
4434
4435 if (VT.isFloatingPoint() && !IsFixedLength) {
4436 // Handle FP data by using an integer gather and casting the result.
4437 if (PassThru) {
4438 EVT PassThruVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4439 PassThru = getSVESafeBitCast(PassThruVT, PassThru, DAG);
4440 }
4441 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4442 }
4443
4444 SDVTList VTs = DAG.getVTList(IndexVT, MVT::Other);
4445
4446 if (getGatherScatterIndexIsExtended(Index))
4447 Index = Index.getOperand(0);
4448
4449 unsigned Opcode = getGatherVecOpcode(IsScaled, IsSigned, IdxNeedsExtend);
4450 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4451 /*isGather=*/true, DAG);
4452
4453 if (ResNeedsSignExtend)
4454 Opcode = getSignExtendedGatherOpcode(Opcode);
4455
4456 if (IsFixedLength) {
4457 if (Index.getSimpleValueType().isFixedLengthVector())
4458 Index = convertToScalableVector(DAG, IndexVT, Index);
4459 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4460 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4461 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4462 }
4463
4464 SDValue Ops[] = {Chain, Mask, BasePtr, Index, InputVT};
4465 SDValue Result = DAG.getNode(Opcode, DL, VTs, Ops);
4466 Chain = Result.getValue(1);
4467
4468 if (IsFixedLength) {
4469 Result = convertFromScalableVector(
4470 DAG, VT.changeVectorElementType(IndexVT.getVectorElementType()),
4471 Result);
4472 Result = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Result);
4473 Result = DAG.getNode(ISD::BITCAST, DL, VT, Result);
4474
4475 if (PassThru)
4476 Result = DAG.getSelect(DL, VT, MGT->getMask(), Result, PassThru);
4477 } else {
4478 if (PassThru)
4479 Result = DAG.getSelect(DL, IndexVT, Mask, Result, PassThru);
4480
4481 if (VT.isFloatingPoint())
4482 Result = getSVESafeBitCast(VT, Result, DAG);
4483 }
4484
4485 return DAG.getMergeValues({Result, Chain}, DL);
4486}
4487
4488SDValue AArch64TargetLowering::LowerMSCATTER(SDValue Op,
4489 SelectionDAG &DAG) const {
4490 SDLoc DL(Op);
4491 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(Op);
4492 assert(MSC && "Can only custom lower scatter store nodes")(static_cast <bool> (MSC && "Can only custom lower scatter store nodes"
) ? void (0) : __assert_fail ("MSC && \"Can only custom lower scatter store nodes\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4492, __extension__ __PRETTY_FUNCTION__))
;
4493
4494 bool IsFixedLength = MSC->getMemoryVT().isFixedLengthVector();
4495
4496 SDValue Index = MSC->getIndex();
4497 SDValue Chain = MSC->getChain();
4498 SDValue StoreVal = MSC->getValue();
4499 SDValue Mask = MSC->getMask();
4500 SDValue BasePtr = MSC->getBasePtr();
4501
4502 ISD::MemIndexType IndexType = MSC->getIndexType();
4503 bool IsScaled =
4504 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::UNSIGNED_SCALED;
4505 bool IsSigned =
4506 IndexType == ISD::SIGNED_SCALED || IndexType == ISD::SIGNED_UNSCALED;
4507 bool NeedsExtend =
4508 getGatherScatterIndexIsExtended(Index) ||
4509 Index.getSimpleValueType().getVectorElementType() == MVT::i32;
4510
4511 EVT VT = StoreVal.getSimpleValueType();
4512 EVT IndexVT = Index.getSimpleValueType();
4513 SDVTList VTs = DAG.getVTList(MVT::Other);
4514 EVT MemVT = MSC->getMemoryVT();
4515 SDValue InputVT = DAG.getValueType(MemVT);
4516
4517 if (VT.getVectorElementType() == MVT::bf16 &&
4518 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
4519 return SDValue();
4520
4521 if (IsFixedLength) {
4522 assert(Subtarget->useSVEForFixedLengthVectors() &&(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4523, __extension__ __PRETTY_FUNCTION__))
4523 "Cannot lower when not using SVE for fixed vectors")(static_cast <bool> (Subtarget->useSVEForFixedLengthVectors
() && "Cannot lower when not using SVE for fixed vectors"
) ? void (0) : __assert_fail ("Subtarget->useSVEForFixedLengthVectors() && \"Cannot lower when not using SVE for fixed vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4523, __extension__ __PRETTY_FUNCTION__))
;
4524 if (MemVT.getScalarSizeInBits() <= IndexVT.getScalarSizeInBits()) {
4525 IndexVT = getContainerForFixedLengthVector(DAG, IndexVT);
4526 MemVT = IndexVT.changeVectorElementType(MemVT.getVectorElementType());
4527 } else {
4528 MemVT = getContainerForFixedLengthVector(DAG, MemVT);
4529 IndexVT = MemVT.changeTypeToInteger();
4530 }
4531 InputVT = DAG.getValueType(MemVT.changeTypeToInteger());
4532
4533 StoreVal =
4534 DAG.getNode(ISD::BITCAST, DL, VT.changeTypeToInteger(), StoreVal);
4535 StoreVal = DAG.getNode(
4536 ISD::ANY_EXTEND, DL,
4537 VT.changeVectorElementType(IndexVT.getVectorElementType()), StoreVal);
4538 StoreVal = convertToScalableVector(DAG, IndexVT, StoreVal);
4539 Mask = DAG.getNode(
4540 ISD::ZERO_EXTEND, DL,
4541 VT.changeVectorElementType(IndexVT.getVectorElementType()), Mask);
4542 } else if (VT.isFloatingPoint()) {
4543 // Handle FP data by casting the data so an integer scatter can be used.
4544 EVT StoreValVT = getPackedSVEVectorVT(VT.getVectorElementCount());
4545 StoreVal = getSVESafeBitCast(StoreValVT, StoreVal, DAG);
4546 InputVT = DAG.getValueType(MemVT.changeVectorElementTypeToInteger());
4547 }
4548
4549 if (getGatherScatterIndexIsExtended(Index))
4550 Index = Index.getOperand(0);
4551
4552 unsigned Opcode = getScatterVecOpcode(IsScaled, IsSigned, NeedsExtend);
4553 selectGatherScatterAddrMode(BasePtr, Index, MemVT, Opcode,
4554 /*isGather=*/false, DAG);
4555
4556 if (IsFixedLength) {
4557 if (Index.getSimpleValueType().isFixedLengthVector())
4558 Index = convertToScalableVector(DAG, IndexVT, Index);
4559 if (BasePtr.getSimpleValueType().isFixedLengthVector())
4560 BasePtr = convertToScalableVector(DAG, IndexVT, BasePtr);
4561 Mask = convertFixedMaskToScalableVector(Mask, DAG);
4562 }
4563
4564 SDValue Ops[] = {Chain, StoreVal, Mask, BasePtr, Index, InputVT};
4565 return DAG.getNode(Opcode, DL, VTs, Ops);
4566}
4567
4568SDValue AArch64TargetLowering::LowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
4569 SDLoc DL(Op);
4570 MaskedLoadSDNode *LoadNode = cast<MaskedLoadSDNode>(Op);
4571 assert(LoadNode && "Expected custom lowering of a masked load node")(static_cast <bool> (LoadNode && "Expected custom lowering of a masked load node"
) ? void (0) : __assert_fail ("LoadNode && \"Expected custom lowering of a masked load node\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4571, __extension__ __PRETTY_FUNCTION__))
;
4572 EVT VT = Op->getValueType(0);
4573
4574 if (useSVEForFixedLengthVectorVT(VT, true))
4575 return LowerFixedLengthVectorMLoadToSVE(Op, DAG);
4576
4577 SDValue PassThru = LoadNode->getPassThru();
4578 SDValue Mask = LoadNode->getMask();
4579
4580 if (PassThru->isUndef() || isZerosVector(PassThru.getNode()))
4581 return Op;
4582
4583 SDValue Load = DAG.getMaskedLoad(
4584 VT, DL, LoadNode->getChain(), LoadNode->getBasePtr(),
4585 LoadNode->getOffset(), Mask, DAG.getUNDEF(VT), LoadNode->getMemoryVT(),
4586 LoadNode->getMemOperand(), LoadNode->getAddressingMode(),
4587 LoadNode->getExtensionType());
4588
4589 SDValue Result = DAG.getSelect(DL, VT, Mask, Load, PassThru);
4590
4591 return DAG.getMergeValues({Result, Load.getValue(1)}, DL);
4592}
4593
4594// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
4595static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
4596 EVT VT, EVT MemVT,
4597 SelectionDAG &DAG) {
4598 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4598, __extension__ __PRETTY_FUNCTION__))
;
4599 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)(static_cast <bool> (MemVT == MVT::v4i8 && VT ==
MVT::v4i16) ? void (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4599, __extension__ __PRETTY_FUNCTION__))
;
4600
4601 SDValue Value = ST->getValue();
4602
4603 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
4604 // the word lane which represent the v4i8 subvector. It optimizes the store
4605 // to:
4606 //
4607 // xtn v0.8b, v0.8h
4608 // str s0, [x0]
4609
4610 SDValue Undef = DAG.getUNDEF(MVT::i16);
4611 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
4612 {Undef, Undef, Undef, Undef});
4613
4614 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
4615 Value, UndefVec);
4616 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
4617
4618 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
4619 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4620 Trunc, DAG.getConstant(0, DL, MVT::i64));
4621
4622 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
4623 ST->getBasePtr(), ST->getMemOperand());
4624}
4625
4626// Custom lowering for any store, vector or scalar and/or default or with
4627// a truncate operations. Currently only custom lower truncate operation
4628// from vector v4i16 to v4i8 or volatile stores of i128.
4629SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
4630 SelectionDAG &DAG) const {
4631 SDLoc Dl(Op);
4632 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
4633 assert (StoreNode && "Can only custom lower store nodes")(static_cast <bool> (StoreNode && "Can only custom lower store nodes"
) ? void (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4633, __extension__ __PRETTY_FUNCTION__))
;
4634
4635 SDValue Value = StoreNode->getValue();
4636
4637 EVT VT = Value.getValueType();
4638 EVT MemVT = StoreNode->getMemoryVT();
4639
4640 if (VT.isVector()) {
4641 if (useSVEForFixedLengthVectorVT(VT, true))
4642 return LowerFixedLengthVectorStoreToSVE(Op, DAG);
4643
4644 unsigned AS = StoreNode->getAddressSpace();
4645 Align Alignment = StoreNode->getAlign();
4646 if (Alignment < MemVT.getStoreSize() &&
4647 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
4648 StoreNode->getMemOperand()->getFlags(),
4649 nullptr)) {
4650 return scalarizeVectorStore(StoreNode, DAG);
4651 }
4652
4653 if (StoreNode->isTruncatingStore() && VT == MVT::v4i16 &&
4654 MemVT == MVT::v4i8) {
4655 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
4656 }
4657 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
4658 // the custom lowering, as there are no un-paired non-temporal stores and
4659 // legalization will break up 256 bit inputs.
4660 ElementCount EC = MemVT.getVectorElementCount();
4661 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
4662 EC.isKnownEven() &&
4663 ((MemVT.getScalarSizeInBits() == 8u ||
4664 MemVT.getScalarSizeInBits() == 16u ||
4665 MemVT.getScalarSizeInBits() == 32u ||
4666 MemVT.getScalarSizeInBits() == 64u))) {
4667 SDValue Lo =
4668 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4669 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4670 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
4671 SDValue Hi =
4672 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
4673 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
4674 StoreNode->getValue(),
4675 DAG.getConstant(EC.getKnownMinValue() / 2, Dl, MVT::i64));
4676 SDValue Result = DAG.getMemIntrinsicNode(
4677 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
4678 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4679 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4680 return Result;
4681 }
4682 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
4683 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)(static_cast <bool> (StoreNode->getValue()->getValueType
(0) == MVT::i128) ? void (0) : __assert_fail ("StoreNode->getValue()->getValueType(0) == MVT::i128"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4683, __extension__ __PRETTY_FUNCTION__))
;
4684 SDValue Lo =
4685 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4686 DAG.getConstant(0, Dl, MVT::i64));
4687 SDValue Hi =
4688 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
4689 DAG.getConstant(1, Dl, MVT::i64));
4690 SDValue Result = DAG.getMemIntrinsicNode(
4691 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
4692 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
4693 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
4694 return Result;
4695 } else if (MemVT == MVT::i64x8) {
4696 SDValue Value = StoreNode->getValue();
4697 assert(Value->getValueType(0) == MVT::i64x8)(static_cast <bool> (Value->getValueType(0) == MVT::
i64x8) ? void (0) : __assert_fail ("Value->getValueType(0) == MVT::i64x8"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4697, __extension__ __PRETTY_FUNCTION__))
;
4698 SDValue Chain = StoreNode->getChain();
4699 SDValue Base = StoreNode->getBasePtr();
4700 EVT PtrVT = Base.getValueType();
4701 for (unsigned i = 0; i < 8; i++) {
4702 SDValue Part = DAG.getNode(AArch64ISD::LS64_EXTRACT, Dl, MVT::i64,
4703 Value, DAG.getConstant(i, Dl, MVT::i32));
4704 SDValue Ptr = DAG.getNode(ISD::ADD, Dl, PtrVT, Base,
4705 DAG.getConstant(i * 8, Dl, PtrVT));
4706 Chain = DAG.getStore(Chain, Dl, Part, Ptr, StoreNode->getPointerInfo(),
4707 StoreNode->getOriginalAlign());
4708 }
4709 return Chain;
4710 }
4711
4712 return SDValue();
4713}
4714
4715SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
4716 SelectionDAG &DAG) const {
4717 SDLoc DL(Op);
4718 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
4719 assert(LoadNode && "Expected custom lowering of a load node")(static_cast <bool> (LoadNode && "Expected custom lowering of a load node"
) ? void (0) : __assert_fail ("LoadNode && \"Expected custom lowering of a load node\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4719, __extension__ __PRETTY_FUNCTION__))
;
4720
4721 if (LoadNode->getMemoryVT() == MVT::i64x8) {
4722 SmallVector<SDValue, 8> Ops;
4723 SDValue Base = LoadNode->getBasePtr();
4724 SDValue Chain = LoadNode->getChain();
4725 EVT PtrVT = Base.getValueType();
4726 for (unsigned i = 0; i < 8; i++) {
4727 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Base,
4728 DAG.getConstant(i * 8, DL, PtrVT));
4729 SDValue Part = DAG.getLoad(MVT::i64, DL, Chain, Ptr,
4730 LoadNode->getPointerInfo(),
4731 LoadNode->getOriginalAlign());
4732 Ops.push_back(Part);
4733 Chain = SDValue(Part.getNode(), 1);
4734 }
4735 SDValue Loaded = DAG.getNode(AArch64ISD::LS64_BUILD, DL, MVT::i64x8, Ops);
4736 return DAG.getMergeValues({Loaded, Chain}, DL);
4737 }
4738
4739 // Custom lowering for extending v4i8 vector loads.
4740 EVT VT = Op->getValueType(0);
4741 assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v4i32
) && "Expected v4i16 or v4i32") ? void (0) : __assert_fail
("(VT == MVT::v4i16 || VT == MVT::v4i32) && \"Expected v4i16 or v4i32\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4741, __extension__ __PRETTY_FUNCTION__))
;
4742
4743 if (LoadNode->getMemoryVT() != MVT::v4i8)
4744 return SDValue();
4745
4746 unsigned ExtType;
4747 if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
4748 ExtType = ISD::SIGN_EXTEND;
4749 else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
4750 LoadNode->getExtensionType() == ISD::EXTLOAD)
4751 ExtType = ISD::ZERO_EXTEND;
4752 else
4753 return SDValue();
4754
4755 SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
4756 LoadNode->getBasePtr(), MachinePointerInfo());
4757 SDValue Chain = Load.getValue(1);
4758 SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
4759 SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
4760 SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
4761 Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
4762 DAG.getConstant(0, DL, MVT::i64));
4763 if (VT == MVT::v4i32)
4764 Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
4765 return DAG.getMergeValues({Ext, Chain}, DL);
4766}
4767
4768// Generate SUBS and CSEL for integer abs.
4769SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
4770 MVT VT = Op.getSimpleValueType();
4771
4772 if (VT.isVector())
4773 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ABS_MERGE_PASSTHRU);
4774
4775 SDLoc DL(Op);
4776 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
4777 Op.getOperand(0));
4778 // Generate SUBS & CSEL.
4779 SDValue Cmp =
4780 DAG.getNode(AArch64ISD::SUBS, DL, DAG.getVTList(VT, MVT::i32),
4781 Op.getOperand(0), DAG.getConstant(0, DL, VT));
4782 return DAG.getNode(AArch64ISD::CSEL, DL, VT, Op.getOperand(0), Neg,
4783 DAG.getConstant(AArch64CC::PL, DL, MVT::i32),
4784 Cmp.getValue(1));
4785}
4786
4787SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
4788 SelectionDAG &DAG) const {
4789 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
4790 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
4791
4792 switch (Op.getOpcode()) {
4793 default:
4794 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4794)
;
4795 return SDValue();
4796 case ISD::BITCAST:
4797 return LowerBITCAST(Op, DAG);
4798 case ISD::GlobalAddress:
4799 return LowerGlobalAddress(Op, DAG);
4800 case ISD::GlobalTLSAddress:
4801 return LowerGlobalTLSAddress(Op, DAG);
4802 case ISD::SETCC:
4803 case ISD::STRICT_FSETCC:
4804 case ISD::STRICT_FSETCCS:
4805 return LowerSETCC(Op, DAG);
4806 case ISD::BR_CC:
4807 return LowerBR_CC(Op, DAG);
4808 case ISD::SELECT:
4809 return LowerSELECT(Op, DAG);
4810 case ISD::SELECT_CC:
4811 return LowerSELECT_CC(Op, DAG);
4812 case ISD::JumpTable:
4813 return LowerJumpTable(Op, DAG);
4814 case ISD::BR_JT:
4815 return LowerBR_JT(Op, DAG);
4816 case ISD::ConstantPool:
4817 return LowerConstantPool(Op, DAG);
4818 case ISD::BlockAddress:
4819 return LowerBlockAddress(Op, DAG);
4820 case ISD::VASTART:
4821 return LowerVASTART(Op, DAG);
4822 case ISD::VACOPY:
4823 return LowerVACOPY(Op, DAG);
4824 case ISD::VAARG:
4825 return LowerVAARG(Op, DAG);
4826 case ISD::ADDC:
4827 case ISD::ADDE:
4828 case ISD::SUBC:
4829 case ISD::SUBE:
4830 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
4831 case ISD::SADDO:
4832 case ISD::UADDO:
4833 case ISD::SSUBO:
4834 case ISD::USUBO:
4835 case ISD::SMULO:
4836 case ISD::UMULO:
4837 return LowerXALUO(Op, DAG);
4838 case ISD::FADD:
4839 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
4840 case ISD::FSUB:
4841 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
4842 case ISD::FMUL:
4843 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
4844 case ISD::FMA:
4845 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
4846 case ISD::FDIV:
4847 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
4848 case ISD::FNEG:
4849 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
4850 case ISD::FCEIL:
4851 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
4852 case ISD::FFLOOR:
4853 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FFLOOR_MERGE_PASSTHRU);
4854 case ISD::FNEARBYINT:
4855 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEARBYINT_MERGE_PASSTHRU);
4856 case ISD::FRINT:
4857 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FRINT_MERGE_PASSTHRU);
4858 case ISD::FROUND:
4859 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUND_MERGE_PASSTHRU);
4860 case ISD::FROUNDEVEN:
4861 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU);
4862 case ISD::FTRUNC:
4863 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FTRUNC_MERGE_PASSTHRU);
4864 case ISD::FSQRT:
4865 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSQRT_MERGE_PASSTHRU);
4866 case ISD::FABS:
4867 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FABS_MERGE_PASSTHRU);
4868 case ISD::FP_ROUND:
4869 case ISD::STRICT_FP_ROUND:
4870 return LowerFP_ROUND(Op, DAG);
4871 case ISD::FP_EXTEND:
4872 return LowerFP_EXTEND(Op, DAG);
4873 case ISD::FRAMEADDR:
4874 return LowerFRAMEADDR(Op, DAG);
4875 case ISD::SPONENTRY:
4876 return LowerSPONENTRY(Op, DAG);
4877 case ISD::RETURNADDR:
4878 return LowerRETURNADDR(Op, DAG);
4879 case ISD::ADDROFRETURNADDR:
4880 return LowerADDROFRETURNADDR(Op, DAG);
4881 case ISD::CONCAT_VECTORS:
4882 return LowerCONCAT_VECTORS(Op, DAG);
4883 case ISD::INSERT_VECTOR_ELT:
4884 return LowerINSERT_VECTOR_ELT(Op, DAG);
4885 case ISD::EXTRACT_VECTOR_ELT:
4886 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
4887 case ISD::BUILD_VECTOR:
4888 return LowerBUILD_VECTOR(Op, DAG);
4889 case ISD::VECTOR_SHUFFLE:
4890 return LowerVECTOR_SHUFFLE(Op, DAG);
4891 case ISD::SPLAT_VECTOR:
4892 return LowerSPLAT_VECTOR(Op, DAG);
4893 case ISD::EXTRACT_SUBVECTOR:
4894 return LowerEXTRACT_SUBVECTOR(Op, DAG);
4895 case ISD::INSERT_SUBVECTOR:
4896 return LowerINSERT_SUBVECTOR(Op, DAG);
4897 case ISD::SDIV:
4898 case ISD::UDIV:
4899 return LowerDIV(Op, DAG);
4900 case ISD::SMIN:
4901 case ISD::UMIN:
4902 case ISD::SMAX:
4903 case ISD::UMAX:
4904 return LowerMinMax(Op, DAG);
4905 case ISD::SRA:
4906 case ISD::SRL:
4907 case ISD::SHL:
4908 return LowerVectorSRA_SRL_SHL(Op, DAG);
4909 case ISD::SHL_PARTS:
4910 case ISD::SRL_PARTS:
4911 case ISD::SRA_PARTS:
4912 return LowerShiftParts(Op, DAG);
4913 case ISD::CTPOP:
4914 return LowerCTPOP(Op, DAG);
4915 case ISD::FCOPYSIGN:
4916 return LowerFCOPYSIGN(Op, DAG);
4917 case ISD::OR:
4918 return LowerVectorOR(Op, DAG);
4919 case ISD::XOR:
4920 return LowerXOR(Op, DAG);
4921 case ISD::PREFETCH:
4922 return LowerPREFETCH(Op, DAG);
4923 case ISD::SINT_TO_FP:
4924 case ISD::UINT_TO_FP:
4925 case ISD::STRICT_SINT_TO_FP:
4926 case ISD::STRICT_UINT_TO_FP:
4927 return LowerINT_TO_FP(Op, DAG);
4928 case ISD::FP_TO_SINT:
4929 case ISD::FP_TO_UINT:
4930 case ISD::STRICT_FP_TO_SINT:
4931 case ISD::STRICT_FP_TO_UINT:
4932 return LowerFP_TO_INT(Op, DAG);
4933 case ISD::FP_TO_SINT_SAT:
4934 case ISD::FP_TO_UINT_SAT:
4935 return LowerFP_TO_INT_SAT(Op, DAG);
4936 case ISD::FSINCOS:
4937 return LowerFSINCOS(Op, DAG);
4938 case ISD::FLT_ROUNDS_:
4939 return LowerFLT_ROUNDS_(Op, DAG);
4940 case ISD::SET_ROUNDING:
4941 return LowerSET_ROUNDING(Op, DAG);
4942 case ISD::MUL:
4943 return LowerMUL(Op, DAG);
4944 case ISD::MULHS:
4945 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHS_PRED,
4946 /*OverrideNEON=*/true);
4947 case ISD::MULHU:
4948 return LowerToPredicatedOp(Op, DAG, AArch64ISD::MULHU_PRED,
4949 /*OverrideNEON=*/true);
4950 case ISD::INTRINSIC_WO_CHAIN:
4951 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
4952 case ISD::STORE:
4953 return LowerSTORE(Op, DAG);
4954 case ISD::MSTORE:
4955 return LowerFixedLengthVectorMStoreToSVE(Op, DAG);
4956 case ISD::MGATHER:
4957 return LowerMGATHER(Op, DAG);
4958 case ISD::MSCATTER:
4959 return LowerMSCATTER(Op, DAG);
4960 case ISD::VECREDUCE_SEQ_FADD:
4961 return LowerVECREDUCE_SEQ_FADD(Op, DAG);
4962 case ISD::VECREDUCE_ADD:
4963 case ISD::VECREDUCE_AND:
4964 case ISD::VECREDUCE_OR:
4965 case ISD::VECREDUCE_XOR:
4966 case ISD::VECREDUCE_SMAX:
4967 case ISD::VECREDUCE_SMIN:
4968 case ISD::VECREDUCE_UMAX:
4969 case ISD::VECREDUCE_UMIN:
4970 case ISD::VECREDUCE_FADD:
4971 case ISD::VECREDUCE_FMAX:
4972 case ISD::VECREDUCE_FMIN:
4973 return LowerVECREDUCE(Op, DAG);
4974 case ISD::ATOMIC_LOAD_SUB:
4975 return LowerATOMIC_LOAD_SUB(Op, DAG);
4976 case ISD::ATOMIC_LOAD_AND:
4977 return LowerATOMIC_LOAD_AND(Op, DAG);
4978 case ISD::DYNAMIC_STACKALLOC:
4979 return LowerDYNAMIC_STACKALLOC(Op, DAG);
4980 case ISD::VSCALE:
4981 return LowerVSCALE(Op, DAG);
4982 case ISD::ANY_EXTEND:
4983 case ISD::SIGN_EXTEND:
4984 case ISD::ZERO_EXTEND:
4985 return LowerFixedLengthVectorIntExtendToSVE(Op, DAG);
4986 case ISD::SIGN_EXTEND_INREG: {
4987 // Only custom lower when ExtraVT has a legal byte based element type.
4988 EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
4989 EVT ExtraEltVT = ExtraVT.getVectorElementType();
4990 if ((ExtraEltVT != MVT::i8) && (ExtraEltVT != MVT::i16) &&
4991 (ExtraEltVT != MVT::i32) && (ExtraEltVT != MVT::i64))
4992 return SDValue();
4993
4994 return LowerToPredicatedOp(Op, DAG,
4995 AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU);
4996 }
4997 case ISD::TRUNCATE:
4998 return LowerTRUNCATE(Op, DAG);
4999 case ISD::MLOAD:
5000 return LowerMLOAD(Op, DAG);
5001 case ISD::LOAD:
5002 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
5003 return LowerFixedLengthVectorLoadToSVE(Op, DAG);
5004 return LowerLOAD(Op, DAG);
5005 case ISD::ADD:
5006 return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
5007 case ISD::AND:
5008 return LowerToScalableOp(Op, DAG);
5009 case ISD::SUB:
5010 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
5011 case ISD::FMAXIMUM:
5012 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
5013 case ISD::FMAXNUM:
5014 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAXNM_PRED);
5015 case ISD::FMINIMUM:
5016 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMIN_PRED);
5017 case ISD::FMINNUM:
5018 return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMINNM_PRED);
5019 case ISD::VSELECT:
5020 return LowerFixedLengthVectorSelectToSVE(Op, DAG);
5021 case ISD::ABS:
5022 return LowerABS(Op, DAG);
5023 case ISD::BITREVERSE:
5024 return LowerBitreverse(Op, DAG);
5025 case ISD::BSWAP:
5026 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BSWAP_MERGE_PASSTHRU);
5027 case ISD::CTLZ:
5028 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTLZ_MERGE_PASSTHRU,
5029 /*OverrideNEON=*/true);
5030 case ISD::CTTZ:
5031 return LowerCTTZ(Op, DAG);
5032 case ISD::VECTOR_SPLICE:
5033 return LowerVECTOR_SPLICE(Op, DAG);
5034 }
5035}
5036
5037bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
5038 return !Subtarget->useSVEForFixedLengthVectors();
5039}
5040
5041bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
5042 EVT VT, bool OverrideNEON) const {
5043 if (!Subtarget->useSVEForFixedLengthVectors())
5044 return false;
5045
5046 if (!VT.isFixedLengthVector())
5047 return false;
5048
5049 // Don't use SVE for vectors we cannot scalarize if required.
5050 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
5051 // Fixed length predicates should be promoted to i8.
5052 // NOTE: This is consistent with how NEON (and thus 64/128bit vectors) work.
5053 case MVT::i1:
5054 default:
5055 return false;
5056 case MVT::i8:
5057 case MVT::i16:
5058 case MVT::i32:
5059 case MVT::i64:
5060 case MVT::f16:
5061 case MVT::f32:
5062 case MVT::f64:
5063 break;
5064 }
5065
5066 // All SVE implementations support NEON sized vectors.
5067 if (OverrideNEON && (VT.is128BitVector() || VT.is64BitVector()))
5068 return true;
5069
5070 // Ensure NEON MVTs only belong to a single register class.
5071 if (VT.getFixedSizeInBits() <= 128)
5072 return false;
5073
5074 // Don't use SVE for types that don't fit.
5075 if (VT.getFixedSizeInBits() > Subtarget->getMinSVEVectorSizeInBits())
5076 return false;
5077
5078 // TODO: Perhaps an artificial restriction, but worth having whilst getting
5079 // the base fixed length SVE support in place.
5080 if (!VT.isPow2VectorType())
5081 return false;
5082
5083 return true;
5084}
5085
5086//===----------------------------------------------------------------------===//
5087// Calling Convention Implementation
5088//===----------------------------------------------------------------------===//
5089
5090/// Selects the correct CCAssignFn for a given CallingConvention value.
5091CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
5092 bool IsVarArg) const {
5093 switch (CC) {
5094 default:
5095 report_fatal_error("Unsupported calling convention.");
5096 case CallingConv::WebKit_JS:
5097 return CC_AArch64_WebKit_JS;
5098 case CallingConv::GHC:
5099 return CC_AArch64_GHC;
5100 case CallingConv::C:
5101 case CallingConv::Fast:
5102 case CallingConv::PreserveMost:
5103 case CallingConv::CXX_FAST_TLS:
5104 case CallingConv::Swift:
5105 case CallingConv::SwiftTail:
5106 case CallingConv::Tail:
5107 if (Subtarget->isTargetWindows() && IsVarArg)
5108 return CC_AArch64_Win64_VarArg;
5109 if (!Subtarget->isTargetDarwin())
5110 return CC_AArch64_AAPCS;
5111 if (!IsVarArg)
5112 return CC_AArch64_DarwinPCS;
5113 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
5114 : CC_AArch64_DarwinPCS_VarArg;
5115 case CallingConv::Win64:
5116 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
5117 case CallingConv::CFGuard_Check:
5118 return CC_AArch64_Win64_CFGuard_Check;
5119 case CallingConv::AArch64_VectorCall:
5120 case CallingConv::AArch64_SVE_VectorCall:
5121 return CC_AArch64_AAPCS;
5122 }
5123}
5124
5125CCAssignFn *
5126AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
5127 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
5128 : RetCC_AArch64_AAPCS;
5129}
5130
5131SDValue AArch64TargetLowering::LowerFormalArguments(
5132 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
5133 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5134 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5135 MachineFunction &MF = DAG.getMachineFunction();
5136 MachineFrameInfo &MFI = MF.getFrameInfo();
5137 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5138
5139 // Assign locations to all of the incoming arguments.
5140 SmallVector<CCValAssign, 16> ArgLocs;
5141 DenseMap<unsigned, SDValue> CopiedRegs;
5142 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
5143 *DAG.getContext());
5144
5145 // At this point, Ins[].VT may already be promoted to i32. To correctly
5146 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5147 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5148 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
5149 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
5150 // LocVT.
5151 unsigned NumArgs = Ins.size();
5152 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
5153 unsigned CurArgIdx = 0;
5154 for (unsigned i = 0; i != NumArgs; ++i) {
5155 MVT ValVT = Ins[i].VT;
5156 if (Ins[i].isOrigArg()) {
5157 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
5158 CurArgIdx = Ins[i].getOrigArgIndex();
5159
5160 // Get type of the original argument.
5161 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
5162 /*AllowUnknown*/ true);
5163 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
5164 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5165 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5166 ValVT = MVT::i8;
5167 else if (ActualMVT == MVT::i16)
5168 ValVT = MVT::i16;
5169 }
5170 bool UseVarArgCC = false;
5171 if (IsWin64)
5172 UseVarArgCC = isVarArg;
5173 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5174 bool Res =
5175 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
5176 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5176, __extension__ __PRETTY_FUNCTION__))
;
5177 (void)Res;
5178 }
5179 SmallVector<SDValue, 16> ArgValues;
5180 unsigned ExtraArgLocs = 0;
5181 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
5182 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5183
5184 if (Ins[i].Flags.isByVal()) {
5185 // Byval is used for HFAs in the PCS, but the system should work in a
5186 // non-compliant manner for larger structs.
5187 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5188 int Size = Ins[i].Flags.getByValSize();
5189 unsigned NumRegs = (Size + 7) / 8;
5190
5191 // FIXME: This works on big-endian for composite byvals, which are the common
5192 // case. It should also work for fundamental types too.
5193 unsigned FrameIdx =
5194 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
5195 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
5196 InVals.push_back(FrameIdxN);
5197
5198 continue;
5199 }
5200
5201 if (Ins[i].Flags.isSwiftAsync())
5202 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
5203
5204 SDValue ArgValue;
5205 if (VA.isRegLoc()) {
5206 // Arguments stored in registers.
5207 EVT RegVT = VA.getLocVT();
5208 const TargetRegisterClass *RC;
5209
5210 if (RegVT == MVT::i32)
5211 RC = &AArch64::GPR32RegClass;
5212 else if (RegVT == MVT::i64)
5213 RC = &AArch64::GPR64RegClass;
5214 else if (RegVT == MVT::f16 || RegVT == MVT::bf16)
5215 RC = &AArch64::FPR16RegClass;
5216 else if (RegVT == MVT::f32)
5217 RC = &AArch64::FPR32RegClass;
5218 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
5219 RC = &AArch64::FPR64RegClass;
5220 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
5221 RC = &AArch64::FPR128RegClass;
5222 else if (RegVT.isScalableVector() &&
5223 RegVT.getVectorElementType() == MVT::i1)
5224 RC = &AArch64::PPRRegClass;
5225 else if (RegVT.isScalableVector())
5226 RC = &AArch64::ZPRRegClass;
5227 else
5228 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5228)
;
5229
5230 // Transform the arguments in physical registers into virtual ones.
5231 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
5232 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
5233
5234 // If this is an 8, 16 or 32-bit value, it is really passed promoted
5235 // to 64 bits. Insert an assert[sz]ext to capture this, then
5236 // truncate to the right size.
5237 switch (VA.getLocInfo()) {
5238 default:
5239 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5239)
;
5240 case CCValAssign::Full:
5241 break;
5242 case CCValAssign::Indirect:
5243 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5244, __extension__ __PRETTY_FUNCTION__))
5244 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5244, __extension__ __PRETTY_FUNCTION__))
;
5245 break;
5246 case CCValAssign::BCvt:
5247 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
5248 break;
5249 case CCValAssign::AExt:
5250 case CCValAssign::SExt:
5251 case CCValAssign::ZExt:
5252 break;
5253 case CCValAssign::AExtUpper:
5254 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
5255 DAG.getConstant(32, DL, RegVT));
5256 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
5257 break;
5258 }
5259 } else { // VA.isRegLoc()
5260 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")(static_cast <bool> (VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5260, __extension__ __PRETTY_FUNCTION__))
;
5261 unsigned ArgOffset = VA.getLocMemOffset();
5262 unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
5263 ? VA.getLocVT().getSizeInBits()
5264 : VA.getValVT().getSizeInBits()) / 8;
5265
5266 uint32_t BEAlign = 0;
5267 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
5268 !Ins[i].Flags.isInConsecutiveRegs())
5269 BEAlign = 8 - ArgSize;
5270
5271 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
5272
5273 // Create load nodes to retrieve arguments from the stack.
5274 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
5275
5276 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
5277 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
5278 MVT MemVT = VA.getValVT();
5279
5280 switch (VA.getLocInfo()) {
5281 default:
5282 break;
5283 case CCValAssign::Trunc:
5284 case CCValAssign::BCvt:
5285 MemVT = VA.getLocVT();
5286 break;
5287 case CCValAssign::Indirect:
5288 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5289, __extension__ __PRETTY_FUNCTION__))
5289 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5289, __extension__ __PRETTY_FUNCTION__))
;
5290 MemVT = VA.getLocVT();
5291 break;
5292 case CCValAssign::SExt:
5293 ExtType = ISD::SEXTLOAD;
5294 break;
5295 case CCValAssign::ZExt:
5296 ExtType = ISD::ZEXTLOAD;
5297 break;
5298 case CCValAssign::AExt:
5299 ExtType = ISD::EXTLOAD;
5300 break;
5301 }
5302
5303 ArgValue = DAG.getExtLoad(
5304 ExtType, DL, VA.getLocVT(), Chain, FIN,
5305 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
5306 MemVT);
5307 }
5308
5309 if (VA.getLocInfo() == CCValAssign::Indirect) {
5310 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5311, __extension__ __PRETTY_FUNCTION__))
5311 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5311, __extension__ __PRETTY_FUNCTION__))
;
5312
5313 uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
5314 unsigned NumParts = 1;
5315 if (Ins[i].Flags.isInConsecutiveRegs()) {
5316 assert(!Ins[i].Flags.isInConsecutiveRegsLast())(static_cast <bool> (!Ins[i].Flags.isInConsecutiveRegsLast
()) ? void (0) : __assert_fail ("!Ins[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5316, __extension__ __PRETTY_FUNCTION__))
;
5317 while (!Ins[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
5318 ++NumParts;
5319 }
5320
5321 MVT PartLoad = VA.getValVT();
5322 SDValue Ptr = ArgValue;
5323
5324 // Ensure we generate all loads for each tuple part, whilst updating the
5325 // pointer after each load correctly using vscale.
5326 while (NumParts > 0) {
5327 ArgValue = DAG.getLoad(PartLoad, DL, Chain, Ptr, MachinePointerInfo());
5328 InVals.push_back(ArgValue);
5329 NumParts--;
5330 if (NumParts > 0) {
5331 SDValue BytesIncrement = DAG.getVScale(
5332 DL, Ptr.getValueType(),
5333 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
5334 SDNodeFlags Flags;
5335 Flags.setNoUnsignedWrap(true);
5336 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
5337 BytesIncrement, Flags);
5338 ExtraArgLocs++;
5339 i++;
5340 }
5341 }
5342 } else {
5343 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
5344 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
5345 ArgValue, DAG.getValueType(MVT::i32));
5346 InVals.push_back(ArgValue);
5347 }
5348 }
5349 assert((ArgLocs.size() + ExtraArgLocs) == Ins.size())(static_cast <bool> ((ArgLocs.size() + ExtraArgLocs) ==
Ins.size()) ? void (0) : __assert_fail ("(ArgLocs.size() + ExtraArgLocs) == Ins.size()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5349, __extension__ __PRETTY_FUNCTION__))
;
5350
5351 // varargs
5352 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5353 if (isVarArg) {
5354 if (!Subtarget->isTargetDarwin() || IsWin64) {
5355 // The AAPCS variadic function ABI is identical to the non-variadic
5356 // one. As a result there may be more arguments in registers and we should
5357 // save them for future reference.
5358 // Win64 variadic functions also pass arguments in registers, but all float
5359 // arguments are passed in integer registers.
5360 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
5361 }
5362
5363 // This will point to the next argument passed via stack.
5364 unsigned StackOffset = CCInfo.getNextStackOffset();
5365 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
5366 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
5367 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
5368
5369 if (MFI.hasMustTailInVarArgFunc()) {
5370 SmallVector<MVT, 2> RegParmTypes;
5371 RegParmTypes.push_back(MVT::i64);
5372 RegParmTypes.push_back(MVT::f128);
5373 // Compute the set of forwarded registers. The rest are scratch.
5374 SmallVectorImpl<ForwardedRegister> &Forwards =
5375 FuncInfo->getForwardedMustTailRegParms();
5376 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
5377 CC_AArch64_AAPCS);
5378
5379 // Conservatively forward X8, since it might be used for aggregate return.
5380 if (!CCInfo.isAllocated(AArch64::X8)) {
5381 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
5382 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
5383 }
5384 }
5385 }
5386
5387 // On Windows, InReg pointers must be returned, so record the pointer in a
5388 // virtual register at the start of the function so it can be returned in the
5389 // epilogue.
5390 if (IsWin64) {
5391 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
5392 if (Ins[I].Flags.isInReg()) {
5393 assert(!FuncInfo->getSRetReturnReg())(static_cast <bool> (!FuncInfo->getSRetReturnReg()) ?
void (0) : __assert_fail ("!FuncInfo->getSRetReturnReg()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5393, __extension__ __PRETTY_FUNCTION__))
;
5394
5395 MVT PtrTy = getPointerTy(DAG.getDataLayout());
5396 Register Reg =
5397 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
5398 FuncInfo->setSRetReturnReg(Reg);
5399
5400 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
5401 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
5402 break;
5403 }
5404 }
5405 }
5406
5407 unsigned StackArgSize = CCInfo.getNextStackOffset();
5408 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5409 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
5410 // This is a non-standard ABI so by fiat I say we're allowed to make full
5411 // use of the stack area to be popped, which must be aligned to 16 bytes in
5412 // any case:
5413 StackArgSize = alignTo(StackArgSize, 16);
5414
5415 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
5416 // a multiple of 16.
5417 FuncInfo->setArgumentStackToRestore(StackArgSize);
5418
5419 // This realignment carries over to the available bytes below. Our own
5420 // callers will guarantee the space is free by giving an aligned value to
5421 // CALLSEQ_START.
5422 }
5423 // Even if we're not expected to free up the space, it's useful to know how
5424 // much is there while considering tail calls (because we can reuse it).
5425 FuncInfo->setBytesInStackArgArea(StackArgSize);
5426
5427 if (Subtarget->hasCustomCallingConv())
5428 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
5429
5430 return Chain;
5431}
5432
5433void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
5434 SelectionDAG &DAG,
5435 const SDLoc &DL,
5436 SDValue &Chain) const {
5437 MachineFunction &MF = DAG.getMachineFunction();
5438 MachineFrameInfo &MFI = MF.getFrameInfo();
5439 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5440 auto PtrVT = getPointerTy(DAG.getDataLayout());
5441 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
5442
5443 SmallVector<SDValue, 8> MemOps;
5444
5445 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
5446 AArch64::X3, AArch64::X4, AArch64::X5,
5447 AArch64::X6, AArch64::X7 };
5448 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
5449 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
5450
5451 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
5452 int GPRIdx = 0;
5453 if (GPRSaveSize != 0) {
5454 if (IsWin64) {
5455 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
5456 if (GPRSaveSize & 15)
5457 // The extra size here, if triggered, will always be 8.
5458 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
5459 } else
5460 GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
5461
5462 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
5463
5464 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
5465 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
5466 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
5467 SDValue Store = DAG.getStore(
5468 Val.getValue(1), DL, Val, FIN,
5469 IsWin64
5470 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
5471 GPRIdx,
5472 (i - FirstVariadicGPR) * 8)
5473 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
5474 MemOps.push_back(Store);
5475 FIN =
5476 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
5477 }
5478 }
5479 FuncInfo->setVarArgsGPRIndex(GPRIdx);
5480 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
5481
5482 if (Subtarget->hasFPARMv8() && !IsWin64) {
5483 static const MCPhysReg FPRArgRegs[] = {
5484 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
5485 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
5486 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
5487 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
5488
5489 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
5490 int FPRIdx = 0;
5491 if (FPRSaveSize != 0) {
5492 FPRIdx = MFI.CreateStackObject(FPRSaveSize, Align(16), false);
5493
5494 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
5495
5496 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
5497 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
5498 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
5499
5500 SDValue Store = DAG.getStore(
5501 Val.getValue(1), DL, Val, FIN,
5502 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
5503 MemOps.push_back(Store);
5504 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
5505 DAG.getConstant(16, DL, PtrVT));
5506 }
5507 }
5508 FuncInfo->setVarArgsFPRIndex(FPRIdx);
5509 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
5510 }
5511
5512 if (!MemOps.empty()) {
5513 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5514 }
5515}
5516
5517/// LowerCallResult - Lower the result values of a call into the
5518/// appropriate copies out of appropriate physical registers.
5519SDValue AArch64TargetLowering::LowerCallResult(
5520 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5521 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
5522 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
5523 SDValue ThisVal) const {
5524 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
5525 // Assign locations to each value returned by this call.
5526 SmallVector<CCValAssign, 16> RVLocs;
5527 DenseMap<unsigned, SDValue> CopiedRegs;
5528 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5529 *DAG.getContext());
5530 CCInfo.AnalyzeCallResult(Ins, RetCC);
5531
5532 // Copy all of the result registers out of their specified physreg.
5533 for (unsigned i = 0; i != RVLocs.size(); ++i) {
5534 CCValAssign VA = RVLocs[i];
5535
5536 // Pass 'this' value directly from the argument to return value, to avoid
5537 // reg unit interference
5538 if (i == 0 && isThisReturn) {
5539 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5540, __extension__ __PRETTY_FUNCTION__))
5540 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5540, __extension__ __PRETTY_FUNCTION__))
;
5541 InVals.push_back(ThisVal);
5542 continue;
5543 }
5544
5545 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
5546 // allows one use of a physreg per block.
5547 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
5548 if (!Val) {
5549 Val =
5550 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
5551 Chain = Val.getValue(1);
5552 InFlag = Val.getValue(2);
5553 CopiedRegs[VA.getLocReg()] = Val;
5554 }
5555
5556 switch (VA.getLocInfo()) {
5557 default:
5558 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5558)
;
5559 case CCValAssign::Full:
5560 break;
5561 case CCValAssign::BCvt:
5562 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
5563 break;
5564 case CCValAssign::AExtUpper:
5565 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
5566 DAG.getConstant(32, DL, VA.getLocVT()));
5567 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5568 case CCValAssign::AExt:
5569 LLVM_FALLTHROUGH[[gnu::fallthrough]];
5570 case CCValAssign::ZExt:
5571 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
5572 break;
5573 }
5574
5575 InVals.push_back(Val);
5576 }
5577
5578 return Chain;
5579}
5580
5581/// Return true if the calling convention is one that we can guarantee TCO for.
5582static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
5583 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
5584 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
5585}
5586
5587/// Return true if we might ever do TCO for calls with this calling convention.
5588static bool mayTailCallThisCC(CallingConv::ID CC) {
5589 switch (CC) {
5590 case CallingConv::C:
5591 case CallingConv::AArch64_SVE_VectorCall:
5592 case CallingConv::PreserveMost:
5593 case CallingConv::Swift:
5594 case CallingConv::SwiftTail:
5595 case CallingConv::Tail:
5596 case CallingConv::Fast:
5597 return true;
5598 default:
5599 return false;
5600 }
5601}
5602
5603bool AArch64TargetLowering::isEligibleForTailCallOptimization(
5604 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
5605 const SmallVectorImpl<ISD::OutputArg> &Outs,
5606 const SmallVectorImpl<SDValue> &OutVals,
5607 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
5608 if (!mayTailCallThisCC(CalleeCC))
5609 return false;
5610
5611 MachineFunction &MF = DAG.getMachineFunction();
5612 const Function &CallerF = MF.getFunction();
5613 CallingConv::ID CallerCC = CallerF.getCallingConv();
5614
5615 // Functions using the C or Fast calling convention that have an SVE signature
5616 // preserve more registers and should assume the SVE_VectorCall CC.
5617 // The check for matching callee-saved regs will determine whether it is
5618 // eligible for TCO.
5619 if ((CallerCC == CallingConv::C || CallerCC == CallingConv::Fast) &&
5620 AArch64RegisterInfo::hasSVEArgsOrReturn(&MF))
5621 CallerCC = CallingConv::AArch64_SVE_VectorCall;
5622
5623 bool CCMatch = CallerCC == CalleeCC;
5624
5625 // When using the Windows calling convention on a non-windows OS, we want
5626 // to back up and restore X18 in such functions; we can't do a tail call
5627 // from those functions.
5628 if (CallerCC == CallingConv::Win64 && !Subtarget->isTargetWindows() &&
5629 CalleeCC != CallingConv::Win64)
5630 return false;
5631
5632 // Byval parameters hand the function a pointer directly into the stack area
5633 // we want to reuse during a tail call. Working around this *is* possible (see
5634 // X86) but less efficient and uglier in LowerCall.
5635 for (Function::const_arg_iterator i = CallerF.arg_begin(),
5636 e = CallerF.arg_end();
5637 i != e; ++i) {
5638 if (i->hasByValAttr())
5639 return false;
5640
5641 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
5642 // In this case, it is necessary to save/restore X0 in the callee. Tail
5643 // call opt interferes with this. So we disable tail call opt when the
5644 // caller has an argument with "inreg" attribute.
5645
5646 // FIXME: Check whether the callee also has an "inreg" argument.
5647 if (i->hasInRegAttr())
5648 return false;
5649 }
5650
5651 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
5652 return CCMatch;
5653
5654 // Externally-defined functions with weak linkage should not be
5655 // tail-called on AArch64 when the OS does not support dynamic
5656 // pre-emption of symbols, as the AAELF spec requires normal calls
5657 // to undefined weak functions to be replaced with a NOP or jump to the
5658 // next instruction. The behaviour of branch instructions in this
5659 // situation (as used for tail calls) is implementation-defined, so we
5660 // cannot rely on the linker replacing the tail call with a return.
5661 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5662 const GlobalValue *GV = G->getGlobal();
5663 const Triple &TT = getTargetMachine().getTargetTriple();
5664 if (GV->hasExternalWeakLinkage() &&
5665 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
5666 return false;
5667 }
5668
5669 // Now we search for cases where we can use a tail call without changing the
5670 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
5671 // concept.
5672
5673 // I want anyone implementing a new calling convention to think long and hard
5674 // about this assert.
5675 assert((!isVarArg || CalleeCC == CallingConv::C) &&(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5676, __extension__ __PRETTY_FUNCTION__))
5676 "Unexpected variadic calling convention")(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5676, __extension__ __PRETTY_FUNCTION__))
;
5677
5678 LLVMContext &C = *DAG.getContext();
5679 if (isVarArg && !Outs.empty()) {
5680 // At least two cases here: if caller is fastcc then we can't have any
5681 // memory arguments (we'd be expected to clean up the stack afterwards). If
5682 // caller is C then we could potentially use its argument area.
5683
5684 // FIXME: for now we take the most conservative of these in both cases:
5685 // disallow all variadic memory operands.
5686 SmallVector<CCValAssign, 16> ArgLocs;
5687 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5688
5689 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
5690 for (const CCValAssign &ArgLoc : ArgLocs)
5691 if (!ArgLoc.isRegLoc())
5692 return false;
5693 }
5694
5695 // Check that the call results are passed in the same way.
5696 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
5697 CCAssignFnForCall(CalleeCC, isVarArg),
5698 CCAssignFnForCall(CallerCC, isVarArg)))
5699 return false;
5700 // The callee has to preserve all registers the caller needs to preserve.
5701 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
5702 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
5703 if (!CCMatch) {
5704 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
5705 if (Subtarget->hasCustomCallingConv()) {
5706 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
5707 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
5708 }
5709 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
5710 return false;
5711 }
5712
5713 // Nothing more to check if the callee is taking no arguments
5714 if (Outs.empty())
5715 return true;
5716
5717 SmallVector<CCValAssign, 16> ArgLocs;
5718 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
5719
5720 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
5721
5722 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5723
5724 // If any of the arguments is passed indirectly, it must be SVE, so the
5725 // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
5726 // allocate space on the stack. That is why we determine this explicitly here
5727 // the call cannot be a tailcall.
5728 if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
5729 assert((A.getLocInfo() != CCValAssign::Indirect ||(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5731, __extension__ __PRETTY_FUNCTION__))
5730 A.getValVT().isScalableVector()) &&(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5731, __extension__ __PRETTY_FUNCTION__))
5731 "Expected value to be scalable")(static_cast <bool> ((A.getLocInfo() != CCValAssign::Indirect
|| A.getValVT().isScalableVector()) && "Expected value to be scalable"
) ? void (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5731, __extension__ __PRETTY_FUNCTION__))
;
5732 return A.getLocInfo() == CCValAssign::Indirect;
5733 }))
5734 return false;
5735
5736 // If the stack arguments for this call do not fit into our own save area then
5737 // the call cannot be made tail.
5738 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
5739 return false;
5740
5741 const MachineRegisterInfo &MRI = MF.getRegInfo();
5742 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
5743 return false;
5744
5745 return true;
5746}
5747
5748SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
5749 SelectionDAG &DAG,
5750 MachineFrameInfo &MFI,
5751 int ClobberedFI) const {
5752 SmallVector<SDValue, 8> ArgChains;
5753 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
5754 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
5755
5756 // Include the original chain at the beginning of the list. When this is
5757 // used by target LowerCall hooks, this helps legalize find the
5758 // CALLSEQ_BEGIN node.
5759 ArgChains.push_back(Chain);
5760
5761 // Add a chain value for each stack argument corresponding
5762 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
5763 UE = DAG.getEntryNode().getNode()->use_end();
5764 U != UE; ++U)
5765 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
5766 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
5767 if (FI->getIndex() < 0) {
5768 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
5769 int64_t InLastByte = InFirstByte;
5770 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
5771
5772 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
5773 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
5774 ArgChains.push_back(SDValue(L, 1));
5775 }
5776
5777 // Build a tokenfactor for all the chains.
5778 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
5779}
5780
5781bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
5782 bool TailCallOpt) const {
5783 return (CallCC == CallingConv::Fast && TailCallOpt) ||
5784 CallCC == CallingConv::Tail || CallCC == CallingConv::SwiftTail;
5785}
5786
5787/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
5788/// and add input and output parameter nodes.
5789SDValue
5790AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
5791 SmallVectorImpl<SDValue> &InVals) const {
5792 SelectionDAG &DAG = CLI.DAG;
5793 SDLoc &DL = CLI.DL;
5794 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
5795 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
5796 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
5797 SDValue Chain = CLI.Chain;
5798 SDValue Callee = CLI.Callee;
5799 bool &IsTailCall = CLI.IsTailCall;
5800 CallingConv::ID CallConv = CLI.CallConv;
5801 bool IsVarArg = CLI.IsVarArg;
5802
5803 MachineFunction &MF = DAG.getMachineFunction();
5804 MachineFunction::CallSiteInfo CSInfo;
5805 bool IsThisReturn = false;
5806
5807 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5808 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
5809 bool IsSibCall = false;
5810 bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv);
5811
5812 // Check callee args/returns for SVE registers and set calling convention
5813 // accordingly.
5814 if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) {
5815 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
5816 return Out.VT.isScalableVector();
5817 });
5818 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
5819 return In.VT.isScalableVector();
5820 });
5821
5822 if (CalleeInSVE || CalleeOutSVE)
5823 CallConv = CallingConv::AArch64_SVE_VectorCall;
5824 }
5825
5826 if (IsTailCall) {
5827 // Check if it's really possible to do a tail call.
5828 IsTailCall = isEligibleForTailCallOptimization(
5829 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
5830
5831 // A sibling call is one where we're under the usual C ABI and not planning
5832 // to change that but can still do a tail call:
5833 if (!TailCallOpt && IsTailCall && CallConv != CallingConv::Tail &&
5834 CallConv != CallingConv::SwiftTail)
5835 IsSibCall = true;
5836
5837 if (IsTailCall)
5838 ++NumTailCalls;
5839 }
5840
5841 if (!IsTailCall && CLI.CB && CLI.CB->isMustTailCall())
5842 report_fatal_error("failed to perform tail call elimination on a call "
5843 "site marked musttail");
5844
5845 // Analyze operands of the call, assigning locations to each operand.
5846 SmallVector<CCValAssign, 16> ArgLocs;
5847 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
5848 *DAG.getContext());
5849
5850 if (IsVarArg) {
5851 // Handle fixed and variable vector arguments differently.
5852 // Variable vector arguments always go into memory.
5853 unsigned NumArgs = Outs.size();
5854
5855 for (unsigned i = 0; i != NumArgs; ++i) {
5856 MVT ArgVT = Outs[i].VT;
5857 if (!Outs[i].IsFixed && ArgVT.isScalableVector())
5858 report_fatal_error("Passing SVE types to variadic functions is "
5859 "currently not supported");
5860
5861 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5862 bool UseVarArgCC = !Outs[i].IsFixed;
5863 // On Windows, the fixed arguments in a vararg call are passed in GPRs
5864 // too, so use the vararg CC to force them to integer registers.
5865 if (IsCalleeWin64)
5866 UseVarArgCC = true;
5867 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
5868 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
5869 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5869, __extension__ __PRETTY_FUNCTION__))
;
5870 (void)Res;
5871 }
5872 } else {
5873 // At this point, Outs[].VT may already be promoted to i32. To correctly
5874 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
5875 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
5876 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
5877 // we use a special version of AnalyzeCallOperands to pass in ValVT and
5878 // LocVT.
5879 unsigned NumArgs = Outs.size();
5880 for (unsigned i = 0; i != NumArgs; ++i) {
5881 MVT ValVT = Outs[i].VT;
5882 // Get type of the original argument.
5883 EVT ActualVT = getValueType(DAG.getDataLayout(),
5884 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
5885 /*AllowUnknown*/ true);
5886 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
5887 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5888 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
5889 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
5890 ValVT = MVT::i8;
5891 else if (ActualMVT == MVT::i16)
5892 ValVT = MVT::i16;
5893
5894 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
5895 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
5896 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5896, __extension__ __PRETTY_FUNCTION__))
;
5897 (void)Res;
5898 }
5899 }
5900
5901 // Get a count of how many bytes are to be pushed on the stack.
5902 unsigned NumBytes = CCInfo.getNextStackOffset();
5903
5904 if (IsSibCall) {
5905 // Since we're not changing the ABI to make this a tail call, the memory
5906 // operands are already available in the caller's incoming argument space.
5907 NumBytes = 0;
5908 }
5909
5910 // FPDiff is the byte offset of the call's argument area from the callee's.
5911 // Stores to callee stack arguments will be placed in FixedStackSlots offset
5912 // by this amount for a tail call. In a sibling call it must be 0 because the
5913 // caller will deallocate the entire stack and the callee still expects its
5914 // arguments to begin at SP+0. Completely unused for non-tail calls.
5915 int FPDiff = 0;
5916
5917 if (IsTailCall && !IsSibCall) {
5918 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
5919
5920 // Since callee will pop argument stack as a tail call, we must keep the
5921 // popped size 16-byte aligned.
5922 NumBytes = alignTo(NumBytes, 16);
5923
5924 // FPDiff will be negative if this tail call requires more space than we
5925 // would automatically have in our incoming argument space. Positive if we
5926 // can actually shrink the stack.
5927 FPDiff = NumReusableBytes - NumBytes;
5928
5929 // Update the required reserved area if this is the tail call requiring the
5930 // most argument stack space.
5931 if (FPDiff < 0 && FuncInfo->getTailCallReservedStack() < (unsigned)-FPDiff)
5932 FuncInfo->setTailCallReservedStack(-FPDiff);
5933
5934 // The stack pointer must be 16-byte aligned at all times it's used for a
5935 // memory operation, which in practice means at *all* times and in
5936 // particular across call boundaries. Therefore our own arguments started at
5937 // a 16-byte aligned SP and the delta applied for the tail call should
5938 // satisfy the same constraint.
5939 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")(static_cast <bool> (FPDiff % 16 == 0 && "unaligned stack on tail call"
) ? void (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5939, __extension__ __PRETTY_FUNCTION__))
;
5940 }
5941
5942 // Adjust the stack pointer for the new arguments...
5943 // These operations are automatically eliminated by the prolog/epilog pass
5944 if (!IsSibCall)
5945 Chain = DAG.getCALLSEQ_START(Chain, IsTailCall ? 0 : NumBytes, 0, DL);
5946
5947 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
5948 getPointerTy(DAG.getDataLayout()));
5949
5950 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
5951 SmallSet<unsigned, 8> RegsUsed;
5952 SmallVector<SDValue, 8> MemOpChains;
5953 auto PtrVT = getPointerTy(DAG.getDataLayout());
5954
5955 if (IsVarArg && CLI.CB && CLI.CB->isMustTailCall()) {
5956 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
5957 for (const auto &F : Forwards) {
5958 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
5959 RegsToPass.emplace_back(F.PReg, Val);
5960 }
5961 }
5962
5963 // Walk the register/memloc assignments, inserting copies/loads.
5964 unsigned ExtraArgLocs = 0;
5965 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
5966 CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
5967 SDValue Arg = OutVals[i];
5968 ISD::ArgFlagsTy Flags = Outs[i].Flags;
5969
5970 // Promote the value if needed.
5971 switch (VA.getLocInfo()) {
5972 default:
5973 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5973)
;
5974 case CCValAssign::Full:
5975 break;
5976 case CCValAssign::SExt:
5977 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
5978 break;
5979 case CCValAssign::ZExt:
5980 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
5981 break;
5982 case CCValAssign::AExt:
5983 if (Outs[i].ArgVT == MVT::i1) {
5984 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
5985 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
5986 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
5987 }
5988 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5989 break;
5990 case CCValAssign::AExtUpper:
5991 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")(static_cast <bool> (VA.getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits") ? void (0) : __assert_fail
("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5991, __extension__ __PRETTY_FUNCTION__))
;
5992 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
5993 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
5994 DAG.getConstant(32, DL, VA.getLocVT()));
5995 break;
5996 case CCValAssign::BCvt:
5997 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
5998 break;
5999 case CCValAssign::Trunc:
6000 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6001 break;
6002 case CCValAssign::FPExt:
6003 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
6004 break;
6005 case CCValAssign::Indirect:
6006 assert(VA.getValVT().isScalableVector() &&(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6007, __extension__ __PRETTY_FUNCTION__))
6007 "Only scalable vectors can be passed indirectly")(static_cast <bool> (VA.getValVT().isScalableVector() &&
"Only scalable vectors can be passed indirectly") ? void (0)
: __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6007, __extension__ __PRETTY_FUNCTION__))
;
6008
6009 uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
6010 uint64_t PartSize = StoreSize;
6011 unsigned NumParts = 1;
6012 if (Outs[i].Flags.isInConsecutiveRegs()) {
6013 assert(!Outs[i].Flags.isInConsecutiveRegsLast())(static_cast <bool> (!Outs[i].Flags.isInConsecutiveRegsLast
()) ? void (0) : __assert_fail ("!Outs[i].Flags.isInConsecutiveRegsLast()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6013, __extension__ __PRETTY_FUNCTION__))
;
6014 while (!Outs[i + NumParts - 1].Flags.isInConsecutiveRegsLast())
6015 ++NumParts;
6016 StoreSize *= NumParts;
6017 }
6018
6019 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6020 Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
6021 Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
6022 int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
6023 MFI.setStackID(FI, TargetStackID::ScalableVector);
6024
6025 MachinePointerInfo MPI =
6026 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6027 SDValue Ptr = DAG.getFrameIndex(
6028 FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
6029 SDValue SpillSlot = Ptr;
6030
6031 // Ensure we generate all stores for each tuple part, whilst updating the
6032 // pointer after each store correctly using vscale.
6033 while (NumParts) {
6034 Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
6035 NumParts--;
6036 if (NumParts > 0) {
6037 SDValue BytesIncrement = DAG.getVScale(
6038 DL, Ptr.getValueType(),
6039 APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
6040 SDNodeFlags Flags;
6041 Flags.setNoUnsignedWrap(true);
6042
6043 MPI = MachinePointerInfo(MPI.getAddrSpace());
6044 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6045 BytesIncrement, Flags);
6046 ExtraArgLocs++;
6047 i++;
6048 }
6049 }
6050
6051 Arg = SpillSlot;
6052 break;
6053 }
6054
6055 if (VA.isRegLoc()) {
6056 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
6057 Outs[0].VT == MVT::i64) {
6058 assert(VA.getLocVT() == MVT::i64 &&(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6059, __extension__ __PRETTY_FUNCTION__))
6059 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6059, __extension__ __PRETTY_FUNCTION__))
;
6060 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6061, __extension__ __PRETTY_FUNCTION__))
6061 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6061, __extension__ __PRETTY_FUNCTION__))
;
6062 IsThisReturn = true;
6063 }
6064 if (RegsUsed.count(VA.getLocReg())) {
6065 // If this register has already been used then we're trying to pack
6066 // parts of an [N x i32] into an X-register. The extension type will
6067 // take care of putting the two halves in the right place but we have to
6068 // combine them.
6069 SDValue &Bits =
6070 llvm::find_if(RegsToPass,
6071 [=](const std::pair<unsigned, SDValue> &Elt) {
6072 return Elt.first == VA.getLocReg();
6073 })
6074 ->second;
6075 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
6076 // Call site info is used for function's parameter entry value
6077 // tracking. For now we track only simple cases when parameter
6078 // is transferred through whole register.
6079 llvm::erase_if(CSInfo, [&VA](MachineFunction::ArgRegPair ArgReg) {
6080 return ArgReg.Reg == VA.getLocReg();
6081 });
6082 } else {
6083 RegsToPass.emplace_back(VA.getLocReg(), Arg);
6084 RegsUsed.insert(VA.getLocReg());
6085 const TargetOptions &Options = DAG.getTarget().Options;
6086 if (Options.EmitCallSiteInfo)
6087 CSInfo.emplace_back(VA.getLocReg(), i);
6088 }
6089 } else {
6090 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6090, __extension__ __PRETTY_FUNCTION__))
;
6091
6092 SDValue DstAddr;
6093 MachinePointerInfo DstInfo;
6094
6095 // FIXME: This works on big-endian for composite byvals, which are the
6096 // common case. It should also work for fundamental types too.
6097 uint32_t BEAlign = 0;
6098 unsigned OpSize;
6099 if (VA.getLocInfo() == CCValAssign::Indirect ||
6100 VA.getLocInfo() == CCValAssign::Trunc)
6101 OpSize = VA.getLocVT().getFixedSizeInBits();
6102 else
6103 OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
6104 : VA.getValVT().getSizeInBits();
6105 OpSize = (OpSize + 7) / 8;
6106 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
6107 !Flags.isInConsecutiveRegs()) {
6108 if (OpSize < 8)
6109 BEAlign = 8 - OpSize;
6110 }
6111 unsigned LocMemOffset = VA.getLocMemOffset();
6112 int32_t Offset = LocMemOffset + BEAlign;
6113 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6114 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6115
6116 if (IsTailCall) {
6117 Offset = Offset + FPDiff;
6118 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
6119
6120 DstAddr = DAG.getFrameIndex(FI, PtrVT);
6121 DstInfo =
6122 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
6123
6124 // Make sure any stack arguments overlapping with where we're storing
6125 // are loaded before this eventual operation. Otherwise they'll be
6126 // clobbered.
6127 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
6128 } else {
6129 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
6130
6131 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
6132 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
6133 LocMemOffset);
6134 }
6135
6136 if (Outs[i].Flags.isByVal()) {
6137 SDValue SizeNode =
6138 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
6139 SDValue Cpy = DAG.getMemcpy(
6140 Chain, DL, DstAddr, Arg, SizeNode,
6141 Outs[i].Flags.getNonZeroByValAlign(),
6142 /*isVol = */ false, /*AlwaysInline = */ false,
6143 /*isTailCall = */ false, DstInfo, MachinePointerInfo());
6144
6145 MemOpChains.push_back(Cpy);
6146 } else {
6147 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
6148 // promoted to a legal register type i32, we should truncate Arg back to
6149 // i1/i8/i16.
6150 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
6151 VA.getValVT() == MVT::i16)
6152 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
6153
6154 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
6155 MemOpChains.push_back(Store);
6156 }
6157 }
6158 }
6159
6160 if (!MemOpChains.empty())
6161 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
6162
6163 // Build a sequence of copy-to-reg nodes chained together with token chain
6164 // and flag operands which copy the outgoing args into the appropriate regs.
6165 SDValue InFlag;
6166 for (auto &RegToPass : RegsToPass) {
6167 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
6168 RegToPass.second, InFlag);
6169 InFlag = Chain.getValue(1);
6170 }
6171
6172 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
6173 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
6174 // node so that legalize doesn't hack it.
6175 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
6176 auto GV = G->getGlobal();
6177 unsigned OpFlags =
6178 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
6179 if (OpFlags & AArch64II::MO_GOT) {
6180 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
6181 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6182 } else {
6183 const GlobalValue *GV = G->getGlobal();
6184 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
6185 }
6186 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
6187 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
6188 Subtarget->isTargetMachO()) {
6189 const char *Sym = S->getSymbol();
6190 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
6191 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
6192 } else {
6193 const char *Sym = S->getSymbol();
6194 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
6195 }
6196 }
6197
6198 // We don't usually want to end the call-sequence here because we would tidy
6199 // the frame up *after* the call, however in the ABI-changing tail-call case
6200 // we've carefully laid out the parameters so that when sp is reset they'll be
6201 // in the correct location.
6202 if (IsTailCall && !IsSibCall) {
6203 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
6204 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
6205 InFlag = Chain.getValue(1);
6206 }
6207
6208 std::vector<SDValue> Ops;
6209 Ops.push_back(Chain);
6210 Ops.push_back(Callee);
6211
6212 if (IsTailCall) {
6213 // Each tail call may have to adjust the stack by a different amount, so
6214 // this information must travel along with the operation for eventual
6215 // consumption by emitEpilogue.
6216 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
6217 }
6218
6219 // Add argument registers to the end of the list so that they are known live
6220 // into the call.
6221 for (auto &RegToPass : RegsToPass)
6222 Ops.push_back(DAG.getRegister(RegToPass.first,
6223 RegToPass.second.getValueType()));
6224
6225 // Add a register mask operand representing the call-preserved registers.
6226 const uint32_t *Mask;
6227 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6228 if (IsThisReturn) {
6229 // For 'this' returns, use the X0-preserving mask if applicable
6230 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
6231 if (!Mask) {
6232 IsThisReturn = false;
6233 Mask = TRI->getCallPreservedMask(MF, CallConv);
6234 }
6235 } else
6236 Mask = TRI->getCallPreservedMask(MF, CallConv);
6237
6238 if (Subtarget->hasCustomCallingConv())
6239 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
6240
6241 if (TRI->isAnyArgRegReserved(MF))
6242 TRI->emitReservedArgRegCallError(MF);
6243
6244 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6244, __extension__ __PRETTY_FUNCTION__))
;
6245 Ops.push_back(DAG.getRegisterMask(Mask));
6246
6247 if (InFlag.getNode())
6248 Ops.push_back(InFlag);
6249
6250 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6251
6252 // If we're doing a tall call, use a TC_RETURN here rather than an
6253 // actual call instruction.
6254 if (IsTailCall) {
6255 MF.getFrameInfo().setHasTailCall();
6256 SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
6257 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
6258 return Ret;
6259 }
6260
6261 unsigned CallOpc = AArch64ISD::CALL;
6262 // Calls with operand bundle "clang.arc.attachedcall" are special. They should
6263 // be expanded to the call, directly followed by a special marker sequence.
6264 // Use the CALL_RVMARKER to do that.
6265 if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
6266 assert(!IsTailCall &&(static_cast <bool> (!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6267, __extension__ __PRETTY_FUNCTION__))
6267 "tail calls cannot be marked with clang.arc.attachedcall")(static_cast <bool> (!IsTailCall && "tail calls cannot be marked with clang.arc.attachedcall"
) ? void (0) : __assert_fail ("!IsTailCall && \"tail calls cannot be marked with clang.arc.attachedcall\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6267, __extension__ __PRETTY_FUNCTION__))
;
6268 CallOpc = AArch64ISD::CALL_RVMARKER;
6269 }
6270
6271 // Returns a chain and a flag for retval copy to use.
6272 Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops);
6273 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
6274 InFlag = Chain.getValue(1);
6275 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
6276
6277 uint64_t CalleePopBytes =
6278 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
6279
6280 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
6281 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
6282 InFlag, DL);
6283 if (!Ins.empty())
6284 InFlag = Chain.getValue(1);
6285
6286 // Handle result values, copying them out of physregs into vregs that we
6287 // return.
6288 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
6289 InVals, IsThisReturn,
6290 IsThisReturn ? OutVals[0] : SDValue());
6291}
6292
6293bool AArch64TargetLowering::CanLowerReturn(
6294 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
6295 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
6296 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
6297 SmallVector<CCValAssign, 16> RVLocs;
6298 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
6299 return CCInfo.CheckReturn(Outs, RetCC);
6300}
6301
6302SDValue
6303AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
6304 bool isVarArg,
6305 const SmallVectorImpl<ISD::OutputArg> &Outs,
6306 const SmallVectorImpl<SDValue> &OutVals,
6307 const SDLoc &DL, SelectionDAG &DAG) const {
6308 auto &MF = DAG.getMachineFunction();
6309 auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
6310
6311 CCAssignFn *RetCC = CCAssignFnForReturn(CallConv);
6312 SmallVector<CCValAssign, 16> RVLocs;
6313 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
6314 *DAG.getContext());
6315 CCInfo.AnalyzeReturn(Outs, RetCC);
6316
6317 // Copy the result values into the output registers.
6318 SDValue Flag;
6319 SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
6320 SmallSet<unsigned, 4> RegsUsed;
6321 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
6322 ++i, ++realRVLocIdx) {
6323 CCValAssign &VA = RVLocs[i];
6324 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6324, __extension__ __PRETTY_FUNCTION__))
;
6325 SDValue Arg = OutVals[realRVLocIdx];
6326
6327 switch (VA.getLocInfo()) {
6328 default:
6329 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6329)
;
6330 case CCValAssign::Full:
6331 if (Outs[i].ArgVT == MVT::i1) {
6332 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
6333 // value. This is strictly redundant on Darwin (which uses "zeroext
6334 // i1"), but will be optimised out before ISel.
6335 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
6336 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
6337 }
6338 break;
6339 case CCValAssign::BCvt:
6340 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
6341 break;
6342 case CCValAssign::AExt:
6343 case CCValAssign::ZExt:
6344 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6345 break;
6346 case CCValAssign::AExtUpper:
6347 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")(static_cast <bool> (VA.getValVT() == MVT::i32 &&
"only expect 32 -> 64 upper bits") ? void (0) : __assert_fail
("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6347, __extension__ __PRETTY_FUNCTION__))
;
6348 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
6349 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
6350 DAG.getConstant(32, DL, VA.getLocVT()));
6351 break;
6352 }
6353
6354 if (RegsUsed.count(VA.getLocReg())) {
6355 SDValue &Bits =
6356 llvm::find_if(RetVals, [=](const std::pair<unsigned, SDValue> &Elt) {
6357 return Elt.first == VA.getLocReg();
6358 })->second;
6359 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
6360 } else {
6361 RetVals.emplace_back(VA.getLocReg(), Arg);
6362 RegsUsed.insert(VA.getLocReg());
6363 }
6364 }
6365
6366 SmallVector<SDValue, 4> RetOps(1, Chain);
6367 for (auto &RetVal : RetVals) {
6368 Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
6369 Flag = Chain.getValue(1);
6370 RetOps.push_back(
6371 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
6372 }
6373
6374 // Windows AArch64 ABIs require that for returning structs by value we copy
6375 // the sret argument into X0 for the return.
6376 // We saved the argument into a virtual register in the entry block,
6377 // so now we copy the value out and into X0.
6378 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
6379 SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
6380 getPointerTy(MF.getDataLayout()));
6381
6382 unsigned RetValReg = AArch64::X0;
6383 Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
6384 Flag = Chain.getValue(1);
6385
6386 RetOps.push_back(
6387 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
6388 }
6389
6390 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6391 const MCPhysReg *I =
6392 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
6393 if (I) {
6394 for (; *I; ++I) {
6395 if (AArch64::GPR64RegClass.contains(*I))
6396 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
6397 else if (AArch64::FPR64RegClass.contains(*I))
6398 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
6399 else
6400 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6400)
;
6401 }
6402 }
6403
6404 RetOps[0] = Chain; // Update chain.
6405
6406 // Add the flag if we have it.
6407 if (Flag.getNode())
6408 RetOps.push_back(Flag);
6409
6410 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
6411}
6412
6413//===----------------------------------------------------------------------===//
6414// Other Lowering Code
6415//===----------------------------------------------------------------------===//
6416
6417SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
6418 SelectionDAG &DAG,
6419 unsigned Flag) const {
6420 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
6421 N->getOffset(), Flag);
6422}
6423
6424SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
6425 SelectionDAG &DAG,
6426 unsigned Flag) const {
6427 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
6428}
6429
6430SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
6431 SelectionDAG &DAG,
6432 unsigned Flag) const {
6433 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
6434 N->getOffset(), Flag);
6435}
6436
6437SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
6438 SelectionDAG &DAG,
6439 unsigned Flag) const {
6440 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
6441}
6442
6443// (loadGOT sym)
6444template <class NodeTy>
6445SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
6446 unsigned Flags) const {
6447 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
6448 SDLoc DL(N);
6449 EVT Ty = getPointerTy(DAG.getDataLayout());
6450 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
6451 // FIXME: Once remat is capable of dealing with instructions with register
6452 // operands, expand this into two nodes instead of using a wrapper node.
6453 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
6454}
6455
6456// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
6457template <class NodeTy>
6458SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
6459 unsigned Flags) const {
6460 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
6461 SDLoc DL(N);
6462 EVT Ty = getPointerTy(DAG.getDataLayout());
6463 const unsigned char MO_NC = AArch64II::MO_NC;
6464 return DAG.getNode(
6465 AArch64ISD::WrapperLarge, DL, Ty,
6466 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
6467 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
6468 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
6469 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
6470}
6471
6472// (addlow (adrp %hi(sym)) %lo(sym))
6473template <class NodeTy>
6474SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
6475 unsigned Flags) const {
6476 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
6477 SDLoc DL(N);
6478 EVT Ty = getPointerTy(DAG.getDataLayout());
6479 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
6480 SDValue Lo = getTargetNode(N, Ty, DAG,
6481 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
6482 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
6483 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
6484}
6485
6486// (adr sym)
6487template <class NodeTy>
6488SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
6489 unsigned Flags) const {
6490 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
6491 SDLoc DL(N);
6492 EVT Ty = getPointerTy(DAG.getDataLayout());
6493 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
6494 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
6495}
6496
6497SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
6498 SelectionDAG &DAG) const {
6499 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
6500 const GlobalValue *GV = GN->getGlobal();
6501 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
6502
6503 if (OpFlags != AArch64II::MO_NO_FLAG)
6504 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6505, __extension__ __PRETTY_FUNCTION__))
6505 "unexpected offset in global node")(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6505, __extension__ __PRETTY_FUNCTION__))
;
6506
6507 // This also catches the large code model case for Darwin, and tiny code
6508 // model with got relocations.
6509 if ((OpFlags & AArch64II::MO_GOT) != 0) {
6510 return getGOT(GN, DAG, OpFlags);
6511 }
6512
6513 SDValue Result;
6514 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
6515 Result = getAddrLarge(GN, DAG, OpFlags);
6516 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
6517 Result = getAddrTiny(GN, DAG, OpFlags);
6518 } else {
6519 Result = getAddr(GN, DAG, OpFlags);
6520 }
6521 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6522 SDLoc DL(GN);
6523 if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
6524 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
6525 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
6526 return Result;
6527}
6528
6529/// Convert a TLS address reference into the correct sequence of loads
6530/// and calls to compute the variable's address (for Darwin, currently) and
6531/// return an SDValue containing the final node.
6532
6533/// Darwin only has one TLS scheme which must be capable of dealing with the
6534/// fully general situation, in the worst case. This means:
6535/// + "extern __thread" declaration.
6536/// + Defined in a possibly unknown dynamic library.
6537///
6538/// The general system is that each __thread variable has a [3 x i64] descriptor
6539/// which contains information used by the runtime to calculate the address. The
6540/// only part of this the compiler needs to know about is the first xword, which
6541/// contains a function pointer that must be called with the address of the
6542/// entire descriptor in "x0".
6543///
6544/// Since this descriptor may be in a different unit, in general even the
6545/// descriptor must be accessed via an indirect load. The "ideal" code sequence
6546/// is:
6547/// adrp x0, _var@TLVPPAGE
6548/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
6549/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
6550/// ; the function pointer
6551/// blr x1 ; Uses descriptor address in x0
6552/// ; Address of _var is now in x0.
6553///
6554/// If the address of _var's descriptor *is* known to the linker, then it can
6555/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
6556/// a slight efficiency gain.
6557SDValue
6558AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
6559 SelectionDAG &DAG) const {
6560 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6561, __extension__ __PRETTY_FUNCTION__))
6561 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6561, __extension__ __PRETTY_FUNCTION__))
;
6562
6563 SDLoc DL(Op);
6564 MVT PtrVT = getPointerTy(DAG.getDataLayout());
6565 MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
6566 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
6567
6568 SDValue TLVPAddr =
6569 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6570 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
6571
6572 // The first entry in the descriptor is a function pointer that we must call
6573 // to obtain the address of the variable.
6574 SDValue Chain = DAG.getEntryNode();
6575 SDValue FuncTLVGet = DAG.getLoad(
6576 PtrMemVT, DL, Chain, DescAddr,
6577 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
6578 Align(PtrMemVT.getSizeInBits() / 8),
6579 MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
6580 Chain = FuncTLVGet.getValue(1);
6581
6582 // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
6583 FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
6584
6585 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6586 MFI.setAdjustsStack(true);
6587
6588 // TLS calls preserve all registers except those that absolutely must be
6589 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
6590 // silly).
6591 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
6592 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
6593 if (Subtarget->hasCustomCallingConv())
6594 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
6595
6596 // Finally, we can make the call. This is just a degenerate version of a
6597 // normal AArch64 call node: x0 takes the address of the descriptor, and
6598 // returns the address of the variable in this thread.
6599 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
6600 Chain =
6601 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
6602 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
6603 DAG.getRegisterMask(Mask), Chain.getValue(1));
6604 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
6605}
6606
6607/// Convert a thread-local variable reference into a sequence of instructions to
6608/// compute the variable's address for the local exec TLS model of ELF targets.
6609/// The sequence depends on the maximum TLS area size.
6610SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
6611 SDValue ThreadBase,
6612 const SDLoc &DL,
6613 SelectionDAG &DAG) const {
6614 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6615 SDValue TPOff, Addr;
6616
6617 switch (DAG.getTarget().Options.TLSSize) {
6618 default:
6619 llvm_unreachable("Unexpected TLS size")::llvm::llvm_unreachable_internal("Unexpected TLS size", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6619)
;
6620
6621 case 12: {
6622 // mrs x0, TPIDR_EL0
6623 // add x0, x0, :tprel_lo12:a
6624 SDValue Var = DAG.getTargetGlobalAddress(
6625 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
6626 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6627 Var,
6628 DAG.getTargetConstant(0, DL, MVT::i32)),
6629 0);
6630 }
6631
6632 case 24: {
6633 // mrs x0, TPIDR_EL0
6634 // add x0, x0, :tprel_hi12:a
6635 // add x0, x0, :tprel_lo12_nc:a
6636 SDValue HiVar = DAG.getTargetGlobalAddress(
6637 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6638 SDValue LoVar = DAG.getTargetGlobalAddress(
6639 GV, DL, PtrVT, 0,
6640 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6641 Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
6642 HiVar,
6643 DAG.getTargetConstant(0, DL, MVT::i32)),
6644 0);
6645 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
6646 LoVar,
6647 DAG.getTargetConstant(0, DL, MVT::i32)),
6648 0);
6649 }
6650
6651 case 32: {
6652 // mrs x1, TPIDR_EL0
6653 // movz x0, #:tprel_g1:a
6654 // movk x0, #:tprel_g0_nc:a
6655 // add x0, x1, x0
6656 SDValue HiVar = DAG.getTargetGlobalAddress(
6657 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
6658 SDValue LoVar = DAG.getTargetGlobalAddress(
6659 GV, DL, PtrVT, 0,
6660 AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
6661 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
6662 DAG.getTargetConstant(16, DL, MVT::i32)),
6663 0);
6664 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
6665 DAG.getTargetConstant(0, DL, MVT::i32)),
6666 0);
6667 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6668 }
6669
6670 case 48: {
6671 // mrs x1, TPIDR_EL0
6672 // movz x0, #:tprel_g2:a
6673 // movk x0, #:tprel_g1_nc:a
6674 // movk x0, #:tprel_g0_nc:a
6675 // add x0, x1, x0
6676 SDValue HiVar = DAG.getTargetGlobalAddress(
6677 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
6678 SDValue MiVar = DAG.getTargetGlobalAddress(
6679 GV, DL, PtrVT, 0,
6680 AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC);
6681 SDValue LoVar = DAG.getTargetGlobalAddress(
6682 GV, DL, PtrVT, 0,
6683 AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
6684 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
6685 DAG.getTargetConstant(32, DL, MVT::i32)),
6686 0);
6687 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
6688 DAG.getTargetConstant(16, DL, MVT::i32)),
6689 0);
6690 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
6691 DAG.getTargetConstant(0, DL, MVT::i32)),
6692 0);
6693 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6694 }
6695 }
6696}
6697
6698/// When accessing thread-local variables under either the general-dynamic or
6699/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
6700/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
6701/// is a function pointer to carry out the resolution.
6702///
6703/// The sequence is:
6704/// adrp x0, :tlsdesc:var
6705/// ldr x1, [x0, #:tlsdesc_lo12:var]
6706/// add x0, x0, #:tlsdesc_lo12:var
6707/// .tlsdesccall var
6708/// blr x1
6709/// (TPIDR_EL0 offset now in x0)
6710///
6711/// The above sequence must be produced unscheduled, to enable the linker to
6712/// optimize/relax this sequence.
6713/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
6714/// above sequence, and expanded really late in the compilation flow, to ensure
6715/// the sequence is produced as per above.
6716SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
6717 const SDLoc &DL,
6718 SelectionDAG &DAG) const {
6719 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6720
6721 SDValue Chain = DAG.getEntryNode();
6722 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
6723
6724 Chain =
6725 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
6726 SDValue Glue = Chain.getValue(1);
6727
6728 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
6729}
6730
6731SDValue
6732AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
6733 SelectionDAG &DAG) const {
6734 assert(Subtarget->isTargetELF() && "This function expects an ELF target")(static_cast <bool> (Subtarget->isTargetELF() &&
"This function expects an ELF target") ? void (0) : __assert_fail
("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6734, __extension__ __PRETTY_FUNCTION__))
;
6735
6736 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6737
6738 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
6739
6740 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
6741 if (Model == TLSModel::LocalDynamic)
6742 Model = TLSModel::GeneralDynamic;
6743 }
6744
6745 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
6746 Model != TLSModel::LocalExec)
6747 report_fatal_error("ELF TLS only supported in small memory model or "
6748 "in local exec TLS model");
6749 // Different choices can be made for the maximum size of the TLS area for a
6750 // module. For the small address model, the default TLS size is 16MiB and the
6751 // maximum TLS size is 4GiB.
6752 // FIXME: add tiny and large code model support for TLS access models other
6753 // than local exec. We currently generate the same code as small for tiny,
6754 // which may be larger than needed.
6755
6756 SDValue TPOff;
6757 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6758 SDLoc DL(Op);
6759 const GlobalValue *GV = GA->getGlobal();
6760
6761 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
6762
6763 if (Model == TLSModel::LocalExec) {
6764 return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
6765 } else if (Model == TLSModel::InitialExec) {
6766 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6767 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
6768 } else if (Model == TLSModel::LocalDynamic) {
6769 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
6770 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
6771 // the beginning of the module's TLS region, followed by a DTPREL offset
6772 // calculation.
6773
6774 // These accesses will need deduplicating if there's more than one.
6775 AArch64FunctionInfo *MFI =
6776 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
6777 MFI->incNumLocalDynamicTLSAccesses();
6778
6779 // The call needs a relocation too for linker relaxation. It doesn't make
6780 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
6781 // the address.
6782 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
6783 AArch64II::MO_TLS);
6784
6785 // Now we can calculate the offset from TPIDR_EL0 to this module's
6786 // thread-local area.
6787 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
6788
6789 // Now use :dtprel_whatever: operations to calculate this variable's offset
6790 // in its thread-storage area.
6791 SDValue HiVar = DAG.getTargetGlobalAddress(
6792 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6793 SDValue LoVar = DAG.getTargetGlobalAddress(
6794 GV, DL, MVT::i64, 0,
6795 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6796
6797 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
6798 DAG.getTargetConstant(0, DL, MVT::i32)),
6799 0);
6800 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
6801 DAG.getTargetConstant(0, DL, MVT::i32)),
6802 0);
6803 } else if (Model == TLSModel::GeneralDynamic) {
6804 // The call needs a relocation too for linker relaxation. It doesn't make
6805 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
6806 // the address.
6807 SDValue SymAddr =
6808 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
6809
6810 // Finally we can make a call to calculate the offset from tpidr_el0.
6811 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
6812 } else
6813 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6813)
;
6814
6815 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
6816}
6817
6818SDValue
6819AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
6820 SelectionDAG &DAG) const {
6821 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows specific TLS lowering") ? void (0) : __assert_fail (
"Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6821, __extension__ __PRETTY_FUNCTION__))
;
6822
6823 SDValue Chain = DAG.getEntryNode();
6824 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6825 SDLoc DL(Op);
6826
6827 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
6828
6829 // Load the ThreadLocalStoragePointer from the TEB
6830 // A pointer to the TLS array is located at offset 0x58 from the TEB.
6831 SDValue TLSArray =
6832 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
6833 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
6834 Chain = TLSArray.getValue(1);
6835
6836 // Load the TLS index from the C runtime;
6837 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
6838 // This also does the same as LOADgot, but using a generic i32 load,
6839 // while LOADgot only loads i64.
6840 SDValue TLSIndexHi =
6841 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
6842 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
6843 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6844 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
6845 SDValue TLSIndex =
6846 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
6847 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
6848 Chain = TLSIndex.getValue(1);
6849
6850 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
6851 // offset into the TLSArray.
6852 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
6853 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
6854 DAG.getConstant(3, DL, PtrVT));
6855 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
6856 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
6857 MachinePointerInfo());
6858 Chain = TLS.getValue(1);
6859
6860 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6861 const GlobalValue *GV = GA->getGlobal();
6862 SDValue TGAHi = DAG.getTargetGlobalAddress(
6863 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
6864 SDValue TGALo = DAG.getTargetGlobalAddress(
6865 GV, DL, PtrVT, 0,
6866 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
6867
6868 // Add the offset from the start of the .tls section (section base).
6869 SDValue Addr =
6870 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
6871 DAG.getTargetConstant(0, DL, MVT::i32)),
6872 0);
6873 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
6874 return Addr;
6875}
6876
6877SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
6878 SelectionDAG &DAG) const {
6879 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
6880 if (DAG.getTarget().useEmulatedTLS())
6881 return LowerToTLSEmulatedModel(GA, DAG);
6882
6883 if (Subtarget->isTargetDarwin())
6884 return LowerDarwinGlobalTLSAddress(Op, DAG);
6885 if (Subtarget->isTargetELF())
6886 return LowerELFGlobalTLSAddress(Op, DAG);
6887 if (Subtarget->isTargetWindows())
6888 return LowerWindowsGlobalTLSAddress(Op, DAG);
6889
6890 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6890)
;
6891}
6892
6893// Looks through \param Val to determine the bit that can be used to
6894// check the sign of the value. It returns the unextended value and
6895// the sign bit position.
6896std::pair<SDValue, uint64_t> lookThroughSignExtension(SDValue Val) {
6897 if (Val.getOpcode() == ISD::SIGN_EXTEND_INREG)
6898 return {Val.getOperand(0),
6899 cast<VTSDNode>(Val.getOperand(1))->getVT().getFixedSizeInBits() -
6900 1};
6901
6902 if (Val.getOpcode() == ISD::SIGN_EXTEND)
6903 return {Val.getOperand(0),
6904 Val.getOperand(0)->getValueType(0).getFixedSizeInBits() - 1};
6905
6906 return {Val, Val.getValueSizeInBits() - 1};
6907}
6908
6909SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
6910 SDValue Chain = Op.getOperand(0);
6911 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
6912 SDValue LHS = Op.getOperand(2);
6913 SDValue RHS = Op.getOperand(3);
6914 SDValue Dest = Op.getOperand(4);
6915 SDLoc dl(Op);
6916
6917 MachineFunction &MF = DAG.getMachineFunction();
6918 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
6919 // will not be produced, as they are conditional branch instructions that do
6920 // not set flags.
6921 bool ProduceNonFlagSettingCondBr =
6922 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
6923
6924 // Handle f128 first, since lowering it will result in comparing the return
6925 // value of a libcall against zero, which is just what the rest of LowerBR_CC
6926 // is expecting to deal with.
6927 if (LHS.getValueType() == MVT::f128) {
6928 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
6929
6930 // If softenSetCCOperands returned a scalar, we need to compare the result
6931 // against zero to select between true and false values.
6932 if (!RHS.getNode()) {
6933 RHS = DAG.getConstant(0, dl, LHS.getValueType());
6934 CC = ISD::SETNE;
6935 }
6936 }
6937
6938 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
6939 // instruction.
6940 if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
6941 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
6942 // Only lower legal XALUO ops.
6943 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
6944 return SDValue();
6945
6946 // The actual operation with overflow check.
6947 AArch64CC::CondCode OFCC;
6948 SDValue Value, Overflow;
6949 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
6950
6951 if (CC == ISD::SETNE)
6952 OFCC = getInvertedCondCode(OFCC);
6953 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
6954
6955 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
6956 Overflow);
6957 }
6958
6959 if (LHS.getValueType().isInteger()) {
6960 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6961, __extension__ __PRETTY_FUNCTION__))
6961 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6961, __extension__ __PRETTY_FUNCTION__))
;
6962
6963 // If the RHS of the comparison is zero, we can potentially fold this
6964 // to a specialized branch.
6965 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
6966 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
6967 if (CC == ISD::SETEQ) {
6968 // See if we can use a TBZ to fold in an AND as well.
6969 // TBZ has a smaller branch displacement than CBZ. If the offset is
6970 // out of bounds, a late MI-layer pass rewrites branches.
6971 // 403.gcc is an example that hits this case.
6972 if (LHS.getOpcode() == ISD::AND &&
6973 isa<ConstantSDNode>(LHS.getOperand(1)) &&
6974 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
6975 SDValue Test = LHS.getOperand(0);
6976 uint64_t Mask = LHS.getConstantOperandVal(1);
6977 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
6978 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
6979 Dest);
6980 }
6981
6982 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
6983 } else if (CC == ISD::SETNE) {
6984 // See if we can use a TBZ to fold in an AND as well.
6985 // TBZ has a smaller branch displacement than CBZ. If the offset is
6986 // out of bounds, a late MI-layer pass rewrites branches.
6987 // 403.gcc is an example that hits this case.
6988 if (LHS.getOpcode() == ISD::AND &&
6989 isa<ConstantSDNode>(LHS.getOperand(1)) &&
6990 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
6991 SDValue Test = LHS.getOperand(0);
6992 uint64_t Mask = LHS.getConstantOperandVal(1);
6993 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
6994 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
6995 Dest);
6996 }
6997
6998 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
6999 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
7000 // Don't combine AND since emitComparison converts the AND to an ANDS
7001 // (a.k.a. TST) and the test in the test bit and branch instruction
7002 // becomes redundant. This would also increase register pressure.
7003 uint64_t SignBitPos;
7004 std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
7005 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
7006 DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
7007 }
7008 }
7009 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
7010 LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
7011 // Don't combine AND since emitComparison converts the AND to an ANDS
7012 // (a.k.a. TST) and the test in the test bit and branch instruction
7013 // becomes redundant. This would also increase register pressure.
7014 uint64_t SignBitPos;
7015 std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
7016 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
7017 DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
7018 }
7019
7020 SDValue CCVal;
7021 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
7022 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
7023 Cmp);
7024 }
7025
7026 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32
|| LHS.getValueType() == MVT::f64) ? void (0) : __assert_fail
("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7027, __extension__ __PRETTY_FUNCTION__))
7027 LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32
|| LHS.getValueType() == MVT::f64) ? void (0) : __assert_fail
("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::bf16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7027, __extension__ __PRETTY_FUNCTION__))
;
7028
7029 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
7030 // clean. Some of them require two branches to implement.
7031 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7032 AArch64CC::CondCode CC1, CC2;
7033 changeFPCCToAArch64CC(CC, CC1, CC2);
7034 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7035 SDValue BR1 =
7036 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
7037 if (CC2 != AArch64CC::AL) {
7038 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7039 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
7040 Cmp);
7041 }
7042
7043 return BR1;
7044}
7045
7046SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
7047 SelectionDAG &DAG) const {
7048 EVT VT = Op.getValueType();
7049 SDLoc DL(Op);
7050
7051 SDValue In1 = Op.getOperand(0);
7052 SDValue In2 = Op.getOperand(1);
7053 EVT SrcVT = In2.getValueType();
7054
7055 if (VT.isScalableVector()) {
7056 if (VT != SrcVT)
7057 return SDValue();
7058
7059 // copysign(x,y) -> (y & SIGN_MASK) | (x & ~SIGN_MASK)
7060 //
7061 // A possible alternative sequence involves using FNEG_MERGE_PASSTHRU;
7062 // maybe useful for copysign operations with mismatched VTs.
7063 //
7064 // IntVT here is chosen so it's a legal type with the same element width
7065 // as the input.
7066 EVT IntVT =
7067 getPackedSVEVectorVT(VT.getVectorElementType().changeTypeToInteger());
7068 unsigned NumBits = VT.getScalarSizeInBits();
7069 SDValue SignMask = DAG.getConstant(APInt::getSignMask(NumBits), DL, IntVT);
7070 SDValue InvSignMask = DAG.getNOT(DL, SignMask, IntVT);
7071 SDValue Sign = DAG.getNode(ISD::AND, DL, IntVT, SignMask,
7072 getSVESafeBitCast(IntVT, In2, DAG));
7073 SDValue Magnitude = DAG.getNode(ISD::AND, DL, IntVT, InvSignMask,
7074 getSVESafeBitCast(IntVT, In1, DAG));
7075 SDValue IntResult = DAG.getNode(ISD::OR, DL, IntVT, Sign, Magnitude);
7076 return getSVESafeBitCast(VT, IntResult, DAG);
7077 }
7078
7079 if (SrcVT.bitsLT(VT))
7080 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
7081 else if (SrcVT.bitsGT(VT))
7082 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
7083
7084 EVT VecVT;
7085 uint64_t EltMask;
7086 SDValue VecVal1, VecVal2;
7087
7088 auto setVecVal = [&] (int Idx) {
7089 if (!VT.isVector()) {
7090 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
7091 DAG.getUNDEF(VecVT), In1);
7092 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
7093 DAG.getUNDEF(VecVT), In2);
7094 } else {
7095 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
7096 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
7097 }
7098 };
7099
7100 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
7101 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
7102 EltMask = 0x80000000ULL;
7103 setVecVal(AArch64::ssub);
7104 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
7105 VecVT = MVT::v2i64;
7106
7107 // We want to materialize a mask with the high bit set, but the AdvSIMD
7108 // immediate moves cannot materialize that in a single instruction for
7109 // 64-bit elements. Instead, materialize zero and then negate it.
7110 EltMask = 0;
7111
7112 setVecVal(AArch64::dsub);
7113 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
7114 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
7115 EltMask = 0x8000ULL;
7116 setVecVal(AArch64::hsub);
7117 } else {
7118 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7118)
;
7119 }
7120
7121 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
7122
7123 // If we couldn't materialize the mask above, then the mask vector will be
7124 // the zero vector, and we need to negate it here.
7125 if (VT == MVT::f64 || VT == MVT::v2f64) {
7126 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
7127 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
7128 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
7129 }
7130
7131 SDValue Sel =
7132 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
7133
7134 if (VT == MVT::f16)
7135 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
7136 if (VT == MVT::f32)
7137 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
7138 else if (VT == MVT::f64)
7139 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
7140 else
7141 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
7142}
7143
7144SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
7145 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
7146 Attribute::NoImplicitFloat))
7147 return SDValue();
7148
7149 if (!Subtarget->hasNEON())
7150 return SDValue();
7151
7152 // While there is no integer popcount instruction, it can
7153 // be more efficiently lowered to the following sequence that uses
7154 // AdvSIMD registers/instructions as long as the copies to/from
7155 // the AdvSIMD registers are cheap.
7156 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
7157 // CNT V0.8B, V0.8B // 8xbyte pop-counts
7158 // ADDV B0, V0.8B // sum 8xbyte pop-counts
7159 // UMOV X0, V0.B[0] // copy byte result back to integer reg
7160 SDValue Val = Op.getOperand(0);
7161 SDLoc DL(Op);
7162 EVT VT = Op.getValueType();
7163
7164 if (VT == MVT::i32 || VT == MVT::i64) {
7165 if (VT == MVT::i32)
7166 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
7167 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
7168
7169 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
7170 SDValue UaddLV = DAG.getNode(
7171 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
7172 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
7173
7174 if (VT == MVT::i64)
7175 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
7176 return UaddLV;
7177 } else if (VT == MVT::i128) {
7178 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Val);
7179
7180 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v16i8, Val);
7181 SDValue UaddLV = DAG.getNode(
7182 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
7183 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
7184
7185 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, UaddLV);
7186 }
7187
7188 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
7189 return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
7190
7191 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7193, __extension__ __PRETTY_FUNCTION__))
7192 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7193, __extension__ __PRETTY_FUNCTION__))
7193 "Unexpected type for custom ctpop lowering")(static_cast <bool> ((VT == MVT::v1i64 || VT == MVT::v2i64
|| VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7193, __extension__ __PRETTY_FUNCTION__))
;
7194
7195 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
7196 Val = DAG.getBitcast(VT8Bit, Val);
7197 Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
7198
7199 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
7200 unsigned EltSize = 8;
7201 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
7202 while (EltSize != VT.getScalarSizeInBits()) {
7203 EltSize *= 2;
7204 NumElts /= 2;
7205 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
7206 Val = DAG.getNode(
7207 ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
7208 DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
7209 }
7210
7211 return Val;
7212}
7213
7214SDValue AArch64TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) const {
7215 EVT VT = Op.getValueType();
7216 assert(VT.isScalableVector() ||(static_cast <bool> (VT.isScalableVector() || useSVEForFixedLengthVectorVT
(VT, true)) ? void (0) : __assert_fail ("VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, true)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7217, __extension__ __PRETTY_FUNCTION__))
7217 useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))(static_cast <bool> (VT.isScalableVector() || useSVEForFixedLengthVectorVT
(VT, true)) ? void (0) : __assert_fail ("VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT, true)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7217, __extension__ __PRETTY_FUNCTION__))
;
7218
7219 SDLoc DL(Op);
7220 SDValue RBIT = DAG.getNode(ISD::BITREVERSE, DL, VT, Op.getOperand(0));
7221 return DAG.getNode(ISD::CTLZ, DL, VT, RBIT);
7222}
7223
7224SDValue AArch64TargetLowering::LowerMinMax(SDValue Op,
7225 SelectionDAG &DAG) const {
7226
7227 EVT VT = Op.getValueType();
7228 SDLoc DL(Op);
7229 unsigned Opcode = Op.getOpcode();
7230 ISD::CondCode CC;
7231 switch (Opcode) {
7232 default:
7233 llvm_unreachable("Wrong instruction")::llvm::llvm_unreachable_internal("Wrong instruction", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7233)
;
7234 case ISD::SMAX:
7235 CC = ISD::SETGT;
7236 break;
7237 case ISD::SMIN:
7238 CC = ISD::SETLT;
7239 break;
7240 case ISD::UMAX:
7241 CC = ISD::SETUGT;
7242 break;
7243 case ISD::UMIN:
7244 CC = ISD::SETULT;
7245 break;
7246 }
7247
7248 if (VT.isScalableVector() ||
7249 useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true)) {
7250 switch (Opcode) {
7251 default:
7252 llvm_unreachable("Wrong instruction")::llvm::llvm_unreachable_internal("Wrong instruction", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7252)
;
7253 case ISD::SMAX:
7254 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED,
7255 /*OverrideNEON=*/true);
7256 case ISD::SMIN:
7257 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED,
7258 /*OverrideNEON=*/true);
7259 case ISD::UMAX:
7260 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED,
7261 /*OverrideNEON=*/true);
7262 case ISD::UMIN:
7263 return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED,
7264 /*OverrideNEON=*/true);
7265 }
7266 }
7267
7268 SDValue Op0 = Op.getOperand(0);
7269 SDValue Op1 = Op.getOperand(1);
7270 SDValue Cond = DAG.getSetCC(DL, VT, Op0, Op1, CC);
7271 return DAG.getSelect(DL, VT, Cond, Op0, Op1);
7272}
7273
7274SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
7275 SelectionDAG &DAG) const {
7276 EVT VT = Op.getValueType();
7277
7278 if (VT.isScalableVector() ||
7279 useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
7280 return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU,
7281 true);
7282
7283 SDLoc DL(Op);
7284 SDValue REVB;
7285 MVT VST;
7286
7287 switch (VT.getSimpleVT().SimpleTy) {
7288 default:
7289 llvm_unreachable("Invalid type for bitreverse!")::llvm::llvm_unreachable_internal("Invalid type for bitreverse!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7289)
;
7290
7291 case MVT::v2i32: {
7292 VST = MVT::v8i8;
7293 REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
7294
7295 break;
7296 }
7297
7298 case MVT::v4i32: {
7299 VST = MVT::v16i8;
7300 REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));
7301
7302 break;
7303 }
7304
7305 case MVT::v1i64: {
7306 VST = MVT::v8i8;
7307 REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
7308
7309 break;
7310 }
7311
7312 case MVT::v2i64: {
7313 VST = MVT::v16i8;
7314 REVB = DAG.getNode(AArch64ISD::REV64, DL, VST, Op.getOperand(0));
7315
7316 break;
7317 }
7318 }
7319
7320 return DAG.getNode(AArch64ISD::NVCAST, DL, VT,
7321 DAG.getNode(ISD::BITREVERSE, DL, VST, REVB));
7322}
7323
7324SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
7325
7326 if (Op.getValueType().isVector())
7327 return LowerVSETCC(Op, DAG);
7328
7329 bool IsStrict = Op->isStrictFPOpcode();
7330 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
7331 unsigned OpNo = IsStrict ? 1 : 0;
7332 SDValue Chain;
7333 if (IsStrict)
7334 Chain = Op.getOperand(0);
7335 SDValue LHS = Op.getOperand(OpNo + 0);
7336 SDValue RHS = Op.getOperand(OpNo + 1);
7337 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
7338 SDLoc dl(Op);
7339
7340 // We chose ZeroOrOneBooleanContents, so use zero and one.
7341 EVT VT = Op.getValueType();
7342 SDValue TVal = DAG.getConstant(1, dl, VT);
7343 SDValue FVal = DAG.getConstant(0, dl, VT);
7344
7345 // Handle f128 first, since one possible outcome is a normal integer
7346 // comparison which gets picked up by the next if statement.
7347 if (LHS.getValueType() == MVT::f128) {
7348 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
7349 IsSignaling);
7350
7351 // If softenSetCCOperands returned a scalar, use it.
7352 if (!RHS.getNode()) {
7353 assert(LHS.getValueType() == Op.getValueType() &&(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7354, __extension__ __PRETTY_FUNCTION__))
7354 "Unexpected setcc expansion!")(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7354, __extension__ __PRETTY_FUNCTION__))
;
7355 return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
7356 }
7357 }
7358
7359 if (LHS.getValueType().isInteger()) {
7360 SDValue CCVal;
7361 SDValue Cmp = getAArch64Cmp(
7362 LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
7363
7364 // Note that we inverted the condition above, so we reverse the order of
7365 // the true and false operands here. This will allow the setcc to be
7366 // matched to a single CSINC instruction.
7367 SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
7368 return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
7369 }
7370
7371 // Now we know we're dealing with FP values.
7372 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7373, __extension__ __PRETTY_FUNCTION__))
7373 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7373, __extension__ __PRETTY_FUNCTION__))
;
7374
7375 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
7376 // and do the comparison.
7377 SDValue Cmp;
7378 if (IsStrict)
7379 Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
7380 else
7381 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7382
7383 AArch64CC::CondCode CC1, CC2;
7384 changeFPCCToAArch64CC(CC, CC1, CC2);
7385 SDValue Res;
7386 if (CC2 == AArch64CC::AL) {
7387 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
7388 CC2);
7389 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7390
7391 // Note that we inverted the condition above, so we reverse the order of
7392 // the true and false operands here. This will allow the setcc to be
7393 // matched to a single CSINC instruction.
7394 Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
7395 } else {
7396 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
7397 // totally clean. Some of them require two CSELs to implement. As is in
7398 // this case, we emit the first CSEL and then emit a second using the output
7399 // of the first as the RHS. We're effectively OR'ing the two CC's together.
7400
7401 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
7402 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7403 SDValue CS1 =
7404 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
7405
7406 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7407 Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
7408 }
7409 return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
7410}
7411
7412SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
7413 SDValue RHS, SDValue TVal,
7414 SDValue FVal, const SDLoc &dl,
7415 SelectionDAG &DAG) const {
7416 // Handle f128 first, because it will result in a comparison of some RTLIB
7417 // call result against zero.
7418 if (LHS.getValueType() == MVT::f128) {
7419 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
7420
7421 // If softenSetCCOperands returned a scalar, we need to compare the result
7422 // against zero to select between true and false values.
7423 if (!RHS.getNode()) {
7424 RHS = DAG.getConstant(0, dl, LHS.getValueType());
7425 CC = ISD::SETNE;
7426 }
7427 }
7428
7429 // Also handle f16, for which we need to do a f32 comparison.
7430 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
7431 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
7432 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
7433 }
7434
7435 // Next, handle integers.
7436 if (LHS.getValueType().isInteger()) {
7437 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7438, __extension__ __PRETTY_FUNCTION__))
7438 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7438, __extension__ __PRETTY_FUNCTION__))
;
7439
7440 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
7441 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
7442 ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
7443 // Check for sign pattern (SELECT_CC setgt, iN lhs, -1, 1, -1) and transform
7444 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
7445 // supported types.
7446 if (CC == ISD::SETGT && RHSC && RHSC->isAllOnesValue() && CTVal && CFVal &&
7447 CTVal->isOne() && CFVal->isAllOnesValue() &&
7448 LHS.getValueType() == TVal.getValueType()) {
7449 EVT VT = LHS.getValueType();
7450 SDValue Shift =
7451 DAG.getNode(ISD::SRA, dl, VT, LHS,
7452 DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
7453 return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
7454 }
7455
7456 unsigned Opcode = AArch64ISD::CSEL;
7457
7458 // If both the TVal and the FVal are constants, see if we can swap them in
7459 // order to for a CSINV or CSINC out of them.
7460 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
7461 std::swap(TVal, FVal);
7462 std::swap(CTVal, CFVal);
7463 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7464 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
7465 std::swap(TVal, FVal);
7466 std::swap(CTVal, CFVal);
7467 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7468 } else if (TVal.getOpcode() == ISD::XOR) {
7469 // If TVal is a NOT we want to swap TVal and FVal so that we can match
7470 // with a CSINV rather than a CSEL.
7471 if (isAllOnesConstant(TVal.getOperand(1))) {
7472 std::swap(TVal, FVal);
7473 std::swap(CTVal, CFVal);
7474 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7475 }
7476 } else if (TVal.getOpcode() == ISD::SUB) {
7477 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
7478 // that we can match with a CSNEG rather than a CSEL.
7479 if (isNullConstant(TVal.getOperand(0))) {
7480 std::swap(TVal, FVal);
7481 std::swap(CTVal, CFVal);
7482 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7483 }
7484 } else if (CTVal && CFVal) {
7485 const int64_t TrueVal = CTVal->getSExtValue();
7486 const int64_t FalseVal = CFVal->getSExtValue();
7487 bool Swap = false;
7488
7489 // If both TVal and FVal are constants, see if FVal is the
7490 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
7491 // instead of a CSEL in that case.
7492 if (TrueVal == ~FalseVal) {
7493 Opcode = AArch64ISD::CSINV;
7494 } else if (FalseVal > std::numeric_limits<int64_t>::min() &&
7495 TrueVal == -FalseVal) {
7496 Opcode = AArch64ISD::CSNEG;
7497 } else if (TVal.getValueType() == MVT::i32) {
7498 // If our operands are only 32-bit wide, make sure we use 32-bit
7499 // arithmetic for the check whether we can use CSINC. This ensures that
7500 // the addition in the check will wrap around properly in case there is
7501 // an overflow (which would not be the case if we do the check with
7502 // 64-bit arithmetic).
7503 const uint32_t TrueVal32 = CTVal->getZExtValue();
7504 const uint32_t FalseVal32 = CFVal->getZExtValue();
7505
7506 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
7507 Opcode = AArch64ISD::CSINC;
7508
7509 if (TrueVal32 > FalseVal32) {
7510 Swap = true;
7511 }
7512 }
7513 // 64-bit check whether we can use CSINC.
7514 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
7515 Opcode = AArch64ISD::CSINC;
7516
7517 if (TrueVal > FalseVal) {
7518 Swap = true;
7519 }
7520 }
7521
7522 // Swap TVal and FVal if necessary.
7523 if (Swap) {
7524 std::swap(TVal, FVal);
7525 std::swap(CTVal, CFVal);
7526 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
7527 }
7528
7529 if (Opcode != AArch64ISD::CSEL) {
7530 // Drop FVal since we can get its value by simply inverting/negating
7531 // TVal.
7532 FVal = TVal;
7533 }
7534 }
7535
7536 // Avoid materializing a constant when possible by reusing a known value in
7537 // a register. However, don't perform this optimization if the known value
7538 // is one, zero or negative one in the case of a CSEL. We can always
7539 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
7540 // FVal, respectively.
7541 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
7542 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
7543 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
7544 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
7545 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
7546 // "a != C ? x : a" to avoid materializing C.
7547 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
7548 TVal = LHS;
7549 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
7550 FVal = LHS;
7551 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
7552 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")(static_cast <bool> (CTVal && CFVal && "Expected constant operands for CSNEG."
) ? void (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7552, __extension__ __PRETTY_FUNCTION__))
;
7553 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
7554 // avoid materializing C.
7555 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
7556 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
7557 Opcode = AArch64ISD::CSINV;
7558 TVal = LHS;
7559 FVal = DAG.getConstant(0, dl, FVal.getValueType());
7560 }
7561 }
7562
7563 SDValue CCVal;
7564 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
7565 EVT VT = TVal.getValueType();
7566 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
7567 }
7568
7569 // Now we know we're dealing with FP values.
7570 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7571, __extension__ __PRETTY_FUNCTION__))
7571 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7571, __extension__ __PRETTY_FUNCTION__))
;
7572 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7572, __extension__ __PRETTY_FUNCTION__))
;
7573 EVT VT = TVal.getValueType();
7574 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
7575
7576 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
7577 // clean. Some of them require two CSELs to implement.
7578 AArch64CC::CondCode CC1, CC2;
7579 changeFPCCToAArch64CC(CC, CC1, CC2);
7580
7581 if (DAG.getTarget().Options.UnsafeFPMath) {
7582 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
7583 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
7584 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
7585 if (RHSVal && RHSVal->isZero()) {
7586 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
7587 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
7588
7589 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
7590 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
7591 TVal = LHS;
7592 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
7593 CFVal && CFVal->isZero() &&
7594 FVal.getValueType() == LHS.getValueType())
7595 FVal = LHS;
7596 }
7597 }
7598
7599 // Emit first, and possibly only, CSEL.
7600 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
7601 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
7602
7603 // If we need a second CSEL, emit it, using the output of the first as the
7604 // RHS. We're effectively OR'ing the two CC's together.
7605 if (CC2 != AArch64CC::AL) {
7606 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
7607 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
7608 }
7609
7610 // Otherwise, return the output of the first CSEL.
7611 return CS1;
7612}
7613
7614SDValue AArch64TargetLowering::LowerVECTOR_SPLICE(SDValue Op,
7615 SelectionDAG &DAG) const {
7616
7617 EVT Ty = Op.getValueType();
7618 auto Idx = Op.getConstantOperandAPInt(2);
7619 if (Idx.sge(-1) && Idx.slt(Ty.getVectorMinNumElements()))
7620 return Op;
7621 return SDValue();
7622}
7623
7624SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
7625 SelectionDAG &DAG) const {
7626 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7627 SDValue LHS = Op.getOperand(0);
7628 SDValue RHS = Op.getOperand(1);
7629 SDValue TVal = Op.getOperand(2);
7630 SDValue FVal = Op.getOperand(3);
7631 SDLoc DL(Op);
7632 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
7633}
7634
7635SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
7636 SelectionDAG &DAG) const {
7637 SDValue CCVal = Op->getOperand(0);
7638 SDValue TVal = Op->getOperand(1);
7639 SDValue FVal = Op->getOperand(2);
7640 SDLoc DL(Op);
7641
7642 EVT Ty = Op.getValueType();
7643 if (Ty.isScalableVector()) {
7644 SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal);
7645 MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount());
7646 SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, TruncCC);
7647 return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
7648 }
7649
7650 if (useSVEForFixedLengthVectorVT(Ty)) {
7651 // FIXME: Ideally this would be the same as above using i1 types, however
7652 // for the moment we can't deal with fixed i1 vector types properly, so
7653 // instead extend the predicate to a result type sized integer vector.
7654 MVT SplatValVT = MVT::getIntegerVT(Ty.getScalarSizeInBits());
7655 MVT PredVT = MVT::getVectorVT(SplatValVT, Ty.getVectorElementCount());
7656 SDValue SplatVal = DAG.getSExtOrTrunc(CCVal, DL, SplatValVT);
7657 SDValue SplatPred = DAG.getNode(ISD::SPLAT_VECTOR, DL, PredVT, SplatVal);
7658 return DAG.getNode(ISD::VSELECT, DL, Ty, SplatPred, TVal, FVal);
7659 }
7660
7661 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
7662 // instruction.
7663 if (ISD::isOverflowIntrOpRes(CCVal)) {
7664 // Only lower legal XALUO ops.
7665 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
7666 return SDValue();
7667
7668 AArch64CC::CondCode OFCC;
7669 SDValue Value, Overflow;
7670 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
7671 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
7672
7673 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
7674 CCVal, Overflow);
7675 }
7676
7677 // Lower it the same way as we would lower a SELECT_CC node.
7678 ISD::CondCode CC;
7679 SDValue LHS, RHS;
7680 if (CCVal.getOpcode() == ISD::SETCC) {
7681 LHS = CCVal.getOperand(0);
7682 RHS = CCVal.getOperand(1);
7683 CC = cast<CondCodeSDNode>(CCVal.getOperand(2))->get();
7684 } else {
7685 LHS = CCVal;
7686 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
7687 CC = ISD::SETNE;
7688 }
7689 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
7690}
7691
7692SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
7693 SelectionDAG &DAG) const {
7694 // Jump table entries as PC relative offsets. No additional tweaking
7695 // is necessary here. Just get the address of the jump table.
7696 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
7697
7698 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
7699 !Subtarget->isTargetMachO()) {
7700 return getAddrLarge(JT, DAG);
7701 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7702 return getAddrTiny(JT, DAG);
7703 }
7704 return getAddr(JT, DAG);
7705}
7706
7707SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
7708 SelectionDAG &DAG) const {
7709 // Jump table entries as PC relative offsets. No additional tweaking
7710 // is necessary here. Just get the address of the jump table.
7711 SDLoc DL(Op);
7712 SDValue JT = Op.getOperand(1);
7713 SDValue Entry = Op.getOperand(2);
7714 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
7715
7716 auto *AFI = DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
7717 AFI->setJumpTableEntryInfo(JTI, 4, nullptr);
7718
7719 SDNode *Dest =
7720 DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
7721 Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
7722 return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
7723 SDValue(Dest, 0));
7724}
7725
7726SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
7727 SelectionDAG &DAG) const {
7728 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
7729
7730 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
7731 // Use the GOT for the large code model on iOS.
7732 if (Subtarget->isTargetMachO()) {
7733 return getGOT(CP, DAG);
7734 }
7735 return getAddrLarge(CP, DAG);
7736 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7737 return getAddrTiny(CP, DAG);
7738 } else {
7739 return getAddr(CP, DAG);
7740 }
7741}
7742
7743SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
7744 SelectionDAG &DAG) const {
7745 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
7746 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
7747 !Subtarget->isTargetMachO()) {
7748 return getAddrLarge(BA, DAG);
7749 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
7750 return getAddrTiny(BA, DAG);
7751 }
7752 return getAddr(BA, DAG);
7753}
7754
7755SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
7756 SelectionDAG &DAG) const {
7757 AArch64FunctionInfo *FuncInfo =
7758 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
7759
7760 SDLoc DL(Op);
7761 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
7762 getPointerTy(DAG.getDataLayout()));
7763 FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
7764 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7765 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
7766 MachinePointerInfo(SV));
7767}
7768
7769SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
7770 SelectionDAG &DAG) const {
7771 AArch64FunctionInfo *FuncInfo =
7772 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
7773
7774 SDLoc DL(Op);
7775 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
7776 ? FuncInfo->getVarArgsGPRIndex()
7777 : FuncInfo->getVarArgsStackIndex(),
7778 getPointerTy(DAG.getDataLayout()));
7779 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7780 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
7781 MachinePointerInfo(SV));
7782}
7783
7784SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
7785 SelectionDAG &DAG) const {
7786 // The layout of the va_list struct is specified in the AArch64 Procedure Call
7787 // Standard, section B.3.
7788 MachineFunction &MF = DAG.getMachineFunction();
7789 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
7790 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
7791 auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
7792 auto PtrVT = getPointerTy(DAG.getDataLayout());
7793 SDLoc DL(Op);
7794
7795 SDValue Chain = Op.getOperand(0);
7796 SDValue VAList = Op.getOperand(1);
7797 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7798 SmallVector<SDValue, 4> MemOps;
7799
7800 // void *__stack at offset 0
7801 unsigned Offset = 0;
7802 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
7803 Stack = DAG.getZExtOrTrunc(Stack, DL, PtrMemVT);
7804 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
7805 MachinePointerInfo(SV), Align(PtrSize)));
7806
7807 // void *__gr_top at offset 8 (4 on ILP32)
7808 Offset += PtrSize;
7809 int GPRSize = FuncInfo->getVarArgsGPRSize();
7810 if (GPRSize > 0) {
7811 SDValue GRTop, GRTopAddr;
7812
7813 GRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7814 DAG.getConstant(Offset, DL, PtrVT));
7815
7816 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
7817 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
7818 DAG.getConstant(GPRSize, DL, PtrVT));
7819 GRTop = DAG.getZExtOrTrunc(GRTop, DL, PtrMemVT);
7820
7821 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
7822 MachinePointerInfo(SV, Offset),
7823 Align(PtrSize)));
7824 }
7825
7826 // void *__vr_top at offset 16 (8 on ILP32)
7827 Offset += PtrSize;
7828 int FPRSize = FuncInfo->getVarArgsFPRSize();
7829 if (FPRSize > 0) {
7830 SDValue VRTop, VRTopAddr;
7831 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7832 DAG.getConstant(Offset, DL, PtrVT));
7833
7834 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
7835 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
7836 DAG.getConstant(FPRSize, DL, PtrVT));
7837 VRTop = DAG.getZExtOrTrunc(VRTop, DL, PtrMemVT);
7838
7839 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
7840 MachinePointerInfo(SV, Offset),
7841 Align(PtrSize)));
7842 }
7843
7844 // int __gr_offs at offset 24 (12 on ILP32)
7845 Offset += PtrSize;
7846 SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7847 DAG.getConstant(Offset, DL, PtrVT));
7848 MemOps.push_back(
7849 DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32),
7850 GROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
7851
7852 // int __vr_offs at offset 28 (16 on ILP32)
7853 Offset += 4;
7854 SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7855 DAG.getConstant(Offset, DL, PtrVT));
7856 MemOps.push_back(
7857 DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32),
7858 VROffsAddr, MachinePointerInfo(SV, Offset), Align(4)));
7859
7860 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
7861}
7862
7863SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
7864 SelectionDAG &DAG) const {
7865 MachineFunction &MF = DAG.getMachineFunction();
7866
7867 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
7868 return LowerWin64_VASTART(Op, DAG);
7869 else if (Subtarget->isTargetDarwin())
7870 return LowerDarwin_VASTART(Op, DAG);
7871 else
7872 return LowerAAPCS_VASTART(Op, DAG);
7873}
7874
7875SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
7876 SelectionDAG &DAG) const {
7877 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
7878 // pointer.
7879 SDLoc DL(Op);
7880 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
7881 unsigned VaListSize =
7882 (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
7883 ? PtrSize
7884 : Subtarget->isTargetILP32() ? 20 : 32;
7885 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
7886 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
7887
7888 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
7889 DAG.getConstant(VaListSize, DL, MVT::i32),
7890 Align(PtrSize), false, false, false,
7891 MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
7892}
7893
7894SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
7895 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7896, __extension__ __PRETTY_FUNCTION__))
7896 "automatic va_arg instruction only works on Darwin")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7896, __extension__ __PRETTY_FUNCTION__))
;
7897
7898 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
7899 EVT VT = Op.getValueType();
7900 SDLoc DL(Op);
7901 SDValue Chain = Op.getOperand(0);
7902 SDValue Addr = Op.getOperand(1);
7903 MaybeAlign Align(Op.getConstantOperandVal(3));
7904 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
7905 auto PtrVT = getPointerTy(DAG.getDataLayout());
7906 auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
7907 SDValue VAList =
7908 DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
7909 Chain = VAList.getValue(1);
7910 VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
7911
7912 if (VT.isScalableVector())
7913 report_fatal_error("Passing SVE types to variadic functions is "
7914 "currently not supported");
7915
7916 if (Align && *Align > MinSlotSize) {
7917 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7918 DAG.getConstant(Align->value() - 1, DL, PtrVT));
7919 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
7920 DAG.getConstant(-(int64_t)Align->value(), DL, PtrVT));
7921 }
7922
7923 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
7924 unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
7925
7926 // Scalar integer and FP values smaller than 64 bits are implicitly extended
7927 // up to 64 bits. At the very least, we have to increase the striding of the
7928 // vaargs list to match this, and for FP values we need to introduce
7929 // FP_ROUND nodes as well.
7930 if (VT.isInteger() && !VT.isVector())
7931 ArgSize = std::max(ArgSize, MinSlotSize);
7932 bool NeedFPTrunc = false;
7933 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
7934 ArgSize = 8;
7935 NeedFPTrunc = true;
7936 }
7937
7938 // Increment the pointer, VAList, to the next vaarg
7939 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
7940 DAG.getConstant(ArgSize, DL, PtrVT));
7941 VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
7942
7943 // Store the incremented VAList to the legalized pointer
7944 SDValue APStore =
7945 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
7946
7947 // Load the actual argument out of the pointer VAList
7948 if (NeedFPTrunc) {
7949 // Load the value as an f64.
7950 SDValue WideFP =
7951 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
7952 // Round the value down to an f32.
7953 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
7954 DAG.getIntPtrConstant(1, DL));
7955 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
7956 // Merge the rounded value with the chain output of the load.
7957 return DAG.getMergeValues(Ops, DL);
7958 }
7959
7960 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
7961}
7962
7963SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
7964 SelectionDAG &DAG) const {
7965 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7966 MFI.setFrameAddressIsTaken(true);
7967
7968 EVT VT = Op.getValueType();
7969 SDLoc DL(Op);
7970 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
7971 SDValue FrameAddr =
7972 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
7973 while (Depth--)
7974 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
7975 MachinePointerInfo());
7976
7977 if (Subtarget->isTargetILP32())
7978 FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
7979 DAG.getValueType(VT));
7980
7981 return FrameAddr;
7982}
7983
7984SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
7985 SelectionDAG &DAG) const {
7986 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7987
7988 EVT VT = getPointerTy(DAG.getDataLayout());
7989 SDLoc DL(Op);
7990 int FI = MFI.CreateFixedObject(4, 0, false);
7991 return DAG.getFrameIndex(FI, VT);
7992}
7993
7994#define GET_REGISTER_MATCHER
7995#include "AArch64GenAsmMatcher.inc"
7996
7997// FIXME? Maybe this could be a TableGen attribute on some registers and
7998// this table could be generated automatically from RegInfo.
7999Register AArch64TargetLowering::
8000getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
8001 Register Reg = MatchRegisterName(RegName);
8002 if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
8003 const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
8004 unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
8005 if (!Subtarget->isXRegisterReserved(DwarfRegNum))
8006 Reg = 0;
8007 }
8008 if (Reg)
8009 return Reg;
8010 report_fatal_error(Twine("Invalid register name \""
8011 + StringRef(RegName) + "\"."));
8012}
8013
8014SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
8015 SelectionDAG &DAG) const {
8016 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
8017
8018 EVT VT = Op.getValueType();
8019 SDLoc DL(Op);
8020
8021 SDValue FrameAddr =
8022 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
8023 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
8024
8025 return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
8026}
8027
8028SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
8029 SelectionDAG &DAG) const {
8030 MachineFunction &MF = DAG.getMachineFunction();
8031 MachineFrameInfo &MFI = MF.getFrameInfo();
8032 MFI.setReturnAddressIsTaken(true);
8033
8034 EVT VT = Op.getValueType();
8035 SDLoc DL(Op);
8036 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
8037 SDValue ReturnAddress;
8038 if (Depth) {
8039 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
8040 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
8041 ReturnAddress = DAG.getLoad(
8042 VT, DL, DAG.getEntryNode(),
8043 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset), MachinePointerInfo());
8044 } else {
8045 // Return LR, which contains the return address. Mark it an implicit
8046 // live-in.
8047 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
8048 ReturnAddress = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
8049 }
8050
8051 // The XPACLRI instruction assembles to a hint-space instruction before
8052 // Armv8.3-A therefore this instruction can be safely used for any pre
8053 // Armv8.3-A architectures. On Armv8.3-A and onwards XPACI is available so use
8054 // that instead.
8055 SDNode *St;
8056 if (Subtarget->hasPAuth()) {
8057 St = DAG.getMachineNode(AArch64::XPACI, DL, VT, ReturnAddress);
8058 } else {
8059 // XPACLRI operates on LR therefore we must move the operand accordingly.
8060 SDValue Chain =
8061 DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::LR, ReturnAddress);
8062 St = DAG.getMachineNode(AArch64::XPACLRI, DL, VT, Chain);
8063 }
8064 return SDValue(St, 0);
8065}
8066
8067/// LowerShiftParts - Lower SHL_PARTS/SRA_PARTS/SRL_PARTS, which returns two
8068/// i32 values and take a 2 x i32 value to shift plus a shift amount.
8069SDValue AArch64TargetLowering::LowerShiftParts(SDValue Op,
8070 SelectionDAG &DAG) const {
8071 SDValue Lo, Hi;
8072 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
8073 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
8074}
8075
8076bool AArch64TargetLowering::isOffsetFoldingLegal(
8077 const GlobalAddressSDNode *GA) const {
8078 // Offsets are folded in the DAG combine rather than here so that we can
8079 // intelligently choose an offset based on the uses.
8080 return false;
8081}
8082
8083bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
8084 bool OptForSize) const {
8085 bool IsLegal = false;
8086 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
8087 // 16-bit case when target has full fp16 support.
8088 // FIXME: We should be able to handle f128 as well with a clever lowering.
8089 const APInt ImmInt = Imm.bitcastToAPInt();
8090 if (VT == MVT::f64)
8091 IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
8092 else if (VT == MVT::f32)
8093 IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
8094 else if (VT == MVT::f16 && Subtarget->hasFullFP16())
8095 IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
8096 // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
8097 // generate that fmov.
8098
8099 // If we can not materialize in immediate field for fmov, check if the
8100 // value can be encoded as the immediate operand of a logical instruction.
8101 // The immediate value will be created with either MOVZ, MOVN, or ORR.
8102 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
8103 // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
8104 // however the mov+fmov sequence is always better because of the reduced
8105 // cache pressure. The timings are still the same if you consider
8106 // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
8107 // movw+movk is fused). So we limit up to 2 instrdduction at most.
8108 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
8109 AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
8110 Insn);
8111 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
8112 IsLegal = Insn.size() <= Limit;
8113 }
8114
8115 LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << (IsLegal ? "Legal " : "Illegal "
) << VT.getEVTString() << " imm value: "; Imm.dump
();; } } while (false)
8116 << " imm value: "; Imm.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << (IsLegal ? "Legal " : "Illegal "
) << VT.getEVTString() << " imm value: "; Imm.dump
();; } } while (false)
;
8117 return IsLegal;
8118}
8119
8120//===----------------------------------------------------------------------===//
8121// AArch64 Optimization Hooks
8122//===----------------------------------------------------------------------===//
8123
8124static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
8125 SDValue Operand, SelectionDAG &DAG,
8126 int &ExtraSteps) {
8127 EVT VT = Operand.getValueType();
8128 if (ST->hasNEON() &&
8129 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
8130 VT == MVT::f32 || VT == MVT::v1f32 ||
8131 VT == MVT::v2f32 || VT == MVT::v4f32)) {
8132 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
8133 // For the reciprocal estimates, convergence is quadratic, so the number
8134 // of digits is doubled after each iteration. In ARMv8, the accuracy of
8135 // the initial estimate is 2^-8. Thus the number of extra steps to refine
8136 // the result for float (23 mantissa bits) is 2 and for double (52
8137 // mantissa bits) is 3.
8138 ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
8139
8140 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
8141 }
8142
8143 return SDValue();
8144}
8145
8146SDValue
8147AArch64TargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
8148 const DenormalMode &Mode) const {
8149 SDLoc DL(Op);
8150 EVT VT = Op.getValueType();
8151 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
8152 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
8153 return DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
8154}
8155
8156SDValue
8157AArch64TargetLowering::getSqrtResultForDenormInput(SDValue Op,
8158 SelectionDAG &DAG) const {
8159 return Op;
8160}
8161
8162SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
8163 SelectionDAG &DAG, int Enabled,
8164 int &ExtraSteps,
8165 bool &UseOneConst,
8166 bool Reciprocal) const {
8167 if (Enabled == ReciprocalEstimate::Enabled ||
8168 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
8169 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
8170 DAG, ExtraSteps)) {
8171 SDLoc DL(Operand);
8172 EVT VT = Operand.getValueType();
8173
8174 SDNodeFlags Flags;
8175 Flags.setAllowReassociation(true);
8176
8177 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
8178 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
8179 for (int i = ExtraSteps; i > 0; --i) {
8180 SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
8181 Flags);
8182 Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
8183 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
8184 }
8185 if (!Reciprocal)
8186 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
8187
8188 ExtraSteps = 0;
8189 return Estimate;
8190 }
8191
8192 return SDValue();
8193}
8194
8195SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
8196 SelectionDAG &DAG, int Enabled,
8197 int &ExtraSteps) const {
8198 if (Enabled == ReciprocalEstimate::Enabled)
8199 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
8200 DAG, ExtraSteps)) {
8201 SDLoc DL(Operand);
8202 EVT VT = Operand.getValueType();
8203
8204 SDNodeFlags Flags;
8205 Flags.setAllowReassociation(true);
8206
8207 // Newton reciprocal iteration: E * (2 - X * E)
8208 // AArch64 reciprocal iteration instruction: (2 - M * N)
8209 for (int i = ExtraSteps; i > 0; --i) {
8210 SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
8211 Estimate, Flags);
8212 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
8213 }
8214
8215 ExtraSteps = 0;
8216 return Estimate;
8217 }
8218
8219 return SDValue();
8220}
8221
8222//===----------------------------------------------------------------------===//
8223// AArch64 Inline Assembly Support
8224//===----------------------------------------------------------------------===//
8225
8226// Table of Constraints
8227// TODO: This is the current set of constraints supported by ARM for the
8228// compiler, not all of them may make sense.
8229//
8230// r - A general register
8231// w - An FP/SIMD register of some size in the range v0-v31
8232// x - An FP/SIMD register of some size in the range v0-v15
8233// I - Constant that can be used with an ADD instruction
8234// J - Constant that can be used with a SUB instruction
8235// K - Constant that can be used with a 32-bit logical instruction
8236// L - Constant that can be used with a 64-bit logical instruction
8237// M - Constant that can be used as a 32-bit MOV immediate
8238// N - Constant that can be used as a 64-bit MOV immediate
8239// Q - A memory reference with base register and no offset
8240// S - A symbolic address
8241// Y - Floating point constant zero
8242// Z - Integer constant zero
8243//
8244// Note that general register operands will be output using their 64-bit x
8245// register name, whatever the size of the variable, unless the asm operand
8246// is prefixed by the %w modifier. Floating-point and SIMD register operands
8247// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
8248// %q modifier.
8249const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
8250 // At this point, we have to lower this constraint to something else, so we
8251 // lower it to an "r" or "w". However, by doing this we will force the result
8252 // to be in register, while the X constraint is much more permissive.
8253 //
8254 // Although we are correct (we are free to emit anything, without
8255 // constraints), we might break use cases that would expect us to be more
8256 // efficient and emit something else.
8257 if (!Subtarget->hasFPARMv8())
8258 return "r";
8259
8260 if (ConstraintVT.isFloatingPoint())
8261 return "w";
8262
8263 if (ConstraintVT.isVector() &&
8264 (ConstraintVT.getSizeInBits() == 64 ||
8265 ConstraintVT.getSizeInBits() == 128))
8266 return "w";
8267
8268 return "r";
8269}
8270
8271enum PredicateConstraint {
8272 Upl,
8273 Upa,
8274 Invalid
8275};
8276
8277static PredicateConstraint parsePredicateConstraint(StringRef Constraint) {
8278 PredicateConstraint P = PredicateConstraint::Invalid;
8279 if (Constraint == "Upa")
8280 P = PredicateConstraint::Upa;
8281 if (Constraint == "Upl")
8282 P = PredicateConstraint::Upl;
8283 return P;
8284}
8285
8286/// getConstraintType - Given a constraint letter, return the type of
8287/// constraint it is for this target.
8288AArch64TargetLowering::ConstraintType
8289AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
8290 if (Constraint.size() == 1) {
8291 switch (Constraint[0]) {
8292 default:
8293 break;
8294 case 'x':
8295 case 'w':
8296 case 'y':
8297 return C_RegisterClass;
8298 // An address with a single base register. Due to the way we
8299 // currently handle addresses it is the same as 'r'.
8300 case 'Q':
8301 return C_Memory;
8302 case 'I':
8303 case 'J':
8304 case 'K':
8305 case 'L':
8306 case 'M':
8307 case 'N':
8308 case 'Y':
8309 case 'Z':
8310 return C_Immediate;
8311 case 'z':
8312 case 'S': // A symbolic address
8313 return C_Other;
8314 }
8315 } else if (parsePredicateConstraint(Constraint) !=
8316 PredicateConstraint::Invalid)
8317 return C_RegisterClass;
8318 return TargetLowering::getConstraintType(Constraint);
8319}
8320
8321/// Examine constraint type and operand type and determine a weight value.
8322/// This object must already have been set up with the operand type
8323/// and the current alternative constraint selected.
8324TargetLowering::ConstraintWeight
8325AArch64TargetLowering::getSingleConstraintMatchWeight(
8326 AsmOperandInfo &info, const char *constraint) const {
8327 ConstraintWeight weight = CW_Invalid;
8328 Value *CallOperandVal = info.CallOperandVal;
8329 // If we don't have a value, we can't do a match,
8330 // but allow it at the lowest weight.
8331 if (!CallOperandVal)
8332 return CW_Default;
8333 Type *type = CallOperandVal->getType();
8334 // Look at the constraint type.
8335 switch (*constraint) {
8336 default:
8337 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
8338 break;
8339 case 'x':
8340 case 'w':
8341 case 'y':
8342 if (type->isFloatingPointTy() || type->isVectorTy())
8343 weight = CW_Register;
8344 break;
8345 case 'z':
8346 weight = CW_Constant;
8347 break;
8348 case 'U':
8349 if (parsePredicateConstraint(constraint) != PredicateConstraint::Invalid)
8350 weight = CW_Register;
8351 break;
8352 }
8353 return weight;
8354}
8355
8356std::pair<unsigned, const TargetRegisterClass *>
8357AArch64TargetLowering::getRegForInlineAsmConstraint(
8358 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
8359 if (Constraint.size() == 1) {
8360 switch (Constraint[0]) {
8361 case 'r':
8362 if (VT.isScalableVector())
8363 return std::make_pair(0U, nullptr);
8364 if (Subtarget->hasLS64() && VT.getSizeInBits() == 512)
8365 return std::make_pair(0U, &AArch64::GPR64x8ClassRegClass);
8366 if (VT.getFixedSizeInBits() == 64)
8367 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
8368 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
8369 case 'w': {
8370 if (!Subtarget->hasFPARMv8())
8371 break;
8372 if (VT.isScalableVector()) {
8373 if (VT.getVectorElementType() != MVT::i1)
8374 return std::make_pair(0U, &AArch64::ZPRRegClass);
8375 return std::make_pair(0U, nullptr);
8376 }
8377 uint64_t VTSize = VT.getFixedSizeInBits();
8378 if (VTSize == 16)
8379 return std::make_pair(0U, &AArch64::FPR16RegClass);
8380 if (VTSize == 32)
8381 return std::make_pair(0U, &AArch64::FPR32RegClass);
8382 if (VTSize == 64)
8383 return std::make_pair(0U, &AArch64::FPR64RegClass);
8384 if (VTSize == 128)
8385 return std::make_pair(0U, &AArch64::FPR128RegClass);
8386 break;
8387 }
8388 // The instructions that this constraint is designed for can
8389 // only take 128-bit registers so just use that regclass.
8390 case 'x':
8391 if (!Subtarget->hasFPARMv8())
8392 break;
8393 if (VT.isScalableVector())
8394 return std::make_pair(0U, &AArch64::ZPR_4bRegClass);
8395 if (VT.getSizeInBits() == 128)
8396 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
8397 break;
8398 case 'y':
8399 if (!Subtarget->hasFPARMv8())
8400 break;
8401 if (VT.isScalableVector())
8402 return std::make_pair(0U, &AArch64::ZPR_3bRegClass);
8403 break;
8404 }
8405 } else {
8406 PredicateConstraint PC = parsePredicateConstraint(Constraint);
8407 if (PC != PredicateConstraint::Invalid) {
8408 if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
8409 return std::make_pair(0U, nullptr);
8410 bool restricted = (PC == PredicateConstraint::Upl);
8411 return restricted ? std::make_pair(0U, &AArch64::PPR_3bRegClass)
8412 : std::make_pair(0U, &AArch64::PPRRegClass);
8413 }
8414 }
8415 if (StringRef("{cc}").equals_insensitive(Constraint))
8416 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
8417
8418 // Use the default implementation in TargetLowering to convert the register
8419 // constraint into a member of a register class.
8420 std::pair<unsigned, const TargetRegisterClass *> Res;
8421 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
8422
8423 // Not found as a standard register?
8424 if (!Res.second) {
8425 unsigned Size = Constraint.size();
8426 if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
8427 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
8428 int RegNo;
8429 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
8430 if (!Failed && RegNo >= 0 && RegNo <= 31) {
8431 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
8432 // By default we'll emit v0-v31 for this unless there's a modifier where
8433 // we'll emit the correct register as well.
8434 if (VT != MVT::Other && VT.getSizeInBits() == 64) {
8435 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
8436 Res.second = &AArch64::FPR64RegClass;
8437 } else {
8438 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
8439 Res.second = &AArch64::FPR128RegClass;
8440 }
8441 }
8442 }
8443 }
8444
8445 if (Res.second && !Subtarget->hasFPARMv8() &&
8446 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
8447 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
8448 return std::make_pair(0U, nullptr);
8449
8450 return Res;
8451}
8452
8453EVT AArch64TargetLowering::getAsmOperandValueType(const DataLayout &DL,
8454 llvm::Type *Ty,
8455 bool AllowUnknown) const {
8456 if (Subtarget->hasLS64() && Ty->isIntegerTy(512))
8457 return EVT(MVT::i64x8);
8458
8459 return TargetLowering::getAsmOperandValueType(DL, Ty, AllowUnknown);
8460}
8461
8462/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
8463/// vector. If it is invalid, don't add anything to Ops.
8464void AArch64TargetLowering::LowerAsmOperandForConstraint(
8465 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
8466 SelectionDAG &DAG) const {
8467 SDValue Result;
8468
8469 // Currently only support length 1 constraints.
8470 if (Constraint.length() != 1)
8471 return;
8472
8473 char ConstraintLetter = Constraint[0];
8474 switch (ConstraintLetter) {
8475 default:
8476 break;
8477
8478 // This set of constraints deal with valid constants for various instructions.
8479 // Validate and return a target constant for them if we can.
8480 case 'z': {
8481 // 'z' maps to xzr or wzr so it needs an input of 0.
8482 if (!isNullConstant(Op))
8483 return;
8484
8485 if (Op.getValueType() == MVT::i64)
8486 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
8487 else
8488 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
8489 break;
8490 }
8491 case 'S': {
8492 // An absolute symbolic address or label reference.
8493 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
8494 Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
8495 GA->getValueType(0));
8496 } else if (const BlockAddressSDNode *BA =
8497 dyn_cast<BlockAddressSDNode>(Op)) {
8498 Result =
8499 DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
8500 } else
8501 return;
8502 break;
8503 }
8504
8505 case 'I':
8506 case 'J':
8507 case 'K':
8508 case 'L':
8509 case 'M':
8510 case 'N':
8511 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
8512 if (!C)
8513 return;
8514
8515 // Grab the value and do some validation.
8516 uint64_t CVal = C->getZExtValue();
8517 switch (ConstraintLetter) {
8518 // The I constraint applies only to simple ADD or SUB immediate operands:
8519 // i.e. 0 to 4095 with optional shift by 12
8520 // The J constraint applies only to ADD or SUB immediates that would be
8521 // valid when negated, i.e. if [an add pattern] were to be output as a SUB
8522 // instruction [or vice versa], in other words -1 to -4095 with optional
8523 // left shift by 12.
8524 case 'I':
8525 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
8526 break;
8527 return;
8528 case 'J': {
8529 uint64_t NVal = -C->getSExtValue();
8530 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
8531 CVal = C->getSExtValue();
8532 break;
8533 }
8534 return;
8535 }
8536 // The K and L constraints apply *only* to logical immediates, including
8537 // what used to be the MOVI alias for ORR (though the MOVI alias has now
8538 // been removed and MOV should be used). So these constraints have to
8539 // distinguish between bit patterns that are valid 32-bit or 64-bit
8540 // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
8541 // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
8542 // versa.
8543 case 'K':
8544 if (AArch64_AM::isLogicalImmediate(CVal, 32))
8545 break;
8546 return;
8547 case 'L':
8548 if (AArch64_AM::isLogicalImmediate(CVal, 64))
8549 break;
8550 return;
8551 // The M and N constraints are a superset of K and L respectively, for use
8552 // with the MOV (immediate) alias. As well as the logical immediates they
8553 // also match 32 or 64-bit immediates that can be loaded either using a
8554 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
8555 // (M) or 64-bit 0x1234000000000000 (N) etc.
8556 // As a note some of this code is liberally stolen from the asm parser.
8557 case 'M': {
8558 if (!isUInt<32>(CVal))
8559 return;
8560 if (AArch64_AM::isLogicalImmediate(CVal, 32))
8561 break;
8562 if ((CVal & 0xFFFF) == CVal)
8563 break;
8564 if ((CVal & 0xFFFF0000ULL) == CVal)
8565 break;
8566 uint64_t NCVal = ~(uint32_t)CVal;
8567 if ((NCVal & 0xFFFFULL) == NCVal)
8568 break;
8569 if ((NCVal & 0xFFFF0000ULL) == NCVal)
8570 break;
8571 return;
8572 }
8573 case 'N': {
8574 if (AArch64_AM::isLogicalImmediate(CVal, 64))
8575 break;
8576 if ((CVal & 0xFFFFULL) == CVal)
8577 break;
8578 if ((CVal & 0xFFFF0000ULL) == CVal)
8579 break;
8580 if ((CVal & 0xFFFF00000000ULL) == CVal)
8581 break;
8582 if ((CVal & 0xFFFF000000000000ULL) == CVal)
8583 break;
8584 uint64_t NCVal = ~CVal;
8585 if ((NCVal & 0xFFFFULL) == NCVal)
8586 break;
8587 if ((NCVal & 0xFFFF0000ULL) == NCVal)
8588 break;
8589 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
8590 break;
8591 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
8592 break;
8593 return;
8594 }
8595 default:
8596 return;
8597 }
8598
8599 // All assembler immediates are 64-bit integers.
8600 Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
8601 break;
8602 }
8603
8604 if (Result.getNode()) {
8605 Ops.push_back(Result);
8606 return;
8607 }
8608
8609 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
8610}
8611
8612//===----------------------------------------------------------------------===//
8613// AArch64 Advanced SIMD Support
8614//===----------------------------------------------------------------------===//
8615
8616/// WidenVector - Given a value in the V64 register class, produce the
8617/// equivalent value in the V128 register class.
8618static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
8619 EVT VT = V64Reg.getValueType();
8620 unsigned NarrowSize = VT.getVectorNumElements();
8621 MVT EltTy = VT.getVectorElementType().getSimpleVT();
8622 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
8623 SDLoc DL(V64Reg);
8624
8625 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
8626 V64Reg, DAG.getConstant(0, DL, MVT::i64));
8627}
8628
8629/// getExtFactor - Determine the adjustment factor for the position when
8630/// generating an "extract from vector registers" instruction.
8631static unsigned getExtFactor(SDValue &V) {
8632 EVT EltType = V.getValueType().getVectorElementType();
8633 return EltType.getSizeInBits() / 8;
8634}
8635
8636/// NarrowVector - Given a value in the V128 register class, produce the
8637/// equivalent value in the V64 register class.
8638static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
8639 EVT VT = V128Reg.getValueType();
8640 unsigned WideSize = VT.getVectorNumElements();
8641 MVT EltTy = VT.getVectorElementType().getSimpleVT();
8642 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
8643 SDLoc DL(V128Reg);
8644
8645 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
8646}
8647
8648// Gather data to see if the operation can be modelled as a
8649// shuffle in combination with VEXTs.
8650SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
8651 SelectionDAG &DAG) const {
8652 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8652, __extension__ __PRETTY_FUNCTION__))
;
8653 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"
; } } while (false)
;
8654 SDLoc dl(Op);
8655 EVT VT = Op.getValueType();
8656 assert(!VT.isScalableVector() &&(static_cast <bool> (!VT.isScalableVector() && "Scalable vectors cannot be used with ISD::BUILD_VECTOR"
) ? void (0) : __assert_fail ("!VT.isScalableVector() && \"Scalable vectors cannot be used with ISD::BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8657, __extension__ __PRETTY_FUNCTION__))
8657 "Scalable vectors cannot be used with ISD::BUILD_VECTOR")(static_cast <bool> (!VT.isScalableVector() && "Scalable vectors cannot be used with ISD::BUILD_VECTOR"
) ? void (0) : __assert_fail ("!VT.isScalableVector() && \"Scalable vectors cannot be used with ISD::BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8657, __extension__ __PRETTY_FUNCTION__))
;
8658 unsigned NumElts = VT.getVectorNumElements();
8659
8660 struct ShuffleSourceInfo {
8661 SDValue Vec;
8662 unsigned MinElt;
8663 unsigned MaxElt;
8664
8665 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8666 // be compatible with the shuffle we intend to construct. As a result
8667 // ShuffleVec will be some sliding window into the original Vec.
8668 SDValue ShuffleVec;
8669
8670 // Code should guarantee that element i in Vec starts at element "WindowBase
8671 // + i * WindowScale in ShuffleVec".
8672 int WindowBase;
8673 int WindowScale;
8674
8675 ShuffleSourceInfo(SDValue Vec)
8676 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
8677 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
8678
8679 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8680 };
8681
8682 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8683 // node.
8684 SmallVector<ShuffleSourceInfo, 2> Sources;
8685 for (unsigned i = 0; i < NumElts; ++i) {
8686 SDValue V = Op.getOperand(i);
8687 if (V.isUndef())
8688 continue;
8689 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
8690 !isa<ConstantSDNode>(V.getOperand(1))) {
8691 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8692 dbgs() << "Reshuffle failed: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8693 "a shuffle can only come from building a vector from "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8694 "various elements of other vectors, provided their "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
8695 "indices are constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
;
8696 return SDValue();
8697 }
8698
8699 // Add this element source to the list if it's not already there.
8700 SDValue SourceVec = V.getOperand(0);
8701 auto Source = find(Sources, SourceVec);
8702 if (Source == Sources.end())
8703 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8704
8705 // Update the minimum and maximum lane number seen.
8706 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
8707 Source->MinElt = std::min(Source->MinElt, EltNo);
8708 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8709 }
8710
8711 if (Sources.size() > 2) {
8712 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
8713 dbgs() << "Reshuffle failed: currently only do something sane when at "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
8714 "most two source vectors are involved\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
;
8715 return SDValue();
8716 }
8717
8718 // Find out the smallest element size among result and two sources, and use
8719 // it as element size to build the shuffle_vector.
8720 EVT SmallestEltTy = VT.getVectorElementType();
8721 for (auto &Source : Sources) {
8722 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8723 if (SrcEltTy.bitsLT(SmallestEltTy)) {
8724 SmallestEltTy = SrcEltTy;
8725 }
8726 }
8727 unsigned ResMultiplier =
8728 VT.getScalarSizeInBits() / SmallestEltTy.getFixedSizeInBits();
8729 uint64_t VTSize = VT.getFixedSizeInBits();
8730 NumElts = VTSize / SmallestEltTy.getFixedSizeInBits();
8731 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8732
8733 // If the source vector is too wide or too narrow, we may nevertheless be able
8734 // to construct a compatible shuffle either by concatenating it with UNDEF or
8735 // extracting a suitable range of elements.
8736 for (auto &Src : Sources) {
8737 EVT SrcVT = Src.ShuffleVec.getValueType();
8738
8739 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8740 if (SrcVTSize == VTSize)
8741 continue;
8742
8743 // This stage of the search produces a source with the same element type as
8744 // the original, but with a total width matching the BUILD_VECTOR output.
8745 EVT EltVT = SrcVT.getVectorElementType();
8746 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8747 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8748
8749 if (SrcVTSize < VTSize) {
8750 assert(2 * SrcVTSize == VTSize)(static_cast <bool> (2 * SrcVTSize == VTSize) ? void (0
) : __assert_fail ("2 * SrcVTSize == VTSize", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8750, __extension__ __PRETTY_FUNCTION__))
;
8751 // We can pad out the smaller vector for free, so if it's part of a
8752 // shuffle...
8753 Src.ShuffleVec =
8754 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8755 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8756 continue;
8757 }
8758
8759 if (SrcVTSize != 2 * VTSize) {
8760 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: result vector too small to extract\n"
; } } while (false)
8761 dbgs() << "Reshuffle failed: result vector too small to extract\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: result vector too small to extract\n"
; } } while (false)
;
8762 return SDValue();
8763 }
8764
8765 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8766 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
8767 dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
;
8768 return SDValue();
8769 }
8770
8771 if (Src.MinElt >= NumSrcElts) {
8772 // The extraction can just take the second half
8773 Src.ShuffleVec =
8774 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8775 DAG.getConstant(NumSrcElts, dl, MVT::i64));
8776 Src.WindowBase = -NumSrcElts;
8777 } else if (Src.MaxElt < NumSrcElts) {
8778 // The extraction can just take the first half
8779 Src.ShuffleVec =
8780 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8781 DAG.getConstant(0, dl, MVT::i64));
8782 } else {
8783 // An actual VEXT is needed
8784 SDValue VEXTSrc1 =
8785 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8786 DAG.getConstant(0, dl, MVT::i64));
8787 SDValue VEXTSrc2 =
8788 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8789 DAG.getConstant(NumSrcElts, dl, MVT::i64));
8790 unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
8791
8792 if (!SrcVT.is64BitVector()) {
8793 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors."; } } while (false)
8794 dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors."; } } while (false)
8795 "for SVE vectors.")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: don't know how to lower AArch64ISD::EXT "
"for SVE vectors."; } } while (false)
;
8796 return SDValue();
8797 }
8798
8799 Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
8800 VEXTSrc2,
8801 DAG.getConstant(Imm, dl, MVT::i32));
8802 Src.WindowBase = -Src.MinElt;
8803 }
8804 }
8805
8806 // Another possible incompatibility occurs from the vector element types. We
8807 // can fix this by bitcasting the source vectors to the same type we intend
8808 // for the shuffle.
8809 for (auto &Src : Sources) {
8810 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8811 if (SrcEltTy == SmallestEltTy)
8812 continue;
8813 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8813, __extension__ __PRETTY_FUNCTION__))
;
8814 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
8815 Src.WindowScale =
8816 SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
8817 Src.WindowBase *= Src.WindowScale;
8818 }
8819
8820 // Final sanity check before we try to actually produce a shuffle.
8821 LLVM_DEBUG(for (auto Srcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8823, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
8822 : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8823, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
8823 assert(Src.ShuffleVec.getValueType() == ShuffleVT);)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8823, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
;
8824
8825 // The stars all align, our next step is to produce the mask for the shuffle.
8826 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
8827 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8828 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8829 SDValue Entry = Op.getOperand(i);
8830 if (Entry.isUndef())
8831 continue;
8832
8833 auto Src = find(Sources, Entry.getOperand(0));
8834 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8835
8836 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8837 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8838 // segment.
8839 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8840 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8841 VT.getScalarSizeInBits());
8842 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8843
8844 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8845 // starting at the appropriate offset.
8846 int *LaneMask = &Mask[i * ResMultiplier];
8847
8848 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8849 ExtractBase += NumElts * (Src - Sources.begin());
8850 for (int j = 0; j < LanesDefined; ++j)
8851 LaneMask[j] = ExtractBase + j;
8852 }
8853
8854 // Final check before we try to produce nonsense...
8855 if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
8856 LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: illegal shuffle mask\n"
; } } while (false)
;
8857 return SDValue();
8858 }
8859
8860 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8861 for (unsigned i = 0; i < Sources.size(); ++i)
8862 ShuffleOps[i] = Sources[i].ShuffleVec;
8863
8864 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8865 ShuffleOps[1], Mask);
8866 SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
8867
8868 LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
8869 dbgs() << "Reshuffle, creating node: "; V.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
;
8870
8871 return V;
8872}
8873
8874// check if an EXT instruction can handle the shuffle mask when the
8875// vector sources of the shuffle are the same.
8876static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
8877 unsigned NumElts = VT.getVectorNumElements();
8878
8879 // Assume that the first shuffle index is not UNDEF. Fail if it is.
8880 if (M[0] < 0)
8881 return false;
8882
8883 Imm = M[0];
8884
8885 // If this is a VEXT shuffle, the immediate value is the index of the first
8886 // element. The other shuffle indices must be the successive elements after
8887 // the first one.
8888 unsigned ExpectedElt = Imm;
8889 for (unsigned i = 1; i < NumElts; ++i) {
8890 // Increment the expected index. If it wraps around, just follow it
8891 // back to index zero and keep going.
8892 ++ExpectedElt;
8893 if (ExpectedElt == NumElts)
8894 ExpectedElt = 0;
8895
8896 if (M[i] < 0)
8897 continue; // ignore UNDEF indices
8898 if (ExpectedElt != static_cast<unsigned>(M[i]))
8899 return false;
8900 }
8901
8902 return true;
8903}
8904
8905/// Check if a vector shuffle corresponds to a DUP instructions with a larger
8906/// element width than the vector lane type. If that is the case the function
8907/// returns true and writes the value of the DUP instruction lane operand into
8908/// DupLaneOp
8909static bool isWideDUPMask(ArrayRef<int> M, EVT VT, unsigned BlockSize,
8910 unsigned &DupLaneOp) {
8911 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for wide DUP are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for wide DUP are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8912, __extension__ __PRETTY_FUNCTION__))
8912 "Only possible block sizes for wide DUP are: 16, 32, 64")(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for wide DUP are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for wide DUP are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8912, __extension__ __PRETTY_FUNCTION__))
;
8913
8914 if (BlockSize <= VT.getScalarSizeInBits())
8915 return false;
8916 if (BlockSize % VT.getScalarSizeInBits() != 0)
8917 return false;
8918 if (VT.getSizeInBits() % BlockSize != 0)
8919 return false;
8920
8921 size_t SingleVecNumElements = VT.getVectorNumElements();
8922 size_t NumEltsPerBlock = BlockSize / VT.getScalarSizeInBits();
8923 size_t NumBlocks = VT.getSizeInBits() / BlockSize;
8924
8925 // We are looking for masks like
8926 // [0, 1, 0, 1] or [2, 3, 2, 3] or [4, 5, 6, 7, 4, 5, 6, 7] where any element
8927 // might be replaced by 'undefined'. BlockIndices will eventually contain
8928 // lane indices of the duplicated block (i.e. [0, 1], [2, 3] and [4, 5, 6, 7]
8929 // for the above examples)
8930 SmallVector<int, 8> BlockElts(NumEltsPerBlock, -1);
8931 for (size_t BlockIndex = 0; BlockIndex < NumBlocks; BlockIndex++)
8932 for (size_t I = 0; I < NumEltsPerBlock; I++) {
8933 int Elt = M[BlockIndex * NumEltsPerBlock + I];
8934 if (Elt < 0)
8935 continue;
8936 // For now we don't support shuffles that use the second operand
8937 if ((unsigned)Elt >= SingleVecNumElements)
8938 return false;
8939 if (BlockElts[I] < 0)
8940 BlockElts[I] = Elt;
8941 else if (BlockElts[I] != Elt)
8942 return false;
8943 }
8944
8945 // We found a candidate block (possibly with some undefs). It must be a
8946 // sequence of consecutive integers starting with a value divisible by
8947 // NumEltsPerBlock with some values possibly replaced by undef-s.
8948
8949 // Find first non-undef element
8950 auto FirstRealEltIter = find_if(BlockElts, [](int Elt) { return Elt >= 0; });
8951 assert(FirstRealEltIter != BlockElts.end() &&(static_cast <bool> (FirstRealEltIter != BlockElts.end(
) && "Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()") ? void (0) : __assert_fail ("FirstRealEltIter != BlockElts.end() && \"Shuffle with all-undefs must have been caught by previous cases, \" \"e.g. isSplat()\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8953, __extension__ __PRETTY_FUNCTION__))
8952 "Shuffle with all-undefs must have been caught by previous cases, "(static_cast <bool> (FirstRealEltIter != BlockElts.end(
) && "Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()") ? void (0) : __assert_fail ("FirstRealEltIter != BlockElts.end() && \"Shuffle with all-undefs must have been caught by previous cases, \" \"e.g. isSplat()\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8953, __extension__ __PRETTY_FUNCTION__))
8953 "e.g. isSplat()")(static_cast <bool> (FirstRealEltIter != BlockElts.end(
) && "Shuffle with all-undefs must have been caught by previous cases, "
"e.g. isSplat()") ? void (0) : __assert_fail ("FirstRealEltIter != BlockElts.end() && \"Shuffle with all-undefs must have been caught by previous cases, \" \"e.g. isSplat()\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 8953, __extension__ __PRETTY_FUNCTION__))
;
8954 if (FirstRealEltIter == BlockElts.end()) {
8955 DupLaneOp = 0;
8956 return true;
8957 }
8958
8959 // Index of FirstRealElt in BlockElts
8960 size_t FirstRealIndex = FirstRealEltIter - BlockElts.begin();
8961
8962 if ((unsigned)*FirstRealEltIter < FirstRealIndex)
8963 return false;
8964 // BlockElts[0] must have the following value if it isn't undef:
8965 size_t Elt0 = *FirstRealEltIter - FirstRealIndex;
8966
8967 // Check the first element
8968 if (Elt0 % NumEltsPerBlock != 0)
8969 return false;
8970 // Check that the sequence indeed consists of consecutive integers (modulo
8971 // undefs)
8972 for (size_t I = 0; I < NumEltsPerBlock; I++)
8973 if (BlockElts[I] >= 0 && (unsigned)BlockElts[I] != Elt0 + I)
8974 return false;
8975
8976 DupLaneOp = Elt0 / NumEltsPerBlock;
8977 return true;
8978}
8979
8980// check if an EXT instruction can handle the shuffle mask when the
8981// vector sources of the shuffle are different.
8982static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
8983 unsigned &Imm) {
8984 // Look for the first non-undef element.
8985 const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
8986
8987 // Benefit form APInt to handle overflow when calculating expected element.
8988 unsigned NumElts = VT.getVectorNumElements();
8989 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
8990 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
8991 // The following shuffle indices must be the successive elements after the
8992 // first real element.
8993 const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
8994 [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
8995 if (FirstWrongElt != M.end())
8996 return false;
8997
8998 // The index of an EXT is the first element if it is not UNDEF.
8999 // Watch out for the beginning UNDEFs. The EXT index should be the expected
9000 // value of the first element. E.g.
9001 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
9002 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
9003 // ExpectedElt is the last mask index plus 1.
9004 Imm = ExpectedElt.getZExtValue();
9005
9006 // There are two difference cases requiring to reverse input vectors.
9007 // For example, for vector <4 x i32> we have the following cases,
9008 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
9009 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
9010 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
9011 // to reverse two input vectors.
9012 if (Imm < NumElts)
9013 ReverseEXT = true;
9014 else
9015 Imm -= NumElts;
9016
9017 return true;
9018}
9019
9020/// isREVMask - Check if a vector shuffle corresponds to a REV
9021/// instruction with the specified blocksize. (The order of the elements
9022/// within each block of the vector is reversed.)
9023static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
9024 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9025, __extension__ __PRETTY_FUNCTION__))
9025 "Only possible block sizes for REV are: 16, 32, 64")(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9025, __extension__ __PRETTY_FUNCTION__))
;
9026
9027 unsigned EltSz = VT.getScalarSizeInBits();
9028 if (EltSz == 64)
9029 return false;
9030
9031 unsigned NumElts = VT.getVectorNumElements();
9032 unsigned BlockElts = M[0] + 1;
9033 // If the first shuffle index is UNDEF, be optimistic.
9034 if (M[0] < 0)
9035 BlockElts = BlockSize / EltSz;
9036
9037 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
9038 return false;
9039
9040 for (unsigned i = 0; i < NumElts; ++i) {
9041 if (M[i] < 0)
9042 continue; // ignore UNDEF indices
9043 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
9044 return false;
9045 }
9046
9047 return true;
9048}
9049
9050static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9051 unsigned NumElts = VT.getVectorNumElements();
9052 if (NumElts % 2 != 0)
9053 return false;
9054 WhichResult = (M[0] == 0 ? 0 : 1);
9055 unsigned Idx = WhichResult * NumElts / 2;
9056 for (unsigned i = 0; i != NumElts; i += 2) {
9057 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
9058 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
9059 return false;
9060 Idx += 1;
9061 }
9062
9063 return true;
9064}
9065
9066static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9067 unsigned NumElts = VT.getVectorNumElements();
9068 WhichResult = (M[0] == 0 ? 0 : 1);
9069 for (unsigned i = 0; i != NumElts; ++i) {
9070 if (M[i] < 0)
9071 continue; // ignore UNDEF indices
9072 if ((unsigned)M[i] != 2 * i + WhichResult)
9073 return false;
9074 }
9075
9076 return true;
9077}
9078
9079static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9080 unsigned NumElts = VT.getVectorNumElements();
9081 if (NumElts % 2 != 0)
9082 return false;
9083 WhichResult = (M[0] == 0 ? 0 : 1);
9084 for (unsigned i = 0; i < NumElts; i += 2) {
9085 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
9086 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
9087 return false;
9088 }
9089 return true;
9090}
9091
9092/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
9093/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
9094/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
9095static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9096 unsigned NumElts = VT.getVectorNumElements();
9097 if (NumElts % 2 != 0)
9098 return false;
9099 WhichResult = (M[0] == 0 ? 0 : 1);
9100 unsigned Idx = WhichResult * NumElts / 2;
9101 for (unsigned i = 0; i != NumElts; i += 2) {
9102 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
9103 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
9104 return false;
9105 Idx += 1;
9106 }
9107
9108 return true;
9109}
9110
9111/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
9112/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
9113/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
9114static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9115 unsigned Half = VT.getVectorNumElements() / 2;
9116 WhichResult = (M[0] == 0 ? 0 : 1);
9117 for (unsigned j = 0; j != 2; ++j) {
9118 unsigned Idx = WhichResult;
9119 for (unsigned i = 0; i != Half; ++i) {
9120 int MIdx = M[i + j * Half];
9121 if (MIdx >= 0 && (unsigned)MIdx != Idx)
9122 return false;
9123 Idx += 2;
9124 }
9125 }
9126
9127 return true;
9128}
9129
9130/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
9131/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
9132/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
9133static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
9134 unsigned NumElts = VT.getVectorNumElements();
9135 if (NumElts % 2 != 0)
9136 return false;
9137 WhichResult = (M[0] == 0 ? 0 : 1);
9138 for (unsigned i = 0; i < NumElts; i += 2) {
9139 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
9140 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
9141 return false;
9142 }
9143 return true;
9144}
9145
9146static bool isINSMask(ArrayRef<int> M, int NumInputElements,
9147 bool &DstIsLeft, int &Anomaly) {
9148 if (M.size() != static_cast<size_t>(NumInputElements))
9149 return false;
9150
9151 int NumLHSMatch = 0, NumRHSMatch = 0;
9152 int LastLHSMismatch = -1, LastRHSMismatch = -1;
9153
9154 for (int i = 0; i < NumInputElements; ++i) {
9155 if (M[i] == -1) {
9156 ++NumLHSMatch;
9157 ++NumRHSMatch;
9158 continue;
9159 }
9160
9161 if (M[i] == i)
9162 ++NumLHSMatch;
9163 else
9164 LastLHSMismatch = i;
9165
9166 if (M[i] == i + NumInputElements)
9167 ++NumRHSMatch;
9168 else
9169 LastRHSMismatch = i;
9170 }
9171
9172 if (NumLHSMatch == NumInputElements - 1) {
9173 DstIsLeft = true;
9174 Anomaly = LastLHSMismatch;
9175 return true;
9176 } else if (NumRHSMatch == NumInputElements - 1) {
9177 DstIsLeft = false;
9178 Anomaly = LastRHSMismatch;
9179 return true;
9180 }
9181
9182 return false;
9183}
9184
9185static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
9186 if (VT.getSizeInBits() != 128)
9187 return false;
9188
9189 unsigned NumElts = VT.getVectorNumElements();
9190
9191 for (int I = 0, E = NumElts / 2; I != E; I++) {
9192 if (Mask[I] != I)
9193 return false;
9194 }
9195
9196 int Offset = NumElts / 2;
9197 for (int I = NumElts / 2, E = NumElts; I != E; I++) {
9198 if (Mask[I] != I + SplitLHS * Offset)
9199 return false;
9200 }
9201
9202 return true;
9203}
9204
9205static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
9206 SDLoc DL(Op);
9207 EVT VT = Op.getValueType();
9208 SDValue V0 = Op.getOperand(0);
9209 SDValue V1 = Op.getOperand(1);
9210 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
9211
9212 if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
9213 VT.getVectorElementType() != V1.getValueType().getVectorElementType())
9214 return SDValue();
9215
9216 bool SplitV0 = V0.getValueSizeInBits() == 128;
9217
9218 if (!isConcatMask(Mask, VT, SplitV0))
9219 return SDValue();
9220
9221 EVT CastVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
9222 if (SplitV0) {
9223 V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
9224 DAG.getConstant(0, DL, MVT::i64));
9225 }
9226 if (V1.getValueSizeInBits() == 128) {
9227 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
9228 DAG.getConstant(0, DL, MVT::i64));
9229 }
9230 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
9231}
9232
9233/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9234/// the specified operations to build the shuffle.
9235static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9236 SDValue RHS, SelectionDAG &DAG,
9237 const SDLoc &dl) {
9238 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9239 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
9240 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
9241
9242 enum {
9243 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9244 OP_VREV,
9245 OP_VDUP0,
9246 OP_VDUP1,
9247 OP_VDUP2,
9248 OP_VDUP3,
9249 OP_VEXT1,
9250 OP_VEXT2,
9251 OP_VEXT3,
9252 OP_VUZPL, // VUZP, left result
9253 OP_VUZPR, // VUZP, right result
9254 OP_VZIPL, // VZIP, left result
9255 OP_VZIPR, // VZIP, right result
9256 OP_VTRNL, // VTRN, left result
9257 OP_VTRNR // VTRN, right result
9258 };
9259
9260 if (OpNum == OP_COPY) {
9261 if (LHSID == (1 * 9 + 2) * 9 + 3)
9262 return LHS;
9263 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!")(static_cast <bool> (LHSID == ((4 * 9 + 5) * 9 + 6) * 9
+ 7 && "Illegal OP_COPY!") ? void (0) : __assert_fail
("LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9263, __extension__ __PRETTY_FUNCTION__))
;
9264 return RHS;
9265 }
9266
9267 SDValue OpLHS, OpRHS;
9268 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9269 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9270 EVT VT = OpLHS.getValueType();
9271
9272 switch (OpNum) {
9273 default:
9274 llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9274)
;
9275 case OP_VREV:
9276 // VREV divides the vector in half and swaps within the half.
9277 if (VT.getVectorElementType() == MVT::i32 ||
9278 VT.getVectorElementType() == MVT::f32)
9279 return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
9280 // vrev <4 x i16> -> REV32
9281 if (VT.getVectorElementType() == MVT::i16 ||
9282 VT.getVectorElementType() == MVT::f16 ||
9283 VT.getVectorElementType() == MVT::bf16)
9284 return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
9285 // vrev <4 x i8> -> REV16
9286 assert(VT.getVectorElementType() == MVT::i8)(static_cast <bool> (VT.getVectorElementType() == MVT::
i8) ? void (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9286, __extension__ __PRETTY_FUNCTION__))
;
9287 return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
9288 case OP_VDUP0:
9289 case OP_VDUP1:
9290 case OP_VDUP2:
9291 case OP_VDUP3: {
9292 EVT EltTy = VT.getVectorElementType();
9293 unsigned Opcode;
9294 if (EltTy == MVT::i8)
9295 Opcode = AArch64ISD::DUPLANE8;
9296 else if (EltTy == MVT::i16 || EltTy == MVT::f16 || EltTy == MVT::bf16)
9297 Opcode = AArch64ISD::DUPLANE16;
9298 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
9299 Opcode = AArch64ISD::DUPLANE32;
9300 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
9301 Opcode = AArch64ISD::DUPLANE64;
9302 else
9303 llvm_unreachable("Invalid vector element type?")::llvm::llvm_unreachable_internal("Invalid vector element type?"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9303)
;
9304
9305 if (VT.getSizeInBits() == 64)
9306 OpLHS = WidenVector(OpLHS, DAG);
9307 SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
9308 return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
9309 }
9310 case OP_VEXT1:
9311 case OP_VEXT2:
9312 case OP_VEXT3: {
9313 unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
9314 return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
9315 DAG.getConstant(Imm, dl, MVT::i32));
9316 }
9317 case OP_VUZPL:
9318 return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
9319 OpRHS);
9320 case OP_VUZPR:
9321 return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
9322 OpRHS);
9323 case OP_VZIPL:
9324 return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
9325 OpRHS);
9326 case OP_VZIPR:
9327 return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
9328 OpRHS);
9329 case OP_VTRNL:
9330 return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
9331 OpRHS);
9332 case OP_VTRNR:
9333 return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
9334 OpRHS);
9335 }
9336}
9337
9338static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
9339 SelectionDAG &DAG) {
9340 // Check to see if we can use the TBL instruction.
9341 SDValue V1 = Op.getOperand(0);
9342 SDValue V2 = Op.getOperand(1);
9343 SDLoc DL(Op);
9344
9345 EVT EltVT = Op.getValueType().getVectorElementType();
9346 unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
9347
9348 SmallVector<SDValue, 8> TBLMask;
9349 for (int Val : ShuffleMask) {
9350 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
9351 unsigned Offset = Byte + Val * BytesPerElt;
9352 TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
9353 }
9354 }
9355
9356 MVT IndexVT = MVT::v8i8;
9357 unsigned IndexLen = 8;
9358 if (Op.getValueSizeInBits() == 128) {
9359 IndexVT = MVT::v16i8;
9360 IndexLen = 16;
9361 }
9362
9363 SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
9364 SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
9365
9366 SDValue Shuffle;
9367 if (V2.getNode()->isUndef()) {
9368 if (IndexLen == 8)
9369 V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
9370 Shuffle = DAG.getNode(
9371 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9372 DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
9373 DAG.getBuildVector(IndexVT, DL,
9374 makeArrayRef(TBLMask.data(), IndexLen)));
9375 } else {
9376 if (IndexLen == 8) {
9377 V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
9378 Shuffle = DAG.getNode(
9379 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9380 DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
9381 DAG.getBuildVector(IndexVT, DL,
9382 makeArrayRef(TBLMask.data(), IndexLen)));
9383 } else {
9384 // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
9385 // cannot currently represent the register constraints on the input
9386 // table registers.
9387 // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
9388 // DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
9389 // IndexLen));
9390 Shuffle = DAG.getNode(
9391 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
9392 DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
9393 V2Cst, DAG.getBuildVector(IndexVT, DL,
9394 makeArrayRef(TBLMask.data(), IndexLen)));
9395 }
9396 }
9397 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
9398}
9399
9400static unsigned getDUPLANEOp(EVT EltType) {
9401 if (EltType == MVT::i8)
9402 return AArch64ISD::DUPLANE8;
9403 if (EltType == MVT::i16 || EltType == MVT::f16 || EltType == MVT::bf16)
9404 return AArch64ISD::DUPLANE16;
9405 if (EltType == MVT::i32 || EltType == MVT::f32)
9406 return AArch64ISD::DUPLANE32;
9407 if (EltType == MVT::i64 || EltType == MVT::f64)
9408 return AArch64ISD::DUPLANE64;
9409
9410 llvm_unreachable("Invalid vector element type?")::llvm::llvm_unreachable_internal("Invalid vector element type?"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9410)
;
9411}
9412
9413static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
9414 unsigned Opcode, SelectionDAG &DAG) {
9415 // Try to eliminate a bitcasted extract subvector before a DUPLANE.
9416 auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
9417 // Match: dup (bitcast (extract_subv X, C)), LaneC
9418 if (BitCast.getOpcode() != ISD::BITCAST ||
9419 BitCast.getOperand(0).getOpcode() != ISD::EXTRACT_SUBVECTOR)
9420 return false;
9421
9422 // The extract index must align in the destination type. That may not
9423 // happen if the bitcast is from narrow to wide type.
9424 SDValue Extract = BitCast.getOperand(0);
9425 unsigned ExtIdx = Extract.getConstantOperandVal(1);
9426 unsigned SrcEltBitWidth = Extract.getScalarValueSizeInBits();
9427 unsigned ExtIdxInBits = ExtIdx * SrcEltBitWidth;
9428 unsigned CastedEltBitWidth = BitCast.getScalarValueSizeInBits();
9429 if (ExtIdxInBits % CastedEltBitWidth != 0)
9430 return false;
9431
9432 // Update the lane value by offsetting with the scaled extract index.
9433 LaneC += ExtIdxInBits / CastedEltBitWidth;
9434
9435 // Determine the casted vector type of the wide vector input.
9436 // dup (bitcast (extract_subv X, C)), LaneC --> dup (bitcast X), LaneC'
9437 // Examples:
9438 // dup (bitcast (extract_subv v2f64 X, 1) to v2f32), 1 --> dup v4f32 X, 3
9439 // dup (bitcast (extract_subv v16i8 X, 8) to v4i16), 1 --> dup v8i16 X, 5
9440 unsigned SrcVecNumElts =
9441 Extract.getOperand(0).getValueSizeInBits() / CastedEltBitWidth;
9442 CastVT = MVT::getVectorVT(BitCast.getSimpleValueType().getScalarType(),
9443 SrcVecNumElts);
9444 return true;
9445 };
9446 MVT CastVT;
9447 if (getScaledOffsetDup(V, Lane, CastVT)) {
9448 V = DAG.getBitcast(CastVT, V.getOperand(0).getOperand(0));
9449 } else if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
9450 // The lane is incremented by the index of the extract.
9451 // Example: dup v2f32 (extract v4f32 X, 2), 1 --> dup v4f32 X, 3
9452 Lane += V.getConstantOperandVal(1);
9453 V = V.getOperand(0);
9454 } else if (V.getOpcode() == ISD::CONCAT_VECTORS) {
9455 // The lane is decremented if we are splatting from the 2nd operand.
9456 // Example: dup v4i32 (concat v2i32 X, v2i32 Y), 3 --> dup v4i32 Y, 1
9457 unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
9458 Lane -= Idx * VT.getVectorNumElements() / 2;
9459 V = WidenVector(V.getOperand(Idx), DAG);
9460 } else if (VT.getSizeInBits() == 64) {
9461 // Widen the operand to 128-bit register with undef.
9462 V = WidenVector(V, DAG);
9463 }
9464 return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
9465}
9466
9467SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9468 SelectionDAG &DAG) const {
9469 SDLoc dl(Op);
9470 EVT VT = Op.getValueType();
9471
9472 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
9473
9474 if (useSVEForFixedLengthVectorVT(VT))
9475 return LowerFixedLengthVECTOR_SHUFFLEToSVE(Op, DAG);
9476
9477 // Convert shuffles that are directly supported on NEON to target-specific
9478 // DAG nodes, instead of keeping them as shuffles and matching them again
9479 // during code selection. This is more efficient and avoids the possibility
9480 // of inconsistencies between legalization and selection.
9481 ArrayRef<int> ShuffleMask = SVN->getMask();
9482
9483 SDValue V1 = Op.getOperand(0);
9484 SDValue V2 = Op.getOperand(1);
9485
9486 assert(V1.getValueType() == VT && "Unexpected VECTOR_SHUFFLE type!")(static_cast <bool> (V1.getValueType() == VT &&
"Unexpected VECTOR_SHUFFLE type!") ? void (0) : __assert_fail
("V1.getValueType() == VT && \"Unexpected VECTOR_SHUFFLE type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9486, __extension__ __PRETTY_FUNCTION__))
;
9487 assert(ShuffleMask.size() == VT.getVectorNumElements() &&(static_cast <bool> (ShuffleMask.size() == VT.getVectorNumElements
() && "Unexpected VECTOR_SHUFFLE mask size!") ? void (
0) : __assert_fail ("ShuffleMask.size() == VT.getVectorNumElements() && \"Unexpected VECTOR_SHUFFLE mask size!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9488, __extension__ __PRETTY_FUNCTION__))
9488 "Unexpected VECTOR_SHUFFLE mask size!")(static_cast <bool> (ShuffleMask.size() == VT.getVectorNumElements
() && "Unexpected VECTOR_SHUFFLE mask size!") ? void (
0) : __assert_fail ("ShuffleMask.size() == VT.getVectorNumElements() && \"Unexpected VECTOR_SHUFFLE mask size!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 9488, __extension__ __PRETTY_FUNCTION__))
;
9489
9490 if (SVN->isSplat()) {
9491 int Lane = SVN->getSplatIndex();
9492 // If this is undef splat, generate it via "just" vdup, if possible.
9493 if (Lane == -1)
9494 Lane = 0;
9495
9496 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
9497 return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
9498 V1.getOperand(0));
9499 // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
9500 // constant. If so, we can just reference the lane's definition directly.
9501 if (V1.getOpcode() == ISD::BUILD_VECTOR &&
9502 !isa<ConstantSDNode>(V1.getOperand(Lane)))
9503 return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
9504
9505 // Otherwise, duplicate from the lane of the input vector.
9506 unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
9507 return constructDup(V1, Lane, dl, VT, Opcode, DAG);
9508 }
9509
9510 // Check if the mask matches a DUP for a wider element
9511 for (unsigned LaneSize : {64U, 32U, 16U}) {
9512 unsigned Lane = 0;
9513 if (isWideDUPMask(ShuffleMask, VT, LaneSize, Lane)) {
9514 unsigned Opcode = LaneSize == 64 ? AArch64ISD::DUPLANE64
9515 : LaneSize == 32 ? AArch64ISD::DUPLANE32
9516 : AArch64ISD::DUPLANE16;
9517 // Cast V1 to an integer vector with required lane size
9518 MVT NewEltTy = MVT::getIntegerVT(LaneSize);
9519 unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
9520 MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
9521 V1 = DAG.getBitcast(NewVecTy, V1);
9522 // Constuct the DUP instruction
9523 V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
9524 // Cast back to the original type
9525 return DAG.getBitcast(VT, V1);
9526 }
9527 }
9528
9529 if (isREVMask(ShuffleMask, VT, 64))
9530 return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
9531 if (isREVMask(ShuffleMask, VT, 32))
9532 return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
9533 if (isREVMask(ShuffleMask, VT, 16))
9534 return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
9535
9536 if (((VT.getVectorNumElements() == 8 && VT.getScalarSizeInBits() == 16) ||
9537 (VT.getVectorNumElements() == 16 && VT.getScalarSizeInBits() == 8)) &&
9538 ShuffleVectorInst::isReverseMask(ShuffleMask)) {
9539 SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
9540 return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
9541 DAG.getConstant(8, dl, MVT::i32));
9542 }
9543
9544 bool ReverseEXT = false;
9545 unsigned Imm;
9546 if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
9547 if (ReverseEXT)
9548 std::swap(V1, V2);
9549 Imm *= getExtFactor(V1);
9550 return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
9551 DAG.getConstant(Imm, dl, MVT::i32));
9552 } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
9553 Imm *= getExtFactor(V1);
9554 return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
9555 DAG.getConstant(Imm, dl, MVT::i32));
9556 }
9557
9558 unsigned WhichResult;
9559 if (isZIPMask(ShuffleMask, VT, WhichResult)) {
9560 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
9561 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9562 }
9563 if (isUZPMask(ShuffleMask, VT, WhichResult)) {
9564 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
9565 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9566 }
9567 if (isTRNMask(ShuffleMask, VT, WhichResult)) {
9568 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
9569 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
9570 }
9571
9572 if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9573 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
9574 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9575 }
9576 if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9577 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
9578 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9579 }
9580 if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
9581 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
9582 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
9583 }
9584
9585 if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
9586 return Concat;
9587
9588 bool DstIsLeft;
9589 int Anomaly;
9590 int NumInputElements = V1.getValueType().getVectorNumElements();
9591 if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
9592 SDValue DstVec = DstIsLeft ? V1 : V2;
9593 SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
9594
9595 SDValue SrcVec = V1;
9596 int SrcLane = ShuffleMask[Anomaly];
9597 if (SrcLane >= NumInputElements) {
9598 SrcVec = V2;
9599 SrcLane -= VT.getVectorNumElements();
9600 }
9601 SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
9602
9603 EVT ScalarVT = VT.getVectorElementType();
9604
9605 if (ScalarVT.getFixedSizeInBits() < 32 && ScalarVT.isInteger())
9606 ScalarVT = MVT::i32;
9607
9608 return DAG.getNode(
9609 ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
9610 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
9611 DstLaneV);
9612 }
9613
9614 // If the shuffle is not directly supported and it has 4 elements, use
9615 // the PerfectShuffle-generated table to synthesize it from other shuffles.
9616 unsigned NumElts = VT.getVectorNumElements();
9617 if (NumElts == 4) {
9618 unsigned PFIndexes[4];
9619 for (unsigned i = 0; i != 4; ++i) {
9620 if (ShuffleMask[i] < 0)
9621 PFIndexes[i] = 8;
9622 else
9623 PFIndexes[i] = ShuffleMask[i];
9624 }
9625
9626 // Compute the index in the perfect shuffle table.
9627 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
9628 PFIndexes[2] * 9 + PFIndexes[3];
9629 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
9630 unsigned Cost = (PFEntry >> 30);
9631
9632 if (Cost <= 4)
9633 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
9634 }
9635
9636 return GenerateTBL(Op, ShuffleMask, DAG);
9637}
9638
9639SDValue AArch64TargetLowering::LowerSPLAT_VECTOR(SDValue Op,
9640 SelectionDAG &DAG) const {
9641 SDLoc dl(Op);
9642 EVT VT = Op.getValueType();
9643 EVT ElemVT = VT.getScalarType();
9644 SDValue SplatVal = Op.getOperand(0);
9645
9646 if (useSVEForFixedLengthVectorVT(VT))
9647 return LowerToScalableOp(Op, DAG);
9648
9649 // Extend input splat value where needed to fit into a GPR (32b or 64b only)
9650 // FPRs don't have this restriction.
9651 switch (ElemVT.getSimpleVT().SimpleTy) {
9652 case MVT::i1: {
9653 // The only legal i1 vectors are SVE vectors, so we can use SVE-specific
9654 // lowering code.
9655 if (auto *ConstVal = dyn_cast<ConstantSDNode>(SplatVal)) {
9656 if (ConstVal->isOne())
9657 return getPTrue(DAG, dl, VT, AArch64SVEPredPattern::all);
9658 // TODO: Add special case for constant false
9659 }
9660 // The general case of i1. There isn't any natural way to do this,
9661 // so we use some trickery with whilelo.
9662 SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
9663 SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i64, SplatVal,
9664 DAG.getValueType(MVT::i1));
9665 SDValue ID = DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, dl,
9666 MVT::i64);
9667 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID,
9668 DAG.getConstant(0, dl, MVT::i64), SplatVal);
9669 }
9670 case MVT::i8:
9671 case MVT::i16:
9672 case MVT::i32:
9673 SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i32);
9674 break;
9675 case MVT::i64:
9676 SplatVal = DAG.getAnyExtOrTrunc(SplatVal, dl, MVT::i64);
9677 break;
9678 case MVT::f16:
9679 case MVT::bf16:
9680 case MVT::f32:
9681 case MVT::f64:
9682 // Fine as is
9683 break;
9684 default:
9685 report_fatal_error("Unsupported SPLAT_VECTOR input operand type");
9686 }
9687
9688 return DAG.getNode(AArch64ISD::DUP, dl, VT, SplatVal);
9689}
9690
9691SDValue AArch64TargetLowering::LowerDUPQLane(SDValue Op,
9692 SelectionDAG &DAG) const {
9693 SDLoc DL(Op);
9694
9695 EVT VT = Op.getValueType();
9696 if (!isTypeLegal(VT) || !VT.isScalableVector())
9697 return SDValue();
9698
9699 // Current lowering only supports the SVE-ACLE types.
9700 if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
9701 return SDValue();
9702
9703 // The DUPQ operation is indepedent of element type so normalise to i64s.
9704 SDValue V = DAG.getNode(ISD::BITCAST, DL, MVT::nxv2i64, Op.getOperand(1));
9705 SDValue Idx128 = Op.getOperand(2);
9706
9707 // DUPQ can be used when idx is in range.
9708 auto *CIdx = dyn_cast<ConstantSDNode>(Idx128);
9709 if (CIdx && (CIdx->getZExtValue() <= 3)) {
9710 SDValue CI = DAG.getTargetConstant(CIdx->getZExtValue(), DL, MVT::i64);
9711 SDNode *DUPQ =
9712 DAG.getMachineNode(AArch64::DUP_ZZI_Q, DL, MVT::nxv2i64, V, CI);
9713 return DAG.getNode(ISD::BITCAST, DL, VT, SDValue(DUPQ, 0));
9714 }
9715
9716 // The ACLE says this must produce the same result as:
9717 // svtbl(data, svadd_x(svptrue_b64(),
9718 // svand_x(svptrue_b64(), svindex_u64(0, 1), 1),
9719 // index * 2))
9720 SDValue One = DAG.getConstant(1, DL, MVT::i64);
9721 SDValue SplatOne = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, One);
9722
9723 // create the vector 0,1,0,1,...
9724 SDValue SV = DAG.getStepVector(DL, MVT::nxv2i64);
9725 SV = DAG.getNode(ISD::AND, DL, MVT::nxv2i64, SV, SplatOne);
9726
9727 // create the vector idx64,idx64+1,idx64,idx64+1,...
9728 SDValue Idx64 = DAG.getNode(ISD::ADD, DL, MVT::i64, Idx128, Idx128);
9729 SDValue SplatIdx64 = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Idx64);
9730 SDValue ShuffleMask = DAG.getNode(ISD::ADD, DL, MVT::nxv2i64, SV, SplatIdx64);
9731
9732 // create the vector Val[idx64],Val[idx64+1],Val[idx64],Val[idx64+1],...
9733 SDValue TBL = DAG.getNode(AArch64ISD::TBL, DL, MVT::nxv2i64, V, ShuffleMask);
9734 return DAG.getNode(ISD::BITCAST, DL, VT, TBL);
9735}
9736
9737
9738static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
9739 APInt &UndefBits) {
9740 EVT VT = BVN->getValueType(0);
9741 APInt SplatBits, SplatUndef;
9742 unsigned SplatBitSize;
9743 bool HasAnyUndefs;
9744 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
9745 unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
9746
9747 for (unsigned i = 0; i < NumSplats; ++i) {
9748 CnstBits <<= SplatBitSize;
9749 UndefBits <<= SplatBitSize;
9750 CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
9751 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
9752 }
9753
9754 return true;
9755 }
9756
9757 return false;
9758}
9759
9760// Try 64-bit splatted SIMD immediate.
9761static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9762 const APInt &Bits) {
9763 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9764 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9765 EVT VT = Op.getValueType();
9766 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v2i64 : MVT::f64;
9767
9768 if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
9769 Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
9770
9771 SDLoc dl(Op);
9772 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9773 DAG.getConstant(Value, dl, MVT::i32));
9774 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9775 }
9776 }
9777
9778 return SDValue();
9779}
9780
9781// Try 32-bit splatted SIMD immediate.
9782static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9783 const APInt &Bits,
9784 const SDValue *LHS = nullptr) {
9785 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9786 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9787 EVT VT = Op.getValueType();
9788 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
9789 bool isAdvSIMDModImm = false;
9790 uint64_t Shift;
9791
9792 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType1(Value))) {
9793 Value = AArch64_AM::encodeAdvSIMDModImmType1(Value);
9794 Shift = 0;
9795 }
9796 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType2(Value))) {
9797 Value = AArch64_AM::encodeAdvSIMDModImmType2(Value);
9798 Shift = 8;
9799 }
9800 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType3(Value))) {
9801 Value = AArch64_AM::encodeAdvSIMDModImmType3(Value);
9802 Shift = 16;
9803 }
9804 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType4(Value))) {
9805 Value = AArch64_AM::encodeAdvSIMDModImmType4(Value);
9806 Shift = 24;
9807 }
9808
9809 if (isAdvSIMDModImm) {
9810 SDLoc dl(Op);
9811 SDValue Mov;
9812
9813 if (LHS)
9814 Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
9815 DAG.getConstant(Value, dl, MVT::i32),
9816 DAG.getConstant(Shift, dl, MVT::i32));
9817 else
9818 Mov = DAG.getNode(NewOp, dl, MovTy,
9819 DAG.getConstant(Value, dl, MVT::i32),
9820 DAG.getConstant(Shift, dl, MVT::i32));
9821
9822 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9823 }
9824 }
9825
9826 return SDValue();
9827}
9828
9829// Try 16-bit splatted SIMD immediate.
9830static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9831 const APInt &Bits,
9832 const SDValue *LHS = nullptr) {
9833 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9834 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9835 EVT VT = Op.getValueType();
9836 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
9837 bool isAdvSIMDModImm = false;
9838 uint64_t Shift;
9839
9840 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType5(Value))) {
9841 Value = AArch64_AM::encodeAdvSIMDModImmType5(Value);
9842 Shift = 0;
9843 }
9844 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType6(Value))) {
9845 Value = AArch64_AM::encodeAdvSIMDModImmType6(Value);
9846 Shift = 8;
9847 }
9848
9849 if (isAdvSIMDModImm) {
9850 SDLoc dl(Op);
9851 SDValue Mov;
9852
9853 if (LHS)
9854 Mov = DAG.getNode(NewOp, dl, MovTy, *LHS,
9855 DAG.getConstant(Value, dl, MVT::i32),
9856 DAG.getConstant(Shift, dl, MVT::i32));
9857 else
9858 Mov = DAG.getNode(NewOp, dl, MovTy,
9859 DAG.getConstant(Value, dl, MVT::i32),
9860 DAG.getConstant(Shift, dl, MVT::i32));
9861
9862 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9863 }
9864 }
9865
9866 return SDValue();
9867}
9868
9869// Try 32-bit splatted SIMD immediate with shifted ones.
9870static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
9871 SelectionDAG &DAG, const APInt &Bits) {
9872 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9873 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9874 EVT VT = Op.getValueType();
9875 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
9876 bool isAdvSIMDModImm = false;
9877 uint64_t Shift;
9878
9879 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType7(Value))) {
9880 Value = AArch64_AM::encodeAdvSIMDModImmType7(Value);
9881 Shift = 264;
9882 }
9883 else if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType8(Value))) {
9884 Value = AArch64_AM::encodeAdvSIMDModImmType8(Value);
9885 Shift = 272;
9886 }
9887
9888 if (isAdvSIMDModImm) {
9889 SDLoc dl(Op);
9890 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9891 DAG.getConstant(Value, dl, MVT::i32),
9892 DAG.getConstant(Shift, dl, MVT::i32));
9893 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9894 }
9895 }
9896
9897 return SDValue();
9898}
9899
9900// Try 8-bit splatted SIMD immediate.
9901static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9902 const APInt &Bits) {
9903 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9904 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9905 EVT VT = Op.getValueType();
9906 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
9907
9908 if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
9909 Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
9910
9911 SDLoc dl(Op);
9912 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9913 DAG.getConstant(Value, dl, MVT::i32));
9914 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9915 }
9916 }
9917
9918 return SDValue();
9919}
9920
9921// Try FP splatted SIMD immediate.
9922static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
9923 const APInt &Bits) {
9924 if (Bits.getHiBits(64) == Bits.getLoBits(64)) {
9925 uint64_t Value = Bits.zextOrTrunc(64).getZExtValue();
9926 EVT VT = Op.getValueType();
9927 bool isWide = (VT.getSizeInBits() == 128);
9928 MVT MovTy;
9929 bool isAdvSIMDModImm = false;
9930
9931 if ((isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType11(Value))) {
9932 Value = AArch64_AM::encodeAdvSIMDModImmType11(Value);
9933 MovTy = isWide ? MVT::v4f32 : MVT::v2f32;
9934 }
9935 else if (isWide &&
9936 (isAdvSIMDModImm = AArch64_AM::isAdvSIMDModImmType12(Value))) {
9937 Value = AArch64_AM::encodeAdvSIMDModImmType12(Value);
9938 MovTy = MVT::v2f64;
9939 }
9940
9941 if (isAdvSIMDModImm) {
9942 SDLoc dl(Op);
9943 SDValue Mov = DAG.getNode(NewOp, dl, MovTy,
9944 DAG.getConstant(Value, dl, MVT::i32));
9945 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
9946 }
9947 }
9948
9949 return SDValue();
9950}
9951
9952// Specialized code to quickly find if PotentialBVec is a BuildVector that
9953// consists of only the same constant int value, returned in reference arg
9954// ConstVal
9955static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
9956 uint64_t &ConstVal) {
9957 BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
9958 if (!Bvec)
9959 return false;
9960 ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
9961 if (!FirstElt)
9962 return false;
9963 EVT VT = Bvec->getValueType(0);
9964 unsigned NumElts = VT.getVectorNumElements();
9965 for (unsigned i = 1; i < NumElts; ++i)
9966 if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
9967 return false;
9968 ConstVal = FirstElt->getZExtValue();
9969 return true;
9970}
9971
9972static unsigned getIntrinsicID(const SDNode *N) {
9973 unsigned Opcode = N->getOpcode();
9974 switch (Opcode) {
9975 default:
9976 return Intrinsic::not_intrinsic;
9977 case ISD::INTRINSIC_WO_CHAIN: {
9978 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
9979 if (IID < Intrinsic::num_intrinsics)
9980 return IID;
9981 return Intrinsic::not_intrinsic;
9982 }
9983 }
9984}
9985
9986// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
9987// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
9988// BUILD_VECTORs with constant element C1, C2 is a constant, and:
9989// - for the SLI case: C1 == ~(Ones(ElemSizeInBits) << C2)
9990// - for the SRI case: C1 == ~(Ones(ElemSizeInBits) >> C2)
9991// The (or (lsl Y, C2), (and X, BvecC1)) case is also handled.
9992static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
9993 EVT VT = N->getValueType(0);
9994
9995 if (!VT.isVector())
9996 return SDValue();
9997
9998 SDLoc DL(N);
9999
10000 SDValue And;
10001 SDValue Shift;
10002
10003 SDValue FirstOp = N->getOperand(0);
10004 unsigned FirstOpc = FirstOp.getOpcode();
10005 SDValue SecondOp = N->getOperand(1);
10006 unsigned SecondOpc = SecondOp.getOpcode();
10007
10008 // Is one of the operands an AND or a BICi? The AND may have been optimised to
10009 // a BICi in order to use an immediate instead of a register.
10010 // Is the other operand an shl or lshr? This will have been turned into:
10011 // AArch64ISD::VSHL vector, #shift or AArch64ISD::VLSHR vector, #shift.
10012 if ((FirstOpc == ISD::AND || FirstOpc == AArch64ISD::BICi) &&
10013 (SecondOpc == AArch64ISD::VSHL || SecondOpc == AArch64ISD::VLSHR)) {
10014 And = FirstOp;
10015 Shift = SecondOp;
10016
10017 } else if ((SecondOpc == ISD::AND || SecondOpc == AArch64ISD::BICi) &&
10018 (FirstOpc == AArch64ISD::VSHL || FirstOpc == AArch64ISD::VLSHR)) {
10019 And = SecondOp;
10020 Shift = FirstOp;
10021 } else
10022 return SDValue();
10023
10024 bool IsAnd = And.getOpcode() == ISD::AND;
10025 bool IsShiftRight = Shift.getOpcode() == AArch64ISD::VLSHR;
10026
10027 // Is the shift amount constant?
10028 ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
10029 if (!C2node)
10030 return SDValue();
10031
10032 uint64_t C1;
10033 if (IsAnd) {
10034 // Is the and mask vector all constant?
10035 if (!isAllConstantBuildVector(And.getOperand(1), C1))
10036 return SDValue();
10037 } else {
10038 // Reconstruct the corresponding AND immediate from the two BICi immediates.
10039 ConstantSDNode *C1nodeImm = dyn_cast<ConstantSDNode>(And.getOperand(1));
10040 ConstantSDNode *C1nodeShift = dyn_cast<ConstantSDNode>(And.getOperand(2));
10041 assert(C1nodeImm && C1nodeShift)(static_cast <bool> (C1nodeImm && C1nodeShift) ?
void (0) : __assert_fail ("C1nodeImm && C1nodeShift"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10041, __extension__ __PRETTY_FUNCTION__))
;
10042 C1 = ~(C1nodeImm->getZExtValue() << C1nodeShift->getZExtValue());
10043 }
10044
10045 // Is C1 == ~(Ones(ElemSizeInBits) << C2) or
10046 // C1 == ~(Ones(ElemSizeInBits) >> C2), taking into account
10047 // how much one can shift elements of a particular size?
10048 uint64_t C2 = C2node->getZExtValue();
10049 unsigned ElemSizeInBits = VT.getScalarSizeInBits();
10050 if (C2 > ElemSizeInBits)
10051 return SDValue();
10052
10053 APInt C1AsAPInt(ElemSizeInBits, C1);
10054 APInt RequiredC1 = IsShiftRight ? APInt::getHighBitsSet(ElemSizeInBits, C2)
10055 : APInt::getLowBitsSet(ElemSizeInBits, C2);
10056 if (C1AsAPInt != RequiredC1)
10057 return SDValue();
10058
10059 SDValue X = And.getOperand(0);
10060 SDValue Y = Shift.getOperand(0);
10061
10062 unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
10063 SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
10064
10065 LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: transformed: \n"
; } } while (false)
;
10066 LLVM_DEBUG(N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { N->dump(&DAG); } } while (false)
;
10067 LLVM_DEBUG(dbgs() << "into: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "into: \n"; } } while (false
)
;
10068 LLVM_DEBUG(ResultSLI->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { ResultSLI->dump(&DAG); } } while (
false)
;
10069
10070 ++NumShiftInserts;
10071 return ResultSLI;
10072}
10073
10074SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
10075 SelectionDAG &DAG) const {
10076 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10077 return LowerToScalableOp(Op, DAG);
10078
10079 // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
10080 if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
10081 return Res;
10082
10083 EVT VT = Op.getValueType();
10084
10085 SDValue LHS = Op.getOperand(0);
10086 BuildVectorSDNode *BVN =
10087 dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
10088 if (!BVN) {
10089 // OR commutes, so try swapping the operands.
10090 LHS = Op.getOperand(1);
10091 BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
10092 }
10093 if (!BVN)
10094 return Op;
10095
10096 APInt DefBits(VT.getSizeInBits(), 0);
10097 APInt UndefBits(VT.getSizeInBits(), 0);
10098 if (resolveBuildVector(BVN, DefBits, UndefBits)) {
10099 SDValue NewOp;
10100
10101 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
10102 DefBits, &LHS)) ||
10103 (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
10104 DefBits, &LHS)))
10105 return NewOp;
10106
10107 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::ORRi, Op, DAG,
10108 UndefBits, &LHS)) ||
10109 (NewOp = tryAdvSIMDModImm16(AArch64ISD::ORRi, Op, DAG,
10110 UndefBits, &LHS)))
10111 return NewOp;
10112 }
10113
10114 // We can always fall back to a non-immediate OR.
10115 return Op;
10116}
10117
10118// Normalize the operands of BUILD_VECTOR. The value of constant operands will
10119// be truncated to fit element width.
10120static SDValue NormalizeBuildVector(SDValue Op,
10121 SelectionDAG &DAG) {
10122 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10122, __extension__ __PRETTY_FUNCTION__))
;
10123 SDLoc dl(Op);
10124 EVT VT = Op.getValueType();
10125 EVT EltTy= VT.getVectorElementType();
10126
10127 if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
10128 return Op;
10129
10130 SmallVector<SDValue, 16> Ops;
10131 for (SDValue Lane : Op->ops()) {
10132 // For integer vectors, type legalization would have promoted the
10133 // operands already. Otherwise, if Op is a floating-point splat
10134 // (with operands cast to integers), then the only possibilities
10135 // are constants and UNDEFs.
10136 if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
10137 APInt LowBits(EltTy.getSizeInBits(),
10138 CstLane->getZExtValue());
10139 Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
10140 } else if (Lane.getNode()->isUndef()) {
10141 Lane = DAG.getUNDEF(MVT::i32);
10142 } else {
10143 assert(Lane.getValueType() == MVT::i32 &&(static_cast <bool> (Lane.getValueType() == MVT::i32 &&
"Unexpected BUILD_VECTOR operand type") ? void (0) : __assert_fail
("Lane.getValueType() == MVT::i32 && \"Unexpected BUILD_VECTOR operand type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10144, __extension__ __PRETTY_FUNCTION__))
10144 "Unexpected BUILD_VECTOR operand type")(static_cast <bool> (Lane.getValueType() == MVT::i32 &&
"Unexpected BUILD_VECTOR operand type") ? void (0) : __assert_fail
("Lane.getValueType() == MVT::i32 && \"Unexpected BUILD_VECTOR operand type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10144, __extension__ __PRETTY_FUNCTION__))
;
10145 }
10146 Ops.push_back(Lane);
10147 }
10148 return DAG.getBuildVector(VT, dl, Ops);
10149}
10150
10151static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG) {
10152 EVT VT = Op.getValueType();
10153
10154 APInt DefBits(VT.getSizeInBits(), 0);
10155 APInt UndefBits(VT.getSizeInBits(), 0);
10156 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
10157 if (resolveBuildVector(BVN, DefBits, UndefBits)) {
10158 SDValue NewOp;
10159 if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
10160 (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10161 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
10162 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10163 (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
10164 (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
10165 return NewOp;
10166
10167 DefBits = ~DefBits;
10168 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
10169 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
10170 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
10171 return NewOp;
10172
10173 DefBits = UndefBits;
10174 if ((NewOp = tryAdvSIMDModImm64(AArch64ISD::MOVIedit, Op, DAG, DefBits)) ||
10175 (NewOp = tryAdvSIMDModImm32(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10176 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MOVImsl, Op, DAG, DefBits)) ||
10177 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MOVIshift, Op, DAG, DefBits)) ||
10178 (NewOp = tryAdvSIMDModImm8(AArch64ISD::MOVI, Op, DAG, DefBits)) ||
10179 (NewOp = tryAdvSIMDModImmFP(AArch64ISD::FMOV, Op, DAG, DefBits)))
10180 return NewOp;
10181
10182 DefBits = ~UndefBits;
10183 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::MVNIshift, Op, DAG, DefBits)) ||
10184 (NewOp = tryAdvSIMDModImm321s(AArch64ISD::MVNImsl, Op, DAG, DefBits)) ||
10185 (NewOp = tryAdvSIMDModImm16(AArch64ISD::MVNIshift, Op, DAG, DefBits)))
10186 return NewOp;
10187 }
10188
10189 return SDValue();
10190}
10191
10192SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
10193 SelectionDAG &DAG) const {
10194 EVT VT = Op.getValueType();
10195
10196 // Try to build a simple constant vector.
10197 Op = NormalizeBuildVector(Op, DAG);
10198 if (VT.isInteger()) {
10199 // Certain vector constants, used to express things like logical NOT and
10200 // arithmetic NEG, are passed through unmodified. This allows special
10201 // patterns for these operations to match, which will lower these constants
10202 // to whatever is proven necessary.
10203 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
10204 if (BVN->isConstant())
10205 if (ConstantSDNode *Const = BVN->getConstantSplatNode()) {
10206 unsigned BitSize = VT.getVectorElementType().getSizeInBits();
10207 APInt Val(BitSize,
10208 Const->getAPIntValue().zextOrTrunc(BitSize).getZExtValue());
10209 if (Val.isNullValue() || Val.isAllOnesValue())
10210 return Op;
10211 }
10212 }
10213
10214 if (SDValue V = ConstantBuildVector(Op, DAG))
10215 return V;
10216
10217 // Scan through the operands to find some interesting properties we can
10218 // exploit:
10219 // 1) If only one value is used, we can use a DUP, or
10220 // 2) if only the low element is not undef, we can just insert that, or
10221 // 3) if only one constant value is used (w/ some non-constant lanes),
10222 // we can splat the constant value into the whole vector then fill
10223 // in the non-constant lanes.
10224 // 4) FIXME: If different constant values are used, but we can intelligently
10225 // select the values we'll be overwriting for the non-constant
10226 // lanes such that we can directly materialize the vector
10227 // some other way (MOVI, e.g.), we can be sneaky.
10228 // 5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
10229 SDLoc dl(Op);
10230 unsigned NumElts = VT.getVectorNumElements();
10231 bool isOnlyLowElement = true;
10232 bool usesOnlyOneValue = true;
10233 bool usesOnlyOneConstantValue = true;
10234 bool isConstant = true;
10235 bool AllLanesExtractElt = true;
10236 unsigned NumConstantLanes = 0;
10237 unsigned NumDifferentLanes = 0;
10238 unsigned NumUndefLanes = 0;
10239 SDValue Value;
10240 SDValue ConstantValue;
10241 for (unsigned i = 0; i < NumElts; ++i) {
10242 SDValue V = Op.getOperand(i);
10243 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
10244 AllLanesExtractElt = false;
10245 if (V.isUndef()) {
10246 ++NumUndefLanes;
10247 continue;
10248 }
10249 if (i > 0)
10250 isOnlyLowElement = false;
10251 if (!isIntOrFPConstant(V))
10252 isConstant = false;
10253
10254 if (isIntOrFPConstant(V)) {
10255 ++NumConstantLanes;
10256 if (!ConstantValue.getNode())
10257 ConstantValue = V;
10258 else if (ConstantValue != V)
10259 usesOnlyOneConstantValue = false;
10260 }
10261
10262 if (!Value.getNode())
10263 Value = V;
10264 else if (V != Value) {
10265 usesOnlyOneValue = false;
10266 ++NumDifferentLanes;
10267 }
10268 }
10269
10270 if (!Value.getNode()) {
10271 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n"
; } } while (false)
10272 dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n"
; } } while (false)
;
10273 return DAG.getUNDEF(VT);
10274 }
10275
10276 // Convert BUILD_VECTOR where all elements but the lowest are undef into
10277 // SCALAR_TO_VECTOR, except for when we have a single-element constant vector
10278 // as SimplifyDemandedBits will just turn that back into BUILD_VECTOR.
10279 if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
10280 LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n"; } } while (false)
10281 "SCALAR_TO_VECTOR node\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n"; } } while (false)
;
10282 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
10283 }
10284
10285 if (AllLanesExtractElt) {
10286 SDNode *Vector = nullptr;
10287 bool Even = false;
10288 bool Odd = false;
10289 // Check whether the extract elements match the Even pattern <0,2,4,...> or
10290 // the Odd pattern <1,3,5,...>.
10291 for (unsigned i = 0; i < NumElts; ++i) {
10292 SDValue V = Op.getOperand(i);
10293 const SDNode *N = V.getNode();
10294 if (!isa<ConstantSDNode>(N->getOperand(1)))
10295 break;
10296 SDValue N0 = N->getOperand(0);
10297
10298 // All elements are extracted from the same vector.
10299 if (!Vector) {
10300 Vector = N0.getNode();
10301 // Check that the type of EXTRACT_VECTOR_ELT matches the type of
10302 // BUILD_VECTOR.
10303 if (VT.getVectorElementType() !=
10304 N0.getValueType().getVectorElementType())
10305 break;
10306 } else if (Vector != N0.getNode()) {
10307 Odd = false;
10308 Even = false;
10309 break;
10310 }
10311
10312 // Extracted values are either at Even indices <0,2,4,...> or at Odd
10313 // indices <1,3,5,...>.
10314 uint64_t Val = N->getConstantOperandVal(1);
10315 if (Val == 2 * i) {
10316 Even = true;
10317 continue;
10318 }
10319 if (Val - 1 == 2 * i) {
10320 Odd = true;
10321 continue;
10322 }
10323
10324 // Something does not match: abort.
10325 Odd = false;
10326 Even = false;
10327 break;
10328 }
10329 if (Even || Odd) {
10330 SDValue LHS =
10331 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
10332 DAG.getConstant(0, dl, MVT::i64));
10333 SDValue RHS =
10334 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
10335 DAG.getConstant(NumElts, dl, MVT::i64));
10336
10337 if (Even && !Odd)
10338 return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), LHS,
10339 RHS);
10340 if (Odd && !Even)
10341 return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), LHS,
10342 RHS);
10343 }
10344 }
10345
10346 // Use DUP for non-constant splats. For f32 constant splats, reduce to
10347 // i32 and try again.
10348 if (usesOnlyOneValue) {
10349 if (!isConstant) {
10350 if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
10351 Value.getValueType() != VT) {
10352 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n"
; } } while (false)
10353 dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n"
; } } while (false)
;
10354 return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
10355 }
10356
10357 // This is actually a DUPLANExx operation, which keeps everything vectory.
10358
10359 SDValue Lane = Value.getOperand(1);
10360 Value = Value.getOperand(0);
10361 if (Value.getValueSizeInBits() == 64) {
10362 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
10363 dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
10364 "widening it\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
;
10365 Value = WidenVector(Value, DAG);
10366 }
10367
10368 unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
10369 return DAG.getNode(Opcode, dl, VT, Value, Lane);
10370 }
10371
10372 if (VT.getVectorElementType().isFloatingPoint()) {
10373 SmallVector<SDValue, 8> Ops;
10374 EVT EltTy = VT.getVectorElementType();
10375 assert ((EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 ||(static_cast <bool> ((EltTy == MVT::f16 || EltTy == MVT
::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && "Unsupported floating-point vector type"
) ? void (0) : __assert_fail ("(EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && \"Unsupported floating-point vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10376, __extension__ __PRETTY_FUNCTION__))
10376 EltTy == MVT::f64) && "Unsupported floating-point vector type")(static_cast <bool> ((EltTy == MVT::f16 || EltTy == MVT
::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && "Unsupported floating-point vector type"
) ? void (0) : __assert_fail ("(EltTy == MVT::f16 || EltTy == MVT::bf16 || EltTy == MVT::f32 || EltTy == MVT::f64) && \"Unsupported floating-point vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10376, __extension__ __PRETTY_FUNCTION__))
;
10377 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
10378 dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
10379 "BITCASTS, and try again\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
;
10380 MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
10381 for (unsigned i = 0; i < NumElts; ++i)
10382 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
10383 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
10384 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
10385 LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
10386 Val.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
;
10387 Val = LowerBUILD_VECTOR(Val, DAG);
10388 if (Val.getNode())
10389 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
10390 }
10391 }
10392
10393 // If we need to insert a small number of different non-constant elements and
10394 // the vector width is sufficiently large, prefer using DUP with the common
10395 // value and INSERT_VECTOR_ELT for the different lanes. If DUP is preferred,
10396 // skip the constant lane handling below.
10397 bool PreferDUPAndInsert =
10398 !isConstant && NumDifferentLanes >= 1 &&
10399 NumDifferentLanes < ((NumElts - NumUndefLanes) / 2) &&
10400 NumDifferentLanes >= NumConstantLanes;
10401
10402 // If there was only one constant value used and for more than one lane,
10403 // start by splatting that value, then replace the non-constant lanes. This
10404 // is better than the default, which will perform a separate initialization
10405 // for each lane.
10406 if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
10407 // Firstly, try to materialize the splat constant.
10408 SDValue Vec = DAG.getSplatBuildVector(VT, dl, ConstantValue),
10409 Val = ConstantBuildVector(Vec, DAG);
10410 if (!Val) {
10411 // Otherwise, materialize the constant and splat it.
10412 Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
10413 DAG.ReplaceAllUsesWith(Vec.getNode(), &Val);
10414 }
10415
10416 // Now insert the non-constant lanes.
10417 for (unsigned i = 0; i < NumElts; ++i) {
10418 SDValue V = Op.getOperand(i);
10419 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
10420 if (!isIntOrFPConstant(V))
10421 // Note that type legalization likely mucked about with the VT of the
10422 // source operand, so we may have to convert it here before inserting.
10423 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
10424 }
10425 return Val;
10426 }
10427
10428 // This will generate a load from the constant pool.
10429 if (isConstant) {
10430 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
10431 dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
10432 "expansion\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
;
10433 return SDValue();
10434 }
10435
10436 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
10437 if (NumElts >= 4) {
10438 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
10439 return shuffle;
10440 }
10441
10442 if (PreferDUPAndInsert) {
10443 // First, build a constant vector with the common element.
10444 SmallVector<SDValue, 8> Ops(NumElts, Value);
10445 SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
10446 // Next, insert the elements that do not match the common value.
10447 for (unsigned I = 0; I < NumElts; ++I)
10448 if (Op.getOperand(I) != Value)
10449 NewVector =
10450 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
10451 Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
10452
10453 return NewVector;
10454 }
10455
10456 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
10457 // know the default expansion would otherwise fall back on something even
10458 // worse. For a vector with one or two non-undef values, that's
10459 // scalar_to_vector for the elements followed by a shuffle (provided the
10460 // shuffle is valid for the target) and materialization element by element
10461 // on the stack followed by a load for everything else.
10462 if (!isConstant && !usesOnlyOneValue) {
10463 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
10464 dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
10465 "of INSERT_VECTOR_ELT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
;
10466
10467 SDValue Vec = DAG.getUNDEF(VT);
10468 SDValue Op0 = Op.getOperand(0);
10469 unsigned i = 0;
10470
10471 // Use SCALAR_TO_VECTOR for lane zero to
10472 // a) Avoid a RMW dependency on the full vector register, and
10473 // b) Allow the register coalescer to fold away the copy if the
10474 // value is already in an S or D register, and we're forced to emit an
10475 // INSERT_SUBREG that we can't fold anywhere.
10476 //
10477 // We also allow types like i8 and i16 which are illegal scalar but legal
10478 // vector element types. After type-legalization the inserted value is
10479 // extended (i32) and it is safe to cast them to the vector type by ignoring
10480 // the upper bits of the lowest lane (e.g. v8i8, v4i16).
10481 if (!Op0.isUndef()) {
10482 LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Creating node for op0, it is not undefined:\n"
; } } while (false)
;
10483 Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
10484 ++i;
10485 }
10486 LLVM_DEBUG(if (i < NumElts) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
10487 << "Creating nodes for the other vector elements:\n";)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
;
10488 for (; i < NumElts; ++i) {
10489 SDValue V = Op.getOperand(i);
10490 if (V.isUndef())
10491 continue;
10492 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
10493 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
10494 }
10495 return Vec;
10496 }
10497
10498 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
10499 dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
10500 "better alternative\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
;
10501 return SDValue();
10502}
10503
10504SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
10505 SelectionDAG &DAG) const {
10506 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10507 return LowerFixedLengthConcatVectorsToSVE(Op, DAG);
10508
10509 assert(Op.getValueType().isScalableVector() &&(static_cast <bool> (Op.getValueType().isScalableVector
() && isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10511, __extension__ __PRETTY_FUNCTION__))
10510 isTypeLegal(Op.getValueType()) &&(static_cast <bool> (Op.getValueType().isScalableVector
() && isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10511, __extension__ __PRETTY_FUNCTION__))
10511 "Expected legal scalable vector type!")(static_cast <bool> (Op.getValueType().isScalableVector
() && isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10511, __extension__ __PRETTY_FUNCTION__))
;
10512
10513 if (isTypeLegal(Op.getOperand(0).getValueType())) {
10514 unsigned NumOperands = Op->getNumOperands();
10515 assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10516, __extension__ __PRETTY_FUNCTION__))
10516 "Unexpected number of operands in CONCAT_VECTORS")(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10516, __extension__ __PRETTY_FUNCTION__))
;
10517
10518 if (NumOperands == 2)
10519 return Op;
10520
10521 // Concat each pair of subvectors and pack into the lower half of the array.
10522 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
10523 while (ConcatOps.size() > 1) {
10524 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
10525 SDValue V1 = ConcatOps[I];
10526 SDValue V2 = ConcatOps[I + 1];
10527 EVT SubVT = V1.getValueType();
10528 EVT PairVT = SubVT.getDoubleNumVectorElementsVT(*DAG.getContext());
10529 ConcatOps[I / 2] =
10530 DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Op), PairVT, V1, V2);
10531 }
10532 ConcatOps.resize(ConcatOps.size() / 2);
10533 }
10534 return ConcatOps[0];
10535 }
10536
10537 return SDValue();
10538}
10539
10540SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10541 SelectionDAG &DAG) const {
10542 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10542, __extension__ __PRETTY_FUNCTION__))
;
10543
10544 if (useSVEForFixedLengthVectorVT(Op.getValueType()))
10545 return LowerFixedLengthInsertVectorElt(Op, DAG);
10546
10547 // Check for non-constant or out of range lane.
10548 EVT VT = Op.getOperand(0).getValueType();
10549
10550 if (VT.getScalarType() == MVT::i1) {
10551 EVT VectorVT = getPromotedVTForPredicate(VT);
10552 SDLoc DL(Op);
10553 SDValue ExtendedVector =
10554 DAG.getAnyExtOrTrunc(Op.getOperand(0), DL, VectorVT);
10555 SDValue ExtendedValue =
10556 DAG.getAnyExtOrTrunc(Op.getOperand(1), DL,
10557 VectorVT.getScalarType().getSizeInBits() < 32
10558 ? MVT::i32
10559 : VectorVT.getScalarType());
10560 ExtendedVector =
10561 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VectorVT, ExtendedVector,
10562 ExtendedValue, Op.getOperand(2));
10563 return DAG.getAnyExtOrTrunc(ExtendedVector, DL, VT);
10564 }
10565
10566 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10567 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
10568 return SDValue();
10569
10570 // Insertion/extraction are legal for V128 types.
10571 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10572 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
10573 VT == MVT::v8f16 || VT == MVT::v8bf16)
10574 return Op;
10575
10576 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
10577 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
10578 VT != MVT::v4bf16)
10579 return SDValue();
10580
10581 // For V64 types, we perform insertion by expanding the value
10582 // to a V128 type and perform the insertion on that.
10583 SDLoc DL(Op);
10584 SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
10585 EVT WideTy = WideVec.getValueType();
10586
10587 SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
10588 Op.getOperand(1), Op.getOperand(2));
10589 // Re-narrow the resultant vector.
10590 return NarrowVector(Node, DAG);
10591}
10592
10593SDValue
10594AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
10595 SelectionDAG &DAG) const {
10596 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10596, __extension__ __PRETTY_FUNCTION__))
;
10597 EVT VT = Op.getOperand(0).getValueType();
10598
10599 if (VT.getScalarType() == MVT::i1) {
10600 // We can't directly extract from an SVE predicate; extend it first.
10601 // (This isn't the only possible lowering, but it's straightforward.)
10602 EVT VectorVT = getPromotedVTForPredicate(VT);
10603 SDLoc DL(Op);
10604 SDValue Extend =
10605 DAG.getNode(ISD::ANY_EXTEND, DL, VectorVT, Op.getOperand(0));
10606 MVT ExtractTy = VectorVT == MVT::nxv2i64 ? MVT::i64 : MVT::i32;
10607 SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractTy,
10608 Extend, Op.getOperand(1));
10609 return DAG.getAnyExtOrTrunc(Extract, DL, Op.getValueType());
10610 }
10611
10612 if (useSVEForFixedLengthVectorVT(VT))
10613 return LowerFixedLengthExtractVectorElt(Op, DAG);
10614
10615 // Check for non-constant or out of range lane.
10616 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
10617 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
10618 return SDValue();
10619
10620 // Insertion/extraction are legal for V128 types.
10621 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10622 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
10623 VT == MVT::v8f16 || VT == MVT::v8bf16)
10624 return Op;
10625
10626 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
10627 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16 &&
10628 VT != MVT::v4bf16)
10629 return SDValue();
10630
10631 // For V64 types, we perform extraction by expanding the value
10632 // to a V128 type and perform the extraction on that.
10633 SDLoc DL(Op);
10634 SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
10635 EVT WideTy = WideVec.getValueType();
10636
10637 EVT ExtrTy = WideTy.getVectorElementType();
10638 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
10639 ExtrTy = MVT::i32;
10640
10641 // For extractions, we just return the result directly.
10642 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
10643 Op.getOperand(1));
10644}
10645
10646SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
10647 SelectionDAG &DAG) const {
10648 assert(Op.getValueType().isFixedLengthVector() &&(static_cast <bool> (Op.getValueType().isFixedLengthVector
() && "Only cases that extract a fixed length vector are supported!"
) ? void (0) : __assert_fail ("Op.getValueType().isFixedLengthVector() && \"Only cases that extract a fixed length vector are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10649, __extension__ __PRETTY_FUNCTION__))
10649 "Only cases that extract a fixed length vector are supported!")(static_cast <bool> (Op.getValueType().isFixedLengthVector
() && "Only cases that extract a fixed length vector are supported!"
) ? void (0) : __assert_fail ("Op.getValueType().isFixedLengthVector() && \"Only cases that extract a fixed length vector are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10649, __extension__ __PRETTY_FUNCTION__))
;
10650
10651 EVT InVT = Op.getOperand(0).getValueType();
10652 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
10653 unsigned Size = Op.getValueSizeInBits();
10654
10655 if (InVT.isScalableVector()) {
10656 // This will be matched by custom code during ISelDAGToDAG.
10657 if (Idx == 0 && isPackedVectorType(InVT, DAG))
10658 return Op;
10659
10660 return SDValue();
10661 }
10662
10663 // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
10664 if (Idx == 0 && InVT.getSizeInBits() <= 128)
10665 return Op;
10666
10667 // If this is extracting the upper 64-bits of a 128-bit vector, we match
10668 // that directly.
10669 if (Size == 64 && Idx * InVT.getScalarSizeInBits() == 64 &&
10670 InVT.getSizeInBits() == 128)
10671 return Op;
10672
10673 return SDValue();
10674}
10675
10676SDValue AArch64TargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
10677 SelectionDAG &DAG) const {
10678 assert(Op.getValueType().isScalableVector() &&(static_cast <bool> (Op.getValueType().isScalableVector
() && "Only expect to lower inserts into scalable vectors!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && \"Only expect to lower inserts into scalable vectors!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10679, __extension__ __PRETTY_FUNCTION__))
10679 "Only expect to lower inserts into scalable vectors!")(static_cast <bool> (Op.getValueType().isScalableVector
() && "Only expect to lower inserts into scalable vectors!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && \"Only expect to lower inserts into scalable vectors!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10679, __extension__ __PRETTY_FUNCTION__))
;
10680
10681 EVT InVT = Op.getOperand(1).getValueType();
10682 unsigned Idx = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
10683
10684 if (InVT.isScalableVector()) {
10685 SDLoc DL(Op);
10686 EVT VT = Op.getValueType();
10687
10688 if (!isTypeLegal(VT) || !VT.isInteger())
10689 return SDValue();
10690
10691 SDValue Vec0 = Op.getOperand(0);
10692 SDValue Vec1 = Op.getOperand(1);
10693
10694 // Ensure the subvector is half the size of the main vector.
10695 if (VT.getVectorElementCount() != (InVT.getVectorElementCount() * 2))
10696 return SDValue();
10697
10698 // Extend elements of smaller vector...
10699 EVT WideVT = InVT.widenIntegerVectorElementType(*(DAG.getContext()));
10700 SDValue ExtVec = DAG.getNode(ISD::ANY_EXTEND, DL, WideVT, Vec1);
10701
10702 if (Idx == 0) {
10703 SDValue HiVec0 = DAG.getNode(AArch64ISD::UUNPKHI, DL, WideVT, Vec0);
10704 return DAG.getNode(AArch64ISD::UZP1, DL, VT, ExtVec, HiVec0);
10705 } else if (Idx == InVT.getVectorMinNumElements()) {
10706 SDValue LoVec0 = DAG.getNode(AArch64ISD::UUNPKLO, DL, WideVT, Vec0);
10707 return DAG.getNode(AArch64ISD::UZP1, DL, VT, LoVec0, ExtVec);
10708 }
10709
10710 return SDValue();
10711 }
10712
10713 // This will be matched by custom code during ISelDAGToDAG.
10714 if (Idx == 0 && isPackedVectorType(InVT, DAG) && Op.getOperand(0).isUndef())
10715 return Op;
10716
10717 return SDValue();
10718}
10719
10720SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
10721 EVT VT = Op.getValueType();
10722
10723 if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
10724 return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
10725
10726 assert(VT.isScalableVector() && "Expected a scalable vector.")(static_cast <bool> (VT.isScalableVector() && "Expected a scalable vector."
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected a scalable vector.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10726, __extension__ __PRETTY_FUNCTION__))
;
10727
10728 bool Signed = Op.getOpcode() == ISD::SDIV;
10729 unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
10730
10731 if (VT == MVT::nxv4i32 || VT == MVT::nxv2i64)
10732 return LowerToPredicatedOp(Op, DAG, PredOpcode);
10733
10734 // SVE doesn't have i8 and i16 DIV operations; widen them to 32-bit
10735 // operations, and truncate the result.
10736 EVT WidenedVT;
10737 if (VT == MVT::nxv16i8)
10738 WidenedVT = MVT::nxv8i16;
10739 else if (VT == MVT::nxv8i16)
10740 WidenedVT = MVT::nxv4i32;
10741 else
10742 llvm_unreachable("Unexpected Custom DIV operation")::llvm::llvm_unreachable_internal("Unexpected Custom DIV operation"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10742)
;
10743
10744 SDLoc dl(Op);
10745 unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
10746 unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
10747 SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
10748 SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
10749 SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
10750 SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
10751 SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
10752 SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
10753 return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLo, ResultHi);
10754}
10755
10756bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
10757 // Currently no fixed length shuffles that require SVE are legal.
10758 if (useSVEForFixedLengthVectorVT(VT))
10759 return false;
10760
10761 if (VT.getVectorNumElements() == 4 &&
10762 (VT.is128BitVector() || VT.is64BitVector())) {
10763 unsigned PFIndexes[4];
10764 for (unsigned i = 0; i != 4; ++i) {
10765 if (M[i] < 0)
10766 PFIndexes[i] = 8;
10767 else
10768 PFIndexes[i] = M[i];
10769 }
10770
10771 // Compute the index in the perfect shuffle table.
10772 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10773 PFIndexes[2] * 9 + PFIndexes[3];
10774 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10775 unsigned Cost = (PFEntry >> 30);
10776
10777 if (Cost <= 4)
10778 return true;
10779 }
10780
10781 bool DummyBool;
10782 int DummyInt;
10783 unsigned DummyUnsigned;
10784
10785 return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
10786 isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
10787 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
10788 // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
10789 isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
10790 isZIPMask(M, VT, DummyUnsigned) ||
10791 isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
10792 isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
10793 isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
10794 isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
10795 isConcatMask(M, VT, VT.getSizeInBits() == 128));
10796}
10797
10798/// getVShiftImm - Check if this is a valid build_vector for the immediate
10799/// operand of a vector shift operation, where all the elements of the
10800/// build_vector must have the same constant integer value.
10801static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
10802 // Ignore bit_converts.
10803 while (Op.getOpcode() == ISD::BITCAST)
10804 Op = Op.getOperand(0);
10805 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
10806 APInt SplatBits, SplatUndef;
10807 unsigned SplatBitSize;
10808 bool HasAnyUndefs;
10809 if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
10810 HasAnyUndefs, ElementBits) ||
10811 SplatBitSize > ElementBits)
10812 return false;
10813 Cnt = SplatBits.getSExtValue();
10814 return true;
10815}
10816
10817/// isVShiftLImm - Check if this is a valid build_vector for the immediate
10818/// operand of a vector shift left operation. That value must be in the range:
10819/// 0 <= Value < ElementBits for a left shift; or
10820/// 0 <= Value <= ElementBits for a long left shift.
10821static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
10822 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10822, __extension__ __PRETTY_FUNCTION__))
;
10823 int64_t ElementBits = VT.getScalarSizeInBits();
10824 if (!getVShiftImm(Op, ElementBits, Cnt))
10825 return false;
10826 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
10827}
10828
10829/// isVShiftRImm - Check if this is a valid build_vector for the immediate
10830/// operand of a vector shift right operation. The value must be in the range:
10831/// 1 <= Value <= ElementBits for a right shift; or
10832static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
10833 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10833, __extension__ __PRETTY_FUNCTION__))
;
10834 int64_t ElementBits = VT.getScalarSizeInBits();
10835 if (!getVShiftImm(Op, ElementBits, Cnt))
10836 return false;
10837 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
10838}
10839
10840SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
10841 SelectionDAG &DAG) const {
10842 EVT VT = Op.getValueType();
10843
10844 if (VT.getScalarType() == MVT::i1) {
10845 // Lower i1 truncate to `(x & 1) != 0`.
10846 SDLoc dl(Op);
10847 EVT OpVT = Op.getOperand(0).getValueType();
10848 SDValue Zero = DAG.getConstant(0, dl, OpVT);
10849 SDValue One = DAG.getConstant(1, dl, OpVT);
10850 SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
10851 return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
10852 }
10853
10854 if (!VT.isVector() || VT.isScalableVector())
10855 return SDValue();
10856
10857 if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
10858 return LowerFixedLengthVectorTruncateToSVE(Op, DAG);
10859
10860 return SDValue();
10861}
10862
10863SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
10864 SelectionDAG &DAG) const {
10865 EVT VT = Op.getValueType();
10866 SDLoc DL(Op);
10867 int64_t Cnt;
10868
10869 if (!Op.getOperand(1).getValueType().isVector())
10870 return Op;
10871 unsigned EltSize = VT.getScalarSizeInBits();
10872
10873 switch (Op.getOpcode()) {
10874 default:
10875 llvm_unreachable("unexpected shift opcode")::llvm::llvm_unreachable_internal("unexpected shift opcode", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10875)
;
10876
10877 case ISD::SHL:
10878 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT))
10879 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
10880
10881 if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
10882 return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
10883 DAG.getConstant(Cnt, DL, MVT::i32));
10884 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
10885 DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
10886 MVT::i32),
10887 Op.getOperand(0), Op.getOperand(1));
10888 case ISD::SRA:
10889 case ISD::SRL:
10890 if (VT.isScalableVector() || useSVEForFixedLengthVectorVT(VT)) {
10891 unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
10892 : AArch64ISD::SRL_PRED;
10893 return LowerToPredicatedOp(Op, DAG, Opc);
10894 }
10895
10896 // Right shift immediate
10897 if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
10898 unsigned Opc =
10899 (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
10900 return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
10901 DAG.getConstant(Cnt, DL, MVT::i32));
10902 }
10903
10904 // Right shift register. Note, there is not a shift right register
10905 // instruction, but the shift left register instruction takes a signed
10906 // value, where negative numbers specify a right shift.
10907 unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
10908 : Intrinsic::aarch64_neon_ushl;
10909 // negate the shift amount
10910 SDValue NegShift = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
10911 Op.getOperand(1));
10912 SDValue NegShiftLeft =
10913 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
10914 DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
10915 NegShift);
10916 return NegShiftLeft;
10917 }
10918
10919 return SDValue();
10920}
10921
10922static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
10923 AArch64CC::CondCode CC, bool NoNans, EVT VT,
10924 const SDLoc &dl, SelectionDAG &DAG) {
10925 EVT SrcVT = LHS.getValueType();
10926 assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&(static_cast <bool> (VT.getSizeInBits() == SrcVT.getSizeInBits
() && "function only supposed to emit natural comparisons"
) ? void (0) : __assert_fail ("VT.getSizeInBits() == SrcVT.getSizeInBits() && \"function only supposed to emit natural comparisons\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10927, __extension__ __PRETTY_FUNCTION__))
10927 "function only supposed to emit natural comparisons")(static_cast <bool> (VT.getSizeInBits() == SrcVT.getSizeInBits
() && "function only supposed to emit natural comparisons"
) ? void (0) : __assert_fail ("VT.getSizeInBits() == SrcVT.getSizeInBits() && \"function only supposed to emit natural comparisons\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 10927, __extension__ __PRETTY_FUNCTION__))
;
10928
10929 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10930 APInt CnstBits(VT.getSizeInBits(), 0);
10931 APInt UndefBits(VT.getSizeInBits(), 0);
10932 bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
10933 bool IsZero = IsCnst && (CnstBits == 0);
10934
10935 if (SrcVT.getVectorElementType().isFloatingPoint()) {
10936 switch (CC) {
10937 default:
10938 return SDValue();
10939 case AArch64CC::NE: {
10940 SDValue Fcmeq;
10941 if (IsZero)
10942 Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
10943 else
10944 Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
10945 return DAG.getNOT(dl, Fcmeq, VT);
10946 }
10947 case AArch64CC::EQ:
10948 if (IsZero)
10949 return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
10950 return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
10951 case AArch64CC::GE:
10952 if (IsZero)
10953 return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
10954 return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
10955 case AArch64CC::GT:
10956 if (IsZero)
10957 return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
10958 return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
10959 case AArch64CC::LS:
10960 if (IsZero)
10961 return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
10962 return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
10963 case AArch64CC::LT:
10964 if (!NoNans)
10965 return SDValue();
10966 // If we ignore NaNs then we can use to the MI implementation.
10967 LLVM_FALLTHROUGH[[gnu::fallthrough]];
10968 case AArch64CC::MI:
10969 if (IsZero)
10970 return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
10971 return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
10972 }
10973 }
10974
10975 switch (CC) {
10976 default:
10977 return SDValue();
10978 case AArch64CC::NE: {
10979 SDValue Cmeq;
10980 if (IsZero)
10981 Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
10982 else
10983 Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
10984 return DAG.getNOT(dl, Cmeq, VT);
10985 }
10986 case AArch64CC::EQ:
10987 if (IsZero)
10988 return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
10989 return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
10990 case AArch64CC::GE:
10991 if (IsZero)
10992 return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
10993 return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
10994 case AArch64CC::GT:
10995 if (IsZero)
10996 return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
10997 return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
10998 case AArch64CC::LE:
10999 if (IsZero)
11000 return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
11001 return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
11002 case AArch64CC::LS:
11003 return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
11004 case AArch64CC::LO:
11005 return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
11006 case AArch64CC::LT:
11007 if (IsZero)
11008 return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
11009 return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
11010 case AArch64CC::HI:
11011 return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
11012 case AArch64CC::HS:
11013 return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
11014 }
11015}
11016
11017SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
11018 SelectionDAG &DAG) const {
11019 if (Op.getValueType().isScalableVector())
11020 return LowerToPredicatedOp(Op, DAG, AArch64ISD::SETCC_MERGE_ZERO);
11021
11022 if (useSVEForFixedLengthVectorVT(Op.getOperand(0).getValueType()))
11023 return LowerFixedLengthVectorSetccToSVE(Op, DAG);
11024
11025 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
11026 SDValue LHS = Op.getOperand(0);
11027 SDValue RHS = Op.getOperand(1);
11028 EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
11029 SDLoc dl(Op);
11030
11031 if (LHS.getValueType().getVectorElementType().isInteger()) {
11032 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11032, __extension__ __PRETTY_FUNCTION__))
;
11033 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
11034 SDValue Cmp =
11035 EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
11036 return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
11037 }
11038
11039 const bool FullFP16 =
11040 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
11041
11042 // Make v4f16 (only) fcmp operations utilise vector instructions
11043 // v8f16 support will be a litle more complicated
11044 if (!FullFP16 && LHS.getValueType().getVectorElementType() == MVT::f16) {
11045 if (LHS.getValueType().getVectorNumElements() == 4) {
11046 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, LHS);
11047 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v4f32, RHS);
11048 SDValue NewSetcc = DAG.getSetCC(dl, MVT::v4i16, LHS, RHS, CC);
11049 DAG.ReplaceAllUsesWith(Op, NewSetcc);
11050 CmpVT = MVT::v4i32;
11051 } else
11052 return SDValue();
11053 }
11054
11055 assert((!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) ||(static_cast <bool> ((!FullFP16 && LHS.getValueType
().getVectorElementType() != MVT::f16) || LHS.getValueType().
getVectorElementType() != MVT::f128) ? void (0) : __assert_fail
("(!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) || LHS.getValueType().getVectorElementType() != MVT::f128"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11056, __extension__ __PRETTY_FUNCTION__))
11056 LHS.getValueType().getVectorElementType() != MVT::f128)(static_cast <bool> ((!FullFP16 && LHS.getValueType
().getVectorElementType() != MVT::f16) || LHS.getValueType().
getVectorElementType() != MVT::f128) ? void (0) : __assert_fail
("(!FullFP16 && LHS.getValueType().getVectorElementType() != MVT::f16) || LHS.getValueType().getVectorElementType() != MVT::f128"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11056, __extension__ __PRETTY_FUNCTION__))
;
11057
11058 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
11059 // clean. Some of them require two branches to implement.
11060 AArch64CC::CondCode CC1, CC2;
11061 bool ShouldInvert;
11062 changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
11063
11064 bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
11065 SDValue Cmp =
11066 EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
11067 if (!Cmp.getNode())
11068 return SDValue();
11069
11070 if (CC2 != AArch64CC::AL) {
11071 SDValue Cmp2 =
11072 EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
11073 if (!Cmp2.getNode())
11074 return SDValue();
11075
11076 Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
11077 }
11078
11079 Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
11080
11081 if (ShouldInvert)
11082 Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
11083
11084 return Cmp;
11085}
11086
11087static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
11088 SelectionDAG &DAG) {
11089 SDValue VecOp = ScalarOp.getOperand(0);
11090 auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
11091 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
11092 DAG.getConstant(0, DL, MVT::i64));
11093}
11094
11095SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
11096 SelectionDAG &DAG) const {
11097 SDValue Src = Op.getOperand(0);
11098
11099 // Try to lower fixed length reductions to SVE.
11100 EVT SrcVT = Src.getValueType();
11101 bool OverrideNEON = Op.getOpcode() == ISD::VECREDUCE_AND ||
11102 Op.getOpcode() == ISD::VECREDUCE_OR ||
11103 Op.getOpcode() == ISD::VECREDUCE_XOR ||
11104 Op.getOpcode() == ISD::VECREDUCE_FADD ||
11105 (Op.getOpcode() != ISD::VECREDUCE_ADD &&
11106 SrcVT.getVectorElementType() == MVT::i64);
11107 if (SrcVT.isScalableVector() ||
11108 useSVEForFixedLengthVectorVT(SrcVT, OverrideNEON)) {
11109
11110 if (SrcVT.getVectorElementType() == MVT::i1)
11111 return LowerPredReductionToSVE(Op, DAG);
11112
11113 switch (Op.getOpcode()) {
11114 case ISD::VECREDUCE_ADD:
11115 return LowerReductionToSVE(AArch64ISD::UADDV_PRED, Op, DAG);
11116 case ISD::VECREDUCE_AND:
11117 return LowerReductionToSVE(AArch64ISD::ANDV_PRED, Op, DAG);
11118 case ISD::VECREDUCE_OR:
11119 return LowerReductionToSVE(AArch64ISD::ORV_PRED, Op, DAG);
11120 case ISD::VECREDUCE_SMAX:
11121 return LowerReductionToSVE(AArch64ISD::SMAXV_PRED, Op, DAG);
11122 case ISD::VECREDUCE_SMIN:
11123 return LowerReductionToSVE(AArch64ISD::SMINV_PRED, Op, DAG);
11124 case ISD::VECREDUCE_UMAX:
11125 return LowerReductionToSVE(AArch64ISD::UMAXV_PRED, Op, DAG);
11126 case ISD::VECREDUCE_UMIN:
11127 return LowerReductionToSVE(AArch64ISD::UMINV_PRED, Op, DAG);
11128 case ISD::VECREDUCE_XOR:
11129 return LowerReductionToSVE(AArch64ISD::EORV_PRED, Op, DAG);
11130 case ISD::VECREDUCE_FADD:
11131 return LowerReductionToSVE(AArch64ISD::FADDV_PRED, Op, DAG);
11132 case ISD::VECREDUCE_FMAX:
11133 return LowerReductionToSVE(AArch64ISD::FMAXNMV_PRED, Op, DAG);
11134 case ISD::VECREDUCE_FMIN:
11135 return LowerReductionToSVE(AArch64ISD::FMINNMV_PRED, Op, DAG);
11136 default:
11137 llvm_unreachable("Unhandled fixed length reduction")::llvm::llvm_unreachable_internal("Unhandled fixed length reduction"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11137)
;
11138 }
11139 }
11140
11141 // Lower NEON reductions.
11142 SDLoc dl(Op);
11143 switch (Op.getOpcode()) {
11144 case ISD::VECREDUCE_ADD:
11145 return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
11146 case ISD::VECREDUCE_SMAX:
11147 return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
11148 case ISD::VECREDUCE_SMIN:
11149 return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
11150 case ISD::VECREDUCE_UMAX:
11151 return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
11152 case ISD::VECREDUCE_UMIN:
11153 return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
11154 case ISD::VECREDUCE_FMAX: {
11155 return DAG.getNode(
11156 ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
11157 DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
11158 Src);
11159 }
11160 case ISD::VECREDUCE_FMIN: {
11161 return DAG.getNode(
11162 ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
11163 DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
11164 Src);
11165 }
11166 default:
11167 llvm_unreachable("Unhandled reduction")::llvm::llvm_unreachable_internal("Unhandled reduction", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11167)
;
11168 }
11169}
11170
11171SDValue AArch64TargetLowering::LowerATOMIC_LOAD_SUB(SDValue Op,
11172 SelectionDAG &DAG) const {
11173 auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
11174 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
11175 return SDValue();
11176
11177 // LSE has an atomic load-add instruction, but not a load-sub.
11178 SDLoc dl(Op);
11179 MVT VT = Op.getSimpleValueType();
11180 SDValue RHS = Op.getOperand(2);
11181 AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
11182 RHS = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), RHS);
11183 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, dl, AN->getMemoryVT(),
11184 Op.getOperand(0), Op.getOperand(1), RHS,
11185 AN->getMemOperand());
11186}
11187
11188SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
11189 SelectionDAG &DAG) const {
11190 auto &Subtarget = static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
11191 if (!Subtarget.hasLSE() && !Subtarget.outlineAtomics())
11192 return SDValue();
11193
11194 // LSE has an atomic load-clear instruction, but not a load-and.
11195 SDLoc dl(Op);
11196 MVT VT = Op.getSimpleValueType();
11197 SDValue RHS = Op.getOperand(2);
11198 AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
11199 RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getConstant(-1ULL, dl, VT), RHS);
11200 return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
11201 Op.getOperand(0), Op.getOperand(1), RHS,
11202 AN->getMemOperand());
11203}
11204
11205SDValue AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(
11206 SDValue Op, SDValue Chain, SDValue &Size, SelectionDAG &DAG) const {
11207 SDLoc dl(Op);
11208 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11209 SDValue Callee = DAG.getTargetExternalSymbol("__chkstk", PtrVT, 0);
11210
11211 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
11212 const uint32_t *Mask = TRI->getWindowsStackProbePreservedMask();
11213 if (Subtarget->hasCustomCallingConv())
11214 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
11215
11216 Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
11217 DAG.getConstant(4, dl, MVT::i64));
11218 Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
11219 Chain =
11220 DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue),
11221 Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
11222 DAG.getRegisterMask(Mask), Chain.getValue(1));
11223 // To match the actual intent better, we should read the output from X15 here
11224 // again (instead of potentially spilling it to the stack), but rereading Size
11225 // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
11226 // here.
11227
11228 Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
11229 DAG.getConstant(4, dl, MVT::i64));
11230 return Chain;
11231}
11232
11233SDValue
11234AArch64TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
11235 SelectionDAG &DAG) const {
11236 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Only Windows alloca probing supported") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Only Windows alloca probing supported\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11237, __extension__ __PRETTY_FUNCTION__))
11237 "Only Windows alloca probing supported")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Only Windows alloca probing supported") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Only Windows alloca probing supported\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11237, __extension__ __PRETTY_FUNCTION__))
;
11238 SDLoc dl(Op);
11239 // Get the inputs.
11240 SDNode *Node = Op.getNode();
11241 SDValue Chain = Op.getOperand(0);
11242 SDValue Size = Op.getOperand(1);
11243 MaybeAlign Align =
11244 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
11245 EVT VT = Node->getValueType(0);
11246
11247 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
11248 "no-stack-arg-probe")) {
11249 SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
11250 Chain = SP.getValue(1);
11251 SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
11252 if (Align)
11253 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11254 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
11255 Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
11256 SDValue Ops[2] = {SP, Chain};
11257 return DAG.getMergeValues(Ops, dl);
11258 }
11259
11260 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
11261
11262 Chain = LowerWindowsDYNAMIC_STACKALLOC(Op, Chain, Size, DAG);
11263
11264 SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
11265 Chain = SP.getValue(1);
11266 SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
11267 if (Align)
11268 SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
11269 DAG.getConstant(-(uint64_t)Align->value(), dl, VT));
11270 Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
11271
11272 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true),
11273 DAG.getIntPtrConstant(0, dl, true), SDValue(), dl);
11274
11275 SDValue Ops[2] = {SP, Chain};
11276 return DAG.getMergeValues(Ops, dl);
11277}
11278
11279SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
11280 SelectionDAG &DAG) const {
11281 EVT VT = Op.getValueType();
11282 assert(VT != MVT::i64 && "Expected illegal VSCALE node")(static_cast <bool> (VT != MVT::i64 && "Expected illegal VSCALE node"
) ? void (0) : __assert_fail ("VT != MVT::i64 && \"Expected illegal VSCALE node\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11282, __extension__ __PRETTY_FUNCTION__))
;
11283
11284 SDLoc DL(Op);
11285 APInt MulImm = cast<ConstantSDNode>(Op.getOperand(0))->getAPIntValue();
11286 return DAG.getZExtOrTrunc(DAG.getVScale(DL, MVT::i64, MulImm.sextOrSelf(64)),
11287 DL, VT);
11288}
11289
11290/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
11291template <unsigned NumVecs>
11292static bool
11293setInfoSVEStN(const AArch64TargetLowering &TLI, const DataLayout &DL,
11294 AArch64TargetLowering::IntrinsicInfo &Info, const CallInst &CI) {
11295 Info.opc = ISD::INTRINSIC_VOID;
11296 // Retrieve EC from first vector argument.
11297 const EVT VT = TLI.getMemValueType(DL, CI.getArgOperand(0)->getType());
11298 ElementCount EC = VT.getVectorElementCount();
11299#ifndef NDEBUG
11300 // Check the assumption that all input vectors are the same type.
11301 for (unsigned I = 0; I < NumVecs; ++I)
11302 assert(VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) &&(static_cast <bool> (VT == TLI.getMemValueType(DL, CI.getArgOperand
(I)->getType()) && "Invalid type.") ? void (0) : __assert_fail
("VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) && \"Invalid type.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11303, __extension__ __PRETTY_FUNCTION__))
11303 "Invalid type.")(static_cast <bool> (VT == TLI.getMemValueType(DL, CI.getArgOperand
(I)->getType()) && "Invalid type.") ? void (0) : __assert_fail
("VT == TLI.getMemValueType(DL, CI.getArgOperand(I)->getType()) && \"Invalid type.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11303, __extension__ __PRETTY_FUNCTION__))
;
11304#endif
11305 // memVT is `NumVecs * VT`.
11306 Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
11307 EC * NumVecs);
11308 Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
11309 Info.offset = 0;
11310 Info.align.reset();
11311 Info.flags = MachineMemOperand::MOStore;
11312 return true;
11313}
11314
11315/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
11316/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
11317/// specified in the intrinsic calls.
11318bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
11319 const CallInst &I,
11320 MachineFunction &MF,
11321 unsigned Intrinsic) const {
11322 auto &DL = I.getModule()->getDataLayout();
11323 switch (Intrinsic) {
11324 case Intrinsic::aarch64_sve_st2:
11325 return setInfoSVEStN<2>(*this, DL, Info, I);
11326 case Intrinsic::aarch64_sve_st3:
11327 return setInfoSVEStN<3>(*this, DL, Info, I);
11328 case Intrinsic::aarch64_sve_st4:
11329 return setInfoSVEStN<4>(*this, DL, Info, I);
11330 case Intrinsic::aarch64_neon_ld2:
11331 case Intrinsic::aarch64_neon_ld3:
11332 case Intrinsic::aarch64_neon_ld4:
11333 case Intrinsic::aarch64_neon_ld1x2:
11334 case Intrinsic::aarch64_neon_ld1x3:
11335 case Intrinsic::aarch64_neon_ld1x4:
11336 case Intrinsic::aarch64_neon_ld2lane:
11337 case Intrinsic::aarch64_neon_ld3lane:
11338 case Intrinsic::aarch64_neon_ld4lane:
11339 case Intrinsic::aarch64_neon_ld2r:
11340 case Intrinsic::aarch64_neon_ld3r:
11341 case Intrinsic::aarch64_neon_ld4r: {
11342 Info.opc = ISD::INTRINSIC_W_CHAIN;
11343 // Conservatively set memVT to the entire set of vectors loaded.
11344 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
11345 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
11346 Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
11347 Info.offset = 0;
11348 Info.align.reset();
11349 // volatile loads with NEON intrinsics not supported
11350 Info.flags = MachineMemOperand::MOLoad;
11351 return true;
11352 }
11353 case Intrinsic::aarch64_neon_st2:
11354 case Intrinsic::aarch64_neon_st3:
11355 case Intrinsic::aarch64_neon_st4:
11356 case Intrinsic::aarch64_neon_st1x2:
11357 case Intrinsic::aarch64_neon_st1x3:
11358 case Intrinsic::aarch64_neon_st1x4:
11359 case Intrinsic::aarch64_neon_st2lane:
11360 case Intrinsic::aarch64_neon_st3lane:
11361 case Intrinsic::aarch64_neon_st4lane: {
11362 Info.opc = ISD::INTRINSIC_VOID;
11363 // Conservatively set memVT to the entire set of vectors stored.
11364 unsigned NumElts = 0;
11365 for (unsigned ArgI = 0, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
11366 Type *ArgTy = I.getArgOperand(ArgI)->getType();
11367 if (!ArgTy->isVectorTy())
11368 break;
11369 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
11370 }
11371 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
11372 Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
11373 Info.offset = 0;
11374 Info.align.reset();
11375 // volatile stores with NEON intrinsics not supported
11376 Info.flags = MachineMemOperand::MOStore;
11377 return true;
11378 }
11379 case Intrinsic::aarch64_ldaxr:
11380 case Intrinsic::aarch64_ldxr: {
11381 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
11382 Info.opc = ISD::INTRINSIC_W_CHAIN;
11383 Info.memVT = MVT::getVT(PtrTy->getElementType());
11384 Info.ptrVal = I.getArgOperand(0);
11385 Info.offset = 0;
11386 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11387 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
11388 return true;
11389 }
11390 case Intrinsic::aarch64_stlxr:
11391 case Intrinsic::aarch64_stxr: {
11392 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
11393 Info.opc = ISD::INTRINSIC_W_CHAIN;
11394 Info.memVT = MVT::getVT(PtrTy->getElementType());
11395 Info.ptrVal = I.getArgOperand(1);
11396 Info.offset = 0;
11397 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11398 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
11399 return true;
11400 }
11401 case Intrinsic::aarch64_ldaxp:
11402 case Intrinsic::aarch64_ldxp:
11403 Info.opc = ISD::INTRINSIC_W_CHAIN;
11404 Info.memVT = MVT::i128;
11405 Info.ptrVal = I.getArgOperand(0);
11406 Info.offset = 0;
11407 Info.align = Align(16);
11408 Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
11409 return true;
11410 case Intrinsic::aarch64_stlxp:
11411 case Intrinsic::aarch64_stxp:
11412 Info.opc = ISD::INTRINSIC_W_CHAIN;
11413 Info.memVT = MVT::i128;
11414 Info.ptrVal = I.getArgOperand(2);
11415 Info.offset = 0;
11416 Info.align = Align(16);
11417 Info.flags = MachineMemOperand::MOStore | MachineMemOperand::MOVolatile;
11418 return true;
11419 case Intrinsic::aarch64_sve_ldnt1: {
11420 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
11421 Info.opc = ISD::INTRINSIC_W_CHAIN;
11422 Info.memVT = MVT::getVT(I.getType());
11423 Info.ptrVal = I.getArgOperand(1);
11424 Info.offset = 0;
11425 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11426 Info.flags = MachineMemOperand::MOLoad;
11427 if (Intrinsic == Intrinsic::aarch64_sve_ldnt1)
11428 Info.flags |= MachineMemOperand::MONonTemporal;
11429 return true;
11430 }
11431 case Intrinsic::aarch64_sve_stnt1: {
11432 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(2)->getType());
11433 Info.opc = ISD::INTRINSIC_W_CHAIN;
11434 Info.memVT = MVT::getVT(I.getOperand(0)->getType());
11435 Info.ptrVal = I.getArgOperand(2);
11436 Info.offset = 0;
11437 Info.align = DL.getABITypeAlign(PtrTy->getElementType());
11438 Info.flags = MachineMemOperand::MOStore;
11439 if (Intrinsic == Intrinsic::aarch64_sve_stnt1)
11440 Info.flags |= MachineMemOperand::MONonTemporal;
11441 return true;
11442 }
11443 default:
11444 break;
11445 }
11446
11447 return false;
11448}
11449
11450bool AArch64TargetLowering::shouldReduceLoadWidth(SDNode *Load,
11451 ISD::LoadExtType ExtTy,
11452 EVT NewVT) const {
11453 // TODO: This may be worth removing. Check regression tests for diffs.
11454 if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
11455 return false;
11456
11457 // If we're reducing the load width in order to avoid having to use an extra
11458 // instruction to do extension then it's probably a good idea.
11459 if (ExtTy != ISD::NON_EXTLOAD)
11460 return true;
11461 // Don't reduce load width if it would prevent us from combining a shift into
11462 // the offset.
11463 MemSDNode *Mem = dyn_cast<MemSDNode>(Load);
11464 assert(Mem)(static_cast <bool> (Mem) ? void (0) : __assert_fail ("Mem"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11464, __extension__ __PRETTY_FUNCTION__))
;
11465 const SDValue &Base = Mem->getBasePtr();
11466 if (Base.getOpcode() == ISD::ADD &&
11467 Base.getOperand(1).getOpcode() == ISD::SHL &&
11468 Base.getOperand(1).hasOneUse() &&
11469 Base.getOperand(1).getOperand(1).getOpcode() == ISD::Constant) {
11470 // The shift can be combined if it matches the size of the value being
11471 // loaded (and so reducing the width would make it not match).
11472 uint64_t ShiftAmount = Base.getOperand(1).getConstantOperandVal(1);
11473 uint64_t LoadBytes = Mem->getMemoryVT().getSizeInBits()/8;
11474 if (ShiftAmount == Log2_32(LoadBytes))
11475 return false;
11476 }
11477 // We have no reason to disallow reducing the load width, so allow it.
11478 return true;
11479}
11480
11481// Truncations from 64-bit GPR to 32-bit GPR is free.
11482bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
11483 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11484 return false;
11485 uint64_t NumBits1 = Ty1->getPrimitiveSizeInBits().getFixedSize();
11486 uint64_t NumBits2 = Ty2->getPrimitiveSizeInBits().getFixedSize();
11487 return NumBits1 > NumBits2;
11488}
11489bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
11490 if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
11491 return false;
11492 uint64_t NumBits1 = VT1.getFixedSizeInBits();
11493 uint64_t NumBits2 = VT2.getFixedSizeInBits();
11494 return NumBits1 > NumBits2;
11495}
11496
11497/// Check if it is profitable to hoist instruction in then/else to if.
11498/// Not profitable if I and it's user can form a FMA instruction
11499/// because we prefer FMSUB/FMADD.
11500bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
11501 if (I->getOpcode() != Instruction::FMul)
11502 return true;
11503
11504 if (!I->hasOneUse())
11505 return true;
11506
11507 Instruction *User = I->user_back();
11508
11509 if (User &&
11510 !(User->getOpcode() == Instruction::FSub ||
11511 User->getOpcode() == Instruction::FAdd))
11512 return true;
11513
11514 const TargetOptions &Options = getTargetMachine().Options;
11515 const Function *F = I->getFunction();
11516 const DataLayout &DL = F->getParent()->getDataLayout();
11517 Type *Ty = User->getOperand(0)->getType();
11518
11519 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
11520 isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) &&
11521 (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11522 Options.UnsafeFPMath));
11523}
11524
11525// All 32-bit GPR operations implicitly zero the high-half of the corresponding
11526// 64-bit GPR.
11527bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
11528 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
11529 return false;
11530 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
11531 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
11532 return NumBits1 == 32 && NumBits2 == 64;
11533}
11534bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
11535 if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
11536 return false;
11537 unsigned NumBits1 = VT1.getSizeInBits();
11538 unsigned NumBits2 = VT2.getSizeInBits();
11539 return NumBits1 == 32 && NumBits2 == 64;
11540}
11541
11542bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
11543 EVT VT1 = Val.getValueType();
11544 if (isZExtFree(VT1, VT2)) {
11545 return true;
11546 }
11547
11548 if (Val.getOpcode() != ISD::LOAD)
11549 return false;
11550
11551 // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
11552 return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
11553 VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
11554 VT1.getSizeInBits() <= 32);
11555}
11556
11557bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
11558 if (isa<FPExtInst>(Ext))
11559 return false;
11560
11561 // Vector types are not free.
11562 if (Ext->getType()->isVectorTy())
11563 return false;
11564
11565 for (const Use &U : Ext->uses()) {
11566 // The extension is free if we can fold it with a left shift in an
11567 // addressing mode or an arithmetic operation: add, sub, and cmp.
11568
11569 // Is there a shift?
11570 const Instruction *Instr = cast<Instruction>(U.getUser());
11571
11572 // Is this a constant shift?
11573 switch (Instr->getOpcode()) {
11574 case Instruction::Shl:
11575 if (!isa<ConstantInt>(Instr->getOperand(1)))
11576 return false;
11577 break;
11578 case Instruction::GetElementPtr: {
11579 gep_type_iterator GTI = gep_type_begin(Instr);
11580 auto &DL = Ext->getModule()->getDataLayout();
11581 std::advance(GTI, U.getOperandNo()-1);
11582 Type *IdxTy = GTI.getIndexedType();
11583 // This extension will end up with a shift because of the scaling factor.
11584 // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
11585 // Get the shift amount based on the scaling factor:
11586 // log2(sizeof(IdxTy)) - log2(8).
11587 uint64_t ShiftAmt =
11588 countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy).getFixedSize()) - 3;
11589 // Is the constant foldable in the shift of the addressing mode?
11590 // I.e., shift amount is between 1 and 4 inclusive.
11591 if (ShiftAmt == 0 || ShiftAmt > 4)
11592 return false;
11593 break;
11594 }
11595 case Instruction::Trunc:
11596 // Check if this is a noop.
11597 // trunc(sext ty1 to ty2) to ty1.
11598 if (Instr->getType() == Ext->getOperand(0)->getType())
11599 continue;
11600 LLVM_FALLTHROUGH[[gnu::fallthrough]];
11601 default:
11602 return false;
11603 }
11604
11605 // At this point we can use the bfm family, so this extension is free
11606 // for that use.
11607 }
11608 return true;
11609}
11610
11611/// Check if both Op1 and Op2 are shufflevector extracts of either the lower
11612/// or upper half of the vector elements.
11613static bool areExtractShuffleVectors(Value *Op1, Value *Op2) {
11614 auto areTypesHalfed = [](Value *FullV, Value *HalfV) {
11615 auto *FullTy = FullV->getType();
11616 auto *HalfTy = HalfV->getType();
11617 return FullTy->getPrimitiveSizeInBits().getFixedSize() ==
11618 2 * HalfTy->getPrimitiveSizeInBits().getFixedSize();
11619 };
11620
11621 auto extractHalf = [](Value *FullV, Value *HalfV) {
11622 auto *FullVT = cast<FixedVectorType>(FullV->getType());
11623 auto *HalfVT = cast<FixedVectorType>(HalfV->getType());
11624 return FullVT->getNumElements() == 2 * HalfVT->getNumElements();
11625 };
11626
11627 ArrayRef<int> M1, M2;
11628 Value *S1Op1, *S2Op1;
11629 if (!match(Op1, m_Shuffle(m_Value(S1Op1), m_Undef(), m_Mask(M1))) ||
11630 !match(Op2, m_Shuffle(m_Value(S2Op1), m_Undef(), m_Mask(M2))))
11631 return false;
11632
11633 // Check that the operands are half as wide as the result and we extract
11634 // half of the elements of the input vectors.
11635 if (!areTypesHalfed(S1Op1, Op1) || !areTypesHalfed(S2Op1, Op2) ||
11636 !extractHalf(S1Op1, Op1) || !extractHalf(S2Op1, Op2))
11637 return false;
11638
11639 // Check the mask extracts either the lower or upper half of vector
11640 // elements.
11641 int M1Start = -1;
11642 int M2Start = -1;
11643 int NumElements = cast<FixedVectorType>(Op1->getType())->getNumElements() * 2;
11644 if (!ShuffleVectorInst::isExtractSubvectorMask(M1, NumElements, M1Start) ||
11645 !ShuffleVectorInst::isExtractSubvectorMask(M2, NumElements, M2Start) ||
11646 M1Start != M2Start || (M1Start != 0 && M2Start != (NumElements / 2)))
11647 return false;
11648
11649 return true;
11650}
11651
11652/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
11653/// of the vector elements.
11654static bool areExtractExts(Value *Ext1, Value *Ext2) {
11655 auto areExtDoubled = [](Instruction *Ext) {
11656 return Ext->getType()->getScalarSizeInBits() ==
11657 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
11658 };
11659
11660 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
11661 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
11662 !areExtDoubled(cast<Instruction>(Ext1)) ||
11663 !areExtDoubled(cast<Instruction>(Ext2)))
11664 return false;
11665
11666 return true;
11667}
11668
11669/// Check if Op could be used with vmull_high_p64 intrinsic.
11670static bool isOperandOfVmullHighP64(Value *Op) {
11671 Value *VectorOperand = nullptr;
11672 ConstantInt *ElementIndex = nullptr;
11673 return match(Op, m_ExtractElt(m_Value(VectorOperand),
11674 m_ConstantInt(ElementIndex))) &&
11675 ElementIndex->getValue() == 1 &&
11676 isa<FixedVectorType>(VectorOperand->getType()) &&
11677 cast<FixedVectorType>(VectorOperand->getType())->getNumElements() == 2;
11678}
11679
11680/// Check if Op1 and Op2 could be used with vmull_high_p64 intrinsic.
11681static bool areOperandsOfVmullHighP64(Value *Op1, Value *Op2) {
11682 return isOperandOfVmullHighP64(Op1) && isOperandOfVmullHighP64(Op2);
11683}
11684
11685/// Check if sinking \p I's operands to I's basic block is profitable, because
11686/// the operands can be folded into a target instruction, e.g.
11687/// shufflevectors extracts and/or sext/zext can be folded into (u,s)subl(2).
11688bool AArch64TargetLowering::shouldSinkOperands(
11689 Instruction *I, SmallVectorImpl<Use *> &Ops) const {
11690 if (!I->getType()->isVectorTy())
11691 return false;
11692
11693 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
11694 switch (II->getIntrinsicID()) {
11695 case Intrinsic::aarch64_neon_umull:
11696 if (!areExtractShuffleVectors(II->getOperand(0), II->getOperand(1)))
11697 return false;
11698 Ops.push_back(&II->getOperandUse(0));
11699 Ops.push_back(&II->getOperandUse(1));
11700 return true;
11701
11702 case Intrinsic::aarch64_neon_pmull64:
11703 if (!areOperandsOfVmullHighP64(II->getArgOperand(0),
11704 II->getArgOperand(1)))
11705 return false;
11706 Ops.push_back(&II->getArgOperandUse(0));
11707 Ops.push_back(&II->getArgOperandUse(1));
11708 return true;
11709
11710 default:
11711 return false;
11712 }
11713 }
11714
11715 switch (I->getOpcode()) {
11716 case Instruction::Sub:
11717 case Instruction::Add: {
11718 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
11719 return false;
11720
11721 // If the exts' operands extract either the lower or upper elements, we
11722 // can sink them too.
11723 auto Ext1 = cast<Instruction>(I->getOperand(0));
11724 auto Ext2 = cast<Instruction>(I->getOperand(1));
11725 if (areExtractShuffleVectors(Ext1->getOperand(0), Ext2->getOperand(0))) {
11726 Ops.push_back(&Ext1->getOperandUse(0));
11727 Ops.push_back(&Ext2->getOperandUse(0));
11728 }
11729
11730 Ops.push_back(&I->getOperandUse(0));
11731 Ops.push_back(&I->getOperandUse(1));
11732
11733 return true;
11734 }
11735 case Instruction::Mul: {
11736 bool IsProfitable = false;
11737 for (auto &Op : I->operands()) {
11738 // Make sure we are not already sinking this operand
11739 if (any_of(Ops, [&](Use *U) { return U->get() == Op; }))
11740 continue;
11741
11742 ShuffleVectorInst *Shuffle = dyn_cast<ShuffleVectorInst>(Op);
11743 if (!Shuffle || !Shuffle->isZeroEltSplat())
11744 continue;
11745
11746 Value *ShuffleOperand = Shuffle->getOperand(0);
11747 InsertElementInst *Insert = dyn_cast<InsertElementInst>(ShuffleOperand);
11748 if (!Insert)
11749 continue;
11750
11751 Instruction *OperandInstr = dyn_cast<Instruction>(Insert->getOperand(1));
11752 if (!OperandInstr)
11753 continue;
11754
11755 ConstantInt *ElementConstant =
11756 dyn_cast<ConstantInt>(Insert->getOperand(2));
11757 // Check that the insertelement is inserting into element 0
11758 if (!ElementConstant || ElementConstant->getZExtValue() != 0)
11759 continue;
11760
11761 unsigned Opcode = OperandInstr->getOpcode();
11762 if (Opcode != Instruction::SExt && Opcode != Instruction::ZExt)
11763 continue;
11764
11765 Ops.push_back(&Shuffle->getOperandUse(0));
11766 Ops.push_back(&Op);
11767 IsProfitable = true;
11768 }
11769
11770 return IsProfitable;
11771 }
11772 default:
11773 return false;
11774 }
11775 return false;
11776}
11777
11778bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
11779 Align &RequiredAligment) const {
11780 if (!LoadedType.isSimple() ||
11781 (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
11782 return false;
11783 // Cyclone supports unaligned accesses.
11784 RequiredAligment = Align(1);
11785 unsigned NumBits = LoadedType.getSizeInBits();
11786 return NumBits == 32 || NumBits == 64;
11787}
11788
11789/// A helper function for determining the number of interleaved accesses we
11790/// will generate when lowering accesses of the given type.
11791unsigned
11792AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
11793 const DataLayout &DL) const {
11794 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
11795}
11796
11797MachineMemOperand::Flags
11798AArch64TargetLowering::getTargetMMOFlags(const Instruction &I) const {
11799 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
11800 I.getMetadata(FALKOR_STRIDED_ACCESS_MD"falkor.strided.access") != nullptr)
11801 return MOStridedAccess;
11802 return MachineMemOperand::MONone;
11803}
11804
11805bool AArch64TargetLowering::isLegalInterleavedAccessType(
11806 VectorType *VecTy, const DataLayout &DL) const {
11807
11808 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
11809 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
11810
11811 // Ensure the number of vector elements is greater than 1.
11812 if (cast<FixedVectorType>(VecTy)->getNumElements() < 2)
11813 return false;
11814
11815 // Ensure the element type is legal.
11816 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
11817 return false;
11818
11819 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
11820 // 128 will be split into multiple interleaved accesses.
11821 return VecSize == 64 || VecSize % 128 == 0;
11822}
11823
11824/// Lower an interleaved load into a ldN intrinsic.
11825///
11826/// E.g. Lower an interleaved load (Factor = 2):
11827/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
11828/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
11829/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
11830///
11831/// Into:
11832/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
11833/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
11834/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
11835bool AArch64TargetLowering::lowerInterleavedLoad(
11836 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
11837 ArrayRef<unsigned> Indices, unsigned Factor) const {
11838 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11839, __extension__ __PRETTY_FUNCTION__))
11839 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11839, __extension__ __PRETTY_FUNCTION__))
;
11840 assert(!Shuffles.empty() && "Empty shufflevector input")(static_cast <bool> (!Shuffles.empty() && "Empty shufflevector input"
) ? void (0) : __assert_fail ("!Shuffles.empty() && \"Empty shufflevector input\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11840, __extension__ __PRETTY_FUNCTION__))
;
11841 assert(Shuffles.size() == Indices.size() &&(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11842, __extension__ __PRETTY_FUNCTION__))
11842 "Unmatched number of shufflevectors and indices")(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11842, __extension__ __PRETTY_FUNCTION__))
;
11843
11844 const DataLayout &DL = LI->getModule()->getDataLayout();
11845
11846 VectorType *VTy = Shuffles[0]->getType();
11847
11848 // Skip if we do not have NEON and skip illegal vector types. We can
11849 // "legalize" wide vector types into multiple interleaved accesses as long as
11850 // the vector types are divisible by 128.
11851 if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VTy, DL))
11852 return false;
11853
11854 unsigned NumLoads = getNumInterleavedAccesses(VTy, DL);
11855
11856 auto *FVTy = cast<FixedVectorType>(VTy);
11857
11858 // A pointer vector can not be the return type of the ldN intrinsics. Need to
11859 // load integer vectors first and then convert to pointer vectors.
11860 Type *EltTy = FVTy->getElementType();
11861 if (EltTy->isPointerTy())
11862 FVTy =
11863 FixedVectorType::get(DL.getIntPtrType(EltTy), FVTy->getNumElements());
11864
11865 IRBuilder<> Builder(LI);
11866
11867 // The base address of the load.
11868 Value *BaseAddr = LI->getPointerOperand();
11869
11870 if (NumLoads > 1) {
11871 // If we're going to generate more than one load, reset the sub-vector type
11872 // to something legal.
11873 FVTy = FixedVectorType::get(FVTy->getElementType(),
11874 FVTy->getNumElements() / NumLoads);
11875
11876 // We will compute the pointer operand of each load from the original base
11877 // address using GEPs. Cast the base address to a pointer to the scalar
11878 // element type.
11879 BaseAddr = Builder.CreateBitCast(
11880 BaseAddr,
11881 FVTy->getElementType()->getPointerTo(LI->getPointerAddressSpace()));
11882 }
11883
11884 Type *PtrTy = FVTy->getPointerTo(LI->getPointerAddressSpace());
11885 Type *Tys[2] = {FVTy, PtrTy};
11886 static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
11887 Intrinsic::aarch64_neon_ld3,
11888 Intrinsic::aarch64_neon_ld4};
11889 Function *LdNFunc =
11890 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
11891
11892 // Holds sub-vectors extracted from the load intrinsic return values. The
11893 // sub-vectors are associated with the shufflevector instructions they will
11894 // replace.
11895 DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
11896
11897 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
11898
11899 // If we're generating more than one load, compute the base address of
11900 // subsequent loads as an offset from the previous.
11901 if (LoadCount > 0)
11902 BaseAddr = Builder.CreateConstGEP1_32(FVTy->getElementType(), BaseAddr,
11903 FVTy->getNumElements() * Factor);
11904
11905 CallInst *LdN = Builder.CreateCall(
11906 LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
11907
11908 // Extract and store the sub-vectors returned by the load intrinsic.
11909 for (unsigned i = 0; i < Shuffles.size(); i++) {
11910 ShuffleVectorInst *SVI = Shuffles[i];
11911 unsigned Index = Indices[i];
11912
11913 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
11914
11915 // Convert the integer vector to pointer vector if the element is pointer.
11916 if (EltTy->isPointerTy())
11917 SubVec = Builder.CreateIntToPtr(
11918 SubVec, FixedVectorType::get(SVI->getType()->getElementType(),
11919 FVTy->getNumElements()));
11920 SubVecs[SVI].push_back(SubVec);
11921 }
11922 }
11923
11924 // Replace uses of the shufflevector instructions with the sub-vectors
11925 // returned by the load intrinsic. If a shufflevector instruction is
11926 // associated with more than one sub-vector, those sub-vectors will be
11927 // concatenated into a single wide vector.
11928 for (ShuffleVectorInst *SVI : Shuffles) {
11929 auto &SubVec = SubVecs[SVI];
11930 auto *WideVec =
11931 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
11932 SVI->replaceAllUsesWith(WideVec);
11933 }
11934
11935 return true;
11936}
11937
11938/// Lower an interleaved store into a stN intrinsic.
11939///
11940/// E.g. Lower an interleaved store (Factor = 3):
11941/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
11942/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
11943/// store <12 x i32> %i.vec, <12 x i32>* %ptr
11944///
11945/// Into:
11946/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
11947/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
11948/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
11949/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
11950///
11951/// Note that the new shufflevectors will be removed and we'll only generate one
11952/// st3 instruction in CodeGen.
11953///
11954/// Example for a more general valid mask (Factor 3). Lower:
11955/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
11956/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
11957/// store <12 x i32> %i.vec, <12 x i32>* %ptr
11958///
11959/// Into:
11960/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
11961/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
11962/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
11963/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
11964bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
11965 ShuffleVectorInst *SVI,
11966 unsigned Factor) const {
11967 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11968, __extension__ __PRETTY_FUNCTION__))
11968 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11968, __extension__ __PRETTY_FUNCTION__))
;
11969
11970 auto *VecTy = cast<FixedVectorType>(SVI->getType());
11971 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store")(static_cast <bool> (VecTy->getNumElements() % Factor
== 0 && "Invalid interleaved store") ? void (0) : __assert_fail
("VecTy->getNumElements() % Factor == 0 && \"Invalid interleaved store\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 11971, __extension__ __PRETTY_FUNCTION__))
;
11972
11973 unsigned LaneLen = VecTy->getNumElements() / Factor;
11974 Type *EltTy = VecTy->getElementType();
11975 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
11976
11977 const DataLayout &DL = SI->getModule()->getDataLayout();
11978
11979 // Skip if we do not have NEON and skip illegal vector types. We can
11980 // "legalize" wide vector types into multiple interleaved accesses as long as
11981 // the vector types are divisible by 128.
11982 if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(SubVecTy, DL))
11983 return false;
11984
11985 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
11986
11987 Value *Op0 = SVI->getOperand(0);
11988 Value *Op1 = SVI->getOperand(1);
11989 IRBuilder<> Builder(SI);
11990
11991 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
11992 // vectors to integer vectors.
11993 if (EltTy->isPointerTy()) {
11994 Type *IntTy = DL.getIntPtrType(EltTy);
11995 unsigned NumOpElts =
11996 cast<FixedVectorType>(Op0->getType())->getNumElements();
11997
11998 // Convert to the corresponding integer vector.
11999 auto *IntVecTy = FixedVectorType::get(IntTy, NumOpElts);
12000 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
12001 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
12002
12003 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
12004 }
12005
12006 // The base address of the store.
12007 Value *BaseAddr = SI->getPointerOperand();
12008
12009 if (NumStores > 1) {
12010 // If we're going to generate more than one store, reset the lane length
12011 // and sub-vector type to something legal.
12012 LaneLen /= NumStores;
12013 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
12014
12015 // We will compute the pointer operand of each store from the original base
12016 // address using GEPs. Cast the base address to a pointer to the scalar
12017 // element type.
12018 BaseAddr = Builder.CreateBitCast(
12019 BaseAddr,
12020 SubVecTy->getElementType()->getPointerTo(SI->getPointerAddressSpace()));
12021 }
12022
12023 auto Mask = SVI->getShuffleMask();
12024
12025 Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
12026 Type *Tys[2] = {SubVecTy, PtrTy};
12027 static const Intrinsic::ID StoreInts[3] = {Intrinsic::aarch64_neon_st2,
12028 Intrinsic::aarch64_neon_st3,
12029 Intrinsic::aarch64_neon_st4};
12030 Function *StNFunc =
12031 Intrinsic::getDeclaration(SI->getModule(), StoreInts[Factor - 2], Tys);
12032
12033 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
12034
12035 SmallVector<Value *, 5> Ops;
12036
12037 // Split the shufflevector operands into sub vectors for the new stN call.
12038 for (unsigned i = 0; i < Factor; i++) {
12039 unsigned IdxI = StoreCount * LaneLen * Factor + i;
12040 if (Mask[IdxI] >= 0) {
12041 Ops.push_back(Builder.CreateShuffleVector(
12042 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
12043 } else {
12044 unsigned StartMask = 0;
12045 for (unsigned j = 1; j < LaneLen; j++) {
12046 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
12047 if (Mask[IdxJ * Factor + IdxI] >= 0) {
12048 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
12049 break;
12050 }
12051 }
12052 // Note: Filling undef gaps with random elements is ok, since
12053 // those elements were being written anyway (with undefs).
12054 // In the case of all undefs we're defaulting to using elems from 0
12055 // Note: StartMask cannot be negative, it's checked in
12056 // isReInterleaveMask
12057 Ops.push_back(Builder.CreateShuffleVector(
12058 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
12059 }
12060 }
12061
12062 // If we generating more than one store, we compute the base address of
12063 // subsequent stores as an offset from the previous.
12064 if (StoreCount > 0)
12065 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
12066 BaseAddr, LaneLen * Factor);
12067
12068 Ops.push_back(Builder.CreateBitCast(BaseAddr, PtrTy));
12069 Builder.CreateCall(StNFunc, Ops);
12070 }
12071 return true;
12072}
12073
12074// Lower an SVE structured load intrinsic returning a tuple type to target
12075// specific intrinsic taking the same input but returning a multi-result value
12076// of the split tuple type.
12077//
12078// E.g. Lowering an LD3:
12079//
12080// call <vscale x 12 x i32> @llvm.aarch64.sve.ld3.nxv12i32(
12081// <vscale x 4 x i1> %pred,
12082// <vscale x 4 x i32>* %addr)
12083//
12084// Output DAG:
12085//
12086// t0: ch = EntryToken
12087// t2: nxv4i1,ch = CopyFromReg t0, Register:nxv4i1 %0
12088// t4: i64,ch = CopyFromReg t0, Register:i64 %1
12089// t5: nxv4i32,nxv4i32,nxv4i32,ch = AArch64ISD::SVE_LD3 t0, t2, t4
12090// t6: nxv12i32 = concat_vectors t5, t5:1, t5:2
12091//
12092// This is called pre-legalization to avoid widening/splitting issues with
12093// non-power-of-2 tuple types used for LD3, such as nxv12i32.
12094SDValue AArch64TargetLowering::LowerSVEStructLoad(unsigned Intrinsic,
12095 ArrayRef<SDValue> LoadOps,
12096 EVT VT, SelectionDAG &DAG,
12097 const SDLoc &DL) const {
12098 assert(VT.isScalableVector() && "Can only lower scalable vectors")(static_cast <bool> (VT.isScalableVector() && "Can only lower scalable vectors"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Can only lower scalable vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12098, __extension__ __PRETTY_FUNCTION__))
;
12099
12100 unsigned N, Opcode;
12101 static std::map<unsigned, std::pair<unsigned, unsigned>> IntrinsicMap = {
12102 {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}},
12103 {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}},
12104 {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}};
12105
12106 std::tie(N, Opcode) = IntrinsicMap[Intrinsic];
12107 assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 &&(static_cast <bool> (VT.getVectorElementCount().getKnownMinValue
() % N == 0 && "invalid tuple vector type!") ? void (
0) : __assert_fail ("VT.getVectorElementCount().getKnownMinValue() % N == 0 && \"invalid tuple vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12108, __extension__ __PRETTY_FUNCTION__))
12108 "invalid tuple vector type!")(static_cast <bool> (VT.getVectorElementCount().getKnownMinValue
() % N == 0 && "invalid tuple vector type!") ? void (
0) : __assert_fail ("VT.getVectorElementCount().getKnownMinValue() % N == 0 && \"invalid tuple vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12108, __extension__ __PRETTY_FUNCTION__))
;
12109
12110 EVT SplitVT =
12111 EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
12112 VT.getVectorElementCount().divideCoefficientBy(N));
12113 assert(isTypeLegal(SplitVT))(static_cast <bool> (isTypeLegal(SplitVT)) ? void (0) :
__assert_fail ("isTypeLegal(SplitVT)", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12113, __extension__ __PRETTY_FUNCTION__))
;
12114
12115 SmallVector<EVT, 5> VTs(N, SplitVT);
12116 VTs.push_back(MVT::Other); // Chain
12117 SDVTList NodeTys = DAG.getVTList(VTs);
12118
12119 SDValue PseudoLoad = DAG.getNode(Opcode, DL, NodeTys, LoadOps);
12120 SmallVector<SDValue, 4> PseudoLoadOps;
12121 for (unsigned I = 0; I < N; ++I)
12122 PseudoLoadOps.push_back(SDValue(PseudoLoad.getNode(), I));
12123 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, PseudoLoadOps);
12124}
12125
12126EVT AArch64TargetLowering::getOptimalMemOpType(
12127 const MemOp &Op, const AttributeList &FuncAttributes) const {
12128 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
12129 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
12130 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
12131 // Only use AdvSIMD to implement memset of 32-byte and above. It would have
12132 // taken one instruction to materialize the v2i64 zero and one store (with
12133 // restrictive addressing mode). Just do i64 stores.
12134 bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
12135 auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
12136 if (Op.isAligned(AlignCheck))
12137 return true;
12138 bool Fast;
12139 return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
12140 MachineMemOperand::MONone, &Fast) &&
12141 Fast;
12142 };
12143
12144 if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
12145 AlignmentIsAcceptable(MVT::v16i8, Align(16)))
12146 return MVT::v16i8;
12147 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
12148 return MVT::f128;
12149 if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
12150 return MVT::i64;
12151 if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
12152 return MVT::i32;
12153 return MVT::Other;
12154}
12155
12156LLT AArch64TargetLowering::getOptimalMemOpLLT(
12157 const MemOp &Op, const AttributeList &FuncAttributes) const {
12158 bool CanImplicitFloat = !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat);
12159 bool CanUseNEON = Subtarget->hasNEON() && CanImplicitFloat;
12160 bool CanUseFP = Subtarget->hasFPARMv8() && CanImplicitFloat;
12161 // Only use AdvSIMD to implement memset of 32-byte and above. It would have
12162 // taken one instruction to materialize the v2i64 zero and one store (with
12163 // restrictive addressing mode). Just do i64 stores.
12164 bool IsSmallMemset = Op.isMemset() && Op.size() < 32;
12165 auto AlignmentIsAcceptable = [&](EVT VT, Align AlignCheck) {
12166 if (Op.isAligned(AlignCheck))
12167 return true;
12168 bool Fast;
12169 return allowsMisalignedMemoryAccesses(VT, 0, Align(1),
12170 MachineMemOperand::MONone, &Fast) &&
12171 Fast;
12172 };
12173
12174 if (CanUseNEON && Op.isMemset() && !IsSmallMemset &&
12175 AlignmentIsAcceptable(MVT::v2i64, Align(16)))
12176 return LLT::fixed_vector(2, 64);
12177 if (CanUseFP && !IsSmallMemset && AlignmentIsAcceptable(MVT::f128, Align(16)))
12178 return LLT::scalar(128);
12179 if (Op.size() >= 8 && AlignmentIsAcceptable(MVT::i64, Align(8)))
12180 return LLT::scalar(64);
12181 if (Op.size() >= 4 && AlignmentIsAcceptable(MVT::i32, Align(4)))
12182 return LLT::scalar(32);
12183 return LLT();
12184}
12185
12186// 12-bit optionally shifted immediates are legal for adds.
12187bool AArch64TargetLowering::isLegalAddImmediate(int64_t Immed) const {
12188 if (Immed == std::numeric_limits<int64_t>::min()) {
12189 LLVM_DEBUG(dbgs() << "Illegal add imm " << Immeddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal add imm " <<
Immed << ": avoid UB for INT64_MIN\n"; } } while (false
)
12190 << ": avoid UB for INT64_MIN\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal add imm " <<
Immed << ": avoid UB for INT64_MIN\n"; } } while (false
)
;
12191 return false;
12192 }
12193 // Same encoding for add/sub, just flip the sign.
12194 Immed = std::abs(Immed);
12195 bool IsLegal = ((Immed >> 12) == 0 ||
12196 ((Immed & 0xfff) == 0 && Immed >> 24 == 0));
12197 LLVM_DEBUG(dbgs() << "Is " << Immeddo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is " << Immed <<
" legal add imm: " << (IsLegal ? "yes" : "no") <<
"\n"; } } while (false)
12198 << " legal add imm: " << (IsLegal ? "yes" : "no") << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is " << Immed <<
" legal add imm: " << (IsLegal ? "yes" : "no") <<
"\n"; } } while (false)
;
12199 return IsLegal;
12200}
12201
12202// Integer comparisons are implemented with ADDS/SUBS, so the range of valid
12203// immediates is the same as for an add or a sub.
12204bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const {
12205 return isLegalAddImmediate(Immed);
12206}
12207
12208/// isLegalAddressingMode - Return true if the addressing mode represented
12209/// by AM is legal for this target, for a load/store of the specified type.
12210bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
12211 const AddrMode &AM, Type *Ty,
12212 unsigned AS, Instruction *I) const {
12213 // AArch64 has five basic addressing modes:
12214 // reg
12215 // reg + 9-bit signed offset
12216 // reg + SIZE_IN_BYTES * 12-bit unsigned offset
12217 // reg1 + reg2
12218 // reg + SIZE_IN_BYTES * reg
12219
12220 // No global is ever allowed as a base.
12221 if (AM.BaseGV)
12222 return false;
12223
12224 // No reg+reg+imm addressing.
12225 if (AM.HasBaseReg && AM.BaseOffs && AM.Scale)
12226 return false;
12227
12228 // FIXME: Update this method to support scalable addressing modes.
12229 if (isa<ScalableVectorType>(Ty)) {
12230 uint64_t VecElemNumBytes =
12231 DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
12232 return AM.HasBaseReg && !AM.BaseOffs &&
12233 (AM.Scale == 0 || (uint64_t)AM.Scale == VecElemNumBytes);
12234 }
12235
12236 // check reg + imm case:
12237 // i.e., reg + 0, reg + imm9, reg + SIZE_IN_BYTES * uimm12
12238 uint64_t NumBytes = 0;
12239 if (Ty->isSized()) {
12240 uint64_t NumBits = DL.getTypeSizeInBits(Ty);
12241 NumBytes = NumBits / 8;
12242 if (!isPowerOf2_64(NumBits))
12243 NumBytes = 0;
12244 }
12245
12246 if (!AM.Scale) {
12247 int64_t Offset = AM.BaseOffs;
12248
12249 // 9-bit signed offset
12250 if (isInt<9>(Offset))
12251 return true;
12252
12253 // 12-bit unsigned offset
12254 unsigned shift = Log2_64(NumBytes);
12255 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
12256 // Must be a multiple of NumBytes (NumBytes is a power of 2)
12257 (Offset >> shift) << shift == Offset)
12258 return true;
12259 return false;
12260 }
12261
12262 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
12263
12264 return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
12265}
12266
12267bool AArch64TargetLowering::shouldConsiderGEPOffsetSplit() const {
12268 // Consider splitting large offset of struct or array.
12269 return true;
12270}
12271
12272InstructionCost AArch64TargetLowering::getScalingFactorCost(
12273 const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const {
12274 // Scaling factors are not free at all.
12275 // Operands | Rt Latency
12276 // -------------------------------------------
12277 // Rt, [Xn, Xm] | 4
12278 // -------------------------------------------
12279 // Rt, [Xn, Xm, lsl #imm] | Rn: 4 Rm: 5
12280 // Rt, [Xn, Wm, <extend> #imm] |
12281 if (isLegalAddressingMode(DL, AM, Ty, AS))
12282 // Scale represents reg2 * scale, thus account for 1 if
12283 // it is not equal to 0 or 1.
12284 return AM.Scale != 0 && AM.Scale != 1;
12285 return -1;
12286}
12287
12288bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(
12289 const MachineFunction &MF, EVT VT) const {
12290 VT = VT.getScalarType();
12291
12292 if (!VT.isSimple())
12293 return false;
12294
12295 switch (VT.getSimpleVT().SimpleTy) {
12296 case MVT::f16:
12297 return Subtarget->hasFullFP16();
12298 case MVT::f32:
12299 case MVT::f64:
12300 return true;
12301 default:
12302 break;
12303 }
12304
12305 return false;
12306}
12307
12308bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F,
12309 Type *Ty) const {
12310 switch (Ty->getScalarType()->getTypeID()) {
12311 case Type::FloatTyID:
12312 case Type::DoubleTyID:
12313 return true;
12314 default:
12315 return false;
12316 }
12317}
12318
12319bool AArch64TargetLowering::generateFMAsInMachineCombiner(
12320 EVT VT, CodeGenOpt::Level OptLevel) const {
12321 return (OptLevel >= CodeGenOpt::Aggressive) && !VT.isScalableVector();
12322}
12323
12324const MCPhysReg *
12325AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
12326 // LR is a callee-save register, but we must treat it as clobbered by any call
12327 // site. Hence we include LR in the scratch registers, which are in turn added
12328 // as implicit-defs for stackmaps and patchpoints.
12329 static const MCPhysReg ScratchRegs[] = {
12330 AArch64::X16, AArch64::X17, AArch64::LR, 0
12331 };
12332 return ScratchRegs;
12333}
12334
12335bool
12336AArch64TargetLowering::isDesirableToCommuteWithShift(const SDNode *N,
12337 CombineLevel Level) const {
12338 N = N->getOperand(0).getNode();
12339 EVT VT = N->getValueType(0);
12340 // If N is unsigned bit extraction: ((x >> C) & mask), then do not combine
12341 // it with shift to let it be lowered to UBFX.
12342 if (N->getOpcode() == ISD::AND && (VT == MVT::i32 || VT == MVT::i64) &&
12343 isa<ConstantSDNode>(N->getOperand(1))) {
12344 uint64_t TruncMask = N->getConstantOperandVal(1);
12345 if (isMask_64(TruncMask) &&
12346 N->getOperand(0).getOpcode() == ISD::SRL &&
12347 isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
12348 return false;
12349 }
12350 return true;
12351}
12352
12353bool AArch64TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
12354 Type *Ty) const {
12355 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12355, __extension__ __PRETTY_FUNCTION__))
;
12356
12357 unsigned BitSize = Ty->getPrimitiveSizeInBits();
12358 if (BitSize == 0)
12359 return false;
12360
12361 int64_t Val = Imm.getSExtValue();
12362 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, BitSize))
12363 return true;
12364
12365 if ((int64_t)Val < 0)
12366 Val = ~Val;
12367 if (BitSize == 32)
12368 Val &= (1LL << 32) - 1;
12369
12370 unsigned LZ = countLeadingZeros((uint64_t)Val);
12371 unsigned Shift = (63 - LZ) / 16;
12372 // MOVZ is free so return true for one or fewer MOVK.
12373 return Shift < 3;
12374}
12375
12376bool AArch64TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
12377 unsigned Index) const {
12378 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
12379 return false;
12380
12381 return (Index == 0 || Index == ResVT.getVectorNumElements());
12382}
12383
12384/// Turn vector tests of the signbit in the form of:
12385/// xor (sra X, elt_size(X)-1), -1
12386/// into:
12387/// cmge X, X, #0
12388static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
12389 const AArch64Subtarget *Subtarget) {
12390 EVT VT = N->getValueType(0);
12391 if (!Subtarget->hasNEON() || !VT.isVector())
12392 return SDValue();
12393
12394 // There must be a shift right algebraic before the xor, and the xor must be a
12395 // 'not' operation.
12396 SDValue Shift = N->getOperand(0);
12397 SDValue Ones = N->getOperand(1);
12398 if (Shift.getOpcode() != AArch64ISD::VASHR || !Shift.hasOneUse() ||
12399 !ISD::isBuildVectorAllOnes(Ones.getNode()))
12400 return SDValue();
12401
12402 // The shift should be smearing the sign bit across each vector element.
12403 auto *ShiftAmt = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
12404 EVT ShiftEltTy = Shift.getValueType().getVectorElementType();
12405 if (!ShiftAmt || ShiftAmt->getZExtValue() != ShiftEltTy.getSizeInBits() - 1)
12406 return SDValue();
12407
12408 return DAG.getNode(AArch64ISD::CMGEz, SDLoc(N), VT, Shift.getOperand(0));
12409}
12410
12411// Given a vecreduce_add node, detect the below pattern and convert it to the
12412// node sequence with UABDL, [S|U]ADB and UADDLP.
12413//
12414// i32 vecreduce_add(
12415// v16i32 abs(
12416// v16i32 sub(
12417// v16i32 [sign|zero]_extend(v16i8 a), v16i32 [sign|zero]_extend(v16i8 b))))
12418// =================>
12419// i32 vecreduce_add(
12420// v4i32 UADDLP(
12421// v8i16 add(
12422// v8i16 zext(
12423// v8i8 [S|U]ABD low8:v16i8 a, low8:v16i8 b
12424// v8i16 zext(
12425// v8i8 [S|U]ABD high8:v16i8 a, high8:v16i8 b
12426static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
12427 SelectionDAG &DAG) {
12428 // Assumed i32 vecreduce_add
12429 if (N->getValueType(0) != MVT::i32)
12430 return SDValue();
12431
12432 SDValue VecReduceOp0 = N->getOperand(0);
12433 unsigned Opcode = VecReduceOp0.getOpcode();
12434 // Assumed v16i32 abs
12435 if (Opcode != ISD::ABS || VecReduceOp0->getValueType(0) != MVT::v16i32)
12436 return SDValue();
12437
12438 SDValue ABS = VecReduceOp0;
12439 // Assumed v16i32 sub
12440 if (ABS->getOperand(0)->getOpcode() != ISD::SUB ||
12441 ABS->getOperand(0)->getValueType(0) != MVT::v16i32)
12442 return SDValue();
12443
12444 SDValue SUB = ABS->getOperand(0);
12445 unsigned Opcode0 = SUB->getOperand(0).getOpcode();
12446 unsigned Opcode1 = SUB->getOperand(1).getOpcode();
12447 // Assumed v16i32 type
12448 if (SUB->getOperand(0)->getValueType(0) != MVT::v16i32 ||
12449 SUB->getOperand(1)->getValueType(0) != MVT::v16i32)
12450 return SDValue();
12451
12452 // Assumed zext or sext
12453 bool IsZExt = false;
12454 if (Opcode0 == ISD::ZERO_EXTEND && Opcode1 == ISD::ZERO_EXTEND) {
12455 IsZExt = true;
12456 } else if (Opcode0 == ISD::SIGN_EXTEND && Opcode1 == ISD::SIGN_EXTEND) {
12457 IsZExt = false;
12458 } else
12459 return SDValue();
12460
12461 SDValue EXT0 = SUB->getOperand(0);
12462 SDValue EXT1 = SUB->getOperand(1);
12463 // Assumed zext's operand has v16i8 type
12464 if (EXT0->getOperand(0)->getValueType(0) != MVT::v16i8 ||
12465 EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
12466 return SDValue();
12467
12468 // Pattern is dectected. Let's convert it to sequence of nodes.
12469 SDLoc DL(N);
12470
12471 // First, create the node pattern of UABD/SABD.
12472 SDValue UABDHigh8Op0 =
12473 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
12474 DAG.getConstant(8, DL, MVT::i64));
12475 SDValue UABDHigh8Op1 =
12476 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
12477 DAG.getConstant(8, DL, MVT::i64));
12478 SDValue UABDHigh8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
12479 UABDHigh8Op0, UABDHigh8Op1);
12480 SDValue UABDL = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDHigh8);
12481
12482 // Second, create the node pattern of UABAL.
12483 SDValue UABDLo8Op0 =
12484 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT0->getOperand(0),
12485 DAG.getConstant(0, DL, MVT::i64));
12486 SDValue UABDLo8Op1 =
12487 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, EXT1->getOperand(0),
12488 DAG.getConstant(0, DL, MVT::i64));
12489 SDValue UABDLo8 = DAG.getNode(IsZExt ? ISD::ABDU : ISD::ABDS, DL, MVT::v8i8,
12490 UABDLo8Op0, UABDLo8Op1);
12491 SDValue ZExtUABD = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, UABDLo8);
12492 SDValue UABAL = DAG.getNode(ISD::ADD, DL, MVT::v8i16, UABDL, ZExtUABD);
12493
12494 // Third, create the node of UADDLP.
12495 SDValue UADDLP = DAG.getNode(AArch64ISD::UADDLP, DL, MVT::v4i32, UABAL);
12496
12497 // Fourth, create the node of VECREDUCE_ADD.
12498 return DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, UADDLP);
12499}
12500
12501// Turn a v8i8/v16i8 extended vecreduce into a udot/sdot and vecreduce
12502// vecreduce.add(ext(A)) to vecreduce.add(DOT(zero, A, one))
12503// vecreduce.add(mul(ext(A), ext(B))) to vecreduce.add(DOT(zero, A, B))
12504static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
12505 const AArch64Subtarget *ST) {
12506 if (!ST->hasDotProd())
12507 return performVecReduceAddCombineWithUADDLP(N, DAG);
12508
12509 SDValue Op0 = N->getOperand(0);
12510 if (N->getValueType(0) != MVT::i32 ||
12511 Op0.getValueType().getVectorElementType() != MVT::i32)
12512 return SDValue();
12513
12514 unsigned ExtOpcode = Op0.getOpcode();
12515 SDValue A = Op0;
12516 SDValue B;
12517 if (ExtOpcode == ISD::MUL) {
12518 A = Op0.getOperand(0);
12519 B = Op0.getOperand(1);
12520 if (A.getOpcode() != B.getOpcode() ||
12521 A.getOperand(0).getValueType() != B.getOperand(0).getValueType())
12522 return SDValue();
12523 ExtOpcode = A.getOpcode();
12524 }
12525 if (ExtOpcode != ISD::ZERO_EXTEND && ExtOpcode != ISD::SIGN_EXTEND)
12526 return SDValue();
12527
12528 EVT Op0VT = A.getOperand(0).getValueType();
12529 if (Op0VT != MVT::v8i8 && Op0VT != MVT::v16i8)
12530 return SDValue();
12531
12532 SDLoc DL(Op0);
12533 // For non-mla reductions B can be set to 1. For MLA we take the operand of
12534 // the extend B.
12535 if (!B)
12536 B = DAG.getConstant(1, DL, Op0VT);
12537 else
12538 B = B.getOperand(0);
12539
12540 SDValue Zeros =
12541 DAG.getConstant(0, DL, Op0VT == MVT::v8i8 ? MVT::v2i32 : MVT::v4i32);
12542 auto DotOpcode =
12543 (ExtOpcode == ISD::ZERO_EXTEND) ? AArch64ISD::UDOT : AArch64ISD::SDOT;
12544 SDValue Dot = DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros,
12545 A.getOperand(0), B);
12546 return DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
12547}
12548
12549static SDValue performXorCombine(SDNode *N, SelectionDAG &DAG,
12550 TargetLowering::DAGCombinerInfo &DCI,
12551 const AArch64Subtarget *Subtarget) {
12552 if (DCI.isBeforeLegalizeOps())
12553 return SDValue();
12554
12555 return foldVectorXorShiftIntoCmp(N, DAG, Subtarget);
12556}
12557
12558SDValue
12559AArch64TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
12560 SelectionDAG &DAG,
12561 SmallVectorImpl<SDNode *> &Created) const {
12562 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
12563 if (isIntDivCheap(N->getValueType(0), Attr))
1
Assuming the condition is false
2
Taking false branch
12564 return SDValue(N,0); // Lower SDIV as SDIV
12565
12566 // fold (sdiv X, pow2)
12567 EVT VT = N->getValueType(0);
12568 if ((VT != MVT::i32 && VT != MVT::i64) ||
3
Taking false branch
12569 !(Divisor.isPowerOf2() || (-Divisor).isPowerOf2()))
12570 return SDValue();
12571
12572 SDLoc DL(N);
12573 SDValue N0 = N->getOperand(0);
12574 unsigned Lg2 = Divisor.countTrailingZeros();
4
Calling 'APInt::countTrailingZeros'
20
Returning from 'APInt::countTrailingZeros'
21
'Lg2' initialized to 64
12575 SDValue Zero = DAG.getConstant(0, DL, VT);
12576 SDValue Pow2MinusOne = DAG.getConstant((1ULL << Lg2) - 1, DL, VT);
22
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'
12577
12578 // Add (N0 < 0) ? Pow2 - 1 : 0;
12579 SDValue CCVal;
12580 SDValue Cmp = getAArch64Cmp(N0, Zero, ISD::SETLT, CCVal, DAG, DL);
12581 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne);
12582 SDValue CSel = DAG.getNode(AArch64ISD::CSEL, DL, VT, Add, N0, CCVal, Cmp);
12583
12584 Created.push_back(Cmp.getNode());
12585 Created.push_back(Add.getNode());
12586 Created.push_back(CSel.getNode());
12587
12588 // Divide by pow2.
12589 SDValue SRA =
12590 DAG.getNode(ISD::SRA, DL, VT, CSel, DAG.getConstant(Lg2, DL, MVT::i64));
12591
12592 // If we're dividing by a positive value, we're done. Otherwise, we must
12593 // negate the result.
12594 if (Divisor.isNonNegative())
12595 return SRA;
12596
12597 Created.push_back(SRA.getNode());
12598 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
12599}
12600
12601static bool IsSVECntIntrinsic(SDValue S) {
12602 switch(getIntrinsicID(S.getNode())) {
12603 default:
12604 break;
12605 case Intrinsic::aarch64_sve_cntb:
12606 case Intrinsic::aarch64_sve_cnth:
12607 case Intrinsic::aarch64_sve_cntw:
12608 case Intrinsic::aarch64_sve_cntd:
12609 return true;
12610 }
12611 return false;
12612}
12613
12614/// Calculates what the pre-extend type is, based on the extension
12615/// operation node provided by \p Extend.
12616///
12617/// In the case that \p Extend is a SIGN_EXTEND or a ZERO_EXTEND, the
12618/// pre-extend type is pulled directly from the operand, while other extend
12619/// operations need a bit more inspection to get this information.
12620///
12621/// \param Extend The SDNode from the DAG that represents the extend operation
12622/// \param DAG The SelectionDAG hosting the \p Extend node
12623///
12624/// \returns The type representing the \p Extend source type, or \p MVT::Other
12625/// if no valid type can be determined
12626static EVT calculatePreExtendType(SDValue Extend, SelectionDAG &DAG) {
12627 switch (Extend.getOpcode()) {
12628 case ISD::SIGN_EXTEND:
12629 case ISD::ZERO_EXTEND:
12630 return Extend.getOperand(0).getValueType();
12631 case ISD::AssertSext:
12632 case ISD::AssertZext:
12633 case ISD::SIGN_EXTEND_INREG: {
12634 VTSDNode *TypeNode = dyn_cast<VTSDNode>(Extend.getOperand(1));
12635 if (!TypeNode)
12636 return MVT::Other;
12637 return TypeNode->getVT();
12638 }
12639 case ISD::AND: {
12640 ConstantSDNode *Constant =
12641 dyn_cast<ConstantSDNode>(Extend.getOperand(1).getNode());
12642 if (!Constant)
12643 return MVT::Other;
12644
12645 uint32_t Mask = Constant->getZExtValue();
12646
12647 if (Mask == UCHAR_MAX(127*2 +1))
12648 return MVT::i8;
12649 else if (Mask == USHRT_MAX(32767 *2 +1))
12650 return MVT::i16;
12651 else if (Mask == UINT_MAX(2147483647 *2U +1U))
12652 return MVT::i32;
12653
12654 return MVT::Other;
12655 }
12656 default:
12657 return MVT::Other;
12658 }
12659
12660 llvm_unreachable("Code path unhandled in calculatePreExtendType!")::llvm::llvm_unreachable_internal("Code path unhandled in calculatePreExtendType!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12660)
;
12661}
12662
12663/// Combines a dup(sext/zext) node pattern into sext/zext(dup)
12664/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
12665static SDValue performCommonVectorExtendCombine(SDValue VectorShuffle,
12666 SelectionDAG &DAG) {
12667
12668 ShuffleVectorSDNode *ShuffleNode =
12669 dyn_cast<ShuffleVectorSDNode>(VectorShuffle.getNode());
12670 if (!ShuffleNode)
12671 return SDValue();
12672
12673 // Ensuring the mask is zero before continuing
12674 if (!ShuffleNode->isSplat() || ShuffleNode->getSplatIndex() != 0)
12675 return SDValue();
12676
12677 SDValue InsertVectorElt = VectorShuffle.getOperand(0);
12678
12679 if (InsertVectorElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
12680 return SDValue();
12681
12682 SDValue InsertLane = InsertVectorElt.getOperand(2);
12683 ConstantSDNode *Constant = dyn_cast<ConstantSDNode>(InsertLane.getNode());
12684 // Ensures the insert is inserting into lane 0
12685 if (!Constant || Constant->getZExtValue() != 0)
12686 return SDValue();
12687
12688 SDValue Extend = InsertVectorElt.getOperand(1);
12689 unsigned ExtendOpcode = Extend.getOpcode();
12690
12691 bool IsSExt = ExtendOpcode == ISD::SIGN_EXTEND ||
12692 ExtendOpcode == ISD::SIGN_EXTEND_INREG ||
12693 ExtendOpcode == ISD::AssertSext;
12694 if (!IsSExt && ExtendOpcode != ISD::ZERO_EXTEND &&
12695 ExtendOpcode != ISD::AssertZext && ExtendOpcode != ISD::AND)
12696 return SDValue();
12697
12698 EVT TargetType = VectorShuffle.getValueType();
12699 EVT PreExtendType = calculatePreExtendType(Extend, DAG);
12700
12701 if ((TargetType != MVT::v8i16 && TargetType != MVT::v4i32 &&
12702 TargetType != MVT::v2i64) ||
12703 (PreExtendType == MVT::Other))
12704 return SDValue();
12705
12706 // Restrict valid pre-extend data type
12707 if (PreExtendType != MVT::i8 && PreExtendType != MVT::i16 &&
12708 PreExtendType != MVT::i32)
12709 return SDValue();
12710
12711 EVT PreExtendVT = TargetType.changeVectorElementType(PreExtendType);
12712
12713 if (PreExtendVT.getVectorElementCount() != TargetType.getVectorElementCount())
12714 return SDValue();
12715
12716 if (TargetType.getScalarSizeInBits() != PreExtendVT.getScalarSizeInBits() * 2)
12717 return SDValue();
12718
12719 SDLoc DL(VectorShuffle);
12720
12721 SDValue InsertVectorNode = DAG.getNode(
12722 InsertVectorElt.getOpcode(), DL, PreExtendVT, DAG.getUNDEF(PreExtendVT),
12723 DAG.getAnyExtOrTrunc(Extend.getOperand(0), DL, PreExtendType),
12724 DAG.getConstant(0, DL, MVT::i64));
12725
12726 std::vector<int> ShuffleMask(TargetType.getVectorElementCount().getValue());
12727
12728 SDValue VectorShuffleNode =
12729 DAG.getVectorShuffle(PreExtendVT, DL, InsertVectorNode,
12730 DAG.getUNDEF(PreExtendVT), ShuffleMask);
12731
12732 SDValue ExtendNode = DAG.getNode(IsSExt ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
12733 DL, TargetType, VectorShuffleNode);
12734
12735 return ExtendNode;
12736}
12737
12738/// Combines a mul(dup(sext/zext)) node pattern into mul(sext/zext(dup))
12739/// making use of the vector SExt/ZExt rather than the scalar SExt/ZExt
12740static SDValue performMulVectorExtendCombine(SDNode *Mul, SelectionDAG &DAG) {
12741 // If the value type isn't a vector, none of the operands are going to be dups
12742 if (!Mul->getValueType(0).isVector())
12743 return SDValue();
12744
12745 SDValue Op0 = performCommonVectorExtendCombine(Mul->getOperand(0), DAG);
12746 SDValue Op1 = performCommonVectorExtendCombine(Mul->getOperand(1), DAG);
12747
12748 // Neither operands have been changed, don't make any further changes
12749 if (!Op0 && !Op1)
12750 return SDValue();
12751
12752 SDLoc DL(Mul);
12753 return DAG.getNode(Mul->getOpcode(), DL, Mul->getValueType(0),
12754 Op0 ? Op0 : Mul->getOperand(0),
12755 Op1 ? Op1 : Mul->getOperand(1));
12756}
12757
12758static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
12759 TargetLowering::DAGCombinerInfo &DCI,
12760 const AArch64Subtarget *Subtarget) {
12761
12762 if (SDValue Ext = performMulVectorExtendCombine(N, DAG))
12763 return Ext;
12764
12765 if (DCI.isBeforeLegalizeOps())
12766 return SDValue();
12767
12768 // The below optimizations require a constant RHS.
12769 if (!isa<ConstantSDNode>(N->getOperand(1)))
12770 return SDValue();
12771
12772 SDValue N0 = N->getOperand(0);
12773 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
12774 const APInt &ConstValue = C->getAPIntValue();
12775
12776 // Allow the scaling to be folded into the `cnt` instruction by preventing
12777 // the scaling to be obscured here. This makes it easier to pattern match.
12778 if (IsSVECntIntrinsic(N0) ||
12779 (N0->getOpcode() == ISD::TRUNCATE &&
12780 (IsSVECntIntrinsic(N0->getOperand(0)))))
12781 if (ConstValue.sge(1) && ConstValue.sle(16))
12782 return SDValue();
12783
12784 // Multiplication of a power of two plus/minus one can be done more
12785 // cheaply as as shift+add/sub. For now, this is true unilaterally. If
12786 // future CPUs have a cheaper MADD instruction, this may need to be
12787 // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
12788 // 64-bit is 5 cycles, so this is always a win.
12789 // More aggressively, some multiplications N0 * C can be lowered to
12790 // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
12791 // e.g. 6=3*2=(2+1)*2.
12792 // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
12793 // which equals to (1+2)*16-(1+2).
12794
12795 // TrailingZeroes is used to test if the mul can be lowered to
12796 // shift+add+shift.
12797 unsigned TrailingZeroes = ConstValue.countTrailingZeros();
12798 if (TrailingZeroes) {
12799 // Conservatively do not lower to shift+add+shift if the mul might be
12800 // folded into smul or umul.
12801 if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
12802 isZeroExtended(N0.getNode(), DAG)))
12803 return SDValue();
12804 // Conservatively do not lower to shift+add+shift if the mul might be
12805 // folded into madd or msub.
12806 if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
12807 N->use_begin()->getOpcode() == ISD::SUB))
12808 return SDValue();
12809 }
12810 // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
12811 // and shift+add+shift.
12812 APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
12813
12814 unsigned ShiftAmt, AddSubOpc;
12815 // Is the shifted value the LHS operand of the add/sub?
12816 bool ShiftValUseIsN0 = true;
12817 // Do we need to negate the result?
12818 bool NegateResult = false;
12819
12820 if (ConstValue.isNonNegative()) {
12821 // (mul x, 2^N + 1) => (add (shl x, N), x)
12822 // (mul x, 2^N - 1) => (sub (shl x, N), x)
12823 // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
12824 APInt SCVMinus1 = ShiftedConstValue - 1;
12825 APInt CVPlus1 = ConstValue + 1;
12826 if (SCVMinus1.isPowerOf2()) {
12827 ShiftAmt = SCVMinus1.logBase2();
12828 AddSubOpc = ISD::ADD;
12829 } else if (CVPlus1.isPowerOf2()) {
12830 ShiftAmt = CVPlus1.logBase2();
12831 AddSubOpc = ISD::SUB;
12832 } else
12833 return SDValue();
12834 } else {
12835 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
12836 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
12837 APInt CVNegPlus1 = -ConstValue + 1;
12838 APInt CVNegMinus1 = -ConstValue - 1;
12839 if (CVNegPlus1.isPowerOf2()) {
12840 ShiftAmt = CVNegPlus1.logBase2();
12841 AddSubOpc = ISD::SUB;
12842 ShiftValUseIsN0 = false;
12843 } else if (CVNegMinus1.isPowerOf2()) {
12844 ShiftAmt = CVNegMinus1.logBase2();
12845 AddSubOpc = ISD::ADD;
12846 NegateResult = true;
12847 } else
12848 return SDValue();
12849 }
12850
12851 SDLoc DL(N);
12852 EVT VT = N->getValueType(0);
12853 SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
12854 DAG.getConstant(ShiftAmt, DL, MVT::i64));
12855
12856 SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
12857 SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
12858 SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
12859 assert(!(NegateResult && TrailingZeroes) &&(static_cast <bool> (!(NegateResult && TrailingZeroes
) && "NegateResult and TrailingZeroes cannot both be true for now."
) ? void (0) : __assert_fail ("!(NegateResult && TrailingZeroes) && \"NegateResult and TrailingZeroes cannot both be true for now.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12860, __extension__ __PRETTY_FUNCTION__))
12860 "NegateResult and TrailingZeroes cannot both be true for now.")(static_cast <bool> (!(NegateResult && TrailingZeroes
) && "NegateResult and TrailingZeroes cannot both be true for now."
) ? void (0) : __assert_fail ("!(NegateResult && TrailingZeroes) && \"NegateResult and TrailingZeroes cannot both be true for now.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 12860, __extension__ __PRETTY_FUNCTION__))
;
12861 // Negate the result.
12862 if (NegateResult)
12863 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
12864 // Shift the result.
12865 if (TrailingZeroes)
12866 return DAG.getNode(ISD::SHL, DL, VT, Res,
12867 DAG.getConstant(TrailingZeroes, DL, MVT::i64));
12868 return Res;
12869}
12870
12871static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
12872 SelectionDAG &DAG) {
12873 // Take advantage of vector comparisons producing 0 or -1 in each lane to
12874 // optimize away operation when it's from a constant.
12875 //
12876 // The general transformation is:
12877 // UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
12878 // AND(VECTOR_CMP(x,y), constant2)
12879 // constant2 = UNARYOP(constant)
12880
12881 // Early exit if this isn't a vector operation, the operand of the
12882 // unary operation isn't a bitwise AND, or if the sizes of the operations
12883 // aren't the same.
12884 EVT VT = N->getValueType(0);
12885 if (!VT.isVector() || N->getOperand(0)->getOpcode() != ISD::AND ||
12886 N->getOperand(0)->getOperand(0)->getOpcode() != ISD::SETCC ||
12887 VT.getSizeInBits() != N->getOperand(0)->getValueType(0).getSizeInBits())
12888 return SDValue();
12889
12890 // Now check that the other operand of the AND is a constant. We could
12891 // make the transformation for non-constant splats as well, but it's unclear
12892 // that would be a benefit as it would not eliminate any operations, just
12893 // perform one more step in scalar code before moving to the vector unit.
12894 if (BuildVectorSDNode *BV =
12895 dyn_cast<BuildVectorSDNode>(N->getOperand(0)->getOperand(1))) {
12896 // Bail out if the vector isn't a constant.
12897 if (!BV->isConstant())
12898 return SDValue();
12899
12900 // Everything checks out. Build up the new and improved node.
12901 SDLoc DL(N);
12902 EVT IntVT = BV->getValueType(0);
12903 // Create a new constant of the appropriate type for the transformed
12904 // DAG.
12905 SDValue SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0));
12906 // The AND node needs bitcasts to/from an integer vector type around it.
12907 SDValue MaskConst = DAG.getNode(ISD::BITCAST, DL, IntVT, SourceConst);
12908 SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT,
12909 N->getOperand(0)->getOperand(0), MaskConst);
12910 SDValue Res = DAG.getNode(ISD::BITCAST, DL, VT, NewAnd);
12911 return Res;
12912 }
12913
12914 return SDValue();
12915}
12916
12917static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
12918 const AArch64Subtarget *Subtarget) {
12919 // First try to optimize away the conversion when it's conditionally from
12920 // a constant. Vectors only.
12921 if (SDValue Res = performVectorCompareAndMaskUnaryOpCombine(N, DAG))
12922 return Res;
12923
12924 EVT VT = N->getValueType(0);
12925 if (VT != MVT::f32 && VT != MVT::f64)
12926 return SDValue();
12927
12928 // Only optimize when the source and destination types have the same width.
12929 if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
12930 return SDValue();
12931
12932 // If the result of an integer load is only used by an integer-to-float
12933 // conversion, use a fp load instead and a AdvSIMD scalar {S|U}CVTF instead.
12934 // This eliminates an "integer-to-vector-move" UOP and improves throughput.
12935 SDValue N0 = N->getOperand(0);
12936 if (Subtarget->hasNEON() && ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12937 // Do not change the width of a volatile load.
12938 !cast<LoadSDNode>(N0)->isVolatile()) {
12939 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12940 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
12941 LN0->getPointerInfo(), LN0->getAlignment(),
12942 LN0->getMemOperand()->getFlags());
12943
12944 // Make sure successors of the original load stay after it by updating them
12945 // to use the new Chain.
12946 DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), Load.getValue(1));
12947
12948 unsigned Opcode =
12949 (N->getOpcode() == ISD::SINT_TO_FP) ? AArch64ISD::SITOF : AArch64ISD::UITOF;
12950 return DAG.getNode(Opcode, SDLoc(N), VT, Load);
12951 }
12952
12953 return SDValue();
12954}
12955
12956/// Fold a floating-point multiply by power of two into floating-point to
12957/// fixed-point conversion.
12958static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
12959 TargetLowering::DAGCombinerInfo &DCI,
12960 const AArch64Subtarget *Subtarget) {
12961 if (!Subtarget->hasNEON())
12962 return SDValue();
12963
12964 if (!N->getValueType(0).isSimple())
12965 return SDValue();
12966
12967 SDValue Op = N->getOperand(0);
12968 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
12969 Op.getOpcode() != ISD::FMUL)
12970 return SDValue();
12971
12972 SDValue ConstVec = Op->getOperand(1);
12973 if (!isa<BuildVectorSDNode>(ConstVec))
12974 return SDValue();
12975
12976 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
12977 uint32_t FloatBits = FloatTy.getSizeInBits();
12978 if (FloatBits != 32 && FloatBits != 64)
12979 return SDValue();
12980
12981 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
12982 uint32_t IntBits = IntTy.getSizeInBits();
12983 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
12984 return SDValue();
12985
12986 // Avoid conversions where iN is larger than the float (e.g., float -> i64).
12987 if (IntBits > FloatBits)
12988 return SDValue();
12989
12990 BitVector UndefElements;
12991 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
12992 int32_t Bits = IntBits == 64 ? 64 : 32;
12993 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, Bits + 1);
12994 if (C == -1 || C == 0 || C > Bits)
12995 return SDValue();
12996
12997 MVT ResTy;
12998 unsigned NumLanes = Op.getValueType().getVectorNumElements();
12999 switch (NumLanes) {
13000 default:
13001 return SDValue();
13002 case 2:
13003 ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
13004 break;
13005 case 4:
13006 ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
13007 break;
13008 }
13009
13010 if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
13011 return SDValue();
13012
13013 assert((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) &&(static_cast <bool> ((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps
()) && "Illegal vector type after legalization") ? void
(0) : __assert_fail ("(ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) && \"Illegal vector type after legalization\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13014, __extension__ __PRETTY_FUNCTION__))
13014 "Illegal vector type after legalization")(static_cast <bool> ((ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps
()) && "Illegal vector type after legalization") ? void
(0) : __assert_fail ("(ResTy != MVT::v4i64 || DCI.isBeforeLegalizeOps()) && \"Illegal vector type after legalization\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13014, __extension__ __PRETTY_FUNCTION__))
;
13015
13016 SDLoc DL(N);
13017 bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
13018 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfp2fxs
13019 : Intrinsic::aarch64_neon_vcvtfp2fxu;
13020 SDValue FixConv =
13021 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResTy,
13022 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32),
13023 Op->getOperand(0), DAG.getConstant(C, DL, MVT::i32));
13024 // We can handle smaller integers by generating an extra trunc.
13025 if (IntBits < FloatBits)
13026 FixConv = DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), FixConv);
13027
13028 return FixConv;
13029}
13030
13031/// Fold a floating-point divide by power of two into fixed-point to
13032/// floating-point conversion.
13033static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
13034 TargetLowering::DAGCombinerInfo &DCI,
13035 const AArch64Subtarget *Subtarget) {
13036 if (!Subtarget->hasNEON())
13037 return SDValue();
13038
13039 SDValue Op = N->getOperand(0);
13040 unsigned Opc = Op->getOpcode();
13041 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
13042 !Op.getOperand(0).getValueType().isSimple() ||
13043 (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
13044 return SDValue();
13045
13046 SDValue ConstVec = N->getOperand(1);
13047 if (!isa<BuildVectorSDNode>(ConstVec))
13048 return SDValue();
13049
13050 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
13051 int32_t IntBits = IntTy.getSizeInBits();
13052 if (IntBits != 16 && IntBits != 32 && IntBits != 64)
13053 return SDValue();
13054
13055 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
13056 int32_t FloatBits = FloatTy.getSizeInBits();
13057 if (FloatBits != 32 && FloatBits != 64)
13058 return SDValue();
13059
13060 // Avoid conversions where iN is larger than the float (e.g., i64 -> float).
13061 if (IntBits > FloatBits)
13062 return SDValue();
13063
13064 BitVector UndefElements;
13065 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
13066 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, FloatBits + 1);
13067 if (C == -1 || C == 0 || C > FloatBits)
13068 return SDValue();
13069
13070 MVT ResTy;
13071 unsigned NumLanes = Op.getValueType().getVectorNumElements();
13072 switch (NumLanes) {
13073 default:
13074 return SDValue();
13075 case 2:
13076 ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
13077 break;
13078 case 4:
13079 ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
13080 break;
13081 }
13082
13083 if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
13084 return SDValue();
13085
13086 SDLoc DL(N);
13087 SDValue ConvInput = Op.getOperand(0);
13088 bool IsSigned = Opc == ISD::SINT_TO_FP;
13089 if (IntBits < FloatBits)
13090 ConvInput = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
13091 ResTy, ConvInput);
13092
13093 unsigned IntrinsicOpcode = IsSigned ? Intrinsic::aarch64_neon_vcvtfxs2fp
13094 : Intrinsic::aarch64_neon_vcvtfxu2fp;
13095 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(),
13096 DAG.getConstant(IntrinsicOpcode, DL, MVT::i32), ConvInput,
13097 DAG.getConstant(C, DL, MVT::i32));
13098}
13099
13100/// An EXTR instruction is made up of two shifts, ORed together. This helper
13101/// searches for and classifies those shifts.
13102static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
13103 bool &FromHi) {
13104 if (N.getOpcode() == ISD::SHL)
13105 FromHi = false;
13106 else if (N.getOpcode() == ISD::SRL)
13107 FromHi = true;
13108 else
13109 return false;
13110
13111 if (!isa<ConstantSDNode>(N.getOperand(1)))
13112 return false;
13113
13114 ShiftAmount = N->getConstantOperandVal(1);
13115 Src = N->getOperand(0);
13116 return true;
13117}
13118
13119/// EXTR instruction extracts a contiguous chunk of bits from two existing
13120/// registers viewed as a high/low pair. This function looks for the pattern:
13121/// <tt>(or (shl VAL1, \#N), (srl VAL2, \#RegWidth-N))</tt> and replaces it
13122/// with an EXTR. Can't quite be done in TableGen because the two immediates
13123/// aren't independent.
13124static SDValue tryCombineToEXTR(SDNode *N,
13125 TargetLowering::DAGCombinerInfo &DCI) {
13126 SelectionDAG &DAG = DCI.DAG;
13127 SDLoc DL(N);
13128 EVT VT = N->getValueType(0);
13129
13130 assert(N->getOpcode() == ISD::OR && "Unexpected root")(static_cast <bool> (N->getOpcode() == ISD::OR &&
"Unexpected root") ? void (0) : __assert_fail ("N->getOpcode() == ISD::OR && \"Unexpected root\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13130, __extension__ __PRETTY_FUNCTION__))
;
13131
13132 if (VT != MVT::i32 && VT != MVT::i64)
13133 return SDValue();
13134
13135 SDValue LHS;
13136 uint32_t ShiftLHS = 0;
13137 bool LHSFromHi = false;
13138 if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
13139 return SDValue();
13140
13141 SDValue RHS;
13142 uint32_t ShiftRHS = 0;
13143 bool RHSFromHi = false;
13144 if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
13145 return SDValue();
13146
13147 // If they're both trying to come from the high part of the register, they're
13148 // not really an EXTR.
13149 if (LHSFromHi == RHSFromHi)
13150 return SDValue();
13151
13152 if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
13153 return SDValue();
13154
13155 if (LHSFromHi) {
13156 std::swap(LHS, RHS);
13157 std::swap(ShiftLHS, ShiftRHS);
13158 }
13159
13160 return DAG.getNode(AArch64ISD::EXTR, DL, VT, LHS, RHS,
13161 DAG.getConstant(ShiftRHS, DL, MVT::i64));
13162}
13163
13164static SDValue tryCombineToBSL(SDNode *N,
13165 TargetLowering::DAGCombinerInfo &DCI) {
13166 EVT VT = N->getValueType(0);
13167 SelectionDAG &DAG = DCI.DAG;
13168 SDLoc DL(N);
13169
13170 if (!VT.isVector())
13171 return SDValue();
13172
13173 // The combining code currently only works for NEON vectors. In particular,
13174 // it does not work for SVE when dealing with vectors wider than 128 bits.
13175 if (!VT.is64BitVector() && !VT.is128BitVector())
13176 return SDValue();
13177
13178 SDValue N0 = N->getOperand(0);
13179 if (N0.getOpcode() != ISD::AND)
13180 return SDValue();
13181
13182 SDValue N1 = N->getOperand(1);
13183 if (N1.getOpcode() != ISD::AND)
13184 return SDValue();
13185
13186 // InstCombine does (not (neg a)) => (add a -1).
13187 // Try: (or (and (neg a) b) (and (add a -1) c)) => (bsl (neg a) b c)
13188 // Loop over all combinations of AND operands.
13189 for (int i = 1; i >= 0; --i) {
13190 for (int j = 1; j >= 0; --j) {
13191 SDValue O0 = N0->getOperand(i);
13192 SDValue O1 = N1->getOperand(j);
13193 SDValue Sub, Add, SubSibling, AddSibling;
13194
13195 // Find a SUB and an ADD operand, one from each AND.
13196 if (O0.getOpcode() == ISD::SUB && O1.getOpcode() == ISD::ADD) {
13197 Sub = O0;
13198 Add = O1;
13199 SubSibling = N0->getOperand(1 - i);
13200 AddSibling = N1->getOperand(1 - j);
13201 } else if (O0.getOpcode() == ISD::ADD && O1.getOpcode() == ISD::SUB) {
13202 Add = O0;
13203 Sub = O1;
13204 AddSibling = N0->getOperand(1 - i);
13205 SubSibling = N1->getOperand(1 - j);
13206 } else
13207 continue;
13208
13209 if (!ISD::isBuildVectorAllZeros(Sub.getOperand(0).getNode()))
13210 continue;
13211
13212 // Constant ones is always righthand operand of the Add.
13213 if (!ISD::isBuildVectorAllOnes(Add.getOperand(1).getNode()))
13214 continue;
13215
13216 if (Sub.getOperand(1) != Add.getOperand(0))
13217 continue;
13218
13219 return DAG.getNode(AArch64ISD::BSP, DL, VT, Sub, SubSibling, AddSibling);
13220 }
13221 }
13222
13223 // (or (and a b) (and (not a) c)) => (bsl a b c)
13224 // We only have to look for constant vectors here since the general, variable
13225 // case can be handled in TableGen.
13226 unsigned Bits = VT.getScalarSizeInBits();
13227 uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
13228 for (int i = 1; i >= 0; --i)
13229 for (int j = 1; j >= 0; --j) {
13230 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(i));
13231 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(j));
13232 if (!BVN0 || !BVN1)
13233 continue;
13234
13235 bool FoundMatch = true;
13236 for (unsigned k = 0; k < VT.getVectorNumElements(); ++k) {
13237 ConstantSDNode *CN0 = dyn_cast<ConstantSDNode>(BVN0->getOperand(k));
13238 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(BVN1->getOperand(k));
13239 if (!CN0 || !CN1 ||
13240 CN0->getZExtValue() != (BitMask & ~CN1->getZExtValue())) {
13241 FoundMatch = false;
13242 break;
13243 }
13244 }
13245
13246 if (FoundMatch)
13247 return DAG.getNode(AArch64ISD::BSP, DL, VT, SDValue(BVN0, 0),
13248 N0->getOperand(1 - i), N1->getOperand(1 - j));
13249 }
13250
13251 return SDValue();
13252}
13253
13254static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13255 const AArch64Subtarget *Subtarget) {
13256 // Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
13257 SelectionDAG &DAG = DCI.DAG;
13258 EVT VT = N->getValueType(0);
13259
13260 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
13261 return SDValue();
13262
13263 if (SDValue Res = tryCombineToEXTR(N, DCI))
13264 return Res;
13265
13266 if (SDValue Res = tryCombineToBSL(N, DCI))
13267 return Res;
13268
13269 return SDValue();
13270}
13271
13272static bool isConstantSplatVectorMaskForType(SDNode *N, EVT MemVT) {
13273 if (!MemVT.getVectorElementType().isSimple())
13274 return false;
13275
13276 uint64_t MaskForTy = 0ull;
13277 switch (MemVT.getVectorElementType().getSimpleVT().SimpleTy) {
13278 case MVT::i8:
13279 MaskForTy = 0xffull;
13280 break;
13281 case MVT::i16:
13282 MaskForTy = 0xffffull;
13283 break;
13284 case MVT::i32:
13285 MaskForTy = 0xffffffffull;
13286 break;
13287 default:
13288 return false;
13289 break;
13290 }
13291
13292 if (N->getOpcode() == AArch64ISD::DUP || N->getOpcode() == ISD::SPLAT_VECTOR)
13293 if (auto *Op0 = dyn_cast<ConstantSDNode>(N->getOperand(0)))
13294 return Op0->getAPIntValue().getLimitedValue() == MaskForTy;
13295
13296 return false;
13297}
13298
13299static SDValue performSVEAndCombine(SDNode *N,
13300 TargetLowering::DAGCombinerInfo &DCI) {
13301 if (DCI.isBeforeLegalizeOps())
13302 return SDValue();
13303
13304 SelectionDAG &DAG = DCI.DAG;
13305 SDValue Src = N->getOperand(0);
13306 unsigned Opc = Src->getOpcode();
13307
13308 // Zero/any extend of an unsigned unpack
13309 if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
13310 SDValue UnpkOp = Src->getOperand(0);
13311 SDValue Dup = N->getOperand(1);
13312
13313 if (Dup.getOpcode() != AArch64ISD::DUP)
13314 return SDValue();
13315
13316 SDLoc DL(N);
13317 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Dup->getOperand(0));
13318 uint64_t ExtVal = C->getZExtValue();
13319
13320 // If the mask is fully covered by the unpack, we don't need to push
13321 // a new AND onto the operand
13322 EVT EltTy = UnpkOp->getValueType(0).getVectorElementType();
13323 if ((ExtVal == 0xFF && EltTy == MVT::i8) ||
13324 (ExtVal == 0xFFFF && EltTy == MVT::i16) ||
13325 (ExtVal == 0xFFFFFFFF && EltTy == MVT::i32))
13326 return Src;
13327
13328 // Truncate to prevent a DUP with an over wide constant
13329 APInt Mask = C->getAPIntValue().trunc(EltTy.getSizeInBits());
13330
13331 // Otherwise, make sure we propagate the AND to the operand
13332 // of the unpack
13333 Dup = DAG.getNode(AArch64ISD::DUP, DL,
13334 UnpkOp->getValueType(0),
13335 DAG.getConstant(Mask.zextOrTrunc(32), DL, MVT::i32));
13336
13337 SDValue And = DAG.getNode(ISD::AND, DL,
13338 UnpkOp->getValueType(0), UnpkOp, Dup);
13339
13340 return DAG.getNode(Opc, DL, N->getValueType(0), And);
13341 }
13342
13343 if (!EnableCombineMGatherIntrinsics)
13344 return SDValue();
13345
13346 SDValue Mask = N->getOperand(1);
13347
13348 if (!Src.hasOneUse())
13349 return SDValue();
13350
13351 EVT MemVT;
13352
13353 // SVE load instructions perform an implicit zero-extend, which makes them
13354 // perfect candidates for combining.
13355 switch (Opc) {
13356 case AArch64ISD::LD1_MERGE_ZERO:
13357 case AArch64ISD::LDNF1_MERGE_ZERO:
13358 case AArch64ISD::LDFF1_MERGE_ZERO:
13359 MemVT = cast<VTSDNode>(Src->getOperand(3))->getVT();
13360 break;
13361 case AArch64ISD::GLD1_MERGE_ZERO:
13362 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
13363 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
13364 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
13365 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
13366 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
13367 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
13368 case AArch64ISD::GLDFF1_MERGE_ZERO:
13369 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
13370 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
13371 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
13372 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
13373 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
13374 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
13375 case AArch64ISD::GLDNT1_MERGE_ZERO:
13376 MemVT = cast<VTSDNode>(Src->getOperand(4))->getVT();
13377 break;
13378 default:
13379 return SDValue();
13380 }
13381
13382 if (isConstantSplatVectorMaskForType(Mask.getNode(), MemVT))
13383 return Src;
13384
13385 return SDValue();
13386}
13387
13388static SDValue performANDCombine(SDNode *N,
13389 TargetLowering::DAGCombinerInfo &DCI) {
13390 SelectionDAG &DAG = DCI.DAG;
13391 SDValue LHS = N->getOperand(0);
13392 EVT VT = N->getValueType(0);
13393 if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))
13394 return SDValue();
13395
13396 if (VT.isScalableVector())
13397 return performSVEAndCombine(N, DCI);
13398
13399 // The combining code below works only for NEON vectors. In particular, it
13400 // does not work for SVE when dealing with vectors wider than 128 bits.
13401 if (!(VT.is64BitVector() || VT.is128BitVector()))
13402 return SDValue();
13403
13404 BuildVectorSDNode *BVN =
13405 dyn_cast<BuildVectorSDNode>(N->getOperand(1).getNode());
13406 if (!BVN)
13407 return SDValue();
13408
13409 // AND does not accept an immediate, so check if we can use a BIC immediate
13410 // instruction instead. We do this here instead of using a (and x, (mvni imm))
13411 // pattern in isel, because some immediates may be lowered to the preferred
13412 // (and x, (movi imm)) form, even though an mvni representation also exists.
13413 APInt DefBits(VT.getSizeInBits(), 0);
13414 APInt UndefBits(VT.getSizeInBits(), 0);
13415 if (resolveBuildVector(BVN, DefBits, UndefBits)) {
13416 SDValue NewOp;
13417
13418 DefBits = ~DefBits;
13419 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
13420 DefBits, &LHS)) ||
13421 (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
13422 DefBits, &LHS)))
13423 return NewOp;
13424
13425 UndefBits = ~UndefBits;
13426 if ((NewOp = tryAdvSIMDModImm32(AArch64ISD::BICi, SDValue(N, 0), DAG,
13427 UndefBits, &LHS)) ||
13428 (NewOp = tryAdvSIMDModImm16(AArch64ISD::BICi, SDValue(N, 0), DAG,
13429 UndefBits, &LHS)))
13430 return NewOp;
13431 }
13432
13433 return SDValue();
13434}
13435
13436static SDValue performSRLCombine(SDNode *N,
13437 TargetLowering::DAGCombinerInfo &DCI) {
13438 SelectionDAG &DAG = DCI.DAG;
13439 EVT VT = N->getValueType(0);
13440 if (VT != MVT::i32 && VT != MVT::i64)
13441 return SDValue();
13442
13443 // Canonicalize (srl (bswap i32 x), 16) to (rotr (bswap i32 x), 16), if the
13444 // high 16-bits of x are zero. Similarly, canonicalize (srl (bswap i64 x), 32)
13445 // to (rotr (bswap i64 x), 32), if the high 32-bits of x are zero.
13446 SDValue N0 = N->getOperand(0);
13447 if (N0.getOpcode() == ISD::BSWAP) {
13448 SDLoc DL(N);
13449 SDValue N1 = N->getOperand(1);
13450 SDValue N00 = N0.getOperand(0);
13451 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
13452 uint64_t ShiftAmt = C->getZExtValue();
13453 if (VT == MVT::i32 && ShiftAmt == 16 &&
13454 DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(32, 16)))
13455 return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
13456 if (VT == MVT::i64 && ShiftAmt == 32 &&
13457 DAG.MaskedValueIsZero(N00, APInt::getHighBitsSet(64, 32)))
13458 return DAG.getNode(ISD::ROTR, DL, VT, N0, N1);
13459 }
13460 }
13461 return SDValue();
13462}
13463
13464// Attempt to form urhadd(OpA, OpB) from
13465// truncate(vlshr(sub(zext(OpB), xor(zext(OpA), Ones(ElemSizeInBits))), 1))
13466// or uhadd(OpA, OpB) from truncate(vlshr(add(zext(OpA), zext(OpB)), 1)).
13467// The original form of the first expression is
13468// truncate(srl(add(zext(OpB), add(zext(OpA), 1)), 1)) and the
13469// (OpA + OpB + 1) subexpression will have been changed to (OpB - (~OpA)).
13470// Before this function is called the srl will have been lowered to
13471// AArch64ISD::VLSHR.
13472// This pass can also recognize signed variants of the patterns that use sign
13473// extension instead of zero extension and form a srhadd(OpA, OpB) or a
13474// shadd(OpA, OpB) from them.
13475static SDValue
13476performVectorTruncateCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13477 SelectionDAG &DAG) {
13478 EVT VT = N->getValueType(0);
13479
13480 // Since we are looking for a right shift by a constant value of 1 and we are
13481 // operating on types at least 16 bits in length (sign/zero extended OpA and
13482 // OpB, which are at least 8 bits), it follows that the truncate will always
13483 // discard the shifted-in bit and therefore the right shift will be logical
13484 // regardless of the signedness of OpA and OpB.
13485 SDValue Shift = N->getOperand(0);
13486 if (Shift.getOpcode() != AArch64ISD::VLSHR)
13487 return SDValue();
13488
13489 // Is the right shift using an immediate value of 1?
13490 uint64_t ShiftAmount = Shift.getConstantOperandVal(1);
13491 if (ShiftAmount != 1)
13492 return SDValue();
13493
13494 SDValue ExtendOpA, ExtendOpB;
13495 SDValue ShiftOp0 = Shift.getOperand(0);
13496 unsigned ShiftOp0Opc = ShiftOp0.getOpcode();
13497 if (ShiftOp0Opc == ISD::SUB) {
13498
13499 SDValue Xor = ShiftOp0.getOperand(1);
13500 if (Xor.getOpcode() != ISD::XOR)
13501 return SDValue();
13502
13503 // Is the XOR using a constant amount of all ones in the right hand side?
13504 uint64_t C;
13505 if (!isAllConstantBuildVector(Xor.getOperand(1), C))
13506 return SDValue();
13507
13508 unsigned ElemSizeInBits = VT.getScalarSizeInBits();
13509 APInt CAsAPInt(ElemSizeInBits, C);
13510 if (CAsAPInt != APInt::getAllOnesValue(ElemSizeInBits))
13511 return SDValue();
13512
13513 ExtendOpA = Xor.getOperand(0);
13514 ExtendOpB = ShiftOp0.getOperand(0);
13515 } else if (ShiftOp0Opc == ISD::ADD) {
13516 ExtendOpA = ShiftOp0.getOperand(0);
13517 ExtendOpB = ShiftOp0.getOperand(1);
13518 } else
13519 return SDValue();
13520
13521 unsigned ExtendOpAOpc = ExtendOpA.getOpcode();
13522 unsigned ExtendOpBOpc = ExtendOpB.getOpcode();
13523 if (!(ExtendOpAOpc == ExtendOpBOpc &&
13524 (ExtendOpAOpc == ISD::ZERO_EXTEND || ExtendOpAOpc == ISD::SIGN_EXTEND)))
13525 return SDValue();
13526
13527 // Is the result of the right shift being truncated to the same value type as
13528 // the original operands, OpA and OpB?
13529 SDValue OpA = ExtendOpA.getOperand(0);
13530 SDValue OpB = ExtendOpB.getOperand(0);
13531 EVT OpAVT = OpA.getValueType();
13532 assert(ExtendOpA.getValueType() == ExtendOpB.getValueType())(static_cast <bool> (ExtendOpA.getValueType() == ExtendOpB
.getValueType()) ? void (0) : __assert_fail ("ExtendOpA.getValueType() == ExtendOpB.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13532, __extension__ __PRETTY_FUNCTION__))
;
13533 if (!(VT == OpAVT && OpAVT == OpB.getValueType()))
13534 return SDValue();
13535
13536 SDLoc DL(N);
13537 bool IsSignExtend = ExtendOpAOpc == ISD::SIGN_EXTEND;
13538 bool IsRHADD = ShiftOp0Opc == ISD::SUB;
13539 unsigned HADDOpc = IsSignExtend
13540 ? (IsRHADD ? AArch64ISD::SRHADD : AArch64ISD::SHADD)
13541 : (IsRHADD ? AArch64ISD::URHADD : AArch64ISD::UHADD);
13542 SDValue ResultHADD = DAG.getNode(HADDOpc, DL, VT, OpA, OpB);
13543
13544 return ResultHADD;
13545}
13546
13547static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
13548 switch (Opcode) {
13549 case ISD::FADD:
13550 return (FullFP16 && VT == MVT::f16) || VT == MVT::f32 || VT == MVT::f64;
13551 case ISD::ADD:
13552 return VT == MVT::i64;
13553 default:
13554 return false;
13555 }
13556}
13557
13558static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
13559 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13560 ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
13561
13562 EVT VT = N->getValueType(0);
13563 const bool FullFP16 =
13564 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
13565
13566 // Rewrite for pairwise fadd pattern
13567 // (f32 (extract_vector_elt
13568 // (fadd (vXf32 Other)
13569 // (vector_shuffle (vXf32 Other) undef <1,X,...> )) 0))
13570 // ->
13571 // (f32 (fadd (extract_vector_elt (vXf32 Other) 0)
13572 // (extract_vector_elt (vXf32 Other) 1))
13573 if (ConstantN1 && ConstantN1->getZExtValue() == 0 &&
13574 hasPairwiseAdd(N0->getOpcode(), VT, FullFP16)) {
13575 SDLoc DL(N0);
13576 SDValue N00 = N0->getOperand(0);
13577 SDValue N01 = N0->getOperand(1);
13578
13579 ShuffleVectorSDNode *Shuffle = dyn_cast<ShuffleVectorSDNode>(N01);
13580 SDValue Other = N00;
13581
13582 // And handle the commutative case.
13583 if (!Shuffle) {
13584 Shuffle = dyn_cast<ShuffleVectorSDNode>(N00);
13585 Other = N01;
13586 }
13587
13588 if (Shuffle && Shuffle->getMaskElt(0) == 1 &&
13589 Other == Shuffle->getOperand(0)) {
13590 return DAG.getNode(N0->getOpcode(), DL, VT,
13591 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
13592 DAG.getConstant(0, DL, MVT::i64)),
13593 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Other,
13594 DAG.getConstant(1, DL, MVT::i64)));
13595 }
13596 }
13597
13598 return SDValue();
13599}
13600
13601static SDValue performConcatVectorsCombine(SDNode *N,
13602 TargetLowering::DAGCombinerInfo &DCI,
13603 SelectionDAG &DAG) {
13604 SDLoc dl(N);
13605 EVT VT = N->getValueType(0);
13606 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
13607 unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
13608
13609 // Optimize concat_vectors of truncated vectors, where the intermediate
13610 // type is illegal, to avoid said illegality, e.g.,
13611 // (v4i16 (concat_vectors (v2i16 (truncate (v2i64))),
13612 // (v2i16 (truncate (v2i64)))))
13613 // ->
13614 // (v4i16 (truncate (vector_shuffle (v4i32 (bitcast (v2i64))),
13615 // (v4i32 (bitcast (v2i64))),
13616 // <0, 2, 4, 6>)))
13617 // This isn't really target-specific, but ISD::TRUNCATE legality isn't keyed
13618 // on both input and result type, so we might generate worse code.
13619 // On AArch64 we know it's fine for v2i64->v4i16 and v4i32->v8i8.
13620 if (N->getNumOperands() == 2 && N0Opc == ISD::TRUNCATE &&
13621 N1Opc == ISD::TRUNCATE) {
13622 SDValue N00 = N0->getOperand(0);
13623 SDValue N10 = N1->getOperand(0);
13624 EVT N00VT = N00.getValueType();
13625
13626 if (N00VT == N10.getValueType() &&
13627 (N00VT == MVT::v2i64 || N00VT == MVT::v4i32) &&
13628 N00VT.getScalarSizeInBits() == 4 * VT.getScalarSizeInBits()) {
13629 MVT MidVT = (N00VT == MVT::v2i64 ? MVT::v4i32 : MVT::v8i16);
13630 SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
13631 for (size_t i = 0; i < Mask.size(); ++i)
13632 Mask[i] = i * 2;
13633 return DAG.getNode(ISD::TRUNCATE, dl, VT,
13634 DAG.getVectorShuffle(
13635 MidVT, dl,
13636 DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
13637 DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
13638 }
13639 }
13640
13641 // Wait 'til after everything is legalized to try this. That way we have
13642 // legal vector types and such.
13643 if (DCI.isBeforeLegalizeOps())
13644 return SDValue();
13645
13646 // Optimise concat_vectors of two [us]rhadds or [us]hadds that use extracted
13647 // subvectors from the same original vectors. Combine these into a single
13648 // [us]rhadd or [us]hadd that operates on the two original vectors. Example:
13649 // (v16i8 (concat_vectors (v8i8 (urhadd (extract_subvector (v16i8 OpA, <0>),
13650 // extract_subvector (v16i8 OpB,
13651 // <0>))),
13652 // (v8i8 (urhadd (extract_subvector (v16i8 OpA, <8>),
13653 // extract_subvector (v16i8 OpB,
13654 // <8>)))))
13655 // ->
13656 // (v16i8(urhadd(v16i8 OpA, v16i8 OpB)))
13657 if (N->getNumOperands() == 2 && N0Opc == N1Opc &&
13658 (N0Opc == AArch64ISD::URHADD || N0Opc == AArch64ISD::SRHADD ||
13659 N0Opc == AArch64ISD::UHADD || N0Opc == AArch64ISD::SHADD)) {
13660 SDValue N00 = N0->getOperand(0);
13661 SDValue N01 = N0->getOperand(1);
13662 SDValue N10 = N1->getOperand(0);
13663 SDValue N11 = N1->getOperand(1);
13664
13665 EVT N00VT = N00.getValueType();
13666 EVT N10VT = N10.getValueType();
13667
13668 if (N00->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13669 N01->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13670 N10->getOpcode() == ISD::EXTRACT_SUBVECTOR &&
13671 N11->getOpcode() == ISD::EXTRACT_SUBVECTOR && N00VT == N10VT) {
13672 SDValue N00Source = N00->getOperand(0);
13673 SDValue N01Source = N01->getOperand(0);
13674 SDValue N10Source = N10->getOperand(0);
13675 SDValue N11Source = N11->getOperand(0);
13676
13677 if (N00Source == N10Source && N01Source == N11Source &&
13678 N00Source.getValueType() == VT && N01Source.getValueType() == VT) {
13679 assert(N0.getValueType() == N1.getValueType())(static_cast <bool> (N0.getValueType() == N1.getValueType
()) ? void (0) : __assert_fail ("N0.getValueType() == N1.getValueType()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13679, __extension__ __PRETTY_FUNCTION__))
;
13680
13681 uint64_t N00Index = N00.getConstantOperandVal(1);
13682 uint64_t N01Index = N01.getConstantOperandVal(1);
13683 uint64_t N10Index = N10.getConstantOperandVal(1);
13684 uint64_t N11Index = N11.getConstantOperandVal(1);
13685
13686 if (N00Index == N01Index && N10Index == N11Index && N00Index == 0 &&
13687 N10Index == N00VT.getVectorNumElements())
13688 return DAG.getNode(N0Opc, dl, VT, N00Source, N01Source);
13689 }
13690 }
13691 }
13692
13693 // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
13694 // splat. The indexed instructions are going to be expecting a DUPLANE64, so
13695 // canonicalise to that.
13696 if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
13697 assert(VT.getScalarSizeInBits() == 64)(static_cast <bool> (VT.getScalarSizeInBits() == 64) ? void
(0) : __assert_fail ("VT.getScalarSizeInBits() == 64", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13697, __extension__ __PRETTY_FUNCTION__))
;
13698 return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
13699 DAG.getConstant(0, dl, MVT::i64));
13700 }
13701
13702 // Canonicalise concat_vectors so that the right-hand vector has as few
13703 // bit-casts as possible before its real operation. The primary matching
13704 // destination for these operations will be the narrowing "2" instructions,
13705 // which depend on the operation being performed on this right-hand vector.
13706 // For example,
13707 // (concat_vectors LHS, (v1i64 (bitconvert (v4i16 RHS))))
13708 // becomes
13709 // (bitconvert (concat_vectors (v4i16 (bitconvert LHS)), RHS))
13710
13711 if (N->getNumOperands() != 2 || N1Opc != ISD::BITCAST)
13712 return SDValue();
13713 SDValue RHS = N1->getOperand(0);
13714 MVT RHSTy = RHS.getValueType().getSimpleVT();
13715 // If the RHS is not a vector, this is not the pattern we're looking for.
13716 if (!RHSTy.isVector())
13717 return SDValue();
13718
13719 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"
; } } while (false)
13720 dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: concat_vectors bitcast simplification\n"
; } } while (false)
;
13721
13722 MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
13723 RHSTy.getVectorNumElements() * 2);
13724 return DAG.getNode(ISD::BITCAST, dl, VT,
13725 DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
13726 DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
13727 RHS));
13728}
13729
13730static SDValue
13731performInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
13732 SelectionDAG &DAG) {
13733 SDValue Vec = N->getOperand(0);
13734 SDValue SubVec = N->getOperand(1);
13735 uint64_t IdxVal = N->getConstantOperandVal(2);
13736 EVT VecVT = Vec.getValueType();
13737 EVT SubVT = SubVec.getValueType();
13738
13739 // Only do this for legal fixed vector types.
13740 if (!VecVT.isFixedLengthVector() ||
13741 !DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
13742 !DAG.getTargetLoweringInfo().isTypeLegal(SubVT))
13743 return SDValue();
13744
13745 // Ignore widening patterns.
13746 if (IdxVal == 0 && Vec.isUndef())
13747 return SDValue();
13748
13749 // Subvector must be half the width and an "aligned" insertion.
13750 unsigned NumSubElts = SubVT.getVectorNumElements();
13751 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
13752 (IdxVal != 0 && IdxVal != NumSubElts))
13753 return SDValue();
13754
13755 // Fold insert_subvector -> concat_vectors
13756 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
13757 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
13758 SDLoc DL(N);
13759 SDValue Lo, Hi;
13760 if (IdxVal == 0) {
13761 Lo = SubVec;
13762 Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
13763 DAG.getVectorIdxConstant(NumSubElts, DL));
13764 } else {
13765 Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
13766 DAG.getVectorIdxConstant(0, DL));
13767 Hi = SubVec;
13768 }
13769 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
13770}
13771
13772static SDValue tryCombineFixedPointConvert(SDNode *N,
13773 TargetLowering::DAGCombinerInfo &DCI,
13774 SelectionDAG &DAG) {
13775 // Wait until after everything is legalized to try this. That way we have
13776 // legal vector types and such.
13777 if (DCI.isBeforeLegalizeOps())
13778 return SDValue();
13779 // Transform a scalar conversion of a value from a lane extract into a
13780 // lane extract of a vector conversion. E.g., from foo1 to foo2:
13781 // double foo1(int64x2_t a) { return vcvtd_n_f64_s64(a[1], 9); }
13782 // double foo2(int64x2_t a) { return vcvtq_n_f64_s64(a, 9)[1]; }
13783 //
13784 // The second form interacts better with instruction selection and the
13785 // register allocator to avoid cross-class register copies that aren't
13786 // coalescable due to a lane reference.
13787
13788 // Check the operand and see if it originates from a lane extract.
13789 SDValue Op1 = N->getOperand(1);
13790 if (Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
13791 // Yep, no additional predication needed. Perform the transform.
13792 SDValue IID = N->getOperand(0);
13793 SDValue Shift = N->getOperand(2);
13794 SDValue Vec = Op1.getOperand(0);
13795 SDValue Lane = Op1.getOperand(1);
13796 EVT ResTy = N->getValueType(0);
13797 EVT VecResTy;
13798 SDLoc DL(N);
13799
13800 // The vector width should be 128 bits by the time we get here, even
13801 // if it started as 64 bits (the extract_vector handling will have
13802 // done so).
13803 assert(Vec.getValueSizeInBits() == 128 &&(static_cast <bool> (Vec.getValueSizeInBits() == 128 &&
"unexpected vector size on extract_vector_elt!") ? void (0) :
__assert_fail ("Vec.getValueSizeInBits() == 128 && \"unexpected vector size on extract_vector_elt!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13804, __extension__ __PRETTY_FUNCTION__))
13804 "unexpected vector size on extract_vector_elt!")(static_cast <bool> (Vec.getValueSizeInBits() == 128 &&
"unexpected vector size on extract_vector_elt!") ? void (0) :
__assert_fail ("Vec.getValueSizeInBits() == 128 && \"unexpected vector size on extract_vector_elt!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13804, __extension__ __PRETTY_FUNCTION__))
;
13805 if (Vec.getValueType() == MVT::v4i32)
13806 VecResTy = MVT::v4f32;
13807 else if (Vec.getValueType() == MVT::v2i64)
13808 VecResTy = MVT::v2f64;
13809 else
13810 llvm_unreachable("unexpected vector type!")::llvm::llvm_unreachable_internal("unexpected vector type!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13810)
;
13811
13812 SDValue Convert =
13813 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift);
13814 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResTy, Convert, Lane);
13815 }
13816 return SDValue();
13817}
13818
13819// AArch64 high-vector "long" operations are formed by performing the non-high
13820// version on an extract_subvector of each operand which gets the high half:
13821//
13822// (longop2 LHS, RHS) == (longop (extract_high LHS), (extract_high RHS))
13823//
13824// However, there are cases which don't have an extract_high explicitly, but
13825// have another operation that can be made compatible with one for free. For
13826// example:
13827//
13828// (dupv64 scalar) --> (extract_high (dup128 scalar))
13829//
13830// This routine does the actual conversion of such DUPs, once outer routines
13831// have determined that everything else is in order.
13832// It also supports immediate DUP-like nodes (MOVI/MVNi), which we can fold
13833// similarly here.
13834static SDValue tryExtendDUPToExtractHigh(SDValue N, SelectionDAG &DAG) {
13835 switch (N.getOpcode()) {
13836 case AArch64ISD::DUP:
13837 case AArch64ISD::DUPLANE8:
13838 case AArch64ISD::DUPLANE16:
13839 case AArch64ISD::DUPLANE32:
13840 case AArch64ISD::DUPLANE64:
13841 case AArch64ISD::MOVI:
13842 case AArch64ISD::MOVIshift:
13843 case AArch64ISD::MOVIedit:
13844 case AArch64ISD::MOVImsl:
13845 case AArch64ISD::MVNIshift:
13846 case AArch64ISD::MVNImsl:
13847 break;
13848 default:
13849 // FMOV could be supported, but isn't very useful, as it would only occur
13850 // if you passed a bitcast' floating point immediate to an eligible long
13851 // integer op (addl, smull, ...).
13852 return SDValue();
13853 }
13854
13855 MVT NarrowTy = N.getSimpleValueType();
13856 if (!NarrowTy.is64BitVector())
13857 return SDValue();
13858
13859 MVT ElementTy = NarrowTy.getVectorElementType();
13860 unsigned NumElems = NarrowTy.getVectorNumElements();
13861 MVT NewVT = MVT::getVectorVT(ElementTy, NumElems * 2);
13862
13863 SDLoc dl(N);
13864 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NarrowTy,
13865 DAG.getNode(N->getOpcode(), dl, NewVT, N->ops()),
13866 DAG.getConstant(NumElems, dl, MVT::i64));
13867}
13868
13869static bool isEssentiallyExtractHighSubvector(SDValue N) {
13870 if (N.getOpcode() == ISD::BITCAST)
13871 N = N.getOperand(0);
13872 if (N.getOpcode() != ISD::EXTRACT_SUBVECTOR)
13873 return false;
13874 return cast<ConstantSDNode>(N.getOperand(1))->getAPIntValue() ==
13875 N.getOperand(0).getValueType().getVectorNumElements() / 2;
13876}
13877
13878/// Helper structure to keep track of ISD::SET_CC operands.
13879struct GenericSetCCInfo {
13880 const SDValue *Opnd0;
13881 const SDValue *Opnd1;
13882 ISD::CondCode CC;
13883};
13884
13885/// Helper structure to keep track of a SET_CC lowered into AArch64 code.
13886struct AArch64SetCCInfo {
13887 const SDValue *Cmp;
13888 AArch64CC::CondCode CC;
13889};
13890
13891/// Helper structure to keep track of SetCC information.
13892union SetCCInfo {
13893 GenericSetCCInfo Generic;
13894 AArch64SetCCInfo AArch64;
13895};
13896
13897/// Helper structure to be able to read SetCC information. If set to
13898/// true, IsAArch64 field, Info is a AArch64SetCCInfo, otherwise Info is a
13899/// GenericSetCCInfo.
13900struct SetCCInfoAndKind {
13901 SetCCInfo Info;
13902 bool IsAArch64;
13903};
13904
13905/// Check whether or not \p Op is a SET_CC operation, either a generic or
13906/// an
13907/// AArch64 lowered one.
13908/// \p SetCCInfo is filled accordingly.
13909/// \post SetCCInfo is meanginfull only when this function returns true.
13910/// \return True when Op is a kind of SET_CC operation.
13911static bool isSetCC(SDValue Op, SetCCInfoAndKind &SetCCInfo) {
13912 // If this is a setcc, this is straight forward.
13913 if (Op.getOpcode() == ISD::SETCC) {
13914 SetCCInfo.Info.Generic.Opnd0 = &Op.getOperand(0);
13915 SetCCInfo.Info.Generic.Opnd1 = &Op.getOperand(1);
13916 SetCCInfo.Info.Generic.CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
13917 SetCCInfo.IsAArch64 = false;
13918 return true;
13919 }
13920 // Otherwise, check if this is a matching csel instruction.
13921 // In other words:
13922 // - csel 1, 0, cc
13923 // - csel 0, 1, !cc
13924 if (Op.getOpcode() != AArch64ISD::CSEL)
13925 return false;
13926 // Set the information about the operands.
13927 // TODO: we want the operands of the Cmp not the csel
13928 SetCCInfo.Info.AArch64.Cmp = &Op.getOperand(3);
13929 SetCCInfo.IsAArch64 = true;
13930 SetCCInfo.Info.AArch64.CC = static_cast<AArch64CC::CondCode>(
13931 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
13932
13933 // Check that the operands matches the constraints:
13934 // (1) Both operands must be constants.
13935 // (2) One must be 1 and the other must be 0.
13936 ConstantSDNode *TValue = dyn_cast<ConstantSDNode>(Op.getOperand(0));
13937 ConstantSDNode *FValue = dyn_cast<ConstantSDNode>(Op.getOperand(1));
13938
13939 // Check (1).
13940 if (!TValue || !FValue)
13941 return false;
13942
13943 // Check (2).
13944 if (!TValue->isOne()) {
13945 // Update the comparison when we are interested in !cc.
13946 std::swap(TValue, FValue);
13947 SetCCInfo.Info.AArch64.CC =
13948 AArch64CC::getInvertedCondCode(SetCCInfo.Info.AArch64.CC);
13949 }
13950 return TValue->isOne() && FValue->isNullValue();
13951}
13952
13953// Returns true if Op is setcc or zext of setcc.
13954static bool isSetCCOrZExtSetCC(const SDValue& Op, SetCCInfoAndKind &Info) {
13955 if (isSetCC(Op, Info))
13956 return true;
13957 return ((Op.getOpcode() == ISD::ZERO_EXTEND) &&
13958 isSetCC(Op->getOperand(0), Info));
13959}
13960
13961// The folding we want to perform is:
13962// (add x, [zext] (setcc cc ...) )
13963// -->
13964// (csel x, (add x, 1), !cc ...)
13965//
13966// The latter will get matched to a CSINC instruction.
13967static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
13968 assert(Op && Op->getOpcode() == ISD::ADD && "Unexpected operation!")(static_cast <bool> (Op && Op->getOpcode() ==
ISD::ADD && "Unexpected operation!") ? void (0) : __assert_fail
("Op && Op->getOpcode() == ISD::ADD && \"Unexpected operation!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 13968, __extension__ __PRETTY_FUNCTION__))
;
13969 SDValue LHS = Op->getOperand(0);
13970 SDValue RHS = Op->getOperand(1);
13971 SetCCInfoAndKind InfoAndKind;
13972
13973 // If both operands are a SET_CC, then we don't want to perform this
13974 // folding and create another csel as this results in more instructions
13975 // (and higher register usage).
13976 if (isSetCCOrZExtSetCC(LHS, InfoAndKind) &&
13977 isSetCCOrZExtSetCC(RHS, InfoAndKind))
13978 return SDValue();
13979
13980 // If neither operand is a SET_CC, give up.
13981 if (!isSetCCOrZExtSetCC(LHS, InfoAndKind)) {
13982 std::swap(LHS, RHS);
13983 if (!isSetCCOrZExtSetCC(LHS, InfoAndKind))
13984 return SDValue();
13985 }
13986
13987 // FIXME: This could be generatized to work for FP comparisons.
13988 EVT CmpVT = InfoAndKind.IsAArch64
13989 ? InfoAndKind.Info.AArch64.Cmp->getOperand(0).getValueType()
13990 : InfoAndKind.Info.Generic.Opnd0->getValueType();
13991 if (CmpVT != MVT::i32 && CmpVT != MVT::i64)
13992 return SDValue();
13993
13994 SDValue CCVal;
13995 SDValue Cmp;
13996 SDLoc dl(Op);
13997 if (InfoAndKind.IsAArch64) {
13998 CCVal = DAG.getConstant(
13999 AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
14000 MVT::i32);
14001 Cmp = *InfoAndKind.Info.AArch64.Cmp;
14002 } else
14003 Cmp = getAArch64Cmp(
14004 *InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
14005 ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
14006 dl);
14007
14008 EVT VT = Op->getValueType(0);
14009 LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
14010 return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
14011}
14012
14013// ADD(UADDV a, UADDV b) --> UADDV(ADD a, b)
14014static SDValue performUADDVCombine(SDNode *N, SelectionDAG &DAG) {
14015 EVT VT = N->getValueType(0);
14016 // Only scalar integer and vector types.
14017 if (N->getOpcode() != ISD::ADD || !VT.isScalarInteger())
14018 return SDValue();
14019
14020 SDValue LHS = N->getOperand(0);
14021 SDValue RHS = N->getOperand(1);
14022 if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14023 RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || LHS.getValueType() != VT)
14024 return SDValue();
14025
14026 auto *LHSN1 = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
14027 auto *RHSN1 = dyn_cast<ConstantSDNode>(RHS->getOperand(1));
14028 if (!LHSN1 || LHSN1 != RHSN1 || !RHSN1->isNullValue())
14029 return SDValue();
14030
14031 SDValue Op1 = LHS->getOperand(0);
14032 SDValue Op2 = RHS->getOperand(0);
14033 EVT OpVT1 = Op1.getValueType();
14034 EVT OpVT2 = Op2.getValueType();
14035 if (Op1.getOpcode() != AArch64ISD::UADDV || OpVT1 != OpVT2 ||
14036 Op2.getOpcode() != AArch64ISD::UADDV ||
14037 OpVT1.getVectorElementType() != VT)
14038 return SDValue();
14039
14040 SDValue Val1 = Op1.getOperand(0);
14041 SDValue Val2 = Op2.getOperand(0);
14042 EVT ValVT = Val1->getValueType(0);
14043 SDLoc DL(N);
14044 SDValue AddVal = DAG.getNode(ISD::ADD, DL, ValVT, Val1, Val2);
14045 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT,
14046 DAG.getNode(AArch64ISD::UADDV, DL, ValVT, AddVal),
14047 DAG.getConstant(0, DL, MVT::i64));
14048}
14049
14050// ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y)
14051static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) {
14052 EVT VT = N->getValueType(0);
14053 if (N->getOpcode() != ISD::ADD)
14054 return SDValue();
14055
14056 SDValue Dot = N->getOperand(0);
14057 SDValue A = N->getOperand(1);
14058 // Handle commutivity
14059 auto isZeroDot = [](SDValue Dot) {
14060 return (Dot.getOpcode() == AArch64ISD::UDOT ||
14061 Dot.getOpcode() == AArch64ISD::SDOT) &&
14062 isZerosVector(Dot.getOperand(0).getNode());
14063 };
14064 if (!isZeroDot(Dot))
14065 std::swap(Dot, A);
14066 if (!isZeroDot(Dot))
14067 return SDValue();
14068
14069 return DAG.getNode(Dot.getOpcode(), SDLoc(N), VT, A, Dot.getOperand(1),
14070 Dot.getOperand(2));
14071}
14072
14073// The basic add/sub long vector instructions have variants with "2" on the end
14074// which act on the high-half of their inputs. They are normally matched by
14075// patterns like:
14076//
14077// (add (zeroext (extract_high LHS)),
14078// (zeroext (extract_high RHS)))
14079// -> uaddl2 vD, vN, vM
14080//
14081// However, if one of the extracts is something like a duplicate, this
14082// instruction can still be used profitably. This function puts the DAG into a
14083// more appropriate form for those patterns to trigger.
14084static SDValue performAddSubLongCombine(SDNode *N,
14085 TargetLowering::DAGCombinerInfo &DCI,
14086 SelectionDAG &DAG) {
14087 if (DCI.isBeforeLegalizeOps())
14088 return SDValue();
14089
14090 MVT VT = N->getSimpleValueType(0);
14091 if (!VT.is128BitVector()) {
14092 if (N->getOpcode() == ISD::ADD)
14093 return performSetccAddFolding(N, DAG);
14094 return SDValue();
14095 }
14096
14097 // Make sure both branches are extended in the same way.
14098 SDValue LHS = N->getOperand(0);
14099 SDValue RHS = N->getOperand(1);
14100 if ((LHS.getOpcode() != ISD::ZERO_EXTEND &&
14101 LHS.getOpcode() != ISD::SIGN_EXTEND) ||
14102 LHS.getOpcode() != RHS.getOpcode())
14103 return SDValue();
14104
14105 unsigned ExtType = LHS.getOpcode();
14106
14107 // It's not worth doing if at least one of the inputs isn't already an
14108 // extract, but we don't know which it'll be so we have to try both.
14109 if (isEssentiallyExtractHighSubvector(LHS.getOperand(0))) {
14110 RHS = tryExtendDUPToExtractHigh(RHS.getOperand(0), DAG);
14111 if (!RHS.getNode())
14112 return SDValue();
14113
14114 RHS = DAG.getNode(ExtType, SDLoc(N), VT, RHS);
14115 } else if (isEssentiallyExtractHighSubvector(RHS.getOperand(0))) {
14116 LHS = tryExtendDUPToExtractHigh(LHS.getOperand(0), DAG);
14117 if (!LHS.getNode())
14118 return SDValue();
14119
14120 LHS = DAG.getNode(ExtType, SDLoc(N), VT, LHS);
14121 }
14122
14123 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, LHS, RHS);
14124}
14125
14126static SDValue performAddSubCombine(SDNode *N,
14127 TargetLowering::DAGCombinerInfo &DCI,
14128 SelectionDAG &DAG) {
14129 // Try to change sum of two reductions.
14130 if (SDValue Val = performUADDVCombine(N, DAG))
14131 return Val;
14132 if (SDValue Val = performAddDotCombine(N, DAG))
14133 return Val;
14134
14135 return performAddSubLongCombine(N, DCI, DAG);
14136}
14137
14138// Massage DAGs which we can use the high-half "long" operations on into
14139// something isel will recognize better. E.g.
14140//
14141// (aarch64_neon_umull (extract_high vec) (dupv64 scalar)) -->
14142// (aarch64_neon_umull (extract_high (v2i64 vec)))
14143// (extract_high (v2i64 (dup128 scalar)))))
14144//
14145static SDValue tryCombineLongOpWithDup(unsigned IID, SDNode *N,
14146 TargetLowering::DAGCombinerInfo &DCI,
14147 SelectionDAG &DAG) {
14148 if (DCI.isBeforeLegalizeOps())
14149 return SDValue();
14150
14151 SDValue LHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 0 : 1);
14152 SDValue RHS = N->getOperand((IID == Intrinsic::not_intrinsic) ? 1 : 2);
14153 assert(LHS.getValueType().is64BitVector() &&(static_cast <bool> (LHS.getValueType().is64BitVector()
&& RHS.getValueType().is64BitVector() && "unexpected shape for long operation"
) ? void (0) : __assert_fail ("LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && \"unexpected shape for long operation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14155, __extension__ __PRETTY_FUNCTION__))
14154 RHS.getValueType().is64BitVector() &&(static_cast <bool> (LHS.getValueType().is64BitVector()
&& RHS.getValueType().is64BitVector() && "unexpected shape for long operation"
) ? void (0) : __assert_fail ("LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && \"unexpected shape for long operation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14155, __extension__ __PRETTY_FUNCTION__))
14155 "unexpected shape for long operation")(static_cast <bool> (LHS.getValueType().is64BitVector()
&& RHS.getValueType().is64BitVector() && "unexpected shape for long operation"
) ? void (0) : __assert_fail ("LHS.getValueType().is64BitVector() && RHS.getValueType().is64BitVector() && \"unexpected shape for long operation\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14155, __extension__ __PRETTY_FUNCTION__))
;
14156
14157 // Either node could be a DUP, but it's not worth doing both of them (you'd
14158 // just as well use the non-high version) so look for a corresponding extract
14159 // operation on the other "wing".
14160 if (isEssentiallyExtractHighSubvector(LHS)) {
14161 RHS = tryExtendDUPToExtractHigh(RHS, DAG);
14162 if (!RHS.getNode())
14163 return SDValue();
14164 } else if (isEssentiallyExtractHighSubvector(RHS)) {
14165 LHS = tryExtendDUPToExtractHigh(LHS, DAG);
14166 if (!LHS.getNode())
14167 return SDValue();
14168 }
14169
14170 if (IID == Intrinsic::not_intrinsic)
14171 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), LHS, RHS);
14172
14173 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), N->getValueType(0),
14174 N->getOperand(0), LHS, RHS);
14175}
14176
14177static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
14178 MVT ElemTy = N->getSimpleValueType(0).getScalarType();
14179 unsigned ElemBits = ElemTy.getSizeInBits();
14180
14181 int64_t ShiftAmount;
14182 if (BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(2))) {
14183 APInt SplatValue, SplatUndef;
14184 unsigned SplatBitSize;
14185 bool HasAnyUndefs;
14186 if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
14187 HasAnyUndefs, ElemBits) ||
14188 SplatBitSize != ElemBits)
14189 return SDValue();
14190
14191 ShiftAmount = SplatValue.getSExtValue();
14192 } else if (ConstantSDNode *CVN = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
14193 ShiftAmount = CVN->getSExtValue();
14194 } else
14195 return SDValue();
14196
14197 unsigned Opcode;
14198 bool IsRightShift;
14199 switch (IID) {
14200 default:
14201 llvm_unreachable("Unknown shift intrinsic")::llvm::llvm_unreachable_internal("Unknown shift intrinsic", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14201)
;
14202 case Intrinsic::aarch64_neon_sqshl:
14203 Opcode = AArch64ISD::SQSHL_I;
14204 IsRightShift = false;
14205 break;
14206 case Intrinsic::aarch64_neon_uqshl:
14207 Opcode = AArch64ISD::UQSHL_I;
14208 IsRightShift = false;
14209 break;
14210 case Intrinsic::aarch64_neon_srshl:
14211 Opcode = AArch64ISD::SRSHR_I;
14212 IsRightShift = true;
14213 break;
14214 case Intrinsic::aarch64_neon_urshl:
14215 Opcode = AArch64ISD::URSHR_I;
14216 IsRightShift = true;
14217 break;
14218 case Intrinsic::aarch64_neon_sqshlu:
14219 Opcode = AArch64ISD::SQSHLU_I;
14220 IsRightShift = false;
14221 break;
14222 case Intrinsic::aarch64_neon_sshl:
14223 case Intrinsic::aarch64_neon_ushl:
14224 // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
14225 // left shift for positive shift amounts. Below, we only replace the current
14226 // node with VSHL, if this condition is met.
14227 Opcode = AArch64ISD::VSHL;
14228 IsRightShift = false;
14229 break;
14230 }
14231
14232 if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
14233 SDLoc dl(N);
14234 return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
14235 DAG.getConstant(-ShiftAmount, dl, MVT::i32));
14236 } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
14237 SDLoc dl(N);
14238 return DAG.getNode(Opcode, dl, N->getValueType(0), N->getOperand(1),
14239 DAG.getConstant(ShiftAmount, dl, MVT::i32));
14240 }
14241
14242 return SDValue();
14243}
14244
14245// The CRC32[BH] instructions ignore the high bits of their data operand. Since
14246// the intrinsics must be legal and take an i32, this means there's almost
14247// certainly going to be a zext in the DAG which we can eliminate.
14248static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
14249 SDValue AndN = N->getOperand(2);
14250 if (AndN.getOpcode() != ISD::AND)
14251 return SDValue();
14252
14253 ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(AndN.getOperand(1));
14254 if (!CMask || CMask->getZExtValue() != Mask)
14255 return SDValue();
14256
14257 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), MVT::i32,
14258 N->getOperand(0), N->getOperand(1), AndN.getOperand(0));
14259}
14260
14261static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
14262 SelectionDAG &DAG) {
14263 SDLoc dl(N);
14264 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
14265 DAG.getNode(Opc, dl,
14266 N->getOperand(1).getSimpleValueType(),
14267 N->getOperand(1)),
14268 DAG.getConstant(0, dl, MVT::i64));
14269}
14270
14271static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
14272 SDLoc DL(N);
14273 SDValue Op1 = N->getOperand(1);
14274 SDValue Op2 = N->getOperand(2);
14275 EVT ScalarTy = Op2.getValueType();
14276 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
14277 ScalarTy = MVT::i32;
14278
14279 // Lower index_vector(base, step) to mul(step step_vector(1)) + splat(base).
14280 SDValue StepVector = DAG.getStepVector(DL, N->getValueType(0));
14281 SDValue Step = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op2);
14282 SDValue Mul = DAG.getNode(ISD::MUL, DL, N->getValueType(0), StepVector, Step);
14283 SDValue Base = DAG.getNode(ISD::SPLAT_VECTOR, DL, N->getValueType(0), Op1);
14284 return DAG.getNode(ISD::ADD, DL, N->getValueType(0), Mul, Base);
14285}
14286
14287static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
14288 SDLoc dl(N);
14289 SDValue Scalar = N->getOperand(3);
14290 EVT ScalarTy = Scalar.getValueType();
14291
14292 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
14293 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
14294
14295 SDValue Passthru = N->getOperand(1);
14296 SDValue Pred = N->getOperand(2);
14297 return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
14298 Pred, Scalar, Passthru);
14299}
14300
14301static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
14302 SDLoc dl(N);
14303 LLVMContext &Ctx = *DAG.getContext();
14304 EVT VT = N->getValueType(0);
14305
14306 assert(VT.isScalableVector() && "Expected a scalable vector.")(static_cast <bool> (VT.isScalableVector() && "Expected a scalable vector."
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected a scalable vector.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14306, __extension__ __PRETTY_FUNCTION__))
;
14307
14308 // Current lowering only supports the SVE-ACLE types.
14309 if (VT.getSizeInBits().getKnownMinSize() != AArch64::SVEBitsPerBlock)
14310 return SDValue();
14311
14312 unsigned ElemSize = VT.getVectorElementType().getSizeInBits() / 8;
14313 unsigned ByteSize = VT.getSizeInBits().getKnownMinSize() / 8;
14314 EVT ByteVT =
14315 EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
14316
14317 // Convert everything to the domain of EXT (i.e bytes).
14318 SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
14319 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
14320 SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
14321 DAG.getConstant(ElemSize, dl, MVT::i32));
14322
14323 SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
14324 return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
14325}
14326
14327static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
14328 TargetLowering::DAGCombinerInfo &DCI,
14329 SelectionDAG &DAG) {
14330 if (DCI.isBeforeLegalize())
14331 return SDValue();
14332
14333 SDValue Comparator = N->getOperand(3);
14334 if (Comparator.getOpcode() == AArch64ISD::DUP ||
14335 Comparator.getOpcode() == ISD::SPLAT_VECTOR) {
14336 unsigned IID = getIntrinsicID(N);
14337 EVT VT = N->getValueType(0);
14338 EVT CmpVT = N->getOperand(2).getValueType();
14339 SDValue Pred = N->getOperand(1);
14340 SDValue Imm;
14341 SDLoc DL(N);
14342
14343 switch (IID) {
14344 default:
14345 llvm_unreachable("Called with wrong intrinsic!")::llvm::llvm_unreachable_internal("Called with wrong intrinsic!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14345)
;
14346 break;
14347
14348 // Signed comparisons
14349 case Intrinsic::aarch64_sve_cmpeq_wide:
14350 case Intrinsic::aarch64_sve_cmpne_wide:
14351 case Intrinsic::aarch64_sve_cmpge_wide:
14352 case Intrinsic::aarch64_sve_cmpgt_wide:
14353 case Intrinsic::aarch64_sve_cmplt_wide:
14354 case Intrinsic::aarch64_sve_cmple_wide: {
14355 if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
14356 int64_t ImmVal = CN->getSExtValue();
14357 if (ImmVal >= -16 && ImmVal <= 15)
14358 Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
14359 else
14360 return SDValue();
14361 }
14362 break;
14363 }
14364 // Unsigned comparisons
14365 case Intrinsic::aarch64_sve_cmphs_wide:
14366 case Intrinsic::aarch64_sve_cmphi_wide:
14367 case Intrinsic::aarch64_sve_cmplo_wide:
14368 case Intrinsic::aarch64_sve_cmpls_wide: {
14369 if (auto *CN = dyn_cast<ConstantSDNode>(Comparator.getOperand(0))) {
14370 uint64_t ImmVal = CN->getZExtValue();
14371 if (ImmVal <= 127)
14372 Imm = DAG.getConstant(ImmVal, DL, MVT::i32);
14373 else
14374 return SDValue();
14375 }
14376 break;
14377 }
14378 }
14379
14380 if (!Imm)
14381 return SDValue();
14382
14383 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, DL, CmpVT, Imm);
14384 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, VT, Pred,
14385 N->getOperand(2), Splat, DAG.getCondCode(CC));
14386 }
14387
14388 return SDValue();
14389}
14390
14391static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
14392 AArch64CC::CondCode Cond) {
14393 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14394
14395 SDLoc DL(Op);
14396 assert(Op.getValueType().isScalableVector() &&(static_cast <bool> (Op.getValueType().isScalableVector
() && TLI.isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && TLI.isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14398, __extension__ __PRETTY_FUNCTION__))
14397 TLI.isTypeLegal(Op.getValueType()) &&(static_cast <bool> (Op.getValueType().isScalableVector
() && TLI.isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && TLI.isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14398, __extension__ __PRETTY_FUNCTION__))
14398 "Expected legal scalable vector type!")(static_cast <bool> (Op.getValueType().isScalableVector
() && TLI.isTypeLegal(Op.getValueType()) && "Expected legal scalable vector type!"
) ? void (0) : __assert_fail ("Op.getValueType().isScalableVector() && TLI.isTypeLegal(Op.getValueType()) && \"Expected legal scalable vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14398, __extension__ __PRETTY_FUNCTION__))
;
14399
14400 // Ensure target specific opcodes are using legal type.
14401 EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
14402 SDValue TVal = DAG.getConstant(1, DL, OutVT);
14403 SDValue FVal = DAG.getConstant(0, DL, OutVT);
14404
14405 // Set condition code (CC) flags.
14406 SDValue Test = DAG.getNode(AArch64ISD::PTEST, DL, MVT::Other, Pg, Op);
14407
14408 // Convert CC to integer based on requested condition.
14409 // NOTE: Cond is inverted to promote CSEL's removal when it feeds a compare.
14410 SDValue CC = DAG.getConstant(getInvertedCondCode(Cond), DL, MVT::i32);
14411 SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, OutVT, FVal, TVal, CC, Test);
14412 return DAG.getZExtOrTrunc(Res, DL, VT);
14413}
14414
14415static SDValue combineSVEReductionInt(SDNode *N, unsigned Opc,
14416 SelectionDAG &DAG) {
14417 SDLoc DL(N);
14418
14419 SDValue Pred = N->getOperand(1);
14420 SDValue VecToReduce = N->getOperand(2);
14421
14422 // NOTE: The integer reduction's result type is not always linked to the
14423 // operand's element type so we construct it from the intrinsic's result type.
14424 EVT ReduceVT = getPackedSVEVectorVT(N->getValueType(0));
14425 SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
14426
14427 // SVE reductions set the whole vector register with the first element
14428 // containing the reduction result, which we'll now extract.
14429 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14430 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14431 Zero);
14432}
14433
14434static SDValue combineSVEReductionFP(SDNode *N, unsigned Opc,
14435 SelectionDAG &DAG) {
14436 SDLoc DL(N);
14437
14438 SDValue Pred = N->getOperand(1);
14439 SDValue VecToReduce = N->getOperand(2);
14440
14441 EVT ReduceVT = VecToReduce.getValueType();
14442 SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, VecToReduce);
14443
14444 // SVE reductions set the whole vector register with the first element
14445 // containing the reduction result, which we'll now extract.
14446 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14447 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14448 Zero);
14449}
14450
14451static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
14452 SelectionDAG &DAG) {
14453 SDLoc DL(N);
14454
14455 SDValue Pred = N->getOperand(1);
14456 SDValue InitVal = N->getOperand(2);
14457 SDValue VecToReduce = N->getOperand(3);
14458 EVT ReduceVT = VecToReduce.getValueType();
14459
14460 // Ordered reductions use the first lane of the result vector as the
14461 // reduction's initial value.
14462 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
14463 InitVal = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ReduceVT,
14464 DAG.getUNDEF(ReduceVT), InitVal, Zero);
14465
14466 SDValue Reduce = DAG.getNode(Opc, DL, ReduceVT, Pred, InitVal, VecToReduce);
14467
14468 // SVE reductions set the whole vector register with the first element
14469 // containing the reduction result, which we'll now extract.
14470 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0), Reduce,
14471 Zero);
14472}
14473
14474static bool isAllActivePredicate(SDValue N) {
14475 unsigned NumElts = N.getValueType().getVectorMinNumElements();
14476
14477 // Look through cast.
14478 while (N.getOpcode() == AArch64ISD::REINTERPRET_CAST) {
14479 N = N.getOperand(0);
14480 // When reinterpreting from a type with fewer elements the "new" elements
14481 // are not active, so bail if they're likely to be used.
14482 if (N.getValueType().getVectorMinNumElements() < NumElts)
14483 return false;
14484 }
14485
14486 // "ptrue p.<ty>, all" can be considered all active when <ty> is the same size
14487 // or smaller than the implicit element type represented by N.
14488 // NOTE: A larger element count implies a smaller element type.
14489 if (N.getOpcode() == AArch64ISD::PTRUE &&
14490 N.getConstantOperandVal(0) == AArch64SVEPredPattern::all)
14491 return N.getValueType().getVectorMinNumElements() >= NumElts;
14492
14493 return false;
14494}
14495
14496// If a merged operation has no inactive lanes we can relax it to a predicated
14497// or unpredicated operation, which potentially allows better isel (perhaps
14498// using immediate forms) or relaxing register reuse requirements.
14499static SDValue convertMergedOpToPredOp(SDNode *N, unsigned Opc,
14500 SelectionDAG &DAG,
14501 bool UnpredOp = false) {
14502 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!")(static_cast <bool> (N->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&& "Expected intrinsic!") ? void (0) : __assert_fail
("N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && \"Expected intrinsic!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14502, __extension__ __PRETTY_FUNCTION__))
;
14503 assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!")(static_cast <bool> (N->getNumOperands() == 4 &&
"Expected 3 operand intrinsic!") ? void (0) : __assert_fail (
"N->getNumOperands() == 4 && \"Expected 3 operand intrinsic!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14503, __extension__ __PRETTY_FUNCTION__))
;
14504 SDValue Pg = N->getOperand(1);
14505
14506 // ISD way to specify an all active predicate.
14507 if (isAllActivePredicate(Pg)) {
14508 if (UnpredOp)
14509 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), N->getOperand(2),
14510 N->getOperand(3));
14511 else
14512 return DAG.getNode(Opc, SDLoc(N), N->getValueType(0), Pg,
14513 N->getOperand(2), N->getOperand(3));
14514 }
14515
14516 // FUTURE: SplatVector(true)
14517 return SDValue();
14518}
14519
14520static SDValue performIntrinsicCombine(SDNode *N,
14521 TargetLowering::DAGCombinerInfo &DCI,
14522 const AArch64Subtarget *Subtarget) {
14523 SelectionDAG &DAG = DCI.DAG;
14524 unsigned IID = getIntrinsicID(N);
14525 switch (IID) {
14526 default:
14527 break;
14528 case Intrinsic::aarch64_neon_vcvtfxs2fp:
14529 case Intrinsic::aarch64_neon_vcvtfxu2fp:
14530 return tryCombineFixedPointConvert(N, DCI, DAG);
14531 case Intrinsic::aarch64_neon_saddv:
14532 return combineAcrossLanesIntrinsic(AArch64ISD::SADDV, N, DAG);
14533 case Intrinsic::aarch64_neon_uaddv:
14534 return combineAcrossLanesIntrinsic(AArch64ISD::UADDV, N, DAG);
14535 case Intrinsic::aarch64_neon_sminv:
14536 return combineAcrossLanesIntrinsic(AArch64ISD::SMINV, N, DAG);
14537 case Intrinsic::aarch64_neon_uminv:
14538 return combineAcrossLanesIntrinsic(AArch64ISD::UMINV, N, DAG);
14539 case Intrinsic::aarch64_neon_smaxv:
14540 return combineAcrossLanesIntrinsic(AArch64ISD::SMAXV, N, DAG);
14541 case Intrinsic::aarch64_neon_umaxv:
14542 return combineAcrossLanesIntrinsic(AArch64ISD::UMAXV, N, DAG);
14543 case Intrinsic::aarch64_neon_fmax:
14544 return DAG.getNode(ISD::FMAXIMUM, SDLoc(N), N->getValueType(0),
14545 N->getOperand(1), N->getOperand(2));
14546 case Intrinsic::aarch64_neon_fmin:
14547 return DAG.getNode(ISD::FMINIMUM, SDLoc(N), N->getValueType(0),
14548 N->getOperand(1), N->getOperand(2));
14549 case Intrinsic::aarch64_neon_fmaxnm:
14550 return DAG.getNode(ISD::FMAXNUM, SDLoc(N), N->getValueType(0),
14551 N->getOperand(1), N->getOperand(2));
14552 case Intrinsic::aarch64_neon_fminnm:
14553 return DAG.getNode(ISD::FMINNUM, SDLoc(N), N->getValueType(0),
14554 N->getOperand(1), N->getOperand(2));
14555 case Intrinsic::aarch64_neon_smull:
14556 case Intrinsic::aarch64_neon_umull:
14557 case Intrinsic::aarch64_neon_pmull:
14558 case Intrinsic::aarch64_neon_sqdmull:
14559 return tryCombineLongOpWithDup(IID, N, DCI, DAG);
14560 case Intrinsic::aarch64_neon_sqshl:
14561 case Intrinsic::aarch64_neon_uqshl:
14562 case Intrinsic::aarch64_neon_sqshlu:
14563 case Intrinsic::aarch64_neon_srshl:
14564 case Intrinsic::aarch64_neon_urshl:
14565 case Intrinsic::aarch64_neon_sshl:
14566 case Intrinsic::aarch64_neon_ushl:
14567 return tryCombineShiftImm(IID, N, DAG);
14568 case Intrinsic::aarch64_crc32b:
14569 case Intrinsic::aarch64_crc32cb:
14570 return tryCombineCRC32(0xff, N, DAG);
14571 case Intrinsic::aarch64_crc32h:
14572 case Intrinsic::aarch64_crc32ch:
14573 return tryCombineCRC32(0xffff, N, DAG);
14574 case Intrinsic::aarch64_sve_saddv:
14575 // There is no i64 version of SADDV because the sign is irrelevant.
14576 if (N->getOperand(2)->getValueType(0).getVectorElementType() == MVT::i64)
14577 return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
14578 else
14579 return combineSVEReductionInt(N, AArch64ISD::SADDV_PRED, DAG);
14580 case Intrinsic::aarch64_sve_uaddv:
14581 return combineSVEReductionInt(N, AArch64ISD::UADDV_PRED, DAG);
14582 case Intrinsic::aarch64_sve_smaxv:
14583 return combineSVEReductionInt(N, AArch64ISD::SMAXV_PRED, DAG);
14584 case Intrinsic::aarch64_sve_umaxv:
14585 return combineSVEReductionInt(N, AArch64ISD::UMAXV_PRED, DAG);
14586 case Intrinsic::aarch64_sve_sminv:
14587 return combineSVEReductionInt(N, AArch64ISD::SMINV_PRED, DAG);
14588 case Intrinsic::aarch64_sve_uminv:
14589 return combineSVEReductionInt(N, AArch64ISD::UMINV_PRED, DAG);
14590 case Intrinsic::aarch64_sve_orv:
14591 return combineSVEReductionInt(N, AArch64ISD::ORV_PRED, DAG);
14592 case Intrinsic::aarch64_sve_eorv:
14593 return combineSVEReductionInt(N, AArch64ISD::EORV_PRED, DAG);
14594 case Intrinsic::aarch64_sve_andv:
14595 return combineSVEReductionInt(N, AArch64ISD::ANDV_PRED, DAG);
14596 case Intrinsic::aarch64_sve_index:
14597 return LowerSVEIntrinsicIndex(N, DAG);
14598 case Intrinsic::aarch64_sve_dup:
14599 return LowerSVEIntrinsicDUP(N, DAG);
14600 case Intrinsic::aarch64_sve_dup_x:
14601 return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), N->getValueType(0),
14602 N->getOperand(1));
14603 case Intrinsic::aarch64_sve_ext:
14604 return LowerSVEIntrinsicEXT(N, DAG);
14605 case Intrinsic::aarch64_sve_mul:
14606 return convertMergedOpToPredOp(N, AArch64ISD::MUL_PRED, DAG);
14607 case Intrinsic::aarch64_sve_smulh:
14608 return convertMergedOpToPredOp(N, AArch64ISD::MULHS_PRED, DAG);
14609 case Intrinsic::aarch64_sve_umulh:
14610 return convertMergedOpToPredOp(N, AArch64ISD::MULHU_PRED, DAG);
14611 case Intrinsic::aarch64_sve_smin:
14612 return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
14613 case Intrinsic::aarch64_sve_umin:
14614 return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
14615 case Intrinsic::aarch64_sve_smax:
14616 return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
14617 case Intrinsic::aarch64_sve_umax:
14618 return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
14619 case Intrinsic::aarch64_sve_lsl:
14620 return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
14621 case Intrinsic::aarch64_sve_lsr:
14622 return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
14623 case Intrinsic::aarch64_sve_asr:
14624 return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
14625 case Intrinsic::aarch64_sve_fadd:
14626 return convertMergedOpToPredOp(N, AArch64ISD::FADD_PRED, DAG);
14627 case Intrinsic::aarch64_sve_fsub:
14628 return convertMergedOpToPredOp(N, AArch64ISD::FSUB_PRED, DAG);
14629 case Intrinsic::aarch64_sve_fmul:
14630 return convertMergedOpToPredOp(N, AArch64ISD::FMUL_PRED, DAG);
14631 case Intrinsic::aarch64_sve_add:
14632 return convertMergedOpToPredOp(N, ISD::ADD, DAG, true);
14633 case Intrinsic::aarch64_sve_sub:
14634 return convertMergedOpToPredOp(N, ISD::SUB, DAG, true);
14635 case Intrinsic::aarch64_sve_and:
14636 return convertMergedOpToPredOp(N, ISD::AND, DAG, true);
14637 case Intrinsic::aarch64_sve_bic:
14638 return convertMergedOpToPredOp(N, AArch64ISD::BIC, DAG, true);
14639 case Intrinsic::aarch64_sve_eor:
14640 return convertMergedOpToPredOp(N, ISD::XOR, DAG, true);
14641 case Intrinsic::aarch64_sve_orr:
14642 return convertMergedOpToPredOp(N, ISD::OR, DAG, true);
14643 case Intrinsic::aarch64_sve_sqadd:
14644 return convertMergedOpToPredOp(N, ISD::SADDSAT, DAG, true);
14645 case Intrinsic::aarch64_sve_sqsub:
14646 return convertMergedOpToPredOp(N, ISD::SSUBSAT, DAG, true);
14647 case Intrinsic::aarch64_sve_uqadd:
14648 return convertMergedOpToPredOp(N, ISD::UADDSAT, DAG, true);
14649 case Intrinsic::aarch64_sve_uqsub:
14650 return convertMergedOpToPredOp(N, ISD::USUBSAT, DAG, true);
14651 case Intrinsic::aarch64_sve_sqadd_x:
14652 return DAG.getNode(ISD::SADDSAT, SDLoc(N), N->getValueType(0),
14653 N->getOperand(1), N->getOperand(2));
14654 case Intrinsic::aarch64_sve_sqsub_x:
14655 return DAG.getNode(ISD::SSUBSAT, SDLoc(N), N->getValueType(0),
14656 N->getOperand(1), N->getOperand(2));
14657 case Intrinsic::aarch64_sve_uqadd_x:
14658 return DAG.getNode(ISD::UADDSAT, SDLoc(N), N->getValueType(0),
14659 N->getOperand(1), N->getOperand(2));
14660 case Intrinsic::aarch64_sve_uqsub_x:
14661 return DAG.getNode(ISD::USUBSAT, SDLoc(N), N->getValueType(0),
14662 N->getOperand(1), N->getOperand(2));
14663 case Intrinsic::aarch64_sve_cmphs:
14664 if (!N->getOperand(2).getValueType().isFloatingPoint())
14665 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14666 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14667 N->getOperand(3), DAG.getCondCode(ISD::SETUGE));
14668 break;
14669 case Intrinsic::aarch64_sve_cmphi:
14670 if (!N->getOperand(2).getValueType().isFloatingPoint())
14671 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14672 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14673 N->getOperand(3), DAG.getCondCode(ISD::SETUGT));
14674 break;
14675 case Intrinsic::aarch64_sve_fcmpge:
14676 case Intrinsic::aarch64_sve_cmpge:
14677 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14678 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14679 N->getOperand(3), DAG.getCondCode(ISD::SETGE));
14680 break;
14681 case Intrinsic::aarch64_sve_fcmpgt:
14682 case Intrinsic::aarch64_sve_cmpgt:
14683 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14684 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14685 N->getOperand(3), DAG.getCondCode(ISD::SETGT));
14686 break;
14687 case Intrinsic::aarch64_sve_fcmpeq:
14688 case Intrinsic::aarch64_sve_cmpeq:
14689 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14690 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14691 N->getOperand(3), DAG.getCondCode(ISD::SETEQ));
14692 break;
14693 case Intrinsic::aarch64_sve_fcmpne:
14694 case Intrinsic::aarch64_sve_cmpne:
14695 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14696 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14697 N->getOperand(3), DAG.getCondCode(ISD::SETNE));
14698 break;
14699 case Intrinsic::aarch64_sve_fcmpuo:
14700 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
14701 N->getValueType(0), N->getOperand(1), N->getOperand(2),
14702 N->getOperand(3), DAG.getCondCode(ISD::SETUO));
14703 break;
14704 case Intrinsic::aarch64_sve_fadda:
14705 return combineSVEReductionOrderedFP(N, AArch64ISD::FADDA_PRED, DAG);
14706 case Intrinsic::aarch64_sve_faddv:
14707 return combineSVEReductionFP(N, AArch64ISD::FADDV_PRED, DAG);
14708 case Intrinsic::aarch64_sve_fmaxnmv:
14709 return combineSVEReductionFP(N, AArch64ISD::FMAXNMV_PRED, DAG);
14710 case Intrinsic::aarch64_sve_fmaxv:
14711 return combineSVEReductionFP(N, AArch64ISD::FMAXV_PRED, DAG);
14712 case Intrinsic::aarch64_sve_fminnmv:
14713 return combineSVEReductionFP(N, AArch64ISD::FMINNMV_PRED, DAG);
14714 case Intrinsic::aarch64_sve_fminv:
14715 return combineSVEReductionFP(N, AArch64ISD::FMINV_PRED, DAG);
14716 case Intrinsic::aarch64_sve_sel:
14717 return DAG.getNode(ISD::VSELECT, SDLoc(N), N->getValueType(0),
14718 N->getOperand(1), N->getOperand(2), N->getOperand(3));
14719 case Intrinsic::aarch64_sve_cmpeq_wide:
14720 return tryConvertSVEWideCompare(N, ISD::SETEQ, DCI, DAG);
14721 case Intrinsic::aarch64_sve_cmpne_wide:
14722 return tryConvertSVEWideCompare(N, ISD::SETNE, DCI, DAG);
14723 case Intrinsic::aarch64_sve_cmpge_wide:
14724 return tryConvertSVEWideCompare(N, ISD::SETGE, DCI, DAG);
14725 case Intrinsic::aarch64_sve_cmpgt_wide:
14726 return tryConvertSVEWideCompare(N, ISD::SETGT, DCI, DAG);
14727 case Intrinsic::aarch64_sve_cmplt_wide:
14728 return tryConvertSVEWideCompare(N, ISD::SETLT, DCI, DAG);
14729 case Intrinsic::aarch64_sve_cmple_wide:
14730 return tryConvertSVEWideCompare(N, ISD::SETLE, DCI, DAG);
14731 case Intrinsic::aarch64_sve_cmphs_wide:
14732 return tryConvertSVEWideCompare(N, ISD::SETUGE, DCI, DAG);
14733 case Intrinsic::aarch64_sve_cmphi_wide:
14734 return tryConvertSVEWideCompare(N, ISD::SETUGT, DCI, DAG);
14735 case Intrinsic::aarch64_sve_cmplo_wide:
14736 return tryConvertSVEWideCompare(N, ISD::SETULT, DCI, DAG);
14737 case Intrinsic::aarch64_sve_cmpls_wide:
14738 return tryConvertSVEWideCompare(N, ISD::SETULE, DCI, DAG);
14739 case Intrinsic::aarch64_sve_ptest_any:
14740 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14741 AArch64CC::ANY_ACTIVE);
14742 case Intrinsic::aarch64_sve_ptest_first:
14743 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14744 AArch64CC::FIRST_ACTIVE);
14745 case Intrinsic::aarch64_sve_ptest_last:
14746 return getPTest(DAG, N->getValueType(0), N->getOperand(1), N->getOperand(2),
14747 AArch64CC::LAST_ACTIVE);
14748 }
14749 return SDValue();
14750}
14751
14752static SDValue performExtendCombine(SDNode *N,
14753 TargetLowering::DAGCombinerInfo &DCI,
14754 SelectionDAG &DAG) {
14755 // If we see something like (zext (sabd (extract_high ...), (DUP ...))) then
14756 // we can convert that DUP into another extract_high (of a bigger DUP), which
14757 // helps the backend to decide that an sabdl2 would be useful, saving a real
14758 // extract_high operation.
14759 if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ZERO_EXTEND &&
14760 (N->getOperand(0).getOpcode() == ISD::ABDU ||
14761 N->getOperand(0).getOpcode() == ISD::ABDS)) {
14762 SDNode *ABDNode = N->getOperand(0).getNode();
14763 SDValue NewABD =
14764 tryCombineLongOpWithDup(Intrinsic::not_intrinsic, ABDNode, DCI, DAG);
14765 if (!NewABD.getNode())
14766 return SDValue();
14767
14768 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), NewABD);
14769 }
14770 return SDValue();
14771}
14772
14773static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
14774 SDValue SplatVal, unsigned NumVecElts) {
14775 assert(!St.isTruncatingStore() && "cannot split truncating vector store")(static_cast <bool> (!St.isTruncatingStore() &&
"cannot split truncating vector store") ? void (0) : __assert_fail
("!St.isTruncatingStore() && \"cannot split truncating vector store\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14775, __extension__ __PRETTY_FUNCTION__))
;
14776 unsigned OrigAlignment = St.getAlignment();
14777 unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
14778
14779 // Create scalar stores. This is at least as good as the code sequence for a
14780 // split unaligned store which is a dup.s, ext.b, and two stores.
14781 // Most of the time the three stores should be replaced by store pair
14782 // instructions (stp).
14783 SDLoc DL(&St);
14784 SDValue BasePtr = St.getBasePtr();
14785 uint64_t BaseOffset = 0;
14786
14787 const MachinePointerInfo &PtrInfo = St.getPointerInfo();
14788 SDValue NewST1 =
14789 DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, PtrInfo,
14790 OrigAlignment, St.getMemOperand()->getFlags());
14791
14792 // As this in ISel, we will not merge this add which may degrade results.
14793 if (BasePtr->getOpcode() == ISD::ADD &&
14794 isa<ConstantSDNode>(BasePtr->getOperand(1))) {
14795 BaseOffset = cast<ConstantSDNode>(BasePtr->getOperand(1))->getSExtValue();
14796 BasePtr = BasePtr->getOperand(0);
14797 }
14798
14799 unsigned Offset = EltOffset;
14800 while (--NumVecElts) {
14801 unsigned Alignment = MinAlign(OrigAlignment, Offset);
14802 SDValue OffsetPtr =
14803 DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
14804 DAG.getConstant(BaseOffset + Offset, DL, MVT::i64));
14805 NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
14806 PtrInfo.getWithOffset(Offset), Alignment,
14807 St.getMemOperand()->getFlags());
14808 Offset += EltOffset;
14809 }
14810 return NewST1;
14811}
14812
14813// Returns an SVE type that ContentTy can be trivially sign or zero extended
14814// into.
14815static MVT getSVEContainerType(EVT ContentTy) {
14816 assert(ContentTy.isSimple() && "No SVE containers for extended types")(static_cast <bool> (ContentTy.isSimple() && "No SVE containers for extended types"
) ? void (0) : __assert_fail ("ContentTy.isSimple() && \"No SVE containers for extended types\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14816, __extension__ __PRETTY_FUNCTION__))
;
14817
14818 switch (ContentTy.getSimpleVT().SimpleTy) {
14819 default:
14820 llvm_unreachable("No known SVE container for this MVT type")::llvm::llvm_unreachable_internal("No known SVE container for this MVT type"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 14820)
;
14821 case MVT::nxv2i8:
14822 case MVT::nxv2i16:
14823 case MVT::nxv2i32:
14824 case MVT::nxv2i64:
14825 case MVT::nxv2f32:
14826 case MVT::nxv2f64:
14827 return MVT::nxv2i64;
14828 case MVT::nxv4i8:
14829 case MVT::nxv4i16:
14830 case MVT::nxv4i32:
14831 case MVT::nxv4f32:
14832 return MVT::nxv4i32;
14833 case MVT::nxv8i8:
14834 case MVT::nxv8i16:
14835 case MVT::nxv8f16:
14836 case MVT::nxv8bf16:
14837 return MVT::nxv8i16;
14838 case MVT::nxv16i8:
14839 return MVT::nxv16i8;
14840 }
14841}
14842
14843static SDValue performLD1Combine(SDNode *N, SelectionDAG &DAG, unsigned Opc) {
14844 SDLoc DL(N);
14845 EVT VT = N->getValueType(0);
14846
14847 if (VT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
14848 return SDValue();
14849
14850 EVT ContainerVT = VT;
14851 if (ContainerVT.isInteger())
14852 ContainerVT = getSVEContainerType(ContainerVT);
14853
14854 SDVTList VTs = DAG.getVTList(ContainerVT, MVT::Other);
14855 SDValue Ops[] = { N->getOperand(0), // Chain
14856 N->getOperand(2), // Pg
14857 N->getOperand(3), // Base
14858 DAG.getValueType(VT) };
14859
14860 SDValue Load = DAG.getNode(Opc, DL, VTs, Ops);
14861 SDValue LoadChain = SDValue(Load.getNode(), 1);
14862
14863 if (ContainerVT.isInteger() && (VT != ContainerVT))
14864 Load = DAG.getNode(ISD::TRUNCATE, DL, VT, Load.getValue(0));
14865
14866 return DAG.getMergeValues({ Load, LoadChain }, DL);
14867}
14868
14869static SDValue performLDNT1Combine(SDNode *N, SelectionDAG &DAG) {
14870 SDLoc DL(N);
14871 EVT VT = N->getValueType(0);
14872 EVT PtrTy = N->getOperand(3).getValueType();
14873
14874 if (VT == MVT::nxv8bf16 &&
14875 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14876 return SDValue();
14877
14878 EVT LoadVT = VT;
14879 if (VT.isFloatingPoint())
14880 LoadVT = VT.changeTypeToInteger();
14881
14882 auto *MINode = cast<MemIntrinsicSDNode>(N);
14883 SDValue PassThru = DAG.getConstant(0, DL, LoadVT);
14884 SDValue L = DAG.getMaskedLoad(LoadVT, DL, MINode->getChain(),
14885 MINode->getOperand(3), DAG.getUNDEF(PtrTy),
14886 MINode->getOperand(2), PassThru,
14887 MINode->getMemoryVT(), MINode->getMemOperand(),
14888 ISD::UNINDEXED, ISD::NON_EXTLOAD, false);
14889
14890 if (VT.isFloatingPoint()) {
14891 SDValue Ops[] = { DAG.getNode(ISD::BITCAST, DL, VT, L), L.getValue(1) };
14892 return DAG.getMergeValues(Ops, DL);
14893 }
14894
14895 return L;
14896}
14897
14898template <unsigned Opcode>
14899static SDValue performLD1ReplicateCombine(SDNode *N, SelectionDAG &DAG) {
14900 static_assert(Opcode == AArch64ISD::LD1RQ_MERGE_ZERO ||
14901 Opcode == AArch64ISD::LD1RO_MERGE_ZERO,
14902 "Unsupported opcode.");
14903 SDLoc DL(N);
14904 EVT VT = N->getValueType(0);
14905 if (VT == MVT::nxv8bf16 &&
14906 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14907 return SDValue();
14908
14909 EVT LoadVT = VT;
14910 if (VT.isFloatingPoint())
14911 LoadVT = VT.changeTypeToInteger();
14912
14913 SDValue Ops[] = {N->getOperand(0), N->getOperand(2), N->getOperand(3)};
14914 SDValue Load = DAG.getNode(Opcode, DL, {LoadVT, MVT::Other}, Ops);
14915 SDValue LoadChain = SDValue(Load.getNode(), 1);
14916
14917 if (VT.isFloatingPoint())
14918 Load = DAG.getNode(ISD::BITCAST, DL, VT, Load.getValue(0));
14919
14920 return DAG.getMergeValues({Load, LoadChain}, DL);
14921}
14922
14923static SDValue performST1Combine(SDNode *N, SelectionDAG &DAG) {
14924 SDLoc DL(N);
14925 SDValue Data = N->getOperand(2);
14926 EVT DataVT = Data.getValueType();
14927 EVT HwSrcVt = getSVEContainerType(DataVT);
14928 SDValue InputVT = DAG.getValueType(DataVT);
14929
14930 if (DataVT == MVT::nxv8bf16 &&
14931 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14932 return SDValue();
14933
14934 if (DataVT.isFloatingPoint())
14935 InputVT = DAG.getValueType(HwSrcVt);
14936
14937 SDValue SrcNew;
14938 if (Data.getValueType().isFloatingPoint())
14939 SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Data);
14940 else
14941 SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Data);
14942
14943 SDValue Ops[] = { N->getOperand(0), // Chain
14944 SrcNew,
14945 N->getOperand(4), // Base
14946 N->getOperand(3), // Pg
14947 InputVT
14948 };
14949
14950 return DAG.getNode(AArch64ISD::ST1_PRED, DL, N->getValueType(0), Ops);
14951}
14952
14953static SDValue performSTNT1Combine(SDNode *N, SelectionDAG &DAG) {
14954 SDLoc DL(N);
14955
14956 SDValue Data = N->getOperand(2);
14957 EVT DataVT = Data.getValueType();
14958 EVT PtrTy = N->getOperand(4).getValueType();
14959
14960 if (DataVT == MVT::nxv8bf16 &&
14961 !static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasBF16())
14962 return SDValue();
14963
14964 if (DataVT.isFloatingPoint())
14965 Data = DAG.getNode(ISD::BITCAST, DL, DataVT.changeTypeToInteger(), Data);
14966
14967 auto *MINode = cast<MemIntrinsicSDNode>(N);
14968 return DAG.getMaskedStore(MINode->getChain(), DL, Data, MINode->getOperand(4),
14969 DAG.getUNDEF(PtrTy), MINode->getOperand(3),
14970 MINode->getMemoryVT(), MINode->getMemOperand(),
14971 ISD::UNINDEXED, false, false);
14972}
14973
14974/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR. The
14975/// load store optimizer pass will merge them to store pair stores. This should
14976/// be better than a movi to create the vector zero followed by a vector store
14977/// if the zero constant is not re-used, since one instructions and one register
14978/// live range will be removed.
14979///
14980/// For example, the final generated code should be:
14981///
14982/// stp xzr, xzr, [x0]
14983///
14984/// instead of:
14985///
14986/// movi v0.2d, #0
14987/// str q0, [x0]
14988///
14989static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
14990 SDValue StVal = St.getValue();
14991 EVT VT = StVal.getValueType();
14992
14993 // Avoid scalarizing zero splat stores for scalable vectors.
14994 if (VT.isScalableVector())
14995 return SDValue();
14996
14997 // It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
14998 // 2, 3 or 4 i32 elements.
14999 int NumVecElts = VT.getVectorNumElements();
15000 if (!(((NumVecElts == 2 || NumVecElts == 3) &&
15001 VT.getVectorElementType().getSizeInBits() == 64) ||
15002 ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
15003 VT.getVectorElementType().getSizeInBits() == 32)))
15004 return SDValue();
15005
15006 if (StVal.getOpcode() != ISD::BUILD_VECTOR)
15007 return SDValue();
15008
15009 // If the zero constant has more than one use then the vector store could be
15010 // better since the constant mov will be amortized and stp q instructions
15011 // should be able to be formed.
15012 if (!StVal.hasOneUse())
15013 return SDValue();
15014
15015 // If the store is truncating then it's going down to i16 or smaller, which
15016 // means it can be implemented in a single store anyway.
15017 if (St.isTruncatingStore())
15018 return SDValue();
15019
15020 // If the immediate offset of the address operand is too large for the stp
15021 // instruction, then bail out.
15022 if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
15023 int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
15024 if (Offset < -512 || Offset > 504)
15025 return SDValue();
15026 }
15027
15028 for (int I = 0; I < NumVecElts; ++I) {
15029 SDValue EltVal = StVal.getOperand(I);
15030 if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
15031 return SDValue();
15032 }
15033
15034 // Use a CopyFromReg WZR/XZR here to prevent
15035 // DAGCombiner::MergeConsecutiveStores from undoing this transformation.
15036 SDLoc DL(&St);
15037 unsigned ZeroReg;
15038 EVT ZeroVT;
15039 if (VT.getVectorElementType().getSizeInBits() == 32) {
15040 ZeroReg = AArch64::WZR;
15041 ZeroVT = MVT::i32;
15042 } else {
15043 ZeroReg = AArch64::XZR;
15044 ZeroVT = MVT::i64;
15045 }
15046 SDValue SplatVal =
15047 DAG.getCopyFromReg(DAG.getEntryNode(), DL, ZeroReg, ZeroVT);
15048 return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
15049}
15050
15051/// Replace a splat of a scalar to a vector store by scalar stores of the scalar
15052/// value. The load store optimizer pass will merge them to store pair stores.
15053/// This has better performance than a splat of the scalar followed by a split
15054/// vector store. Even if the stores are not merged it is four stores vs a dup,
15055/// followed by an ext.b and two stores.
15056static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
15057 SDValue StVal = St.getValue();
15058 EVT VT = StVal.getValueType();
15059
15060 // Don't replace floating point stores, they possibly won't be transformed to
15061 // stp because of the store pair suppress pass.
15062 if (VT.isFloatingPoint())
15063 return SDValue();
15064
15065 // We can express a splat as store pair(s) for 2 or 4 elements.
15066 unsigned NumVecElts = VT.getVectorNumElements();
15067 if (NumVecElts != 4 && NumVecElts != 2)
15068 return SDValue();
15069
15070 // If the store is truncating then it's going down to i16 or smaller, which
15071 // means it can be implemented in a single store anyway.
15072 if (St.isTruncatingStore())
15073 return SDValue();
15074
15075 // Check that this is a splat.
15076 // Make sure that each of the relevant vector element locations are inserted
15077 // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
15078 std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
15079 SDValue SplatVal;
15080 for (unsigned I = 0; I < NumVecElts; ++I) {
15081 // Check for insert vector elements.
15082 if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
15083 return SDValue();
15084
15085 // Check that same value is inserted at each vector element.
15086 if (I == 0)
15087 SplatVal = StVal.getOperand(1);
15088 else if (StVal.getOperand(1) != SplatVal)
15089 return SDValue();
15090
15091 // Check insert element index.
15092 ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
15093 if (!CIndex)
15094 return SDValue();
15095 uint64_t IndexVal = CIndex->getZExtValue();
15096 if (IndexVal >= NumVecElts)
15097 return SDValue();
15098 IndexNotInserted.reset(IndexVal);
15099
15100 StVal = StVal.getOperand(0);
15101 }
15102 // Check that all vector element locations were inserted to.
15103 if (IndexNotInserted.any())
15104 return SDValue();
15105
15106 return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
15107}
15108
15109static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
15110 SelectionDAG &DAG,
15111 const AArch64Subtarget *Subtarget) {
15112
15113 StoreSDNode *S = cast<StoreSDNode>(N);
15114 if (S->isVolatile() || S->isIndexed())
15115 return SDValue();
15116
15117 SDValue StVal = S->getValue();
15118 EVT VT = StVal.getValueType();
15119
15120 if (!VT.isFixedLengthVector())
15121 return SDValue();
15122
15123 // If we get a splat of zeros, convert this vector store to a store of
15124 // scalars. They will be merged into store pairs of xzr thereby removing one
15125 // instruction and one register.
15126 if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
15127 return ReplacedZeroSplat;
15128
15129 // FIXME: The logic for deciding if an unaligned store should be split should
15130 // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
15131 // a call to that function here.
15132
15133 if (!Subtarget->isMisaligned128StoreSlow())
15134 return SDValue();
15135
15136 // Don't split at -Oz.
15137 if (DAG.getMachineFunction().getFunction().hasMinSize())
15138 return SDValue();
15139
15140 // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
15141 // those up regresses performance on micro-benchmarks and olden/bh.
15142 if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
15143 return SDValue();
15144
15145 // Split unaligned 16B stores. They are terrible for performance.
15146 // Don't split stores with alignment of 1 or 2. Code that uses clang vector
15147 // extensions can use this to mark that it does not want splitting to happen
15148 // (by underspecifying alignment to be 1 or 2). Furthermore, the chance of
15149 // eliminating alignment hazards is only 1 in 8 for alignment of 2.
15150 if (VT.getSizeInBits() != 128 || S->getAlignment() >= 16 ||
15151 S->getAlignment() <= 2)
15152 return SDValue();
15153
15154 // If we get a splat of a scalar convert this vector store to a store of
15155 // scalars. They will be merged into store pairs thereby removing two
15156 // instructions.
15157 if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
15158 return ReplacedSplat;
15159
15160 SDLoc DL(S);
15161
15162 // Split VT into two.
15163 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
15164 unsigned NumElts = HalfVT.getVectorNumElements();
15165 SDValue SubVector0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
15166 DAG.getConstant(0, DL, MVT::i64));
15167 SDValue SubVector1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, StVal,
15168 DAG.getConstant(NumElts, DL, MVT::i64));
15169 SDValue BasePtr = S->getBasePtr();
15170 SDValue NewST1 =
15171 DAG.getStore(S->getChain(), DL, SubVector0, BasePtr, S->getPointerInfo(),
15172 S->getAlignment(), S->getMemOperand()->getFlags());
15173 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
15174 DAG.getConstant(8, DL, MVT::i64));
15175 return DAG.getStore(NewST1.getValue(0), DL, SubVector1, OffsetPtr,
15176 S->getPointerInfo(), S->getAlignment(),
15177 S->getMemOperand()->getFlags());
15178}
15179
15180static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
15181 assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!")(static_cast <bool> (N->getOpcode() == AArch64ISD::SPLICE
&& "Unexepected Opcode!") ? void (0) : __assert_fail
("N->getOpcode() == AArch64ISD::SPLICE && \"Unexepected Opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15181, __extension__ __PRETTY_FUNCTION__))
;
15182
15183 // splice(pg, op1, undef) -> op1
15184 if (N->getOperand(2).isUndef())
15185 return N->getOperand(1);
15186
15187 return SDValue();
15188}
15189
15190static SDValue performUzpCombine(SDNode *N, SelectionDAG &DAG) {
15191 SDLoc DL(N);
15192 SDValue Op0 = N->getOperand(0);
15193 SDValue Op1 = N->getOperand(1);
15194 EVT ResVT = N->getValueType(0);
15195
15196 // uzp1(unpklo(uzp1(x, y)), z) => uzp1(x, z)
15197 if (Op0.getOpcode() == AArch64ISD::UUNPKLO) {
15198 if (Op0.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
15199 SDValue X = Op0.getOperand(0).getOperand(0);
15200 return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, X, Op1);
15201 }
15202 }
15203
15204 // uzp1(x, unpkhi(uzp1(y, z))) => uzp1(x, z)
15205 if (Op1.getOpcode() == AArch64ISD::UUNPKHI) {
15206 if (Op1.getOperand(0).getOpcode() == AArch64ISD::UZP1) {
15207 SDValue Z = Op1.getOperand(0).getOperand(1);
15208 return DAG.getNode(AArch64ISD::UZP1, DL, ResVT, Op0, Z);
15209 }
15210 }
15211
15212 return SDValue();
15213}
15214
15215static SDValue performGLD1Combine(SDNode *N, SelectionDAG &DAG) {
15216 unsigned Opc = N->getOpcode();
15217
15218 assert(((Opc >= AArch64ISD::GLD1_MERGE_ZERO && // unsigned gather loads(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15222, __extension__ __PRETTY_FUNCTION__))
15219 Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) ||(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15222, __extension__ __PRETTY_FUNCTION__))
15220 (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && // signed gather loads(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15222, __extension__ __PRETTY_FUNCTION__))
15221 Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) &&(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15222, __extension__ __PRETTY_FUNCTION__))
15222 "Invalid opcode.")(static_cast <bool> (((Opc >= AArch64ISD::GLD1_MERGE_ZERO
&& Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc
>= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD
::GLD1S_IMM_MERGE_ZERO)) && "Invalid opcode.") ? void
(0) : __assert_fail ("((Opc >= AArch64ISD::GLD1_MERGE_ZERO && Opc <= AArch64ISD::GLD1_IMM_MERGE_ZERO) || (Opc >= AArch64ISD::GLD1S_MERGE_ZERO && Opc <= AArch64ISD::GLD1S_IMM_MERGE_ZERO)) && \"Invalid opcode.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15222, __extension__ __PRETTY_FUNCTION__))
;
15223
15224 const bool Scaled = Opc == AArch64ISD::GLD1_SCALED_MERGE_ZERO ||
15225 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
15226 const bool Signed = Opc == AArch64ISD::GLD1S_MERGE_ZERO ||
15227 Opc == AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
15228 const bool Extended = Opc == AArch64ISD::GLD1_SXTW_MERGE_ZERO ||
15229 Opc == AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO ||
15230 Opc == AArch64ISD::GLD1_UXTW_MERGE_ZERO ||
15231 Opc == AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO;
15232
15233 SDLoc DL(N);
15234 SDValue Chain = N->getOperand(0);
15235 SDValue Pg = N->getOperand(1);
15236 SDValue Base = N->getOperand(2);
15237 SDValue Offset = N->getOperand(3);
15238 SDValue Ty = N->getOperand(4);
15239
15240 EVT ResVT = N->getValueType(0);
15241
15242 const auto OffsetOpc = Offset.getOpcode();
15243 const bool OffsetIsZExt =
15244 OffsetOpc == AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU;
15245 const bool OffsetIsSExt =
15246 OffsetOpc == AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU;
15247
15248 // Fold sign/zero extensions of vector offsets into GLD1 nodes where possible.
15249 if (!Extended && (OffsetIsSExt || OffsetIsZExt)) {
15250 SDValue ExtPg = Offset.getOperand(0);
15251 VTSDNode *ExtFrom = cast<VTSDNode>(Offset.getOperand(2).getNode());
15252 EVT ExtFromEVT = ExtFrom->getVT().getVectorElementType();
15253
15254 // If the predicate for the sign- or zero-extended offset is the
15255 // same as the predicate used for this load and the sign-/zero-extension
15256 // was from a 32-bits...
15257 if (ExtPg == Pg && ExtFromEVT == MVT::i32) {
15258 SDValue UnextendedOffset = Offset.getOperand(1);
15259
15260 unsigned NewOpc = getGatherVecOpcode(Scaled, OffsetIsSExt, true);
15261 if (Signed)
15262 NewOpc = getSignExtendedGatherOpcode(NewOpc);
15263
15264 return DAG.getNode(NewOpc, DL, {ResVT, MVT::Other},
15265 {Chain, Pg, Base, UnextendedOffset, Ty});
15266 }
15267 }
15268
15269 return SDValue();
15270}
15271
15272/// Optimize a vector shift instruction and its operand if shifted out
15273/// bits are not used.
15274static SDValue performVectorShiftCombine(SDNode *N,
15275 const AArch64TargetLowering &TLI,
15276 TargetLowering::DAGCombinerInfo &DCI) {
15277 assert(N->getOpcode() == AArch64ISD::VASHR ||(static_cast <bool> (N->getOpcode() == AArch64ISD::VASHR
|| N->getOpcode() == AArch64ISD::VLSHR) ? void (0) : __assert_fail
("N->getOpcode() == AArch64ISD::VASHR || N->getOpcode() == AArch64ISD::VLSHR"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15278, __extension__ __PRETTY_FUNCTION__))
15278 N->getOpcode() == AArch64ISD::VLSHR)(static_cast <bool> (N->getOpcode() == AArch64ISD::VASHR
|| N->getOpcode() == AArch64ISD::VLSHR) ? void (0) : __assert_fail
("N->getOpcode() == AArch64ISD::VASHR || N->getOpcode() == AArch64ISD::VLSHR"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15278, __extension__ __PRETTY_FUNCTION__))
;
15279
15280 SDValue Op = N->getOperand(0);
15281 unsigned OpScalarSize = Op.getScalarValueSizeInBits();
15282
15283 unsigned ShiftImm = N->getConstantOperandVal(1);
15284 assert(OpScalarSize > ShiftImm && "Invalid shift imm")(static_cast <bool> (OpScalarSize > ShiftImm &&
"Invalid shift imm") ? void (0) : __assert_fail ("OpScalarSize > ShiftImm && \"Invalid shift imm\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15284, __extension__ __PRETTY_FUNCTION__))
;
15285
15286 APInt ShiftedOutBits = APInt::getLowBitsSet(OpScalarSize, ShiftImm);
15287 APInt DemandedMask = ~ShiftedOutBits;
15288
15289 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15290 return SDValue(N, 0);
15291
15292 return SDValue();
15293}
15294
15295/// Target-specific DAG combine function for post-increment LD1 (lane) and
15296/// post-increment LD1R.
15297static SDValue performPostLD1Combine(SDNode *N,
15298 TargetLowering::DAGCombinerInfo &DCI,
15299 bool IsLaneOp) {
15300 if (DCI.isBeforeLegalizeOps())
15301 return SDValue();
15302
15303 SelectionDAG &DAG = DCI.DAG;
15304 EVT VT = N->getValueType(0);
15305
15306 if (VT.isScalableVector())
15307 return SDValue();
15308
15309 unsigned LoadIdx = IsLaneOp ? 1 : 0;
15310 SDNode *LD = N->getOperand(LoadIdx).getNode();
15311 // If it is not LOAD, can not do such combine.
15312 if (LD->getOpcode() != ISD::LOAD)
15313 return SDValue();
15314
15315 // The vector lane must be a constant in the LD1LANE opcode.
15316 SDValue Lane;
15317 if (IsLaneOp) {
15318 Lane = N->getOperand(2);
15319 auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
15320 if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
15321 return SDValue();
15322 }
15323
15324 LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
15325 EVT MemVT = LoadSDN->getMemoryVT();
15326 // Check if memory operand is the same type as the vector element.
15327 if (MemVT != VT.getVectorElementType())
15328 return SDValue();
15329
15330 // Check if there are other uses. If so, do not combine as it will introduce
15331 // an extra load.
15332 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end(); UI != UE;
15333 ++UI) {
15334 if (UI.getUse().getResNo() == 1) // Ignore uses of the chain result.
15335 continue;
15336 if (*UI != N)
15337 return SDValue();
15338 }
15339
15340 SDValue Addr = LD->getOperand(1);
15341 SDValue Vector = N->getOperand(0);
15342 // Search for a use of the address operand that is an increment.
15343 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), UE =
15344 Addr.getNode()->use_end(); UI != UE; ++UI) {
15345 SDNode *User = *UI;
15346 if (User->getOpcode() != ISD::ADD
15347 || UI.getUse().getResNo() != Addr.getResNo())
15348 continue;
15349
15350 // If the increment is a constant, it must match the memory ref size.
15351 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
15352 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
15353 uint32_t IncVal = CInc->getZExtValue();
15354 unsigned NumBytes = VT.getScalarSizeInBits() / 8;
15355 if (IncVal != NumBytes)
15356 continue;
15357 Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
15358 }
15359
15360 // To avoid cycle construction make sure that neither the load nor the add
15361 // are predecessors to each other or the Vector.
15362 SmallPtrSet<const SDNode *, 32> Visited;
15363 SmallVector<const SDNode *, 16> Worklist;
15364 Visited.insert(Addr.getNode());
15365 Worklist.push_back(User);
15366 Worklist.push_back(LD);
15367 Worklist.push_back(Vector.getNode());
15368 if (SDNode::hasPredecessorHelper(LD, Visited, Worklist) ||
15369 SDNode::hasPredecessorHelper(User, Visited, Worklist))
15370 continue;
15371
15372 SmallVector<SDValue, 8> Ops;
15373 Ops.push_back(LD->getOperand(0)); // Chain
15374 if (IsLaneOp) {
15375 Ops.push_back(Vector); // The vector to be inserted
15376 Ops.push_back(Lane); // The lane to be inserted in the vector
15377 }
15378 Ops.push_back(Addr);
15379 Ops.push_back(Inc);
15380
15381 EVT Tys[3] = { VT, MVT::i64, MVT::Other };
15382 SDVTList SDTys = DAG.getVTList(Tys);
15383 unsigned NewOp = IsLaneOp ? AArch64ISD::LD1LANEpost : AArch64ISD::LD1DUPpost;
15384 SDValue UpdN = DAG.getMemIntrinsicNode(NewOp, SDLoc(N), SDTys, Ops,
15385 MemVT,
15386 LoadSDN->getMemOperand());
15387
15388 // Update the uses.
15389 SDValue NewResults[] = {
15390 SDValue(LD, 0), // The result of load
15391 SDValue(UpdN.getNode(), 2) // Chain
15392 };
15393 DCI.CombineTo(LD, NewResults);
15394 DCI.CombineTo(N, SDValue(UpdN.getNode(), 0)); // Dup/Inserted Result
15395 DCI.CombineTo(User, SDValue(UpdN.getNode(), 1)); // Write back register
15396
15397 break;
15398 }
15399 return SDValue();
15400}
15401
15402/// Simplify ``Addr`` given that the top byte of it is ignored by HW during
15403/// address translation.
15404static bool performTBISimplification(SDValue Addr,
15405 TargetLowering::DAGCombinerInfo &DCI,
15406 SelectionDAG &DAG) {
15407 APInt DemandedMask = APInt::getLowBitsSet(64, 56);
15408 KnownBits Known;
15409 TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
15410 !DCI.isBeforeLegalizeOps());
15411 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15412 if (TLI.SimplifyDemandedBits(Addr, DemandedMask, Known, TLO)) {
15413 DCI.CommitTargetLoweringOpt(TLO);
15414 return true;
15415 }
15416 return false;
15417}
15418
15419static SDValue foldTruncStoreOfExt(SelectionDAG &DAG, SDNode *N) {
15420 assert((N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) &&(static_cast <bool> ((N->getOpcode() == ISD::STORE ||
N->getOpcode() == ISD::MSTORE) && "Expected STORE dag node in input!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) && \"Expected STORE dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15421, __extension__ __PRETTY_FUNCTION__))
15421 "Expected STORE dag node in input!")(static_cast <bool> ((N->getOpcode() == ISD::STORE ||
N->getOpcode() == ISD::MSTORE) && "Expected STORE dag node in input!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::STORE || N->getOpcode() == ISD::MSTORE) && \"Expected STORE dag node in input!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15421, __extension__ __PRETTY_FUNCTION__))
;
15422
15423 if (auto Store = dyn_cast<StoreSDNode>(N)) {
15424 if (!Store->isTruncatingStore() || Store->isIndexed())
15425 return SDValue();
15426 SDValue Ext = Store->getValue();
15427 auto ExtOpCode = Ext.getOpcode();
15428 if (ExtOpCode != ISD::ZERO_EXTEND && ExtOpCode != ISD::SIGN_EXTEND &&
15429 ExtOpCode != ISD::ANY_EXTEND)
15430 return SDValue();
15431 SDValue Orig = Ext->getOperand(0);
15432 if (Store->getMemoryVT() != Orig.getValueType())
15433 return SDValue();
15434 return DAG.getStore(Store->getChain(), SDLoc(Store), Orig,
15435 Store->getBasePtr(), Store->getMemOperand());
15436 }
15437
15438 return SDValue();
15439}
15440
15441static SDValue performSTORECombine(SDNode *N,
15442 TargetLowering::DAGCombinerInfo &DCI,
15443 SelectionDAG &DAG,
15444 const AArch64Subtarget *Subtarget) {
15445 if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
15446 return Split;
15447
15448 if (Subtarget->supportsAddressTopByteIgnored() &&
15449 performTBISimplification(N->getOperand(2), DCI, DAG))
15450 return SDValue(N, 0);
15451
15452 if (SDValue Store = foldTruncStoreOfExt(DAG, N))
15453 return Store;
15454
15455 return SDValue();
15456}
15457
15458/// Target-specific DAG combine function for NEON load/store intrinsics
15459/// to merge base address updates.
15460static SDValue performNEONPostLDSTCombine(SDNode *N,
15461 TargetLowering::DAGCombinerInfo &DCI,
15462 SelectionDAG &DAG) {
15463 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
15464 return SDValue();
15465
15466 unsigned AddrOpIdx = N->getNumOperands() - 1;
15467 SDValue Addr = N->getOperand(AddrOpIdx);
15468
15469 // Search for a use of the address operand that is an increment.
15470 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
15471 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
15472 SDNode *User = *UI;
15473 if (User->getOpcode() != ISD::ADD ||
15474 UI.getUse().getResNo() != Addr.getResNo())
15475 continue;
15476
15477 // Check that the add is independent of the load/store. Otherwise, folding
15478 // it would create a cycle.
15479 SmallPtrSet<const SDNode *, 32> Visited;
15480 SmallVector<const SDNode *, 16> Worklist;
15481 Visited.insert(Addr.getNode());
15482 Worklist.push_back(N);
15483 Worklist.push_back(User);
15484 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
15485 SDNode::hasPredecessorHelper(User, Visited, Worklist))
15486 continue;
15487
15488 // Find the new opcode for the updating load/store.
15489 bool IsStore = false;
15490 bool IsLaneOp = false;
15491 bool IsDupOp = false;
15492 unsigned NewOpc = 0;
15493 unsigned NumVecs = 0;
15494 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
15495 switch (IntNo) {
15496 default: llvm_unreachable("unexpected intrinsic for Neon base update")::llvm::llvm_unreachable_internal("unexpected intrinsic for Neon base update"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15496)
;
15497 case Intrinsic::aarch64_neon_ld2: NewOpc = AArch64ISD::LD2post;
15498 NumVecs = 2; break;
15499 case Intrinsic::aarch64_neon_ld3: NewOpc = AArch64ISD::LD3post;
15500 NumVecs = 3; break;
15501 case Intrinsic::aarch64_neon_ld4: NewOpc = AArch64ISD::LD4post;
15502 NumVecs = 4; break;
15503 case Intrinsic::aarch64_neon_st2: NewOpc = AArch64ISD::ST2post;
15504 NumVecs = 2; IsStore = true; break;
15505 case Intrinsic::aarch64_neon_st3: NewOpc = AArch64ISD::ST3post;
15506 NumVecs = 3; IsStore = true; break;
15507 case Intrinsic::aarch64_neon_st4: NewOpc = AArch64ISD::ST4post;
15508 NumVecs = 4; IsStore = true; break;
15509 case Intrinsic::aarch64_neon_ld1x2: NewOpc = AArch64ISD::LD1x2post;
15510 NumVecs = 2; break;
15511 case Intrinsic::aarch64_neon_ld1x3: NewOpc = AArch64ISD::LD1x3post;
15512 NumVecs = 3; break;
15513 case Intrinsic::aarch64_neon_ld1x4: NewOpc = AArch64ISD::LD1x4post;
15514 NumVecs = 4; break;
15515 case Intrinsic::aarch64_neon_st1x2: NewOpc = AArch64ISD::ST1x2post;
15516 NumVecs = 2; IsStore = true; break;
15517 case Intrinsic::aarch64_neon_st1x3: NewOpc = AArch64ISD::ST1x3post;
15518 NumVecs = 3; IsStore = true; break;
15519 case Intrinsic::aarch64_neon_st1x4: NewOpc = AArch64ISD::ST1x4post;
15520 NumVecs = 4; IsStore = true; break;
15521 case Intrinsic::aarch64_neon_ld2r: NewOpc = AArch64ISD::LD2DUPpost;
15522 NumVecs = 2; IsDupOp = true; break;
15523 case Intrinsic::aarch64_neon_ld3r: NewOpc = AArch64ISD::LD3DUPpost;
15524 NumVecs = 3; IsDupOp = true; break;
15525 case Intrinsic::aarch64_neon_ld4r: NewOpc = AArch64ISD::LD4DUPpost;
15526 NumVecs = 4; IsDupOp = true; break;
15527 case Intrinsic::aarch64_neon_ld2lane: NewOpc = AArch64ISD::LD2LANEpost;
15528 NumVecs = 2; IsLaneOp = true; break;
15529 case Intrinsic::aarch64_neon_ld3lane: NewOpc = AArch64ISD::LD3LANEpost;
15530 NumVecs = 3; IsLaneOp = true; break;
15531 case Intrinsic::aarch64_neon_ld4lane: NewOpc = AArch64ISD::LD4LANEpost;
15532 NumVecs = 4; IsLaneOp = true; break;
15533 case Intrinsic::aarch64_neon_st2lane: NewOpc = AArch64ISD::ST2LANEpost;
15534 NumVecs = 2; IsStore = true; IsLaneOp = true; break;
15535 case Intrinsic::aarch64_neon_st3lane: NewOpc = AArch64ISD::ST3LANEpost;
15536 NumVecs = 3; IsStore = true; IsLaneOp = true; break;
15537 case Intrinsic::aarch64_neon_st4lane: NewOpc = AArch64ISD::ST4LANEpost;
15538 NumVecs = 4; IsStore = true; IsLaneOp = true; break;
15539 }
15540
15541 EVT VecTy;
15542 if (IsStore)
15543 VecTy = N->getOperand(2).getValueType();
15544 else
15545 VecTy = N->getValueType(0);
15546
15547 // If the increment is a constant, it must match the memory ref size.
15548 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
15549 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
15550 uint32_t IncVal = CInc->getZExtValue();
15551 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15552 if (IsLaneOp || IsDupOp)
15553 NumBytes /= VecTy.getVectorNumElements();
15554 if (IncVal != NumBytes)
15555 continue;
15556 Inc = DAG.getRegister(AArch64::XZR, MVT::i64);
15557 }
15558 SmallVector<SDValue, 8> Ops;
15559 Ops.push_back(N->getOperand(0)); // Incoming chain
15560 // Load lane and store have vector list as input.
15561 if (IsLaneOp || IsStore)
15562 for (unsigned i = 2; i < AddrOpIdx; ++i)
15563 Ops.push_back(N->getOperand(i));
15564 Ops.push_back(Addr); // Base register
15565 Ops.push_back(Inc);
15566
15567 // Return Types.
15568 EVT Tys[6];
15569 unsigned NumResultVecs = (IsStore ? 0 : NumVecs);
15570 unsigned n;
15571 for (n = 0; n < NumResultVecs; ++n)
15572 Tys[n] = VecTy;
15573 Tys[n++] = MVT::i64; // Type of write back register
15574 Tys[n] = MVT::Other; // Type of the chain
15575 SDVTList SDTys = DAG.getVTList(makeArrayRef(Tys, NumResultVecs + 2));
15576
15577 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
15578 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops,
15579 MemInt->getMemoryVT(),
15580 MemInt->getMemOperand());
15581
15582 // Update the uses.
15583 std::vector<SDValue> NewResults;
15584 for (unsigned i = 0; i < NumResultVecs; ++i) {
15585 NewResults.push_back(SDValue(UpdN.getNode(), i));
15586 }
15587 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1));
15588 DCI.CombineTo(N, NewResults);
15589 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
15590
15591 break;
15592 }
15593 return SDValue();
15594}
15595
15596// Checks to see if the value is the prescribed width and returns information
15597// about its extension mode.
15598static
15599bool checkValueWidth(SDValue V, unsigned width, ISD::LoadExtType &ExtType) {
15600 ExtType = ISD::NON_EXTLOAD;
15601 switch(V.getNode()->getOpcode()) {
15602 default:
15603 return false;
15604 case ISD::LOAD: {
15605 LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
15606 if ((LoadNode->getMemoryVT() == MVT::i8 && width == 8)
15607 || (LoadNode->getMemoryVT() == MVT::i16 && width == 16)) {
15608 ExtType = LoadNode->getExtensionType();
15609 return true;
15610 }
15611 return false;
15612 }
15613 case ISD::AssertSext: {
15614 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
15615 if ((TypeNode->getVT() == MVT::i8 && width == 8)
15616 || (TypeNode->getVT() == MVT::i16 && width == 16)) {
15617 ExtType = ISD::SEXTLOAD;
15618 return true;
15619 }
15620 return false;
15621 }
15622 case ISD::AssertZext: {
15623 VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
15624 if ((TypeNode->getVT() == MVT::i8 && width == 8)
15625 || (TypeNode->getVT() == MVT::i16 && width == 16)) {
15626 ExtType = ISD::ZEXTLOAD;
15627 return true;
15628 }
15629 return false;
15630 }
15631 case ISD::Constant:
15632 case ISD::TargetConstant: {
15633 return std::abs(cast<ConstantSDNode>(V.getNode())->getSExtValue()) <
15634 1LL << (width - 1);
15635 }
15636 }
15637
15638 return true;
15639}
15640
15641// This function does a whole lot of voodoo to determine if the tests are
15642// equivalent without and with a mask. Essentially what happens is that given a
15643// DAG resembling:
15644//
15645// +-------------+ +-------------+ +-------------+ +-------------+
15646// | Input | | AddConstant | | CompConstant| | CC |
15647// +-------------+ +-------------+ +-------------+ +-------------+
15648// | | | |
15649// V V | +----------+
15650// +-------------+ +----+ | |
15651// | ADD | |0xff| | |
15652// +-------------+ +----+ | |
15653// | | | |
15654// V V | |
15655// +-------------+ | |
15656// | AND | | |
15657// +-------------+ | |
15658// | | |
15659// +-----+ | |
15660// | | |
15661// V V V
15662// +-------------+
15663// | CMP |
15664// +-------------+
15665//
15666// The AND node may be safely removed for some combinations of inputs. In
15667// particular we need to take into account the extension type of the Input,
15668// the exact values of AddConstant, CompConstant, and CC, along with the nominal
15669// width of the input (this can work for any width inputs, the above graph is
15670// specific to 8 bits.
15671//
15672// The specific equations were worked out by generating output tables for each
15673// AArch64CC value in terms of and AddConstant (w1), CompConstant(w2). The
15674// problem was simplified by working with 4 bit inputs, which means we only
15675// needed to reason about 24 distinct bit patterns: 8 patterns unique to zero
15676// extension (8,15), 8 patterns unique to sign extensions (-8,-1), and 8
15677// patterns present in both extensions (0,7). For every distinct set of
15678// AddConstant and CompConstants bit patterns we can consider the masked and
15679// unmasked versions to be equivalent if the result of this function is true for
15680// all 16 distinct bit patterns of for the current extension type of Input (w0).
15681//
15682// sub w8, w0, w1
15683// and w10, w8, #0x0f
15684// cmp w8, w2
15685// cset w9, AArch64CC
15686// cmp w10, w2
15687// cset w11, AArch64CC
15688// cmp w9, w11
15689// cset w0, eq
15690// ret
15691//
15692// Since the above function shows when the outputs are equivalent it defines
15693// when it is safe to remove the AND. Unfortunately it only runs on AArch64 and
15694// would be expensive to run during compiles. The equations below were written
15695// in a test harness that confirmed they gave equivalent outputs to the above
15696// for all inputs function, so they can be used determine if the removal is
15697// legal instead.
15698//
15699// isEquivalentMaskless() is the code for testing if the AND can be removed
15700// factored out of the DAG recognition as the DAG can take several forms.
15701
15702static bool isEquivalentMaskless(unsigned CC, unsigned width,
15703 ISD::LoadExtType ExtType, int AddConstant,
15704 int CompConstant) {
15705 // By being careful about our equations and only writing the in term
15706 // symbolic values and well known constants (0, 1, -1, MaxUInt) we can
15707 // make them generally applicable to all bit widths.
15708 int MaxUInt = (1 << width);
15709
15710 // For the purposes of these comparisons sign extending the type is
15711 // equivalent to zero extending the add and displacing it by half the integer
15712 // width. Provided we are careful and make sure our equations are valid over
15713 // the whole range we can just adjust the input and avoid writing equations
15714 // for sign extended inputs.
15715 if (ExtType == ISD::SEXTLOAD)
15716 AddConstant -= (1 << (width-1));
15717
15718 switch(CC) {
15719 case AArch64CC::LE:
15720 case AArch64CC::GT:
15721 if ((AddConstant == 0) ||
15722 (CompConstant == MaxUInt - 1 && AddConstant < 0) ||
15723 (AddConstant >= 0 && CompConstant < 0) ||
15724 (AddConstant <= 0 && CompConstant <= 0 && CompConstant < AddConstant))
15725 return true;
15726 break;
15727 case AArch64CC::LT:
15728 case AArch64CC::GE:
15729 if ((AddConstant == 0) ||
15730 (AddConstant >= 0 && CompConstant <= 0) ||
15731 (AddConstant <= 0 && CompConstant <= 0 && CompConstant <= AddConstant))
15732 return true;
15733 break;
15734 case AArch64CC::HI:
15735 case AArch64CC::LS:
15736 if ((AddConstant >= 0 && CompConstant < 0) ||
15737 (AddConstant <= 0 && CompConstant >= -1 &&
15738 CompConstant < AddConstant + MaxUInt))
15739 return true;
15740 break;
15741 case AArch64CC::PL:
15742 case AArch64CC::MI:
15743 if ((AddConstant == 0) ||
15744 (AddConstant > 0 && CompConstant <= 0) ||
15745 (AddConstant < 0 && CompConstant <= AddConstant))
15746 return true;
15747 break;
15748 case AArch64CC::LO:
15749 case AArch64CC::HS:
15750 if ((AddConstant >= 0 && CompConstant <= 0) ||
15751 (AddConstant <= 0 && CompConstant >= 0 &&
15752 CompConstant <= AddConstant + MaxUInt))
15753 return true;
15754 break;
15755 case AArch64CC::EQ:
15756 case AArch64CC::NE:
15757 if ((AddConstant > 0 && CompConstant < 0) ||
15758 (AddConstant < 0 && CompConstant >= 0 &&
15759 CompConstant < AddConstant + MaxUInt) ||
15760 (AddConstant >= 0 && CompConstant >= 0 &&
15761 CompConstant >= AddConstant) ||
15762 (AddConstant <= 0 && CompConstant < 0 && CompConstant < AddConstant))
15763 return true;
15764 break;
15765 case AArch64CC::VS:
15766 case AArch64CC::VC:
15767 case AArch64CC::AL:
15768 case AArch64CC::NV:
15769 return true;
15770 case AArch64CC::Invalid:
15771 break;
15772 }
15773
15774 return false;
15775}
15776
15777static
15778SDValue performCONDCombine(SDNode *N,
15779 TargetLowering::DAGCombinerInfo &DCI,
15780 SelectionDAG &DAG, unsigned CCIndex,
15781 unsigned CmpIndex) {
15782 unsigned CC = cast<ConstantSDNode>(N->getOperand(CCIndex))->getSExtValue();
15783 SDNode *SubsNode = N->getOperand(CmpIndex).getNode();
15784 unsigned CondOpcode = SubsNode->getOpcode();
15785
15786 if (CondOpcode != AArch64ISD::SUBS)
15787 return SDValue();
15788
15789 // There is a SUBS feeding this condition. Is it fed by a mask we can
15790 // use?
15791
15792 SDNode *AndNode = SubsNode->getOperand(0).getNode();
15793 unsigned MaskBits = 0;
15794
15795 if (AndNode->getOpcode() != ISD::AND)
15796 return SDValue();
15797
15798 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
15799 uint32_t CNV = CN->getZExtValue();
15800 if (CNV == 255)
15801 MaskBits = 8;
15802 else if (CNV == 65535)
15803 MaskBits = 16;
15804 }
15805
15806 if (!MaskBits)
15807 return SDValue();
15808
15809 SDValue AddValue = AndNode->getOperand(0);
15810
15811 if (AddValue.getOpcode() != ISD::ADD)
15812 return SDValue();
15813
15814 // The basic dag structure is correct, grab the inputs and validate them.
15815
15816 SDValue AddInputValue1 = AddValue.getNode()->getOperand(0);
15817 SDValue AddInputValue2 = AddValue.getNode()->getOperand(1);
15818 SDValue SubsInputValue = SubsNode->getOperand(1);
15819
15820 // The mask is present and the provenance of all the values is a smaller type,
15821 // lets see if the mask is superfluous.
15822
15823 if (!isa<ConstantSDNode>(AddInputValue2.getNode()) ||
15824 !isa<ConstantSDNode>(SubsInputValue.getNode()))
15825 return SDValue();
15826
15827 ISD::LoadExtType ExtType;
15828
15829 if (!checkValueWidth(SubsInputValue, MaskBits, ExtType) ||
15830 !checkValueWidth(AddInputValue2, MaskBits, ExtType) ||
15831 !checkValueWidth(AddInputValue1, MaskBits, ExtType) )
15832 return SDValue();
15833
15834 if(!isEquivalentMaskless(CC, MaskBits, ExtType,
15835 cast<ConstantSDNode>(AddInputValue2.getNode())->getSExtValue(),
15836 cast<ConstantSDNode>(SubsInputValue.getNode())->getSExtValue()))
15837 return SDValue();
15838
15839 // The AND is not necessary, remove it.
15840
15841 SDVTList VTs = DAG.getVTList(SubsNode->getValueType(0),
15842 SubsNode->getValueType(1));
15843 SDValue Ops[] = { AddValue, SubsNode->getOperand(1) };
15844
15845 SDValue NewValue = DAG.getNode(CondOpcode, SDLoc(SubsNode), VTs, Ops);
15846 DAG.ReplaceAllUsesWith(SubsNode, NewValue.getNode());
15847
15848 return SDValue(N, 0);
15849}
15850
15851// Optimize compare with zero and branch.
15852static SDValue performBRCONDCombine(SDNode *N,
15853 TargetLowering::DAGCombinerInfo &DCI,
15854 SelectionDAG &DAG) {
15855 MachineFunction &MF = DAG.getMachineFunction();
15856 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
15857 // will not be produced, as they are conditional branch instructions that do
15858 // not set flags.
15859 if (MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
15860 return SDValue();
15861
15862 if (SDValue NV = performCONDCombine(N, DCI, DAG, 2, 3))
15863 N = NV.getNode();
15864 SDValue Chain = N->getOperand(0);
15865 SDValue Dest = N->getOperand(1);
15866 SDValue CCVal = N->getOperand(2);
15867 SDValue Cmp = N->getOperand(3);
15868
15869 assert(isa<ConstantSDNode>(CCVal) && "Expected a ConstantSDNode here!")(static_cast <bool> (isa<ConstantSDNode>(CCVal) &&
"Expected a ConstantSDNode here!") ? void (0) : __assert_fail
("isa<ConstantSDNode>(CCVal) && \"Expected a ConstantSDNode here!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15869, __extension__ __PRETTY_FUNCTION__))
;
15870 unsigned CC = cast<ConstantSDNode>(CCVal)->getZExtValue();
15871 if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
15872 return SDValue();
15873
15874 unsigned CmpOpc = Cmp.getOpcode();
15875 if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
15876 return SDValue();
15877
15878 // Only attempt folding if there is only one use of the flag and no use of the
15879 // value.
15880 if (!Cmp->hasNUsesOfValue(0, 0) || !Cmp->hasNUsesOfValue(1, 1))
15881 return SDValue();
15882
15883 SDValue LHS = Cmp.getOperand(0);
15884 SDValue RHS = Cmp.getOperand(1);
15885
15886 assert(LHS.getValueType() == RHS.getValueType() &&(static_cast <bool> (LHS.getValueType() == RHS.getValueType
() && "Expected the value type to be the same for both operands!"
) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType() && \"Expected the value type to be the same for both operands!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15887, __extension__ __PRETTY_FUNCTION__))
15887 "Expected the value type to be the same for both operands!")(static_cast <bool> (LHS.getValueType() == RHS.getValueType
() && "Expected the value type to be the same for both operands!"
) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType() && \"Expected the value type to be the same for both operands!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15887, __extension__ __PRETTY_FUNCTION__))
;
15888 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
15889 return SDValue();
15890
15891 if (isNullConstant(LHS))
15892 std::swap(LHS, RHS);
15893
15894 if (!isNullConstant(RHS))
15895 return SDValue();
15896
15897 if (LHS.getOpcode() == ISD::SHL || LHS.getOpcode() == ISD::SRA ||
15898 LHS.getOpcode() == ISD::SRL)
15899 return SDValue();
15900
15901 // Fold the compare into the branch instruction.
15902 SDValue BR;
15903 if (CC == AArch64CC::EQ)
15904 BR = DAG.getNode(AArch64ISD::CBZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
15905 else
15906 BR = DAG.getNode(AArch64ISD::CBNZ, SDLoc(N), MVT::Other, Chain, LHS, Dest);
15907
15908 // Do not add new nodes to DAG combiner worklist.
15909 DCI.CombineTo(N, BR, false);
15910
15911 return SDValue();
15912}
15913
15914// Optimize CSEL instructions
15915static SDValue performCSELCombine(SDNode *N,
15916 TargetLowering::DAGCombinerInfo &DCI,
15917 SelectionDAG &DAG) {
15918 // CSEL x, x, cc -> x
15919 if (N->getOperand(0) == N->getOperand(1))
15920 return N->getOperand(0);
15921
15922 return performCONDCombine(N, DCI, DAG, 2, 3);
15923}
15924
15925static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
15926 assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == ISD::SETCC &&
"Unexpected opcode!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::SETCC && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15926, __extension__ __PRETTY_FUNCTION__))
;
15927 SDValue LHS = N->getOperand(0);
15928 SDValue RHS = N->getOperand(1);
15929 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(2))->get();
15930
15931 // setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
15932 if (Cond == ISD::SETNE && isOneConstant(RHS) &&
15933 LHS->getOpcode() == AArch64ISD::CSEL &&
15934 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
15935 LHS->hasOneUse()) {
15936 SDLoc DL(N);
15937
15938 // Invert CSEL's condition.
15939 auto *OpCC = cast<ConstantSDNode>(LHS.getOperand(2));
15940 auto OldCond = static_cast<AArch64CC::CondCode>(OpCC->getZExtValue());
15941 auto NewCond = getInvertedCondCode(OldCond);
15942
15943 // csel 0, 1, !cond, X
15944 SDValue CSEL =
15945 DAG.getNode(AArch64ISD::CSEL, DL, LHS.getValueType(), LHS.getOperand(0),
15946 LHS.getOperand(1), DAG.getConstant(NewCond, DL, MVT::i32),
15947 LHS.getOperand(3));
15948 return DAG.getZExtOrTrunc(CSEL, DL, N->getValueType(0));
15949 }
15950
15951 return SDValue();
15952}
15953
15954static SDValue performSetccMergeZeroCombine(SDNode *N, SelectionDAG &DAG) {
15955 assert(N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&(static_cast <bool> (N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO
&& "Unexpected opcode!") ? void (0) : __assert_fail (
"N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15956, __extension__ __PRETTY_FUNCTION__))
15956 "Unexpected opcode!")(static_cast <bool> (N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO
&& "Unexpected opcode!") ? void (0) : __assert_fail (
"N->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO && \"Unexpected opcode!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 15956, __extension__ __PRETTY_FUNCTION__))
;
15957
15958 SDValue Pred = N->getOperand(0);
15959 SDValue LHS = N->getOperand(1);
15960 SDValue RHS = N->getOperand(2);
15961 ISD::CondCode Cond = cast<CondCodeSDNode>(N->getOperand(3))->get();
15962
15963 // setcc_merge_zero pred (sign_extend (setcc_merge_zero ... pred ...)), 0, ne
15964 // => inner setcc_merge_zero
15965 if (Cond == ISD::SETNE && isZerosVector(RHS.getNode()) &&
15966 LHS->getOpcode() == ISD::SIGN_EXTEND &&
15967 LHS->getOperand(0)->getValueType(0) == N->getValueType(0) &&
15968 LHS->getOperand(0)->getOpcode() == AArch64ISD::SETCC_MERGE_ZERO &&
15969 LHS->getOperand(0)->getOperand(0) == Pred)
15970 return LHS->getOperand(0);
15971
15972 return SDValue();
15973}
15974
15975// Optimize some simple tbz/tbnz cases. Returns the new operand and bit to test
15976// as well as whether the test should be inverted. This code is required to
15977// catch these cases (as opposed to standard dag combines) because
15978// AArch64ISD::TBZ is matched during legalization.
15979static SDValue getTestBitOperand(SDValue Op, unsigned &Bit, bool &Invert,
15980 SelectionDAG &DAG) {
15981
15982 if (!Op->hasOneUse())
15983 return Op;
15984
15985 // We don't handle undef/constant-fold cases below, as they should have
15986 // already been taken care of (e.g. and of 0, test of undefined shifted bits,
15987 // etc.)
15988
15989 // (tbz (trunc x), b) -> (tbz x, b)
15990 // This case is just here to enable more of the below cases to be caught.
15991 if (Op->getOpcode() == ISD::TRUNCATE &&
15992 Bit < Op->getValueType(0).getSizeInBits()) {
15993 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
15994 }
15995
15996 // (tbz (any_ext x), b) -> (tbz x, b) if we don't use the extended bits.
15997 if (Op->getOpcode() == ISD::ANY_EXTEND &&
15998 Bit < Op->getOperand(0).getValueSizeInBits()) {
15999 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16000 }
16001
16002 if (Op->getNumOperands() != 2)
16003 return Op;
16004
16005 auto *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
16006 if (!C)
16007 return Op;
16008
16009 switch (Op->getOpcode()) {
16010 default:
16011 return Op;
16012
16013 // (tbz (and x, m), b) -> (tbz x, b)
16014 case ISD::AND:
16015 if ((C->getZExtValue() >> Bit) & 1)
16016 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16017 return Op;
16018
16019 // (tbz (shl x, c), b) -> (tbz x, b-c)
16020 case ISD::SHL:
16021 if (C->getZExtValue() <= Bit &&
16022 (Bit - C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
16023 Bit = Bit - C->getZExtValue();
16024 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16025 }
16026 return Op;
16027
16028 // (tbz (sra x, c), b) -> (tbz x, b+c) or (tbz x, msb) if b+c is > # bits in x
16029 case ISD::SRA:
16030 Bit = Bit + C->getZExtValue();
16031 if (Bit >= Op->getValueType(0).getSizeInBits())
16032 Bit = Op->getValueType(0).getSizeInBits() - 1;
16033 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16034
16035 // (tbz (srl x, c), b) -> (tbz x, b+c)
16036 case ISD::SRL:
16037 if ((Bit + C->getZExtValue()) < Op->getValueType(0).getSizeInBits()) {
16038 Bit = Bit + C->getZExtValue();
16039 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16040 }
16041 return Op;
16042
16043 // (tbz (xor x, -1), b) -> (tbnz x, b)
16044 case ISD::XOR:
16045 if ((C->getZExtValue() >> Bit) & 1)
16046 Invert = !Invert;
16047 return getTestBitOperand(Op->getOperand(0), Bit, Invert, DAG);
16048 }
16049}
16050
16051// Optimize test single bit zero/non-zero and branch.
16052static SDValue performTBZCombine(SDNode *N,
16053 TargetLowering::DAGCombinerInfo &DCI,
16054 SelectionDAG &DAG) {
16055 unsigned Bit = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
16056 bool Invert = false;
16057 SDValue TestSrc = N->getOperand(1);
16058 SDValue NewTestSrc = getTestBitOperand(TestSrc, Bit, Invert, DAG);
16059
16060 if (TestSrc == NewTestSrc)
16061 return SDValue();
16062
16063 unsigned NewOpc = N->getOpcode();
16064 if (Invert) {
16065 if (NewOpc == AArch64ISD::TBZ)
16066 NewOpc = AArch64ISD::TBNZ;
16067 else {
16068 assert(NewOpc == AArch64ISD::TBNZ)(static_cast <bool> (NewOpc == AArch64ISD::TBNZ) ? void
(0) : __assert_fail ("NewOpc == AArch64ISD::TBNZ", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16068, __extension__ __PRETTY_FUNCTION__))
;
16069 NewOpc = AArch64ISD::TBZ;
16070 }
16071 }
16072
16073 SDLoc DL(N);
16074 return DAG.getNode(NewOpc, DL, MVT::Other, N->getOperand(0), NewTestSrc,
16075 DAG.getConstant(Bit, DL, MVT::i64), N->getOperand(3));
16076}
16077
16078// vselect (v1i1 setcc) ->
16079// vselect (v1iXX setcc) (XX is the size of the compared operand type)
16080// FIXME: Currently the type legalizer can't handle VSELECT having v1i1 as
16081// condition. If it can legalize "VSELECT v1i1" correctly, no need to combine
16082// such VSELECT.
16083static SDValue performVSelectCombine(SDNode *N, SelectionDAG &DAG) {
16084 SDValue N0 = N->getOperand(0);
16085 EVT CCVT = N0.getValueType();
16086
16087 // Check for sign pattern (VSELECT setgt, iN lhs, -1, 1, -1) and transform
16088 // into (OR (ASR lhs, N-1), 1), which requires less instructions for the
16089 // supported types.
16090 SDValue SetCC = N->getOperand(0);
16091 if (SetCC.getOpcode() == ISD::SETCC &&
16092 SetCC.getOperand(2) == DAG.getCondCode(ISD::SETGT)) {
16093 SDValue CmpLHS = SetCC.getOperand(0);
16094 EVT VT = CmpLHS.getValueType();
16095 SDNode *CmpRHS = SetCC.getOperand(1).getNode();
16096 SDNode *SplatLHS = N->getOperand(1).getNode();
16097 SDNode *SplatRHS = N->getOperand(2).getNode();
16098 APInt SplatLHSVal;
16099 if (CmpLHS.getValueType() == N->getOperand(1).getValueType() &&
16100 VT.isSimple() &&
16101 is_contained(
16102 makeArrayRef({MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16,
16103 MVT::v2i32, MVT::v4i32, MVT::v2i64}),
16104 VT.getSimpleVT().SimpleTy) &&
16105 ISD::isConstantSplatVector(SplatLHS, SplatLHSVal) &&
16106 SplatLHSVal.isOneValue() && ISD::isConstantSplatVectorAllOnes(CmpRHS) &&
16107 ISD::isConstantSplatVectorAllOnes(SplatRHS)) {
16108 unsigned NumElts = VT.getVectorNumElements();
16109 SmallVector<SDValue, 8> Ops(
16110 NumElts, DAG.getConstant(VT.getScalarSizeInBits() - 1, SDLoc(N),
16111 VT.getScalarType()));
16112 SDValue Val = DAG.getBuildVector(VT, SDLoc(N), Ops);
16113
16114 auto Shift = DAG.getNode(ISD::SRA, SDLoc(N), VT, CmpLHS, Val);
16115 auto Or = DAG.getNode(ISD::OR, SDLoc(N), VT, Shift, N->getOperand(1));
16116 return Or;
16117 }
16118 }
16119
16120 if (N0.getOpcode() != ISD::SETCC ||
16121 CCVT.getVectorElementCount() != ElementCount::getFixed(1) ||
16122 CCVT.getVectorElementType() != MVT::i1)
16123 return SDValue();
16124
16125 EVT ResVT = N->getValueType(0);
16126 EVT CmpVT = N0.getOperand(0).getValueType();
16127 // Only combine when the result type is of the same size as the compared
16128 // operands.
16129 if (ResVT.getSizeInBits() != CmpVT.getSizeInBits())
16130 return SDValue();
16131
16132 SDValue IfTrue = N->getOperand(1);
16133 SDValue IfFalse = N->getOperand(2);
16134 SetCC = DAG.getSetCC(SDLoc(N), CmpVT.changeVectorElementTypeToInteger(),
16135 N0.getOperand(0), N0.getOperand(1),
16136 cast<CondCodeSDNode>(N0.getOperand(2))->get());
16137 return DAG.getNode(ISD::VSELECT, SDLoc(N), ResVT, SetCC,
16138 IfTrue, IfFalse);
16139}
16140
16141/// A vector select: "(select vL, vR, (setcc LHS, RHS))" is best performed with
16142/// the compare-mask instructions rather than going via NZCV, even if LHS and
16143/// RHS are really scalar. This replaces any scalar setcc in the above pattern
16144/// with a vector one followed by a DUP shuffle on the result.
16145static SDValue performSelectCombine(SDNode *N,
16146 TargetLowering::DAGCombinerInfo &DCI) {
16147 SelectionDAG &DAG = DCI.DAG;
16148 SDValue N0 = N->getOperand(0);
16149 EVT ResVT = N->getValueType(0);
16150
16151 if (N0.getOpcode() != ISD::SETCC)
16152 return SDValue();
16153
16154 if (ResVT.isScalableVector())
16155 return SDValue();
16156
16157 // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
16158 // scalar SetCCResultType. We also don't expect vectors, because we assume
16159 // that selects fed by vector SETCCs are canonicalized to VSELECT.
16160 assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&(static_cast <bool> ((N0.getValueType() == MVT::i1 || N0
.getValueType() == MVT::i32) && "Scalar-SETCC feeding SELECT has unexpected result type!"
) ? void (0) : __assert_fail ("(N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) && \"Scalar-SETCC feeding SELECT has unexpected result type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16161, __extension__ __PRETTY_FUNCTION__))
16161 "Scalar-SETCC feeding SELECT has unexpected result type!")(static_cast <bool> ((N0.getValueType() == MVT::i1 || N0
.getValueType() == MVT::i32) && "Scalar-SETCC feeding SELECT has unexpected result type!"
) ? void (0) : __assert_fail ("(N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) && \"Scalar-SETCC feeding SELECT has unexpected result type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16161, __extension__ __PRETTY_FUNCTION__))
;
16162
16163 // If NumMaskElts == 0, the comparison is larger than select result. The
16164 // largest real NEON comparison is 64-bits per lane, which means the result is
16165 // at most 32-bits and an illegal vector. Just bail out for now.
16166 EVT SrcVT = N0.getOperand(0).getValueType();
16167
16168 // Don't try to do this optimization when the setcc itself has i1 operands.
16169 // There are no legal vectors of i1, so this would be pointless.
16170 if (SrcVT == MVT::i1)
16171 return SDValue();
16172
16173 int NumMaskElts = ResVT.getSizeInBits() / SrcVT.getSizeInBits();
16174 if (!ResVT.isVector() || NumMaskElts == 0)
16175 return SDValue();
16176
16177 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumMaskElts);
16178 EVT CCVT = SrcVT.changeVectorElementTypeToInteger();
16179
16180 // Also bail out if the vector CCVT isn't the same size as ResVT.
16181 // This can happen if the SETCC operand size doesn't divide the ResVT size
16182 // (e.g., f64 vs v3f32).
16183 if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
16184 return SDValue();
16185
16186 // Make sure we didn't create illegal types, if we're not supposed to.
16187 assert(DCI.isBeforeLegalize() ||(static_cast <bool> (DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo
().isTypeLegal(SrcVT)) ? void (0) : __assert_fail ("DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16188, __extension__ __PRETTY_FUNCTION__))
16188 DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))(static_cast <bool> (DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo
().isTypeLegal(SrcVT)) ? void (0) : __assert_fail ("DCI.isBeforeLegalize() || DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16188, __extension__ __PRETTY_FUNCTION__))
;
16189
16190 // First perform a vector comparison, where lane 0 is the one we're interested
16191 // in.
16192 SDLoc DL(N0);
16193 SDValue LHS =
16194 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(0));
16195 SDValue RHS =
16196 DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, SrcVT, N0.getOperand(1));
16197 SDValue SetCC = DAG.getNode(ISD::SETCC, DL, CCVT, LHS, RHS, N0.getOperand(2));
16198
16199 // Now duplicate the comparison mask we want across all other lanes.
16200 SmallVector<int, 8> DUPMask(CCVT.getVectorNumElements(), 0);
16201 SDValue Mask = DAG.getVectorShuffle(CCVT, DL, SetCC, SetCC, DUPMask);
16202 Mask = DAG.getNode(ISD::BITCAST, DL,
16203 ResVT.changeVectorElementTypeToInteger(), Mask);
16204
16205 return DAG.getSelect(DL, ResVT, Mask, N->getOperand(1), N->getOperand(2));
16206}
16207
16208/// Get rid of unnecessary NVCASTs (that don't change the type).
16209static SDValue performNVCASTCombine(SDNode *N) {
16210 if (N->getValueType(0) == N->getOperand(0).getValueType())
16211 return N->getOperand(0);
16212
16213 return SDValue();
16214}
16215
16216// If all users of the globaladdr are of the form (globaladdr + constant), find
16217// the smallest constant, fold it into the globaladdr's offset and rewrite the
16218// globaladdr as (globaladdr + constant) - constant.
16219static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG,
16220 const AArch64Subtarget *Subtarget,
16221 const TargetMachine &TM) {
16222 auto *GN = cast<GlobalAddressSDNode>(N);
16223 if (Subtarget->ClassifyGlobalReference(GN->getGlobal(), TM) !=
16224 AArch64II::MO_NO_FLAG)
16225 return SDValue();
16226
16227 uint64_t MinOffset = -1ull;
16228 for (SDNode *N : GN->uses()) {
16229 if (N->getOpcode() != ISD::ADD)
16230 return SDValue();
16231 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(0));
16232 if (!C)
16233 C = dyn_cast<ConstantSDNode>(N->getOperand(1));
16234 if (!C)
16235 return SDValue();
16236 MinOffset = std::min(MinOffset, C->getZExtValue());
16237 }
16238 uint64_t Offset = MinOffset + GN->getOffset();
16239
16240 // Require that the new offset is larger than the existing one. Otherwise, we
16241 // can end up oscillating between two possible DAGs, for example,
16242 // (add (add globaladdr + 10, -1), 1) and (add globaladdr + 9, 1).
16243 if (Offset <= uint64_t(GN->getOffset()))
16244 return SDValue();
16245
16246 // Check whether folding this offset is legal. It must not go out of bounds of
16247 // the referenced object to avoid violating the code model, and must be
16248 // smaller than 2^21 because this is the largest offset expressible in all
16249 // object formats.
16250 //
16251 // This check also prevents us from folding negative offsets, which will end
16252 // up being treated in the same way as large positive ones. They could also
16253 // cause code model violations, and aren't really common enough to matter.
16254 if (Offset >= (1 << 21))
16255 return SDValue();
16256
16257 const GlobalValue *GV = GN->getGlobal();
16258 Type *T = GV->getValueType();
16259 if (!T->isSized() ||
16260 Offset > GV->getParent()->getDataLayout().getTypeAllocSize(T))
16261 return SDValue();
16262
16263 SDLoc DL(GN);
16264 SDValue Result = DAG.getGlobalAddress(GV, DL, MVT::i64, Offset);
16265 return DAG.getNode(ISD::SUB, DL, MVT::i64, Result,
16266 DAG.getConstant(MinOffset, DL, MVT::i64));
16267}
16268
16269// Turns the vector of indices into a vector of byte offstes by scaling Offset
16270// by (BitWidth / 8).
16271static SDValue getScaledOffsetForBitWidth(SelectionDAG &DAG, SDValue Offset,
16272 SDLoc DL, unsigned BitWidth) {
16273 assert(Offset.getValueType().isScalableVector() &&(static_cast <bool> (Offset.getValueType().isScalableVector
() && "This method is only for scalable vectors of offsets"
) ? void (0) : __assert_fail ("Offset.getValueType().isScalableVector() && \"This method is only for scalable vectors of offsets\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16274, __extension__ __PRETTY_FUNCTION__))
16274 "This method is only for scalable vectors of offsets")(static_cast <bool> (Offset.getValueType().isScalableVector
() && "This method is only for scalable vectors of offsets"
) ? void (0) : __assert_fail ("Offset.getValueType().isScalableVector() && \"This method is only for scalable vectors of offsets\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16274, __extension__ __PRETTY_FUNCTION__))
;
16275
16276 SDValue Shift = DAG.getConstant(Log2_32(BitWidth / 8), DL, MVT::i64);
16277 SDValue SplatShift = DAG.getNode(ISD::SPLAT_VECTOR, DL, MVT::nxv2i64, Shift);
16278
16279 return DAG.getNode(ISD::SHL, DL, MVT::nxv2i64, Offset, SplatShift);
16280}
16281
16282/// Check if the value of \p OffsetInBytes can be used as an immediate for
16283/// the gather load/prefetch and scatter store instructions with vector base and
16284/// immediate offset addressing mode:
16285///
16286/// [<Zn>.[S|D]{, #<imm>}]
16287///
16288/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
16289inline static bool isValidImmForSVEVecImmAddrMode(unsigned OffsetInBytes,
16290 unsigned ScalarSizeInBytes) {
16291 // The immediate is not a multiple of the scalar size.
16292 if (OffsetInBytes % ScalarSizeInBytes)
16293 return false;
16294
16295 // The immediate is out of range.
16296 if (OffsetInBytes / ScalarSizeInBytes > 31)
16297 return false;
16298
16299 return true;
16300}
16301
16302/// Check if the value of \p Offset represents a valid immediate for the SVE
16303/// gather load/prefetch and scatter store instructiona with vector base and
16304/// immediate offset addressing mode:
16305///
16306/// [<Zn>.[S|D]{, #<imm>}]
16307///
16308/// where <imm> = sizeof(<T>) * k, for k = 0, 1, ..., 31.
16309static bool isValidImmForSVEVecImmAddrMode(SDValue Offset,
16310 unsigned ScalarSizeInBytes) {
16311 ConstantSDNode *OffsetConst = dyn_cast<ConstantSDNode>(Offset.getNode());
16312 return OffsetConst && isValidImmForSVEVecImmAddrMode(
16313 OffsetConst->getZExtValue(), ScalarSizeInBytes);
16314}
16315
16316static SDValue performScatterStoreCombine(SDNode *N, SelectionDAG &DAG,
16317 unsigned Opcode,
16318 bool OnlyPackedOffsets = true) {
16319 const SDValue Src = N->getOperand(2);
16320 const EVT SrcVT = Src->getValueType(0);
16321 assert(SrcVT.isScalableVector() &&(static_cast <bool> (SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors") ? void (
0) : __assert_fail ("SrcVT.isScalableVector() && \"Scatter stores are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16322, __extension__ __PRETTY_FUNCTION__))
16322 "Scatter stores are only possible for SVE vectors")(static_cast <bool> (SrcVT.isScalableVector() &&
"Scatter stores are only possible for SVE vectors") ? void (
0) : __assert_fail ("SrcVT.isScalableVector() && \"Scatter stores are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16322, __extension__ __PRETTY_FUNCTION__))
;
16323
16324 SDLoc DL(N);
16325 MVT SrcElVT = SrcVT.getVectorElementType().getSimpleVT();
16326
16327 // Make sure that source data will fit into an SVE register
16328 if (SrcVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
16329 return SDValue();
16330
16331 // For FPs, ACLE only supports _packed_ single and double precision types.
16332 if (SrcElVT.isFloatingPoint())
16333 if ((SrcVT != MVT::nxv4f32) && (SrcVT != MVT::nxv2f64))
16334 return SDValue();
16335
16336 // Depending on the addressing mode, this is either a pointer or a vector of
16337 // pointers (that fits into one register)
16338 SDValue Base = N->getOperand(4);
16339 // Depending on the addressing mode, this is either a single offset or a
16340 // vector of offsets (that fits into one register)
16341 SDValue Offset = N->getOperand(5);
16342
16343 // For "scalar + vector of indices", just scale the indices. This only
16344 // applies to non-temporal scatters because there's no instruction that takes
16345 // indicies.
16346 if (Opcode == AArch64ISD::SSTNT1_INDEX_PRED) {
16347 Offset =
16348 getScaledOffsetForBitWidth(DAG, Offset, DL, SrcElVT.getSizeInBits());
16349 Opcode = AArch64ISD::SSTNT1_PRED;
16350 }
16351
16352 // In the case of non-temporal gather loads there's only one SVE instruction
16353 // per data-size: "scalar + vector", i.e.
16354 // * stnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
16355 // Since we do have intrinsics that allow the arguments to be in a different
16356 // order, we may need to swap them to match the spec.
16357 if (Opcode == AArch64ISD::SSTNT1_PRED && Offset.getValueType().isVector())
16358 std::swap(Base, Offset);
16359
16360 // SST1_IMM requires that the offset is an immediate that is:
16361 // * a multiple of #SizeInBytes,
16362 // * in the range [0, 31 x #SizeInBytes],
16363 // where #SizeInBytes is the size in bytes of the stored items. For
16364 // immediates outside that range and non-immediate scalar offsets use SST1 or
16365 // SST1_UXTW instead.
16366 if (Opcode == AArch64ISD::SST1_IMM_PRED) {
16367 if (!isValidImmForSVEVecImmAddrMode(Offset,
16368 SrcVT.getScalarSizeInBits() / 8)) {
16369 if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
16370 Opcode = AArch64ISD::SST1_UXTW_PRED;
16371 else
16372 Opcode = AArch64ISD::SST1_PRED;
16373
16374 std::swap(Base, Offset);
16375 }
16376 }
16377
16378 auto &TLI = DAG.getTargetLoweringInfo();
16379 if (!TLI.isTypeLegal(Base.getValueType()))
16380 return SDValue();
16381
16382 // Some scatter store variants allow unpacked offsets, but only as nxv2i32
16383 // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
16384 // nxv2i64. Legalize accordingly.
16385 if (!OnlyPackedOffsets &&
16386 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
16387 Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
16388
16389 if (!TLI.isTypeLegal(Offset.getValueType()))
16390 return SDValue();
16391
16392 // Source value type that is representable in hardware
16393 EVT HwSrcVt = getSVEContainerType(SrcVT);
16394
16395 // Keep the original type of the input data to store - this is needed to be
16396 // able to select the correct instruction, e.g. ST1B, ST1H, ST1W and ST1D. For
16397 // FP values we want the integer equivalent, so just use HwSrcVt.
16398 SDValue InputVT = DAG.getValueType(SrcVT);
16399 if (SrcVT.isFloatingPoint())
16400 InputVT = DAG.getValueType(HwSrcVt);
16401
16402 SDVTList VTs = DAG.getVTList(MVT::Other);
16403 SDValue SrcNew;
16404
16405 if (Src.getValueType().isFloatingPoint())
16406 SrcNew = DAG.getNode(ISD::BITCAST, DL, HwSrcVt, Src);
16407 else
16408 SrcNew = DAG.getNode(ISD::ANY_EXTEND, DL, HwSrcVt, Src);
16409
16410 SDValue Ops[] = {N->getOperand(0), // Chain
16411 SrcNew,
16412 N->getOperand(3), // Pg
16413 Base,
16414 Offset,
16415 InputVT};
16416
16417 return DAG.getNode(Opcode, DL, VTs, Ops);
16418}
16419
16420static SDValue performGatherLoadCombine(SDNode *N, SelectionDAG &DAG,
16421 unsigned Opcode,
16422 bool OnlyPackedOffsets = true) {
16423 const EVT RetVT = N->getValueType(0);
16424 assert(RetVT.isScalableVector() &&(static_cast <bool> (RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors") ? void (0)
: __assert_fail ("RetVT.isScalableVector() && \"Gather loads are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16425, __extension__ __PRETTY_FUNCTION__))
16425 "Gather loads are only possible for SVE vectors")(static_cast <bool> (RetVT.isScalableVector() &&
"Gather loads are only possible for SVE vectors") ? void (0)
: __assert_fail ("RetVT.isScalableVector() && \"Gather loads are only possible for SVE vectors\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16425, __extension__ __PRETTY_FUNCTION__))
;
16426
16427 SDLoc DL(N);
16428
16429 // Make sure that the loaded data will fit into an SVE register
16430 if (RetVT.getSizeInBits().getKnownMinSize() > AArch64::SVEBitsPerBlock)
16431 return SDValue();
16432
16433 // Depending on the addressing mode, this is either a pointer or a vector of
16434 // pointers (that fits into one register)
16435 SDValue Base = N->getOperand(3);
16436 // Depending on the addressing mode, this is either a single offset or a
16437 // vector of offsets (that fits into one register)
16438 SDValue Offset = N->getOperand(4);
16439
16440 // For "scalar + vector of indices", just scale the indices. This only
16441 // applies to non-temporal gathers because there's no instruction that takes
16442 // indicies.
16443 if (Opcode == AArch64ISD::GLDNT1_INDEX_MERGE_ZERO) {
16444 Offset = getScaledOffsetForBitWidth(DAG, Offset, DL,
16445 RetVT.getScalarSizeInBits());
16446 Opcode = AArch64ISD::GLDNT1_MERGE_ZERO;
16447 }
16448
16449 // In the case of non-temporal gather loads there's only one SVE instruction
16450 // per data-size: "scalar + vector", i.e.
16451 // * ldnt1{b|h|w|d} { z0.s }, p0/z, [z0.s, x0]
16452 // Since we do have intrinsics that allow the arguments to be in a different
16453 // order, we may need to swap them to match the spec.
16454 if (Opcode == AArch64ISD::GLDNT1_MERGE_ZERO &&
16455 Offset.getValueType().isVector())
16456 std::swap(Base, Offset);
16457
16458 // GLD{FF}1_IMM requires that the offset is an immediate that is:
16459 // * a multiple of #SizeInBytes,
16460 // * in the range [0, 31 x #SizeInBytes],
16461 // where #SizeInBytes is the size in bytes of the loaded items. For
16462 // immediates outside that range and non-immediate scalar offsets use
16463 // GLD1_MERGE_ZERO or GLD1_UXTW_MERGE_ZERO instead.
16464 if (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO ||
16465 Opcode == AArch64ISD::GLDFF1_IMM_MERGE_ZERO) {
16466 if (!isValidImmForSVEVecImmAddrMode(Offset,
16467 RetVT.getScalarSizeInBits() / 8)) {
16468 if (MVT::nxv4i32 == Base.getValueType().getSimpleVT().SimpleTy)
16469 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
16470 ? AArch64ISD::GLD1_UXTW_MERGE_ZERO
16471 : AArch64ISD::GLDFF1_UXTW_MERGE_ZERO;
16472 else
16473 Opcode = (Opcode == AArch64ISD::GLD1_IMM_MERGE_ZERO)
16474 ? AArch64ISD::GLD1_MERGE_ZERO
16475 : AArch64ISD::GLDFF1_MERGE_ZERO;
16476
16477 std::swap(Base, Offset);
16478 }
16479 }
16480
16481 auto &TLI = DAG.getTargetLoweringInfo();
16482 if (!TLI.isTypeLegal(Base.getValueType()))
16483 return SDValue();
16484
16485 // Some gather load variants allow unpacked offsets, but only as nxv2i32
16486 // vectors. These are implicitly sign (sxtw) or zero (zxtw) extend to
16487 // nxv2i64. Legalize accordingly.
16488 if (!OnlyPackedOffsets &&
16489 Offset.getValueType().getSimpleVT().SimpleTy == MVT::nxv2i32)
16490 Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset).getValue(0);
16491
16492 // Return value type that is representable in hardware
16493 EVT HwRetVt = getSVEContainerType(RetVT);
16494
16495 // Keep the original output value type around - this is needed to be able to
16496 // select the correct instruction, e.g. LD1B, LD1H, LD1W and LD1D. For FP
16497 // values we want the integer equivalent, so just use HwRetVT.
16498 SDValue OutVT = DAG.getValueType(RetVT);
16499 if (RetVT.isFloatingPoint())
16500 OutVT = DAG.getValueType(HwRetVt);
16501
16502 SDVTList VTs = DAG.getVTList(HwRetVt, MVT::Other);
16503 SDValue Ops[] = {N->getOperand(0), // Chain
16504 N->getOperand(2), // Pg
16505 Base, Offset, OutVT};
16506
16507 SDValue Load = DAG.getNode(Opcode, DL, VTs, Ops);
16508 SDValue LoadChain = SDValue(Load.getNode(), 1);
16509
16510 if (RetVT.isInteger() && (RetVT != HwRetVt))
16511 Load = DAG.getNode(ISD::TRUNCATE, DL, RetVT, Load.getValue(0));
16512
16513 // If the original return value was FP, bitcast accordingly. Doing it here
16514 // means that we can avoid adding TableGen patterns for FPs.
16515 if (RetVT.isFloatingPoint())
16516 Load = DAG.getNode(ISD::BITCAST, DL, RetVT, Load.getValue(0));
16517
16518 return DAG.getMergeValues({Load, LoadChain}, DL);
16519}
16520
16521static SDValue
16522performSignExtendInRegCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
16523 SelectionDAG &DAG) {
16524 SDLoc DL(N);
16525 SDValue Src = N->getOperand(0);
16526 unsigned Opc = Src->getOpcode();
16527
16528 // Sign extend of an unsigned unpack -> signed unpack
16529 if (Opc == AArch64ISD::UUNPKHI || Opc == AArch64ISD::UUNPKLO) {
16530
16531 unsigned SOpc = Opc == AArch64ISD::UUNPKHI ? AArch64ISD::SUNPKHI
16532 : AArch64ISD::SUNPKLO;
16533
16534 // Push the sign extend to the operand of the unpack
16535 // This is necessary where, for example, the operand of the unpack
16536 // is another unpack:
16537 // 4i32 sign_extend_inreg (4i32 uunpklo(8i16 uunpklo (16i8 opnd)), from 4i8)
16538 // ->
16539 // 4i32 sunpklo (8i16 sign_extend_inreg(8i16 uunpklo (16i8 opnd), from 8i8)
16540 // ->
16541 // 4i32 sunpklo(8i16 sunpklo(16i8 opnd))
16542 SDValue ExtOp = Src->getOperand(0);
16543 auto VT = cast<VTSDNode>(N->getOperand(1))->getVT();
16544 EVT EltTy = VT.getVectorElementType();
16545 (void)EltTy;
16546
16547 assert((EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) &&(static_cast <bool> ((EltTy == MVT::i8 || EltTy == MVT::
i16 || EltTy == MVT::i32) && "Sign extending from an invalid type"
) ? void (0) : __assert_fail ("(EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) && \"Sign extending from an invalid type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16548, __extension__ __PRETTY_FUNCTION__))
16548 "Sign extending from an invalid type")(static_cast <bool> ((EltTy == MVT::i8 || EltTy == MVT::
i16 || EltTy == MVT::i32) && "Sign extending from an invalid type"
) ? void (0) : __assert_fail ("(EltTy == MVT::i8 || EltTy == MVT::i16 || EltTy == MVT::i32) && \"Sign extending from an invalid type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16548, __extension__ __PRETTY_FUNCTION__))
;
16549
16550 EVT ExtVT = VT.getDoubleNumVectorElementsVT(*DAG.getContext());
16551
16552 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ExtOp.getValueType(),
16553 ExtOp, DAG.getValueType(ExtVT));
16554
16555 return DAG.getNode(SOpc, DL, N->getValueType(0), Ext);
16556 }
16557
16558 if (DCI.isBeforeLegalizeOps())
16559 return SDValue();
16560
16561 if (!EnableCombineMGatherIntrinsics)
16562 return SDValue();
16563
16564 // SVE load nodes (e.g. AArch64ISD::GLD1) are straightforward candidates
16565 // for DAG Combine with SIGN_EXTEND_INREG. Bail out for all other nodes.
16566 unsigned NewOpc;
16567 unsigned MemVTOpNum = 4;
16568 switch (Opc) {
16569 case AArch64ISD::LD1_MERGE_ZERO:
16570 NewOpc = AArch64ISD::LD1S_MERGE_ZERO;
16571 MemVTOpNum = 3;
16572 break;
16573 case AArch64ISD::LDNF1_MERGE_ZERO:
16574 NewOpc = AArch64ISD::LDNF1S_MERGE_ZERO;
16575 MemVTOpNum = 3;
16576 break;
16577 case AArch64ISD::LDFF1_MERGE_ZERO:
16578 NewOpc = AArch64ISD::LDFF1S_MERGE_ZERO;
16579 MemVTOpNum = 3;
16580 break;
16581 case AArch64ISD::GLD1_MERGE_ZERO:
16582 NewOpc = AArch64ISD::GLD1S_MERGE_ZERO;
16583 break;
16584 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
16585 NewOpc = AArch64ISD::GLD1S_SCALED_MERGE_ZERO;
16586 break;
16587 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
16588 NewOpc = AArch64ISD::GLD1S_SXTW_MERGE_ZERO;
16589 break;
16590 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
16591 NewOpc = AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO;
16592 break;
16593 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
16594 NewOpc = AArch64ISD::GLD1S_UXTW_MERGE_ZERO;
16595 break;
16596 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
16597 NewOpc = AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO;
16598 break;
16599 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
16600 NewOpc = AArch64ISD::GLD1S_IMM_MERGE_ZERO;
16601 break;
16602 case AArch64ISD::GLDFF1_MERGE_ZERO:
16603 NewOpc = AArch64ISD::GLDFF1S_MERGE_ZERO;
16604 break;
16605 case AArch64ISD::GLDFF1_SCALED_MERGE_ZERO:
16606 NewOpc = AArch64ISD::GLDFF1S_SCALED_MERGE_ZERO;
16607 break;
16608 case AArch64ISD::GLDFF1_SXTW_MERGE_ZERO:
16609 NewOpc = AArch64ISD::GLDFF1S_SXTW_MERGE_ZERO;
16610 break;
16611 case AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO:
16612 NewOpc = AArch64ISD::GLDFF1S_SXTW_SCALED_MERGE_ZERO;
16613 break;
16614 case AArch64ISD::GLDFF1_UXTW_MERGE_ZERO:
16615 NewOpc = AArch64ISD::GLDFF1S_UXTW_MERGE_ZERO;
16616 break;
16617 case AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO:
16618 NewOpc = AArch64ISD::GLDFF1S_UXTW_SCALED_MERGE_ZERO;
16619 break;
16620 case AArch64ISD::GLDFF1_IMM_MERGE_ZERO:
16621 NewOpc = AArch64ISD::GLDFF1S_IMM_MERGE_ZERO;
16622 break;
16623 case AArch64ISD::GLDNT1_MERGE_ZERO:
16624 NewOpc = AArch64ISD::GLDNT1S_MERGE_ZERO;
16625 break;
16626 default:
16627 return SDValue();
16628 }
16629
16630 EVT SignExtSrcVT = cast<VTSDNode>(N->getOperand(1))->getVT();
16631 EVT SrcMemVT = cast<VTSDNode>(Src->getOperand(MemVTOpNum))->getVT();
16632
16633 if ((SignExtSrcVT != SrcMemVT) || !Src.hasOneUse())
16634 return SDValue();
16635
16636 EVT DstVT = N->getValueType(0);
16637 SDVTList VTs = DAG.getVTList(DstVT, MVT::Other);
16638
16639 SmallVector<SDValue, 5> Ops;
16640 for (unsigned I = 0; I < Src->getNumOperands(); ++I)
16641 Ops.push_back(Src->getOperand(I));
16642
16643 SDValue ExtLoad = DAG.getNode(NewOpc, SDLoc(N), VTs, Ops);
16644 DCI.CombineTo(N, ExtLoad);
16645 DCI.CombineTo(Src.getNode(), ExtLoad, ExtLoad.getValue(1));
16646
16647 // Return N so it doesn't get rechecked
16648 return SDValue(N, 0);
16649}
16650
16651/// Legalize the gather prefetch (scalar + vector addressing mode) when the
16652/// offset vector is an unpacked 32-bit scalable vector. The other cases (Offset
16653/// != nxv2i32) do not need legalization.
16654static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
16655 const unsigned OffsetPos = 4;
16656 SDValue Offset = N->getOperand(OffsetPos);
16657
16658 // Not an unpacked vector, bail out.
16659 if (Offset.getValueType().getSimpleVT().SimpleTy != MVT::nxv2i32)
16660 return SDValue();
16661
16662 // Extend the unpacked offset vector to 64-bit lanes.
16663 SDLoc DL(N);
16664 Offset = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::nxv2i64, Offset);
16665 SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
16666 // Replace the offset operand with the 64-bit one.
16667 Ops[OffsetPos] = Offset;
16668
16669 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
16670}
16671
16672/// Combines a node carrying the intrinsic
16673/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
16674/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
16675/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
16676/// sve gather prefetch instruction with vector plus immediate addressing mode.
16677static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
16678 unsigned ScalarSizeInBytes) {
16679 const unsigned ImmPos = 4, OffsetPos = 3;
16680 // No need to combine the node if the immediate is valid...
16681 if (isValidImmForSVEVecImmAddrMode(N->getOperand(ImmPos), ScalarSizeInBytes))
16682 return SDValue();
16683
16684 // ...otherwise swap the offset base with the offset...
16685 SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
16686 std::swap(Ops[ImmPos], Ops[OffsetPos]);
16687 // ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
16688 // `aarch64_sve_prfb_gather_uxtw_index`.
16689 SDLoc DL(N);
16690 Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
16691 MVT::i64);
16692
16693 return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
16694}
16695
16696// Return true if the vector operation can guarantee only the first lane of its
16697// result contains data, with all bits in other lanes set to zero.
16698static bool isLanes1toNKnownZero(SDValue Op) {
16699 switch (Op.getOpcode()) {
16700 default:
16701 return false;
16702 case AArch64ISD::ANDV_PRED:
16703 case AArch64ISD::EORV_PRED:
16704 case AArch64ISD::FADDA_PRED:
16705 case AArch64ISD::FADDV_PRED:
16706 case AArch64ISD::FMAXNMV_PRED:
16707 case AArch64ISD::FMAXV_PRED:
16708 case AArch64ISD::FMINNMV_PRED:
16709 case AArch64ISD::FMINV_PRED:
16710 case AArch64ISD::ORV_PRED:
16711 case AArch64ISD::SADDV_PRED:
16712 case AArch64ISD::SMAXV_PRED:
16713 case AArch64ISD::SMINV_PRED:
16714 case AArch64ISD::UADDV_PRED:
16715 case AArch64ISD::UMAXV_PRED:
16716 case AArch64ISD::UMINV_PRED:
16717 return true;
16718 }
16719}
16720
16721static SDValue removeRedundantInsertVectorElt(SDNode *N) {
16722 assert(N->getOpcode() == ISD::INSERT_VECTOR_ELT && "Unexpected node!")(static_cast <bool> (N->getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Unexpected node!") ? void (0) : __assert_fail ("N->getOpcode() == ISD::INSERT_VECTOR_ELT && \"Unexpected node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 16722, __extension__ __PRETTY_FUNCTION__))
;
16723 SDValue InsertVec = N->getOperand(0);
16724 SDValue InsertElt = N->getOperand(1);
16725 SDValue InsertIdx = N->getOperand(2);
16726
16727 // We only care about inserts into the first element...
16728 if (!isNullConstant(InsertIdx))
16729 return SDValue();
16730 // ...of a zero'd vector...
16731 if (!ISD::isConstantSplatVectorAllZeros(InsertVec.getNode()))
16732 return SDValue();
16733 // ...where the inserted data was previously extracted...
16734 if (InsertElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16735 return SDValue();
16736
16737 SDValue ExtractVec = InsertElt.getOperand(0);
16738 SDValue ExtractIdx = InsertElt.getOperand(1);
16739
16740 // ...from the first element of a vector.
16741 if (!isNullConstant(ExtractIdx))
16742 return SDValue();
16743
16744 // If we get here we are effectively trying to zero lanes 1-N of a vector.
16745
16746 // Ensure there's no type conversion going on.
16747 if (N->getValueType(0) != ExtractVec.getValueType())
16748 return SDValue();
16749
16750 if (!isLanes1toNKnownZero(ExtractVec))
16751 return SDValue();
16752
16753 // The explicit zeroing is redundant.
16754 return ExtractVec;
16755}
16756
16757static SDValue
16758performInsertVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
16759 if (SDValue Res = removeRedundantInsertVectorElt(N))
16760 return Res;
16761
16762 return performPostLD1Combine(N, DCI, true);
16763}
16764
16765SDValue performSVESpliceCombine(SDNode *N, SelectionDAG &DAG) {
16766 EVT Ty = N->getValueType(0);
16767 if (Ty.isInteger())
16768 return SDValue();
16769
16770 EVT IntTy = Ty.changeVectorElementTypeToInteger();
16771 EVT ExtIntTy = getPackedSVEVectorVT(IntTy.getVectorElementCount());
16772 if (ExtIntTy.getVectorElementType().getScalarSizeInBits() <
16773 IntTy.getVectorElementType().getScalarSizeInBits())
16774 return SDValue();
16775
16776 SDLoc DL(N);
16777 SDValue LHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(0)),
16778 DL, ExtIntTy);
16779 SDValue RHS = DAG.getAnyExtOrTrunc(DAG.getBitcast(IntTy, N->getOperand(1)),
16780 DL, ExtIntTy);
16781 SDValue Idx = N->getOperand(2);
16782 SDValue Splice = DAG.getNode(ISD::VECTOR_SPLICE, DL, ExtIntTy, LHS, RHS, Idx);
16783 SDValue Trunc = DAG.getAnyExtOrTrunc(Splice, DL, IntTy);
16784 return DAG.getBitcast(Ty, Trunc);
16785}
16786
16787SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
16788 DAGCombinerInfo &DCI) const {
16789 SelectionDAG &DAG = DCI.DAG;
16790 switch (N->getOpcode()) {
16791 default:
16792 LLVM_DEBUG(dbgs() << "Custom combining: skipping\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom combining: skipping\n"
; } } while (false)
;
16793 break;
16794 case ISD::ADD:
16795 case ISD::SUB:
16796 return performAddSubCombine(N, DCI, DAG);
16797 case ISD::XOR:
16798 return performXorCombine(N, DAG, DCI, Subtarget);
16799 case ISD::MUL:
16800 return performMulCombine(N, DAG, DCI, Subtarget);
16801 case ISD::SINT_TO_FP:
16802 case ISD::UINT_TO_FP:
16803 return performIntToFpCombine(N, DAG, Subtarget);
16804 case ISD::FP_TO_SINT:
16805 case ISD::FP_TO_UINT:
16806 return performFpToIntCombine(N, DAG, DCI, Subtarget);
16807 case ISD::FDIV:
16808 return performFDivCombine(N, DAG, DCI, Subtarget);
16809 case ISD::OR:
16810 return performORCombine(N, DCI, Subtarget);
16811 case ISD::AND:
16812 return performANDCombine(N, DCI);
16813 case ISD::SRL:
16814 return performSRLCombine(N, DCI);
16815 case ISD::INTRINSIC_WO_CHAIN:
16816 return performIntrinsicCombine(N, DCI, Subtarget);
16817 case ISD::ANY_EXTEND:
16818 case ISD::ZERO_EXTEND:
16819 case ISD::SIGN_EXTEND:
16820 return performExtendCombine(N, DCI, DAG);
16821 case ISD::SIGN_EXTEND_INREG:
16822 return performSignExtendInRegCombine(N, DCI, DAG);
16823 case ISD::TRUNCATE:
16824 return performVectorTruncateCombine(N, DCI, DAG);
16825 case ISD::CONCAT_VECTORS:
16826 return performConcatVectorsCombine(N, DCI, DAG);
16827 case ISD::INSERT_SUBVECTOR:
16828 return performInsertSubvectorCombine(N, DCI, DAG);
16829 case ISD::SELECT:
16830 return performSelectCombine(N, DCI);
16831 case ISD::VSELECT:
16832 return performVSelectCombine(N, DCI.DAG);
16833 case ISD::SETCC:
16834 return performSETCCCombine(N, DAG);
16835 case ISD::LOAD:
16836 if (performTBISimplification(N->getOperand(1), DCI, DAG))
16837 return SDValue(N, 0);
16838 break;
16839 case ISD::STORE:
16840 return performSTORECombine(N, DCI, DAG, Subtarget);
16841 case ISD::VECTOR_SPLICE:
16842 return performSVESpliceCombine(N, DAG);
16843 case AArch64ISD::BRCOND:
16844 return performBRCONDCombine(N, DCI, DAG);
16845 case AArch64ISD::TBNZ:
16846 case AArch64ISD::TBZ:
16847 return performTBZCombine(N, DCI, DAG);
16848 case AArch64ISD::CSEL:
16849 return performCSELCombine(N, DCI, DAG);
16850 case AArch64ISD::DUP:
16851 return performPostLD1Combine(N, DCI, false);
16852 case AArch64ISD::NVCAST:
16853 return performNVCASTCombine(N);
16854 case AArch64ISD::SPLICE:
16855 return performSpliceCombine(N, DAG);
16856 case AArch64ISD::UZP1:
16857 return performUzpCombine(N, DAG);
16858 case AArch64ISD::SETCC_MERGE_ZERO:
16859 return performSetccMergeZeroCombine(N, DAG);
16860 case AArch64ISD::GLD1_MERGE_ZERO:
16861 case AArch64ISD::GLD1_SCALED_MERGE_ZERO:
16862 case AArch64ISD::GLD1_UXTW_MERGE_ZERO:
16863 case AArch64ISD::GLD1_SXTW_MERGE_ZERO:
16864 case AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO:
16865 case AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO:
16866 case AArch64ISD::GLD1_IMM_MERGE_ZERO:
16867 case AArch64ISD::GLD1S_MERGE_ZERO:
16868 case AArch64ISD::GLD1S_SCALED_MERGE_ZERO:
16869 case AArch64ISD::GLD1S_UXTW_MERGE_ZERO:
16870 case AArch64ISD::GLD1S_SXTW_MERGE_ZERO:
16871 case AArch64ISD::GLD1S_UXTW_SCALED_MERGE_ZERO:
16872 case AArch64ISD::GLD1S_SXTW_SCALED_MERGE_ZERO:
16873 case AArch64ISD::GLD1S_IMM_MERGE_ZERO:
16874 return performGLD1Combine(N, DAG);
16875 case AArch64ISD::VASHR:
16876 case AArch64ISD::VLSHR:
16877 return performVectorShiftCombine(N, *this, DCI);
16878 case ISD::INSERT_VECTOR_ELT:
16879 return performInsertVectorEltCombine(N, DCI);
16880 case ISD::EXTRACT_VECTOR_ELT:
16881 return performExtractVectorEltCombine(N, DAG);
16882 case ISD::VECREDUCE_ADD:
16883 return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
16884 case ISD::INTRINSIC_VOID:
16885 case ISD::INTRINSIC_W_CHAIN:
16886 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
16887 case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
16888 return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
16889 case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
16890 return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
16891 case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
16892 return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
16893 case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
16894 return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
16895 case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
16896 case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
16897 case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
16898 case Intrinsic::aarch64_sve_prfh_gather_sxtw_index:
16899 case Intrinsic::aarch64_sve_prfw_gather_uxtw_index:
16900 case Intrinsic::aarch64_sve_prfw_gather_sxtw_index:
16901 case Intrinsic::aarch64_sve_prfd_gather_uxtw_index:
16902 case Intrinsic::aarch64_sve_prfd_gather_sxtw_index:
16903 return legalizeSVEGatherPrefetchOffsVec(N, DAG);
16904 case Intrinsic::aarch64_neon_ld2:
16905 case Intrinsic::aarch64_neon_ld3:
16906 case Intrinsic::aarch64_neon_ld4:
16907 case Intrinsic::aarch64_neon_ld1x2:
16908 case Intrinsic::aarch64_neon_ld1x3:
16909 case Intrinsic::aarch64_neon_ld1x4:
16910 case Intrinsic::aarch64_neon_ld2lane:
16911 case Intrinsic::aarch64_neon_ld3lane:
16912 case Intrinsic::aarch64_neon_ld4lane:
16913 case Intrinsic::aarch64_neon_ld2r:
16914 case Intrinsic::aarch64_neon_ld3r:
16915 case Intrinsic::aarch64_neon_ld4r:
16916 case Intrinsic::aarch64_neon_st2:
16917 case Intrinsic::aarch64_neon_st3:
16918 case Intrinsic::aarch64_neon_st4:
16919 case Intrinsic::aarch64_neon_st1x2:
16920 case Intrinsic::aarch64_neon_st1x3:
16921 case Intrinsic::aarch64_neon_st1x4:
16922 case Intrinsic::aarch64_neon_st2lane:
16923 case Intrinsic::aarch64_neon_st3lane:
16924 case Intrinsic::aarch64_neon_st4lane:
16925 return performNEONPostLDSTCombine(N, DCI, DAG);
16926 case Intrinsic::aarch64_sve_ldnt1:
16927 return performLDNT1Combine(N, DAG);
16928 case Intrinsic::aarch64_sve_ld1rq:
16929 return performLD1ReplicateCombine<AArch64ISD::LD1RQ_MERGE_ZERO>(N, DAG);
16930 case Intrinsic::aarch64_sve_ld1ro:
16931 return performLD1ReplicateCombine<AArch64ISD::LD1RO_MERGE_ZERO>(N, DAG);
16932 case Intrinsic::aarch64_sve_ldnt1_gather_scalar_offset:
16933 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
16934 case Intrinsic::aarch64_sve_ldnt1_gather:
16935 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
16936 case Intrinsic::aarch64_sve_ldnt1_gather_index:
16937 return performGatherLoadCombine(N, DAG,
16938 AArch64ISD::GLDNT1_INDEX_MERGE_ZERO);
16939 case Intrinsic::aarch64_sve_ldnt1_gather_uxtw:
16940 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDNT1_MERGE_ZERO);
16941 case Intrinsic::aarch64_sve_ld1:
16942 return performLD1Combine(N, DAG, AArch64ISD::LD1_MERGE_ZERO);
16943 case Intrinsic::aarch64_sve_ldnf1:
16944 return performLD1Combine(N, DAG, AArch64ISD::LDNF1_MERGE_ZERO);
16945 case Intrinsic::aarch64_sve_ldff1:
16946 return performLD1Combine(N, DAG, AArch64ISD::LDFF1_MERGE_ZERO);
16947 case Intrinsic::aarch64_sve_st1:
16948 return performST1Combine(N, DAG);
16949 case Intrinsic::aarch64_sve_stnt1:
16950 return performSTNT1Combine(N, DAG);
16951 case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset:
16952 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
16953 case Intrinsic::aarch64_sve_stnt1_scatter_uxtw:
16954 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
16955 case Intrinsic::aarch64_sve_stnt1_scatter:
16956 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_PRED);
16957 case Intrinsic::aarch64_sve_stnt1_scatter_index:
16958 return performScatterStoreCombine(N, DAG, AArch64ISD::SSTNT1_INDEX_PRED);
16959 case Intrinsic::aarch64_sve_ld1_gather:
16960 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_MERGE_ZERO);
16961 case Intrinsic::aarch64_sve_ld1_gather_index:
16962 return performGatherLoadCombine(N, DAG,
16963 AArch64ISD::GLD1_SCALED_MERGE_ZERO);
16964 case Intrinsic::aarch64_sve_ld1_gather_sxtw:
16965 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_SXTW_MERGE_ZERO,
16966 /*OnlyPackedOffsets=*/false);
16967 case Intrinsic::aarch64_sve_ld1_gather_uxtw:
16968 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_UXTW_MERGE_ZERO,
16969 /*OnlyPackedOffsets=*/false);
16970 case Intrinsic::aarch64_sve_ld1_gather_sxtw_index:
16971 return performGatherLoadCombine(N, DAG,
16972 AArch64ISD::GLD1_SXTW_SCALED_MERGE_ZERO,
16973 /*OnlyPackedOffsets=*/false);
16974 case Intrinsic::aarch64_sve_ld1_gather_uxtw_index:
16975 return performGatherLoadCombine(N, DAG,
16976 AArch64ISD::GLD1_UXTW_SCALED_MERGE_ZERO,
16977 /*OnlyPackedOffsets=*/false);
16978 case Intrinsic::aarch64_sve_ld1_gather_scalar_offset:
16979 return performGatherLoadCombine(N, DAG, AArch64ISD::GLD1_IMM_MERGE_ZERO);
16980 case Intrinsic::aarch64_sve_ldff1_gather:
16981 return performGatherLoadCombine(N, DAG, AArch64ISD::GLDFF1_MERGE_ZERO);
16982 case Intrinsic::aarch64_sve_ldff1_gather_index:
16983 return performGatherLoadCombine(N, DAG,
16984 AArch64ISD::GLDFF1_SCALED_MERGE_ZERO);
16985 case Intrinsic::aarch64_sve_ldff1_gather_sxtw:
16986 return performGatherLoadCombine(N, DAG,
16987 AArch64ISD::GLDFF1_SXTW_MERGE_ZERO,
16988 /*OnlyPackedOffsets=*/false);
16989 case Intrinsic::aarch64_sve_ldff1_gather_uxtw:
16990 return performGatherLoadCombine(N, DAG,
16991 AArch64ISD::GLDFF1_UXTW_MERGE_ZERO,
16992 /*OnlyPackedOffsets=*/false);
16993 case Intrinsic::aarch64_sve_ldff1_gather_sxtw_index:
16994 return performGatherLoadCombine(N, DAG,
16995 AArch64ISD::GLDFF1_SXTW_SCALED_MERGE_ZERO,
16996 /*OnlyPackedOffsets=*/false);
16997 case Intrinsic::aarch64_sve_ldff1_gather_uxtw_index:
16998 return performGatherLoadCombine(N, DAG,
16999 AArch64ISD::GLDFF1_UXTW_SCALED_MERGE_ZERO,
17000 /*OnlyPackedOffsets=*/false);
17001 case Intrinsic::aarch64_sve_ldff1_gather_scalar_offset:
17002 return performGatherLoadCombine(N, DAG,
17003 AArch64ISD::GLDFF1_IMM_MERGE_ZERO);
17004 case Intrinsic::aarch64_sve_st1_scatter:
17005 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_PRED);
17006 case Intrinsic::aarch64_sve_st1_scatter_index:
17007 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SCALED_PRED);
17008 case Intrinsic::aarch64_sve_st1_scatter_sxtw:
17009 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_SXTW_PRED,
17010 /*OnlyPackedOffsets=*/false);
17011 case Intrinsic::aarch64_sve_st1_scatter_uxtw:
17012 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_UXTW_PRED,
17013 /*OnlyPackedOffsets=*/false);
17014 case Intrinsic::aarch64_sve_st1_scatter_sxtw_index:
17015 return performScatterStoreCombine(N, DAG,
17016 AArch64ISD::SST1_SXTW_SCALED_PRED,
17017 /*OnlyPackedOffsets=*/false);
17018 case Intrinsic::aarch64_sve_st1_scatter_uxtw_index:
17019 return performScatterStoreCombine(N, DAG,
17020 AArch64ISD::SST1_UXTW_SCALED_PRED,
17021 /*OnlyPackedOffsets=*/false);
17022 case Intrinsic::aarch64_sve_st1_scatter_scalar_offset:
17023 return performScatterStoreCombine(N, DAG, AArch64ISD::SST1_IMM_PRED);
17024 case Intrinsic::aarch64_sve_tuple_get: {
17025 SDLoc DL(N);
17026 SDValue Chain = N->getOperand(0);
17027 SDValue Src1 = N->getOperand(2);
17028 SDValue Idx = N->getOperand(3);
17029
17030 uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
17031 EVT ResVT = N->getValueType(0);
17032 uint64_t NumLanes = ResVT.getVectorElementCount().getKnownMinValue();
17033 SDValue ExtIdx = DAG.getVectorIdxConstant(IdxConst * NumLanes, DL);
17034 SDValue Val =
17035 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, Src1, ExtIdx);
17036 return DAG.getMergeValues({Val, Chain}, DL);
17037 }
17038 case Intrinsic::aarch64_sve_tuple_set: {
17039 SDLoc DL(N);
17040 SDValue Chain = N->getOperand(0);
17041 SDValue Tuple = N->getOperand(2);
17042 SDValue Idx = N->getOperand(3);
17043 SDValue Vec = N->getOperand(4);
17044
17045 EVT TupleVT = Tuple.getValueType();
17046 uint64_t TupleLanes = TupleVT.getVectorElementCount().getKnownMinValue();
17047
17048 uint64_t IdxConst = cast<ConstantSDNode>(Idx)->getZExtValue();
17049 uint64_t NumLanes =
17050 Vec.getValueType().getVectorElementCount().getKnownMinValue();
17051
17052 if ((TupleLanes % NumLanes) != 0)
17053 report_fatal_error("invalid tuple vector!");
17054
17055 uint64_t NumVecs = TupleLanes / NumLanes;
17056
17057 SmallVector<SDValue, 4> Opnds;
17058 for (unsigned I = 0; I < NumVecs; ++I) {
17059 if (I == IdxConst)
17060 Opnds.push_back(Vec);
17061 else {
17062 SDValue ExtIdx = DAG.getVectorIdxConstant(I * NumLanes, DL);
17063 Opnds.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL,
17064 Vec.getValueType(), Tuple, ExtIdx));
17065 }
17066 }
17067 SDValue Concat =
17068 DAG.getNode(ISD::CONCAT_VECTORS, DL, Tuple.getValueType(), Opnds);
17069 return DAG.getMergeValues({Concat, Chain}, DL);
17070 }
17071 case Intrinsic::aarch64_sve_tuple_create2:
17072 case Intrinsic::aarch64_sve_tuple_create3:
17073 case Intrinsic::aarch64_sve_tuple_create4: {
17074 SDLoc DL(N);
17075 SDValue Chain = N->getOperand(0);
17076
17077 SmallVector<SDValue, 4> Opnds;
17078 for (unsigned I = 2; I < N->getNumOperands(); ++I)
17079 Opnds.push_back(N->getOperand(I));
17080
17081 EVT VT = Opnds[0].getValueType();
17082 EVT EltVT = VT.getVectorElementType();
17083 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
17084 VT.getVectorElementCount() *
17085 (N->getNumOperands() - 2));
17086 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Opnds);
17087 return DAG.getMergeValues({Concat, Chain}, DL);
17088 }
17089 case Intrinsic::aarch64_sve_ld2:
17090 case Intrinsic::aarch64_sve_ld3:
17091 case Intrinsic::aarch64_sve_ld4: {
17092 SDLoc DL(N);
17093 SDValue Chain = N->getOperand(0);
17094 SDValue Mask = N->getOperand(2);
17095 SDValue BasePtr = N->getOperand(3);
17096 SDValue LoadOps[] = {Chain, Mask, BasePtr};
17097 unsigned IntrinsicID =
17098 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
17099 SDValue Result =
17100 LowerSVEStructLoad(IntrinsicID, LoadOps, N->getValueType(0), DAG, DL);
17101 return DAG.getMergeValues({Result, Chain}, DL);
17102 }
17103 case Intrinsic::aarch64_rndr:
17104 case Intrinsic::aarch64_rndrrs: {
17105 unsigned IntrinsicID =
17106 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
17107 auto Register =
17108 (IntrinsicID == Intrinsic::aarch64_rndr ? AArch64SysReg::RNDR
17109 : AArch64SysReg::RNDRRS);
17110 SDLoc DL(N);
17111 SDValue A = DAG.getNode(
17112 AArch64ISD::MRS, DL, DAG.getVTList(MVT::i64, MVT::Glue, MVT::Other),
17113 N->getOperand(0), DAG.getConstant(Register, DL, MVT::i64));
17114 SDValue B = DAG.getNode(
17115 AArch64ISD::CSINC, DL, MVT::i32, DAG.getConstant(0, DL, MVT::i32),
17116 DAG.getConstant(0, DL, MVT::i32),
17117 DAG.getConstant(AArch64CC::NE, DL, MVT::i32), A.getValue(1));
17118 return DAG.getMergeValues(
17119 {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL);
17120 }
17121 default:
17122 break;
17123 }
17124 break;
17125 case ISD::GlobalAddress:
17126 return performGlobalAddressCombine(N, DAG, Subtarget, getTargetMachine());
17127 }
17128 return SDValue();
17129}
17130
17131// Check if the return value is used as only a return value, as otherwise
17132// we can't perform a tail-call. In particular, we need to check for
17133// target ISD nodes that are returns and any other "odd" constructs
17134// that the generic analysis code won't necessarily catch.
17135bool AArch64TargetLowering::isUsedByReturnOnly(SDNode *N,
17136 SDValue &Chain) const {
17137 if (N->getNumValues() != 1)
17138 return false;
17139 if (!N->hasNUsesOfValue(1, 0))
17140 return false;
17141
17142 SDValue TCChain = Chain;
17143 SDNode *Copy = *N->use_begin();
17144 if (Copy->getOpcode() == ISD::CopyToReg) {
17145 // If the copy has a glue operand, we conservatively assume it isn't safe to
17146 // perform a tail call.
17147 if (Copy->getOperand(Copy->getNumOperands() - 1).getValueType() ==
17148 MVT::Glue)
17149 return false;
17150 TCChain = Copy->getOperand(0);
17151 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
17152 return false;
17153
17154 bool HasRet = false;
17155 for (SDNode *Node : Copy->uses()) {
17156 if (Node->getOpcode() != AArch64ISD::RET_FLAG)
17157 return false;
17158 HasRet = true;
17159 }
17160
17161 if (!HasRet)
17162 return false;
17163
17164 Chain = TCChain;
17165 return true;
17166}
17167
17168// Return whether the an instruction can potentially be optimized to a tail
17169// call. This will cause the optimizers to attempt to move, or duplicate,
17170// return instructions to help enable tail call optimizations for this
17171// instruction.
17172bool AArch64TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17173 return CI->isTailCall();
17174}
17175
17176bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
17177 SDValue &Offset,
17178 ISD::MemIndexedMode &AM,
17179 bool &IsInc,
17180 SelectionDAG &DAG) const {
17181 if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)
17182 return false;
17183
17184 Base = Op->getOperand(0);
17185 // All of the indexed addressing mode instructions take a signed
17186 // 9 bit immediate offset.
17187 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
17188 int64_t RHSC = RHS->getSExtValue();
17189 if (Op->getOpcode() == ISD::SUB)
17190 RHSC = -(uint64_t)RHSC;
17191 if (!isInt<9>(RHSC))
17192 return false;
17193 IsInc = (Op->getOpcode() == ISD::ADD);
17194 Offset = Op->getOperand(1);
17195 return true;
17196 }
17197 return false;
17198}
17199
17200bool AArch64TargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
17201 SDValue &Offset,
17202 ISD::MemIndexedMode &AM,
17203 SelectionDAG &DAG) const {
17204 EVT VT;
17205 SDValue Ptr;
17206 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
17207 VT = LD->getMemoryVT();
17208 Ptr = LD->getBasePtr();
17209 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
17210 VT = ST->getMemoryVT();
17211 Ptr = ST->getBasePtr();
17212 } else
17213 return false;
17214
17215 bool IsInc;
17216 if (!getIndexedAddressParts(Ptr.getNode(), Base, Offset, AM, IsInc, DAG))
17217 return false;
17218 AM = IsInc ? ISD::PRE_INC : ISD::PRE_DEC;
17219 return true;
17220}
17221
17222bool AArch64TargetLowering::getPostIndexedAddressParts(
17223 SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset,
17224 ISD::MemIndexedMode &AM, SelectionDAG &DAG) const {
17225 EVT VT;
17226 SDValue Ptr;
17227 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
17228 VT = LD->getMemoryVT();
17229 Ptr = LD->getBasePtr();
17230 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
17231 VT = ST->getMemoryVT();
17232 Ptr = ST->getBasePtr();
17233 } else
17234 return false;
17235
17236 bool IsInc;
17237 if (!getIndexedAddressParts(Op, Base, Offset, AM, IsInc, DAG))
17238 return false;
17239 // Post-indexing updates the base, so it's not a valid transform
17240 // if that's not the same as the load's pointer.
17241 if (Ptr != Base)
17242 return false;
17243 AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
17244 return true;
17245}
17246
17247void AArch64TargetLowering::ReplaceBITCASTResults(
17248 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
17249 SDLoc DL(N);
17250 SDValue Op = N->getOperand(0);
17251 EVT VT = N->getValueType(0);
17252 EVT SrcVT = Op.getValueType();
17253
17254 if (VT.isScalableVector() && !isTypeLegal(VT) && isTypeLegal(SrcVT)) {
17255 assert(!VT.isFloatingPoint() && SrcVT.isFloatingPoint() &&(static_cast <bool> (!VT.isFloatingPoint() && SrcVT
.isFloatingPoint() && "Expected fp->int bitcast!")
? void (0) : __assert_fail ("!VT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"Expected fp->int bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17256, __extension__ __PRETTY_FUNCTION__))
17256 "Expected fp->int bitcast!")(static_cast <bool> (!VT.isFloatingPoint() && SrcVT
.isFloatingPoint() && "Expected fp->int bitcast!")
? void (0) : __assert_fail ("!VT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"Expected fp->int bitcast!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17256, __extension__ __PRETTY_FUNCTION__))
;
17257 SDValue CastResult = getSVESafeBitCast(getSVEContainerType(VT), Op, DAG);
17258 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, CastResult));
17259 return;
17260 }
17261
17262 if (VT != MVT::i16 || (SrcVT != MVT::f16 && SrcVT != MVT::bf16))
17263 return;
17264
17265 Op = SDValue(
17266 DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f32,
17267 DAG.getUNDEF(MVT::i32), Op,
17268 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
17269 0);
17270 Op = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Op);
17271 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Op));
17272}
17273
17274static void ReplaceReductionResults(SDNode *N,
17275 SmallVectorImpl<SDValue> &Results,
17276 SelectionDAG &DAG, unsigned InterOp,
17277 unsigned AcrossOp) {
17278 EVT LoVT, HiVT;
17279 SDValue Lo, Hi;
17280 SDLoc dl(N);
17281 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
17282 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
17283 SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
17284 SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
17285 Results.push_back(SplitVal);
17286}
17287
17288static std::pair<SDValue, SDValue> splitInt128(SDValue N, SelectionDAG &DAG) {
17289 SDLoc DL(N);
17290 SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, N);
17291 SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
17292 DAG.getNode(ISD::SRL, DL, MVT::i128, N,
17293 DAG.getConstant(64, DL, MVT::i64)));
17294 return std::make_pair(Lo, Hi);
17295}
17296
17297void AArch64TargetLowering::ReplaceExtractSubVectorResults(
17298 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
17299 SDValue In = N->getOperand(0);
17300 EVT InVT = In.getValueType();
17301
17302 // Common code will handle these just fine.
17303 if (!InVT.isScalableVector() || !InVT.isInteger())
17304 return;
17305
17306 SDLoc DL(N);
17307 EVT VT = N->getValueType(0);
17308
17309 // The following checks bail if this is not a halving operation.
17310
17311 ElementCount ResEC = VT.getVectorElementCount();
17312
17313 if (InVT.getVectorElementCount() != (ResEC * 2))
17314 return;
17315
17316 auto *CIndex = dyn_cast<ConstantSDNode>(N->getOperand(1));
17317 if (!CIndex)
17318 return;
17319
17320 unsigned Index = CIndex->getZExtValue();
17321 if ((Index != 0) && (Index != ResEC.getKnownMinValue()))
17322 return;
17323
17324 unsigned Opcode = (Index == 0) ? AArch64ISD::UUNPKLO : AArch64ISD::UUNPKHI;
17325 EVT ExtendedHalfVT = VT.widenIntegerVectorElementType(*DAG.getContext());
17326
17327 SDValue Half = DAG.getNode(Opcode, DL, ExtendedHalfVT, N->getOperand(0));
17328 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
17329}
17330
17331// Create an even/odd pair of X registers holding integer value V.
17332static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
17333 SDLoc dl(V.getNode());
17334 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i64);
17335 SDValue VHi = DAG.getAnyExtOrTrunc(
17336 DAG.getNode(ISD::SRL, dl, MVT::i128, V, DAG.getConstant(64, dl, MVT::i64)),
17337 dl, MVT::i64);
17338 if (DAG.getDataLayout().isBigEndian())
17339 std::swap (VLo, VHi);
17340 SDValue RegClass =
17341 DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
17342 SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
17343 SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
17344 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
17345 return SDValue(
17346 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
17347}
17348
17349static void ReplaceCMP_SWAP_128Results(SDNode *N,
17350 SmallVectorImpl<SDValue> &Results,
17351 SelectionDAG &DAG,
17352 const AArch64Subtarget *Subtarget) {
17353 assert(N->getValueType(0) == MVT::i128 &&(static_cast <bool> (N->getValueType(0) == MVT::i128
&& "AtomicCmpSwap on types less than 128 should be legal"
) ? void (0) : __assert_fail ("N->getValueType(0) == MVT::i128 && \"AtomicCmpSwap on types less than 128 should be legal\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17354, __extension__ __PRETTY_FUNCTION__))
17354 "AtomicCmpSwap on types less than 128 should be legal")(static_cast <bool> (N->getValueType(0) == MVT::i128
&& "AtomicCmpSwap on types less than 128 should be legal"
) ? void (0) : __assert_fail ("N->getValueType(0) == MVT::i128 && \"AtomicCmpSwap on types less than 128 should be legal\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17354, __extension__ __PRETTY_FUNCTION__))
;
17355
17356 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
17357 if (Subtarget->hasLSE() || Subtarget->outlineAtomics()) {
17358 // LSE has a 128-bit compare and swap (CASP), but i128 is not a legal type,
17359 // so lower it here, wrapped in REG_SEQUENCE and EXTRACT_SUBREG.
17360 SDValue Ops[] = {
17361 createGPRPairNode(DAG, N->getOperand(2)), // Compare value
17362 createGPRPairNode(DAG, N->getOperand(3)), // Store value
17363 N->getOperand(1), // Ptr
17364 N->getOperand(0), // Chain in
17365 };
17366
17367 unsigned Opcode;
17368 switch (MemOp->getMergedOrdering()) {
17369 case AtomicOrdering::Monotonic:
17370 Opcode = AArch64::CASPX;
17371 break;
17372 case AtomicOrdering::Acquire:
17373 Opcode = AArch64::CASPAX;
17374 break;
17375 case AtomicOrdering::Release:
17376 Opcode = AArch64::CASPLX;
17377 break;
17378 case AtomicOrdering::AcquireRelease:
17379 case AtomicOrdering::SequentiallyConsistent:
17380 Opcode = AArch64::CASPALX;
17381 break;
17382 default:
17383 llvm_unreachable("Unexpected ordering!")::llvm::llvm_unreachable_internal("Unexpected ordering!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17383)
;
17384 }
17385
17386 MachineSDNode *CmpSwap = DAG.getMachineNode(
17387 Opcode, SDLoc(N), DAG.getVTList(MVT::Untyped, MVT::Other), Ops);
17388 DAG.setNodeMemRefs(CmpSwap, {MemOp});
17389
17390 unsigned SubReg1 = AArch64::sube64, SubReg2 = AArch64::subo64;
17391 if (DAG.getDataLayout().isBigEndian())
17392 std::swap(SubReg1, SubReg2);
17393 SDValue Lo = DAG.getTargetExtractSubreg(SubReg1, SDLoc(N), MVT::i64,
17394 SDValue(CmpSwap, 0));
17395 SDValue Hi = DAG.getTargetExtractSubreg(SubReg2, SDLoc(N), MVT::i64,
17396 SDValue(CmpSwap, 0));
17397 Results.push_back(
17398 DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128, Lo, Hi));
17399 Results.push_back(SDValue(CmpSwap, 1)); // Chain out
17400 return;
17401 }
17402
17403 unsigned Opcode;
17404 switch (MemOp->getMergedOrdering()) {
17405 case AtomicOrdering::Monotonic:
17406 Opcode = AArch64::CMP_SWAP_128_MONOTONIC;
17407 break;
17408 case AtomicOrdering::Acquire:
17409 Opcode = AArch64::CMP_SWAP_128_ACQUIRE;
17410 break;
17411 case AtomicOrdering::Release:
17412 Opcode = AArch64::CMP_SWAP_128_RELEASE;
17413 break;
17414 case AtomicOrdering::AcquireRelease:
17415 case AtomicOrdering::SequentiallyConsistent:
17416 Opcode = AArch64::CMP_SWAP_128;
17417 break;
17418 default:
17419 llvm_unreachable("Unexpected ordering!")::llvm::llvm_unreachable_internal("Unexpected ordering!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17419)
;
17420 }
17421
17422 auto Desired = splitInt128(N->getOperand(2), DAG);
17423 auto New = splitInt128(N->getOperand(3), DAG);
17424 SDValue Ops[] = {N->getOperand(1), Desired.first, Desired.second,
17425 New.first, New.second, N->getOperand(0)};
17426 SDNode *CmpSwap = DAG.getMachineNode(
17427 Opcode, SDLoc(N), DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other),
17428 Ops);
17429 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
17430
17431 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
17432 SDValue(CmpSwap, 0), SDValue(CmpSwap, 1)));
17433 Results.push_back(SDValue(CmpSwap, 3));
17434}
17435
17436void AArch64TargetLowering::ReplaceNodeResults(
17437 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
17438 switch (N->getOpcode()) {
17439 default:
17440 llvm_unreachable("Don't know how to custom expand this")::llvm::llvm_unreachable_internal("Don't know how to custom expand this"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17440)
;
17441 case ISD::BITCAST:
17442 ReplaceBITCASTResults(N, Results, DAG);
17443 return;
17444 case ISD::VECREDUCE_ADD:
17445 case ISD::VECREDUCE_SMAX:
17446 case ISD::VECREDUCE_SMIN:
17447 case ISD::VECREDUCE_UMAX:
17448 case ISD::VECREDUCE_UMIN:
17449 Results.push_back(LowerVECREDUCE(SDValue(N, 0), DAG));
17450 return;
17451
17452 case ISD::CTPOP:
17453 if (SDValue Result = LowerCTPOP(SDValue(N, 0), DAG))
17454 Results.push_back(Result);
17455 return;
17456 case AArch64ISD::SADDV:
17457 ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::SADDV);
17458 return;
17459 case AArch64ISD::UADDV:
17460 ReplaceReductionResults(N, Results, DAG, ISD::ADD, AArch64ISD::UADDV);
17461 return;
17462 case AArch64ISD::SMINV:
17463 ReplaceReductionResults(N, Results, DAG, ISD::SMIN, AArch64ISD::SMINV);
17464 return;
17465 case AArch64ISD::UMINV:
17466 ReplaceReductionResults(N, Results, DAG, ISD::UMIN, AArch64ISD::UMINV);
17467 return;
17468 case AArch64ISD::SMAXV:
17469 ReplaceReductionResults(N, Results, DAG, ISD::SMAX, AArch64ISD::SMAXV);
17470 return;
17471 case AArch64ISD::UMAXV:
17472 ReplaceReductionResults(N, Results, DAG, ISD::UMAX, AArch64ISD::UMAXV);
17473 return;
17474 case ISD::FP_TO_UINT:
17475 case ISD::FP_TO_SINT:
17476 case ISD::STRICT_FP_TO_SINT:
17477 case ISD::STRICT_FP_TO_UINT:
17478 assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion")(static_cast <bool> (N->getValueType(0) == MVT::i128
&& "unexpected illegal conversion") ? void (0) : __assert_fail
("N->getValueType(0) == MVT::i128 && \"unexpected illegal conversion\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17478, __extension__ __PRETTY_FUNCTION__))
;
17479 // Let normal code take care of it by not adding anything to Results.
17480 return;
17481 case ISD::ATOMIC_CMP_SWAP:
17482 ReplaceCMP_SWAP_128Results(N, Results, DAG, Subtarget);
17483 return;
17484 case ISD::LOAD: {
17485 assert(SDValue(N, 0).getValueType() == MVT::i128 &&(static_cast <bool> (SDValue(N, 0).getValueType() == MVT
::i128 && "unexpected load's value type") ? void (0) :
__assert_fail ("SDValue(N, 0).getValueType() == MVT::i128 && \"unexpected load's value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17486, __extension__ __PRETTY_FUNCTION__))
17486 "unexpected load's value type")(static_cast <bool> (SDValue(N, 0).getValueType() == MVT
::i128 && "unexpected load's value type") ? void (0) :
__assert_fail ("SDValue(N, 0).getValueType() == MVT::i128 && \"unexpected load's value type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17486, __extension__ __PRETTY_FUNCTION__))
;
17487 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
17488 if (!LoadNode->isVolatile() || LoadNode->getMemoryVT() != MVT::i128) {
17489 // Non-volatile loads are optimized later in AArch64's load/store
17490 // optimizer.
17491 return;
17492 }
17493
17494 SDValue Result = DAG.getMemIntrinsicNode(
17495 AArch64ISD::LDP, SDLoc(N),
17496 DAG.getVTList({MVT::i64, MVT::i64, MVT::Other}),
17497 {LoadNode->getChain(), LoadNode->getBasePtr()}, LoadNode->getMemoryVT(),
17498 LoadNode->getMemOperand());
17499
17500 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i128,
17501 Result.getValue(0), Result.getValue(1));
17502 Results.append({Pair, Result.getValue(2) /* Chain */});
17503 return;
17504 }
17505 case ISD::EXTRACT_SUBVECTOR:
17506 ReplaceExtractSubVectorResults(N, Results, DAG);
17507 return;
17508 case ISD::INSERT_SUBVECTOR:
17509 // Custom lowering has been requested for INSERT_SUBVECTOR -- but delegate
17510 // to common code for result type legalisation
17511 return;
17512 case ISD::INTRINSIC_WO_CHAIN: {
17513 EVT VT = N->getValueType(0);
17514 assert((VT == MVT::i8 || VT == MVT::i16) &&(static_cast <bool> ((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type") ? void (0) : __assert_fail
("(VT == MVT::i8 || VT == MVT::i16) && \"custom lowering for unexpected type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17515, __extension__ __PRETTY_FUNCTION__))
17515 "custom lowering for unexpected type")(static_cast <bool> ((VT == MVT::i8 || VT == MVT::i16) &&
"custom lowering for unexpected type") ? void (0) : __assert_fail
("(VT == MVT::i8 || VT == MVT::i16) && \"custom lowering for unexpected type\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17515, __extension__ __PRETTY_FUNCTION__))
;
17516
17517 ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(0));
17518 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
17519 switch (IntID) {
17520 default:
17521 return;
17522 case Intrinsic::aarch64_sve_clasta_n: {
17523 SDLoc DL(N);
17524 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
17525 auto V = DAG.getNode(AArch64ISD::CLASTA_N, DL, MVT::i32,
17526 N->getOperand(1), Op2, N->getOperand(3));
17527 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17528 return;
17529 }
17530 case Intrinsic::aarch64_sve_clastb_n: {
17531 SDLoc DL(N);
17532 auto Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, N->getOperand(2));
17533 auto V = DAG.getNode(AArch64ISD::CLASTB_N, DL, MVT::i32,
17534 N->getOperand(1), Op2, N->getOperand(3));
17535 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17536 return;
17537 }
17538 case Intrinsic::aarch64_sve_lasta: {
17539 SDLoc DL(N);
17540 auto V = DAG.getNode(AArch64ISD::LASTA, DL, MVT::i32,
17541 N->getOperand(1), N->getOperand(2));
17542 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17543 return;
17544 }
17545 case Intrinsic::aarch64_sve_lastb: {
17546 SDLoc DL(N);
17547 auto V = DAG.getNode(AArch64ISD::LASTB, DL, MVT::i32,
17548 N->getOperand(1), N->getOperand(2));
17549 Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
17550 return;
17551 }
17552 }
17553 }
17554 }
17555}
17556
17557bool AArch64TargetLowering::useLoadStackGuardNode() const {
17558 if (Subtarget->isTargetAndroid() || Subtarget->isTargetFuchsia())
17559 return TargetLowering::useLoadStackGuardNode();
17560 return true;
17561}
17562
17563unsigned AArch64TargetLowering::combineRepeatedFPDivisors() const {
17564 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
17565 // reciprocal if there are three or more FDIVs.
17566 return 3;
17567}
17568
17569TargetLoweringBase::LegalizeTypeAction
17570AArch64TargetLowering::getPreferredVectorAction(MVT VT) const {
17571 // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8,
17572 // v4i16, v2i32 instead of to promote.
17573 if (VT == MVT::v1i8 || VT == MVT::v1i16 || VT == MVT::v1i32 ||
17574 VT == MVT::v1f32)
17575 return TypeWidenVector;
17576
17577 return TargetLoweringBase::getPreferredVectorAction(VT);
17578}
17579
17580// Loads and stores less than 128-bits are already atomic; ones above that
17581// are doomed anyway, so defer to the default libcall and blame the OS when
17582// things go wrong.
17583bool AArch64TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
17584 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
17585 return Size == 128;
17586}
17587
17588// Loads and stores less than 128-bits are already atomic; ones above that
17589// are doomed anyway, so defer to the default libcall and blame the OS when
17590// things go wrong.
17591TargetLowering::AtomicExpansionKind
17592AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
17593 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
17594 return Size == 128 ? AtomicExpansionKind::LLSC : AtomicExpansionKind::None;
17595}
17596
17597// For the real atomic operations, we have ldxr/stxr up to 128 bits,
17598TargetLowering::AtomicExpansionKind
17599AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
17600 if (AI->isFloatingPointOperation())
17601 return AtomicExpansionKind::CmpXChg;
17602
17603 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
17604 if (Size > 128) return AtomicExpansionKind::None;
17605
17606 // Nand is not supported in LSE.
17607 // Leave 128 bits to LLSC or CmpXChg.
17608 if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
17609 if (Subtarget->hasLSE())
17610 return AtomicExpansionKind::None;
17611 if (Subtarget->outlineAtomics()) {
17612 // [U]Min/[U]Max RWM atomics are used in __sync_fetch_ libcalls so far.
17613 // Don't outline them unless
17614 // (1) high level <atomic> support approved:
17615 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p0493r1.pdf
17616 // (2) low level libgcc and compiler-rt support implemented by:
17617 // min/max outline atomics helpers
17618 if (AI->getOperation() != AtomicRMWInst::Min &&
17619 AI->getOperation() != AtomicRMWInst::Max &&
17620 AI->getOperation() != AtomicRMWInst::UMin &&
17621 AI->getOperation() != AtomicRMWInst::UMax) {
17622 return AtomicExpansionKind::None;
17623 }
17624 }
17625 }
17626
17627 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
17628 // implement atomicrmw without spilling. If the target address is also on the
17629 // stack and close enough to the spill slot, this can lead to a situation
17630 // where the monitor always gets cleared and the atomic operation can never
17631 // succeed. So at -O0 lower this operation to a CAS loop.
17632 if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
17633 return AtomicExpansionKind::CmpXChg;
17634
17635 return AtomicExpansionKind::LLSC;
17636}
17637
17638TargetLowering::AtomicExpansionKind
17639AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
17640 AtomicCmpXchgInst *AI) const {
17641 // If subtarget has LSE, leave cmpxchg intact for codegen.
17642 if (Subtarget->hasLSE() || Subtarget->outlineAtomics())
17643 return AtomicExpansionKind::None;
17644 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
17645 // implement cmpxchg without spilling. If the address being exchanged is also
17646 // on the stack and close enough to the spill slot, this can lead to a
17647 // situation where the monitor always gets cleared and the atomic operation
17648 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
17649 if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
17650 return AtomicExpansionKind::None;
17651
17652 // 128-bit atomic cmpxchg is weird; AtomicExpand doesn't know how to expand
17653 // it.
17654 unsigned Size = AI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
17655 if (Size > 64)
17656 return AtomicExpansionKind::None;
17657
17658 return AtomicExpansionKind::LLSC;
17659}
17660
17661Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
17662 Type *ValueTy, Value *Addr,
17663 AtomicOrdering Ord) const {
17664 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17665 bool IsAcquire = isAcquireOrStronger(Ord);
17666
17667 // Since i128 isn't legal and intrinsics don't get type-lowered, the ldrexd
17668 // intrinsic must return {i64, i64} and we have to recombine them into a
17669 // single i128 here.
17670 if (ValueTy->getPrimitiveSizeInBits() == 128) {
17671 Intrinsic::ID Int =
17672 IsAcquire ? Intrinsic::aarch64_ldaxp : Intrinsic::aarch64_ldxp;
17673 Function *Ldxr = Intrinsic::getDeclaration(M, Int);
17674
17675 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
17676 Value *LoHi = Builder.CreateCall(Ldxr, Addr, "lohi");
17677
17678 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
17679 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
17680 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
17681 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
17682 return Builder.CreateOr(
17683 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");
17684 }
17685
17686 Type *Tys[] = { Addr->getType() };
17687 Intrinsic::ID Int =
17688 IsAcquire ? Intrinsic::aarch64_ldaxr : Intrinsic::aarch64_ldxr;
17689 Function *Ldxr = Intrinsic::getDeclaration(M, Int, Tys);
17690
17691 const DataLayout &DL = M->getDataLayout();
17692 IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
17693 Value *Trunc = Builder.CreateTrunc(Builder.CreateCall(Ldxr, Addr), IntEltTy);
17694
17695 return Builder.CreateBitCast(Trunc, ValueTy);
17696}
17697
17698void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
17699 IRBuilderBase &Builder) const {
17700 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17701 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
17702}
17703
17704Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
17705 Value *Val, Value *Addr,
17706 AtomicOrdering Ord) const {
17707 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17708 bool IsRelease = isReleaseOrStronger(Ord);
17709
17710 // Since the intrinsics must have legal type, the i128 intrinsics take two
17711 // parameters: "i64, i64". We must marshal Val into the appropriate form
17712 // before the call.
17713 if (Val->getType()->getPrimitiveSizeInBits() == 128) {
17714 Intrinsic::ID Int =
17715 IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
17716 Function *Stxr = Intrinsic::getDeclaration(M, Int);
17717 Type *Int64Ty = Type::getInt64Ty(M->getContext());
17718
17719 Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
17720 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
17721 Addr = Builder.CreateBitCast(Addr, Type::getInt8PtrTy(M->getContext()));
17722 return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
17723 }
17724
17725 Intrinsic::ID Int =
17726 IsRelease ? Intrinsic::aarch64_stlxr : Intrinsic::aarch64_stxr;
17727 Type *Tys[] = { Addr->getType() };
17728 Function *Stxr = Intrinsic::getDeclaration(M, Int, Tys);
17729
17730 const DataLayout &DL = M->getDataLayout();
17731 IntegerType *IntValTy = Builder.getIntNTy(DL.getTypeSizeInBits(Val->getType()));
17732 Val = Builder.CreateBitCast(Val, IntValTy);
17733
17734 return Builder.CreateCall(Stxr,
17735 {Builder.CreateZExtOrBitCast(
17736 Val, Stxr->getFunctionType()->getParamType(0)),
17737 Addr});
17738}
17739
17740bool AArch64TargetLowering::functionArgumentNeedsConsecutiveRegisters(
17741 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
17742 const DataLayout &DL) const {
17743 if (!Ty->isArrayTy()) {
17744 const TypeSize &TySize = Ty->getPrimitiveSizeInBits();
17745 return TySize.isScalable() && TySize.getKnownMinSize() > 128;
17746 }
17747
17748 // All non aggregate members of the type must have the same type
17749 SmallVector<EVT> ValueVTs;
17750 ComputeValueVTs(*this, DL, Ty, ValueVTs);
17751 return is_splat(ValueVTs);
17752}
17753
17754bool AArch64TargetLowering::shouldNormalizeToSelectSequence(LLVMContext &,
17755 EVT) const {
17756 return false;
17757}
17758
17759static Value *UseTlsOffset(IRBuilderBase &IRB, unsigned Offset) {
17760 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
17761 Function *ThreadPointerFunc =
17762 Intrinsic::getDeclaration(M, Intrinsic::thread_pointer);
17763 return IRB.CreatePointerCast(
17764 IRB.CreateConstGEP1_32(IRB.getInt8Ty(), IRB.CreateCall(ThreadPointerFunc),
17765 Offset),
17766 IRB.getInt8PtrTy()->getPointerTo(0));
17767}
17768
17769Value *AArch64TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
17770 // Android provides a fixed TLS slot for the stack cookie. See the definition
17771 // of TLS_SLOT_STACK_GUARD in
17772 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
17773 if (Subtarget->isTargetAndroid())
17774 return UseTlsOffset(IRB, 0x28);
17775
17776 // Fuchsia is similar.
17777 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
17778 if (Subtarget->isTargetFuchsia())
17779 return UseTlsOffset(IRB, -0x10);
17780
17781 return TargetLowering::getIRStackGuard(IRB);
17782}
17783
17784void AArch64TargetLowering::insertSSPDeclarations(Module &M) const {
17785 // MSVC CRT provides functionalities for stack protection.
17786 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment()) {
17787 // MSVC CRT has a global variable holding security cookie.
17788 M.getOrInsertGlobal("__security_cookie",
17789 Type::getInt8PtrTy(M.getContext()));
17790
17791 // MSVC CRT has a function to validate security cookie.
17792 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
17793 "__security_check_cookie", Type::getVoidTy(M.getContext()),
17794 Type::getInt8PtrTy(M.getContext()));
17795 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
17796 F->setCallingConv(CallingConv::Win64);
17797 F->addParamAttr(0, Attribute::AttrKind::InReg);
17798 }
17799 return;
17800 }
17801 TargetLowering::insertSSPDeclarations(M);
17802}
17803
17804Value *AArch64TargetLowering::getSDagStackGuard(const Module &M) const {
17805 // MSVC CRT has a global variable holding security cookie.
17806 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
17807 return M.getGlobalVariable("__security_cookie");
17808 return TargetLowering::getSDagStackGuard(M);
17809}
17810
17811Function *AArch64TargetLowering::getSSPStackGuardCheck(const Module &M) const {
17812 // MSVC CRT has a function to validate security cookie.
17813 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
17814 return M.getFunction("__security_check_cookie");
17815 return TargetLowering::getSSPStackGuardCheck(M);
17816}
17817
17818Value *
17819AArch64TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
17820 // Android provides a fixed TLS slot for the SafeStack pointer. See the
17821 // definition of TLS_SLOT_SAFESTACK in
17822 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
17823 if (Subtarget->isTargetAndroid())
17824 return UseTlsOffset(IRB, 0x48);
17825
17826 // Fuchsia is similar.
17827 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
17828 if (Subtarget->isTargetFuchsia())
17829 return UseTlsOffset(IRB, -0x8);
17830
17831 return TargetLowering::getSafeStackPointerLocation(IRB);
17832}
17833
17834bool AArch64TargetLowering::isMaskAndCmp0FoldingBeneficial(
17835 const Instruction &AndI) const {
17836 // Only sink 'and' mask to cmp use block if it is masking a single bit, since
17837 // this is likely to be fold the and/cmp/br into a single tbz instruction. It
17838 // may be beneficial to sink in other cases, but we would have to check that
17839 // the cmp would not get folded into the br to form a cbz for these to be
17840 // beneficial.
17841 ConstantInt* Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
17842 if (!Mask)
17843 return false;
17844 return Mask->getValue().isPowerOf2();
17845}
17846
17847bool AArch64TargetLowering::
17848 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
17849 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
17850 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
17851 SelectionDAG &DAG) const {
17852 // Does baseline recommend not to perform the fold by default?
17853 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
17854 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
17855 return false;
17856 // Else, if this is a vector shift, prefer 'shl'.
17857 return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL;
17858}
17859
17860bool AArch64TargetLowering::shouldExpandShift(SelectionDAG &DAG,
17861 SDNode *N) const {
17862 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
17863 !Subtarget->isTargetWindows() && !Subtarget->isTargetDarwin())
17864 return false;
17865 return true;
17866}
17867
17868void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
17869 // Update IsSplitCSR in AArch64unctionInfo.
17870 AArch64FunctionInfo *AFI = Entry->getParent()->getInfo<AArch64FunctionInfo>();
17871 AFI->setIsSplitCSR(true);
17872}
17873
17874void AArch64TargetLowering::insertCopiesSplitCSR(
17875 MachineBasicBlock *Entry,
17876 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
17877 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
17878 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
17879 if (!IStart)
17880 return;
17881
17882 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
17883 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
17884 MachineBasicBlock::iterator MBBI = Entry->begin();
17885 for (const MCPhysReg *I = IStart; *I; ++I) {
17886 const TargetRegisterClass *RC = nullptr;
17887 if (AArch64::GPR64RegClass.contains(*I))
17888 RC = &AArch64::GPR64RegClass;
17889 else if (AArch64::FPR64RegClass.contains(*I))
17890 RC = &AArch64::FPR64RegClass;
17891 else
17892 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17892)
;
17893
17894 Register NewVR = MRI->createVirtualRegister(RC);
17895 // Create copy from CSR to a virtual register.
17896 // FIXME: this currently does not emit CFI pseudo-instructions, it works
17897 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
17898 // nounwind. If we want to generalize this later, we may need to emit
17899 // CFI pseudo-instructions.
17900 assert(Entry->getParent()->getFunction().hasFnAttribute((static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17902, __extension__ __PRETTY_FUNCTION__))
17901 Attribute::NoUnwind) &&(static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17902, __extension__ __PRETTY_FUNCTION__))
17902 "Function should be nounwind in insertCopiesSplitCSR!")(static_cast <bool> (Entry->getParent()->getFunction
().hasFnAttribute( Attribute::NoUnwind) && "Function should be nounwind in insertCopiesSplitCSR!"
) ? void (0) : __assert_fail ("Entry->getParent()->getFunction().hasFnAttribute( Attribute::NoUnwind) && \"Function should be nounwind in insertCopiesSplitCSR!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17902, __extension__ __PRETTY_FUNCTION__))
;
17903 Entry->addLiveIn(*I);
17904 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
17905 .addReg(*I);
17906
17907 // Insert the copy-back instructions right before the terminator.
17908 for (auto *Exit : Exits)
17909 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
17910 TII->get(TargetOpcode::COPY), *I)
17911 .addReg(NewVR);
17912 }
17913}
17914
17915bool AArch64TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const {
17916 // Integer division on AArch64 is expensive. However, when aggressively
17917 // optimizing for code size, we prefer to use a div instruction, as it is
17918 // usually smaller than the alternative sequence.
17919 // The exception to this is vector division. Since AArch64 doesn't have vector
17920 // integer division, leaving the division as-is is a loss even in terms of
17921 // size, because it will have to be scalarized, while the alternative code
17922 // sequence can be performed in vector form.
17923 bool OptSize = Attr.hasFnAttr(Attribute::MinSize);
17924 return OptSize && !VT.isVector();
17925}
17926
17927bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
17928 // We want inc-of-add for scalars and sub-of-not for vectors.
17929 return VT.isScalarInteger();
17930}
17931
17932bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
17933 return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
17934}
17935
17936unsigned
17937AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
17938 if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
17939 return getPointerTy(DL).getSizeInBits();
17940
17941 return 3 * getPointerTy(DL).getSizeInBits() + 2 * 32;
17942}
17943
17944void AArch64TargetLowering::finalizeLowering(MachineFunction &MF) const {
17945 MF.getFrameInfo().computeMaxCallFrameSize(MF);
17946 TargetLoweringBase::finalizeLowering(MF);
17947}
17948
17949// Unlike X86, we let frame lowering assign offsets to all catch objects.
17950bool AArch64TargetLowering::needsFixedCatchObjects() const {
17951 return false;
17952}
17953
17954bool AArch64TargetLowering::shouldLocalize(
17955 const MachineInstr &MI, const TargetTransformInfo *TTI) const {
17956 switch (MI.getOpcode()) {
17957 case TargetOpcode::G_GLOBAL_VALUE: {
17958 // On Darwin, TLS global vars get selected into function calls, which
17959 // we don't want localized, as they can get moved into the middle of a
17960 // another call sequence.
17961 const GlobalValue &GV = *MI.getOperand(1).getGlobal();
17962 if (GV.isThreadLocal() && Subtarget->isTargetMachO())
17963 return false;
17964 break;
17965 }
17966 // If we legalized G_GLOBAL_VALUE into ADRP + G_ADD_LOW, mark both as being
17967 // localizable.
17968 case AArch64::ADRP:
17969 case AArch64::G_ADD_LOW:
17970 return true;
17971 default:
17972 break;
17973 }
17974 return TargetLoweringBase::shouldLocalize(MI, TTI);
17975}
17976
17977bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
17978 if (isa<ScalableVectorType>(Inst.getType()))
17979 return true;
17980
17981 for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
17982 if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
17983 return true;
17984
17985 if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
17986 if (isa<ScalableVectorType>(AI->getAllocatedType()))
17987 return true;
17988 }
17989
17990 return false;
17991}
17992
17993// Return the largest legal scalable vector type that matches VT's element type.
17994static EVT getContainerForFixedLengthVector(SelectionDAG &DAG, EVT VT) {
17995 assert(VT.isFixedLengthVector() &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17997, __extension__ __PRETTY_FUNCTION__))
17996 DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17997, __extension__ __PRETTY_FUNCTION__))
17997 "Expected legal fixed length vector!")(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 17997, __extension__ __PRETTY_FUNCTION__))
;
17998 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
17999 default:
18000 llvm_unreachable("unexpected element type for SVE container")::llvm::llvm_unreachable_internal("unexpected element type for SVE container"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18000)
;
18001 case MVT::i8:
18002 return EVT(MVT::nxv16i8);
18003 case MVT::i16:
18004 return EVT(MVT::nxv8i16);
18005 case MVT::i32:
18006 return EVT(MVT::nxv4i32);
18007 case MVT::i64:
18008 return EVT(MVT::nxv2i64);
18009 case MVT::f16:
18010 return EVT(MVT::nxv8f16);
18011 case MVT::f32:
18012 return EVT(MVT::nxv4f32);
18013 case MVT::f64:
18014 return EVT(MVT::nxv2f64);
18015 }
18016}
18017
18018// Return a PTRUE with active lanes corresponding to the extent of VT.
18019static SDValue getPredicateForFixedLengthVector(SelectionDAG &DAG, SDLoc &DL,
18020 EVT VT) {
18021 assert(VT.isFixedLengthVector() &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18023, __extension__ __PRETTY_FUNCTION__))
18022 DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18023, __extension__ __PRETTY_FUNCTION__))
18023 "Expected legal fixed length vector!")(static_cast <bool> (VT.isFixedLengthVector() &&
DAG.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal fixed length vector!"
) ? void (0) : __assert_fail ("VT.isFixedLengthVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18023, __extension__ __PRETTY_FUNCTION__))
;
18024
18025 unsigned PgPattern =
18026 getSVEPredPatternFromNumElements(VT.getVectorNumElements());
18027 assert(PgPattern && "Unexpected element count for SVE predicate")(static_cast <bool> (PgPattern && "Unexpected element count for SVE predicate"
) ? void (0) : __assert_fail ("PgPattern && \"Unexpected element count for SVE predicate\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18027, __extension__ __PRETTY_FUNCTION__))
;
18028
18029 // For vectors that are exactly getMaxSVEVectorSizeInBits big, we can use
18030 // AArch64SVEPredPattern::all, which can enable the use of unpredicated
18031 // variants of instructions when available.
18032 const auto &Subtarget =
18033 static_cast<const AArch64Subtarget &>(DAG.getSubtarget());
18034 unsigned MinSVESize = Subtarget.getMinSVEVectorSizeInBits();
18035 unsigned MaxSVESize = Subtarget.getMaxSVEVectorSizeInBits();
18036 if (MaxSVESize && MinSVESize == MaxSVESize &&
18037 MaxSVESize == VT.getSizeInBits())
18038 PgPattern = AArch64SVEPredPattern::all;
18039
18040 MVT MaskVT;
18041 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
18042 default:
18043 llvm_unreachable("unexpected element type for SVE predicate")::llvm::llvm_unreachable_internal("unexpected element type for SVE predicate"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18043)
;
18044 case MVT::i8:
18045 MaskVT = MVT::nxv16i1;
18046 break;
18047 case MVT::i16:
18048 case MVT::f16:
18049 MaskVT = MVT::nxv8i1;
18050 break;
18051 case MVT::i32:
18052 case MVT::f32:
18053 MaskVT = MVT::nxv4i1;
18054 break;
18055 case MVT::i64:
18056 case MVT::f64:
18057 MaskVT = MVT::nxv2i1;
18058 break;
18059 }
18060
18061 return getPTrue(DAG, DL, MaskVT, PgPattern);
18062}
18063
18064static SDValue getPredicateForScalableVector(SelectionDAG &DAG, SDLoc &DL,
18065 EVT VT) {
18066 assert(VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&(static_cast <bool> (VT.isScalableVector() && DAG
.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18067, __extension__ __PRETTY_FUNCTION__))
18067 "Expected legal scalable vector!")(static_cast <bool> (VT.isScalableVector() && DAG
.getTargetLoweringInfo().isTypeLegal(VT) && "Expected legal scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) && \"Expected legal scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18067, __extension__ __PRETTY_FUNCTION__))
;
18068 auto PredTy = VT.changeVectorElementType(MVT::i1);
18069 return getPTrue(DAG, DL, PredTy, AArch64SVEPredPattern::all);
18070}
18071
18072static SDValue getPredicateForVector(SelectionDAG &DAG, SDLoc &DL, EVT VT) {
18073 if (VT.isFixedLengthVector())
18074 return getPredicateForFixedLengthVector(DAG, DL, VT);
18075
18076 return getPredicateForScalableVector(DAG, DL, VT);
18077}
18078
18079// Grow V to consume an entire SVE register.
18080static SDValue convertToScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
18081 assert(VT.isScalableVector() &&(static_cast <bool> (VT.isScalableVector() && "Expected to convert into a scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected to convert into a scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18082, __extension__ __PRETTY_FUNCTION__))
18082 "Expected to convert into a scalable vector!")(static_cast <bool> (VT.isScalableVector() && "Expected to convert into a scalable vector!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Expected to convert into a scalable vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18082, __extension__ __PRETTY_FUNCTION__))
;
18083 assert(V.getValueType().isFixedLengthVector() &&(static_cast <bool> (V.getValueType().isFixedLengthVector
() && "Expected a fixed length vector operand!") ? void
(0) : __assert_fail ("V.getValueType().isFixedLengthVector() && \"Expected a fixed length vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18084, __extension__ __PRETTY_FUNCTION__))
18084 "Expected a fixed length vector operand!")(static_cast <bool> (V.getValueType().isFixedLengthVector
() && "Expected a fixed length vector operand!") ? void
(0) : __assert_fail ("V.getValueType().isFixedLengthVector() && \"Expected a fixed length vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18084, __extension__ __PRETTY_FUNCTION__))
;
18085 SDLoc DL(V);
18086 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18087 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
18088}
18089
18090// Shrink V so it's just big enough to maintain a VT's worth of data.
18091static SDValue convertFromScalableVector(SelectionDAG &DAG, EVT VT, SDValue V) {
18092 assert(VT.isFixedLengthVector() &&(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!") ? void (0
) : __assert_fail ("VT.isFixedLengthVector() && \"Expected to convert into a fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18093, __extension__ __PRETTY_FUNCTION__))
18093 "Expected to convert into a fixed length vector!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected to convert into a fixed length vector!") ? void (0
) : __assert_fail ("VT.isFixedLengthVector() && \"Expected to convert into a fixed length vector!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18093, __extension__ __PRETTY_FUNCTION__))
;
18094 assert(V.getValueType().isScalableVector() &&(static_cast <bool> (V.getValueType().isScalableVector(
) && "Expected a scalable vector operand!") ? void (0
) : __assert_fail ("V.getValueType().isScalableVector() && \"Expected a scalable vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18095, __extension__ __PRETTY_FUNCTION__))
18095 "Expected a scalable vector operand!")(static_cast <bool> (V.getValueType().isScalableVector(
) && "Expected a scalable vector operand!") ? void (0
) : __assert_fail ("V.getValueType().isScalableVector() && \"Expected a scalable vector operand!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18095, __extension__ __PRETTY_FUNCTION__))
;
18096 SDLoc DL(V);
18097 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18098 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
18099}
18100
18101// Convert all fixed length vector loads larger than NEON to masked_loads.
18102SDValue AArch64TargetLowering::LowerFixedLengthVectorLoadToSVE(
18103 SDValue Op, SelectionDAG &DAG) const {
18104 auto Load = cast<LoadSDNode>(Op);
18105
18106 SDLoc DL(Op);
18107 EVT VT = Op.getValueType();
18108 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18109
18110 auto NewLoad = DAG.getMaskedLoad(
18111 ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
18112 getPredicateForFixedLengthVector(DAG, DL, VT), DAG.getUNDEF(ContainerVT),
18113 Load->getMemoryVT(), Load->getMemOperand(), Load->getAddressingMode(),
18114 Load->getExtensionType());
18115
18116 auto Result = convertFromScalableVector(DAG, VT, NewLoad);
18117 SDValue MergedValues[2] = {Result, Load->getChain()};
18118 return DAG.getMergeValues(MergedValues, DL);
18119}
18120
18121static SDValue convertFixedMaskToScalableVector(SDValue Mask,
18122 SelectionDAG &DAG) {
18123 SDLoc DL(Mask);
18124 EVT InVT = Mask.getValueType();
18125 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18126
18127 auto Op1 = convertToScalableVector(DAG, ContainerVT, Mask);
18128 auto Op2 = DAG.getConstant(0, DL, ContainerVT);
18129 auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
18130
18131 EVT CmpVT = Pg.getValueType();
18132 return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
18133 {Pg, Op1, Op2, DAG.getCondCode(ISD::SETNE)});
18134}
18135
18136// Convert all fixed length vector loads larger than NEON to masked_loads.
18137SDValue AArch64TargetLowering::LowerFixedLengthVectorMLoadToSVE(
18138 SDValue Op, SelectionDAG &DAG) const {
18139 auto Load = cast<MaskedLoadSDNode>(Op);
18140
18141 if (Load->getExtensionType() != ISD::LoadExtType::NON_EXTLOAD)
18142 return SDValue();
18143
18144 SDLoc DL(Op);
18145 EVT VT = Op.getValueType();
18146 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18147
18148 SDValue Mask = convertFixedMaskToScalableVector(Load->getMask(), DAG);
18149
18150 SDValue PassThru;
18151 bool IsPassThruZeroOrUndef = false;
18152
18153 if (Load->getPassThru()->isUndef()) {
18154 PassThru = DAG.getUNDEF(ContainerVT);
18155 IsPassThruZeroOrUndef = true;
18156 } else {
18157 if (ContainerVT.isInteger())
18158 PassThru = DAG.getConstant(0, DL, ContainerVT);
18159 else
18160 PassThru = DAG.getConstantFP(0, DL, ContainerVT);
18161 if (isZerosVector(Load->getPassThru().getNode()))
18162 IsPassThruZeroOrUndef = true;
18163 }
18164
18165 auto NewLoad = DAG.getMaskedLoad(
18166 ContainerVT, DL, Load->getChain(), Load->getBasePtr(), Load->getOffset(),
18167 Mask, PassThru, Load->getMemoryVT(), Load->getMemOperand(),
18168 Load->getAddressingMode(), Load->getExtensionType());
18169
18170 if (!IsPassThruZeroOrUndef) {
18171 SDValue OldPassThru =
18172 convertToScalableVector(DAG, ContainerVT, Load->getPassThru());
18173 NewLoad = DAG.getSelect(DL, ContainerVT, Mask, NewLoad, OldPassThru);
18174 }
18175
18176 auto Result = convertFromScalableVector(DAG, VT, NewLoad);
18177 SDValue MergedValues[2] = {Result, Load->getChain()};
18178 return DAG.getMergeValues(MergedValues, DL);
18179}
18180
18181// Convert all fixed length vector stores larger than NEON to masked_stores.
18182SDValue AArch64TargetLowering::LowerFixedLengthVectorStoreToSVE(
18183 SDValue Op, SelectionDAG &DAG) const {
18184 auto Store = cast<StoreSDNode>(Op);
18185
18186 SDLoc DL(Op);
18187 EVT VT = Store->getValue().getValueType();
18188 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18189
18190 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
18191 return DAG.getMaskedStore(
18192 Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
18193 getPredicateForFixedLengthVector(DAG, DL, VT), Store->getMemoryVT(),
18194 Store->getMemOperand(), Store->getAddressingMode(),
18195 Store->isTruncatingStore());
18196}
18197
18198SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
18199 SDValue Op, SelectionDAG &DAG) const {
18200 auto Store = cast<MaskedStoreSDNode>(Op);
18201
18202 if (Store->isTruncatingStore())
18203 return SDValue();
18204
18205 SDLoc DL(Op);
18206 EVT VT = Store->getValue().getValueType();
18207 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18208
18209 auto NewValue = convertToScalableVector(DAG, ContainerVT, Store->getValue());
18210 SDValue Mask = convertFixedMaskToScalableVector(Store->getMask(), DAG);
18211
18212 return DAG.getMaskedStore(
18213 Store->getChain(), DL, NewValue, Store->getBasePtr(), Store->getOffset(),
18214 Mask, Store->getMemoryVT(), Store->getMemOperand(),
18215 Store->getAddressingMode(), Store->isTruncatingStore());
18216}
18217
18218SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
18219 SDValue Op, SelectionDAG &DAG) const {
18220 SDLoc dl(Op);
18221 EVT VT = Op.getValueType();
18222 EVT EltVT = VT.getVectorElementType();
18223
18224 bool Signed = Op.getOpcode() == ISD::SDIV;
18225 unsigned PredOpcode = Signed ? AArch64ISD::SDIV_PRED : AArch64ISD::UDIV_PRED;
18226
18227 // Scalable vector i32/i64 DIV is supported.
18228 if (EltVT == MVT::i32 || EltVT == MVT::i64)
18229 return LowerToPredicatedOp(Op, DAG, PredOpcode, /*OverrideNEON=*/true);
18230
18231 // Scalable vector i8/i16 DIV is not supported. Promote it to i32.
18232 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18233 EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18234 EVT FixedWidenedVT = HalfVT.widenIntegerVectorElementType(*DAG.getContext());
18235 EVT ScalableWidenedVT = getContainerForFixedLengthVector(DAG, FixedWidenedVT);
18236
18237 // If this is not a full vector, extend, div, and truncate it.
18238 EVT WidenedVT = VT.widenIntegerVectorElementType(*DAG.getContext());
18239 if (DAG.getTargetLoweringInfo().isTypeLegal(WidenedVT)) {
18240 unsigned ExtendOpcode = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
18241 SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(0));
18242 SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WidenedVT, Op.getOperand(1));
18243 SDValue Div = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0, Op1);
18244 return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
18245 }
18246
18247 // Convert the operands to scalable vectors.
18248 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
18249 SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
18250
18251 // Extend the scalable operands.
18252 unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
18253 unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
18254 SDValue Op0Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op0);
18255 SDValue Op1Lo = DAG.getNode(UnpkLo, dl, ScalableWidenedVT, Op1);
18256 SDValue Op0Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op0);
18257 SDValue Op1Hi = DAG.getNode(UnpkHi, dl, ScalableWidenedVT, Op1);
18258
18259 // Convert back to fixed vectors so the DIV can be further lowered.
18260 Op0Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op0Lo);
18261 Op1Lo = convertFromScalableVector(DAG, FixedWidenedVT, Op1Lo);
18262 Op0Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op0Hi);
18263 Op1Hi = convertFromScalableVector(DAG, FixedWidenedVT, Op1Hi);
18264 SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
18265 Op0Lo, Op1Lo);
18266 SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, FixedWidenedVT,
18267 Op0Hi, Op1Hi);
18268
18269 // Convert again to scalable vectors to truncate.
18270 ResultLo = convertToScalableVector(DAG, ScalableWidenedVT, ResultLo);
18271 ResultHi = convertToScalableVector(DAG, ScalableWidenedVT, ResultHi);
18272 SDValue ScalableResult = DAG.getNode(AArch64ISD::UZP1, dl, ContainerVT,
18273 ResultLo, ResultHi);
18274
18275 return convertFromScalableVector(DAG, VT, ScalableResult);
18276}
18277
18278SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
18279 SDValue Op, SelectionDAG &DAG) const {
18280 EVT VT = Op.getValueType();
18281 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18281, __extension__ __PRETTY_FUNCTION__))
;
18282
18283 SDLoc DL(Op);
18284 SDValue Val = Op.getOperand(0);
18285 EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
18286 Val = convertToScalableVector(DAG, ContainerVT, Val);
18287
18288 bool Signed = Op.getOpcode() == ISD::SIGN_EXTEND;
18289 unsigned ExtendOpc = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
18290
18291 // Repeatedly unpack Val until the result is of the desired element type.
18292 switch (ContainerVT.getSimpleVT().SimpleTy) {
18293 default:
18294 llvm_unreachable("unimplemented container type")::llvm::llvm_unreachable_internal("unimplemented container type"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18294)
;
18295 case MVT::nxv16i8:
18296 Val = DAG.getNode(ExtendOpc, DL, MVT::nxv8i16, Val);
18297 if (VT.getVectorElementType() == MVT::i16)
18298 break;
18299 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18300 case MVT::nxv8i16:
18301 Val = DAG.getNode(ExtendOpc, DL, MVT::nxv4i32, Val);
18302 if (VT.getVectorElementType() == MVT::i32)
18303 break;
18304 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18305 case MVT::nxv4i32:
18306 Val = DAG.getNode(ExtendOpc, DL, MVT::nxv2i64, Val);
18307 assert(VT.getVectorElementType() == MVT::i64 && "Unexpected element type!")(static_cast <bool> (VT.getVectorElementType() == MVT::
i64 && "Unexpected element type!") ? void (0) : __assert_fail
("VT.getVectorElementType() == MVT::i64 && \"Unexpected element type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18307, __extension__ __PRETTY_FUNCTION__))
;
18308 break;
18309 }
18310
18311 return convertFromScalableVector(DAG, VT, Val);
18312}
18313
18314SDValue AArch64TargetLowering::LowerFixedLengthVectorTruncateToSVE(
18315 SDValue Op, SelectionDAG &DAG) const {
18316 EVT VT = Op.getValueType();
18317 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18317, __extension__ __PRETTY_FUNCTION__))
;
18318
18319 SDLoc DL(Op);
18320 SDValue Val = Op.getOperand(0);
18321 EVT ContainerVT = getContainerForFixedLengthVector(DAG, Val.getValueType());
18322 Val = convertToScalableVector(DAG, ContainerVT, Val);
18323
18324 // Repeatedly truncate Val until the result is of the desired element type.
18325 switch (ContainerVT.getSimpleVT().SimpleTy) {
18326 default:
18327 llvm_unreachable("unimplemented container type")::llvm::llvm_unreachable_internal("unimplemented container type"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18327)
;
18328 case MVT::nxv2i64:
18329 Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv4i32, Val);
18330 Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv4i32, Val, Val);
18331 if (VT.getVectorElementType() == MVT::i32)
18332 break;
18333 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18334 case MVT::nxv4i32:
18335 Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv8i16, Val);
18336 Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv8i16, Val, Val);
18337 if (VT.getVectorElementType() == MVT::i16)
18338 break;
18339 LLVM_FALLTHROUGH[[gnu::fallthrough]];
18340 case MVT::nxv8i16:
18341 Val = DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i8, Val);
18342 Val = DAG.getNode(AArch64ISD::UZP1, DL, MVT::nxv16i8, Val, Val);
18343 assert(VT.getVectorElementType() == MVT::i8 && "Unexpected element type!")(static_cast <bool> (VT.getVectorElementType() == MVT::
i8 && "Unexpected element type!") ? void (0) : __assert_fail
("VT.getVectorElementType() == MVT::i8 && \"Unexpected element type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18343, __extension__ __PRETTY_FUNCTION__))
;
18344 break;
18345 }
18346
18347 return convertFromScalableVector(DAG, VT, Val);
18348}
18349
18350SDValue AArch64TargetLowering::LowerFixedLengthExtractVectorElt(
18351 SDValue Op, SelectionDAG &DAG) const {
18352 EVT VT = Op.getValueType();
18353 EVT InVT = Op.getOperand(0).getValueType();
18354 assert(InVT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (InVT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("InVT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18354, __extension__ __PRETTY_FUNCTION__))
;
18355
18356 SDLoc DL(Op);
18357 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18358 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
18359
18360 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Op.getOperand(1));
18361}
18362
18363SDValue AArch64TargetLowering::LowerFixedLengthInsertVectorElt(
18364 SDValue Op, SelectionDAG &DAG) const {
18365 EVT VT = Op.getValueType();
18366 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18366, __extension__ __PRETTY_FUNCTION__))
;
18367
18368 SDLoc DL(Op);
18369 EVT InVT = Op.getOperand(0).getValueType();
18370 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18371 SDValue Op0 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(0));
18372
18373 auto ScalableRes = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT, Op0,
18374 Op.getOperand(1), Op.getOperand(2));
18375
18376 return convertFromScalableVector(DAG, VT, ScalableRes);
18377}
18378
18379// Convert vector operation 'Op' to an equivalent predicated operation whereby
18380// the original operation's type is used to construct a suitable predicate.
18381// NOTE: The results for inactive lanes are undefined.
18382SDValue AArch64TargetLowering::LowerToPredicatedOp(SDValue Op,
18383 SelectionDAG &DAG,
18384 unsigned NewOp,
18385 bool OverrideNEON) const {
18386 EVT VT = Op.getValueType();
18387 SDLoc DL(Op);
18388 auto Pg = getPredicateForVector(DAG, DL, VT);
18389
18390 if (useSVEForFixedLengthVectorVT(VT, OverrideNEON)) {
18391 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18392
18393 // Create list of operands by converting existing ones to scalable types.
18394 SmallVector<SDValue, 4> Operands = {Pg};
18395 for (const SDValue &V : Op->op_values()) {
18396 if (isa<CondCodeSDNode>(V)) {
18397 Operands.push_back(V);
18398 continue;
18399 }
18400
18401 if (const VTSDNode *VTNode = dyn_cast<VTSDNode>(V)) {
18402 EVT VTArg = VTNode->getVT().getVectorElementType();
18403 EVT NewVTArg = ContainerVT.changeVectorElementType(VTArg);
18404 Operands.push_back(DAG.getValueType(NewVTArg));
18405 continue;
18406 }
18407
18408 assert(useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
(), OverrideNEON) && "Only fixed length vectors are supported!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18409, __extension__ __PRETTY_FUNCTION__))
18409 "Only fixed length vectors are supported!")(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
(), OverrideNEON) && "Only fixed length vectors are supported!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType(), OverrideNEON) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18409, __extension__ __PRETTY_FUNCTION__))
;
18410 Operands.push_back(convertToScalableVector(DAG, ContainerVT, V));
18411 }
18412
18413 if (isMergePassthruOpcode(NewOp))
18414 Operands.push_back(DAG.getUNDEF(ContainerVT));
18415
18416 auto ScalableRes = DAG.getNode(NewOp, DL, ContainerVT, Operands);
18417 return convertFromScalableVector(DAG, VT, ScalableRes);
18418 }
18419
18420 assert(VT.isScalableVector() && "Only expect to lower scalable vector op!")(static_cast <bool> (VT.isScalableVector() && "Only expect to lower scalable vector op!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && \"Only expect to lower scalable vector op!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18420, __extension__ __PRETTY_FUNCTION__))
;
18421
18422 SmallVector<SDValue, 4> Operands = {Pg};
18423 for (const SDValue &V : Op->op_values()) {
18424 assert((!V.getValueType().isVector() ||(static_cast <bool> ((!V.getValueType().isVector() || V
.getValueType().isScalableVector()) && "Only scalable vectors are supported!"
) ? void (0) : __assert_fail ("(!V.getValueType().isVector() || V.getValueType().isScalableVector()) && \"Only scalable vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18426, __extension__ __PRETTY_FUNCTION__))
18425 V.getValueType().isScalableVector()) &&(static_cast <bool> ((!V.getValueType().isVector() || V
.getValueType().isScalableVector()) && "Only scalable vectors are supported!"
) ? void (0) : __assert_fail ("(!V.getValueType().isVector() || V.getValueType().isScalableVector()) && \"Only scalable vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18426, __extension__ __PRETTY_FUNCTION__))
18426 "Only scalable vectors are supported!")(static_cast <bool> ((!V.getValueType().isVector() || V
.getValueType().isScalableVector()) && "Only scalable vectors are supported!"
) ? void (0) : __assert_fail ("(!V.getValueType().isVector() || V.getValueType().isScalableVector()) && \"Only scalable vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18426, __extension__ __PRETTY_FUNCTION__))
;
18427 Operands.push_back(V);
18428 }
18429
18430 if (isMergePassthruOpcode(NewOp))
18431 Operands.push_back(DAG.getUNDEF(VT));
18432
18433 return DAG.getNode(NewOp, DL, VT, Operands);
18434}
18435
18436// If a fixed length vector operation has no side effects when applied to
18437// undefined elements, we can safely use scalable vectors to perform the same
18438// operation without needing to worry about predication.
18439SDValue AArch64TargetLowering::LowerToScalableOp(SDValue Op,
18440 SelectionDAG &DAG) const {
18441 EVT VT = Op.getValueType();
18442 assert(useSVEForFixedLengthVectorVT(VT) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(VT) &&
"Only expected to lower fixed length vector operation!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(VT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18443, __extension__ __PRETTY_FUNCTION__))
18443 "Only expected to lower fixed length vector operation!")(static_cast <bool> (useSVEForFixedLengthVectorVT(VT) &&
"Only expected to lower fixed length vector operation!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(VT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18443, __extension__ __PRETTY_FUNCTION__))
;
18444 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18445
18446 // Create list of operands by converting existing ones to scalable types.
18447 SmallVector<SDValue, 4> Ops;
18448 for (const SDValue &V : Op->op_values()) {
18449 assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!")(static_cast <bool> (!isa<VTSDNode>(V) &&
"Unexpected VTSDNode node!") ? void (0) : __assert_fail ("!isa<VTSDNode>(V) && \"Unexpected VTSDNode node!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18449, __extension__ __PRETTY_FUNCTION__))
;
18450
18451 // Pass through non-vector operands.
18452 if (!V.getValueType().isVector()) {
18453 Ops.push_back(V);
18454 continue;
18455 }
18456
18457 // "cast" fixed length vector to a scalable vector.
18458 assert(useSVEForFixedLengthVectorVT(V.getValueType()) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
()) && "Only fixed length vectors are supported!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType()) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18459, __extension__ __PRETTY_FUNCTION__))
18459 "Only fixed length vectors are supported!")(static_cast <bool> (useSVEForFixedLengthVectorVT(V.getValueType
()) && "Only fixed length vectors are supported!") ? void
(0) : __assert_fail ("useSVEForFixedLengthVectorVT(V.getValueType()) && \"Only fixed length vectors are supported!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18459, __extension__ __PRETTY_FUNCTION__))
;
18460 Ops.push_back(convertToScalableVector(DAG, ContainerVT, V));
18461 }
18462
18463 auto ScalableRes = DAG.getNode(Op.getOpcode(), SDLoc(Op), ContainerVT, Ops);
18464 return convertFromScalableVector(DAG, VT, ScalableRes);
18465}
18466
18467SDValue AArch64TargetLowering::LowerVECREDUCE_SEQ_FADD(SDValue ScalarOp,
18468 SelectionDAG &DAG) const {
18469 SDLoc DL(ScalarOp);
18470 SDValue AccOp = ScalarOp.getOperand(0);
18471 SDValue VecOp = ScalarOp.getOperand(1);
18472 EVT SrcVT = VecOp.getValueType();
18473 EVT ResVT = SrcVT.getVectorElementType();
18474
18475 EVT ContainerVT = SrcVT;
18476 if (SrcVT.isFixedLengthVector()) {
18477 ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
18478 VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
18479 }
18480
18481 SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
18482 SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
18483
18484 // Convert operands to Scalable.
18485 AccOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
18486 DAG.getUNDEF(ContainerVT), AccOp, Zero);
18487
18488 // Perform reduction.
18489 SDValue Rdx = DAG.getNode(AArch64ISD::FADDA_PRED, DL, ContainerVT,
18490 Pg, AccOp, VecOp);
18491
18492 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Rdx, Zero);
18493}
18494
18495SDValue AArch64TargetLowering::LowerPredReductionToSVE(SDValue ReduceOp,
18496 SelectionDAG &DAG) const {
18497 SDLoc DL(ReduceOp);
18498 SDValue Op = ReduceOp.getOperand(0);
18499 EVT OpVT = Op.getValueType();
18500 EVT VT = ReduceOp.getValueType();
18501
18502 if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
18503 return SDValue();
18504
18505 SDValue Pg = getPredicateForVector(DAG, DL, OpVT);
18506
18507 switch (ReduceOp.getOpcode()) {
18508 default:
18509 return SDValue();
18510 case ISD::VECREDUCE_OR:
18511 return getPTest(DAG, VT, Pg, Op, AArch64CC::ANY_ACTIVE);
18512 case ISD::VECREDUCE_AND: {
18513 Op = DAG.getNode(ISD::XOR, DL, OpVT, Op, Pg);
18514 return getPTest(DAG, VT, Pg, Op, AArch64CC::NONE_ACTIVE);
18515 }
18516 case ISD::VECREDUCE_XOR: {
18517 SDValue ID =
18518 DAG.getTargetConstant(Intrinsic::aarch64_sve_cntp, DL, MVT::i64);
18519 SDValue Cntp =
18520 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i64, ID, Pg, Op);
18521 return DAG.getAnyExtOrTrunc(Cntp, DL, VT);
18522 }
18523 }
18524
18525 return SDValue();
18526}
18527
18528SDValue AArch64TargetLowering::LowerReductionToSVE(unsigned Opcode,
18529 SDValue ScalarOp,
18530 SelectionDAG &DAG) const {
18531 SDLoc DL(ScalarOp);
18532 SDValue VecOp = ScalarOp.getOperand(0);
18533 EVT SrcVT = VecOp.getValueType();
18534
18535 if (useSVEForFixedLengthVectorVT(SrcVT, true)) {
18536 EVT ContainerVT = getContainerForFixedLengthVector(DAG, SrcVT);
18537 VecOp = convertToScalableVector(DAG, ContainerVT, VecOp);
18538 }
18539
18540 // UADDV always returns an i64 result.
18541 EVT ResVT = (Opcode == AArch64ISD::UADDV_PRED) ? MVT::i64 :
18542 SrcVT.getVectorElementType();
18543 EVT RdxVT = SrcVT;
18544 if (SrcVT.isFixedLengthVector() || Opcode == AArch64ISD::UADDV_PRED)
18545 RdxVT = getPackedSVEVectorVT(ResVT);
18546
18547 SDValue Pg = getPredicateForVector(DAG, DL, SrcVT);
18548 SDValue Rdx = DAG.getNode(Opcode, DL, RdxVT, Pg, VecOp);
18549 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT,
18550 Rdx, DAG.getConstant(0, DL, MVT::i64));
18551
18552 // The VEC_REDUCE nodes expect an element size result.
18553 if (ResVT != ScalarOp.getValueType())
18554 Res = DAG.getAnyExtOrTrunc(Res, DL, ScalarOp.getValueType());
18555
18556 return Res;
18557}
18558
18559SDValue
18560AArch64TargetLowering::LowerFixedLengthVectorSelectToSVE(SDValue Op,
18561 SelectionDAG &DAG) const {
18562 EVT VT = Op.getValueType();
18563 SDLoc DL(Op);
18564
18565 EVT InVT = Op.getOperand(1).getValueType();
18566 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18567 SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(1));
18568 SDValue Op2 = convertToScalableVector(DAG, ContainerVT, Op->getOperand(2));
18569
18570 // Convert the mask to a predicated (NOTE: We don't need to worry about
18571 // inactive lanes since VSELECT is safe when given undefined elements).
18572 EVT MaskVT = Op.getOperand(0).getValueType();
18573 EVT MaskContainerVT = getContainerForFixedLengthVector(DAG, MaskVT);
18574 auto Mask = convertToScalableVector(DAG, MaskContainerVT, Op.getOperand(0));
18575 Mask = DAG.getNode(ISD::TRUNCATE, DL,
18576 MaskContainerVT.changeVectorElementType(MVT::i1), Mask);
18577
18578 auto ScalableRes = DAG.getNode(ISD::VSELECT, DL, ContainerVT,
18579 Mask, Op1, Op2);
18580
18581 return convertFromScalableVector(DAG, VT, ScalableRes);
18582}
18583
18584SDValue AArch64TargetLowering::LowerFixedLengthVectorSetccToSVE(
18585 SDValue Op, SelectionDAG &DAG) const {
18586 SDLoc DL(Op);
18587 EVT InVT = Op.getOperand(0).getValueType();
18588 EVT ContainerVT = getContainerForFixedLengthVector(DAG, InVT);
18589
18590 assert(useSVEForFixedLengthVectorVT(InVT) &&(static_cast <bool> (useSVEForFixedLengthVectorVT(InVT)
&& "Only expected to lower fixed length vector operation!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(InVT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18591, __extension__ __PRETTY_FUNCTION__))
18591 "Only expected to lower fixed length vector operation!")(static_cast <bool> (useSVEForFixedLengthVectorVT(InVT)
&& "Only expected to lower fixed length vector operation!"
) ? void (0) : __assert_fail ("useSVEForFixedLengthVectorVT(InVT) && \"Only expected to lower fixed length vector operation!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18591, __extension__ __PRETTY_FUNCTION__))
;
18592 assert(Op.getValueType() == InVT.changeTypeToInteger() &&(static_cast <bool> (Op.getValueType() == InVT.changeTypeToInteger
() && "Expected integer result of the same bit length as the inputs!"
) ? void (0) : __assert_fail ("Op.getValueType() == InVT.changeTypeToInteger() && \"Expected integer result of the same bit length as the inputs!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18593, __extension__ __PRETTY_FUNCTION__))
18593 "Expected integer result of the same bit length as the inputs!")(static_cast <bool> (Op.getValueType() == InVT.changeTypeToInteger
() && "Expected integer result of the same bit length as the inputs!"
) ? void (0) : __assert_fail ("Op.getValueType() == InVT.changeTypeToInteger() && \"Expected integer result of the same bit length as the inputs!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18593, __extension__ __PRETTY_FUNCTION__))
;
18594
18595 auto Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
18596 auto Op2 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(1));
18597 auto Pg = getPredicateForFixedLengthVector(DAG, DL, InVT);
18598
18599 EVT CmpVT = Pg.getValueType();
18600 auto Cmp = DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, DL, CmpVT,
18601 {Pg, Op1, Op2, Op.getOperand(2)});
18602
18603 EVT PromoteVT = ContainerVT.changeTypeToInteger();
18604 auto Promote = DAG.getBoolExtOrTrunc(Cmp, DL, PromoteVT, InVT);
18605 return convertFromScalableVector(DAG, Op.getValueType(), Promote);
18606}
18607
18608SDValue
18609AArch64TargetLowering::LowerFixedLengthBitcastToSVE(SDValue Op,
18610 SelectionDAG &DAG) const {
18611 SDLoc DL(Op);
18612 auto SrcOp = Op.getOperand(0);
18613 EVT VT = Op.getValueType();
18614 EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18615 EVT ContainerSrcVT =
18616 getContainerForFixedLengthVector(DAG, SrcOp.getValueType());
18617
18618 SrcOp = convertToScalableVector(DAG, ContainerSrcVT, SrcOp);
18619 Op = DAG.getNode(ISD::BITCAST, DL, ContainerDstVT, SrcOp);
18620 return convertFromScalableVector(DAG, VT, Op);
18621}
18622
18623SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
18624 SDValue Op, SelectionDAG &DAG) const {
18625 SDLoc DL(Op);
18626 unsigned NumOperands = Op->getNumOperands();
18627
18628 assert(NumOperands > 1 && isPowerOf2_32(NumOperands) &&(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18629, __extension__ __PRETTY_FUNCTION__))
18629 "Unexpected number of operands in CONCAT_VECTORS")(static_cast <bool> (NumOperands > 1 && isPowerOf2_32
(NumOperands) && "Unexpected number of operands in CONCAT_VECTORS"
) ? void (0) : __assert_fail ("NumOperands > 1 && isPowerOf2_32(NumOperands) && \"Unexpected number of operands in CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18629, __extension__ __PRETTY_FUNCTION__))
;
18630
18631 auto SrcOp1 = Op.getOperand(0);
18632 auto SrcOp2 = Op.getOperand(1);
18633 EVT VT = Op.getValueType();
18634 EVT SrcVT = SrcOp1.getValueType();
18635
18636 if (NumOperands > 2) {
18637 SmallVector<SDValue, 4> Ops;
18638 EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
18639 for (unsigned I = 0; I < NumOperands; I += 2)
18640 Ops.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, PairVT,
18641 Op->getOperand(I), Op->getOperand(I + 1)));
18642
18643 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Ops);
18644 }
18645
18646 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18647
18648 SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, SrcVT);
18649 SrcOp1 = convertToScalableVector(DAG, ContainerVT, SrcOp1);
18650 SrcOp2 = convertToScalableVector(DAG, ContainerVT, SrcOp2);
18651
18652 Op = DAG.getNode(AArch64ISD::SPLICE, DL, ContainerVT, Pg, SrcOp1, SrcOp2);
18653
18654 return convertFromScalableVector(DAG, VT, Op);
18655}
18656
18657SDValue
18658AArch64TargetLowering::LowerFixedLengthFPExtendToSVE(SDValue Op,
18659 SelectionDAG &DAG) const {
18660 EVT VT = Op.getValueType();
18661 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18661, __extension__ __PRETTY_FUNCTION__))
;
18662
18663 SDLoc DL(Op);
18664 SDValue Val = Op.getOperand(0);
18665 SDValue Pg = getPredicateForVector(DAG, DL, VT);
18666 EVT SrcVT = Val.getValueType();
18667 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18668 EVT ExtendVT = ContainerVT.changeVectorElementType(
18669 SrcVT.getVectorElementType());
18670
18671 Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
18672 Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT.changeTypeToInteger(), Val);
18673
18674 Val = convertToScalableVector(DAG, ContainerVT.changeTypeToInteger(), Val);
18675 Val = getSVESafeBitCast(ExtendVT, Val, DAG);
18676 Val = DAG.getNode(AArch64ISD::FP_EXTEND_MERGE_PASSTHRU, DL, ContainerVT,
18677 Pg, Val, DAG.getUNDEF(ContainerVT));
18678
18679 return convertFromScalableVector(DAG, VT, Val);
18680}
18681
18682SDValue
18683AArch64TargetLowering::LowerFixedLengthFPRoundToSVE(SDValue Op,
18684 SelectionDAG &DAG) const {
18685 EVT VT = Op.getValueType();
18686 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18686, __extension__ __PRETTY_FUNCTION__))
;
18687
18688 SDLoc DL(Op);
18689 SDValue Val = Op.getOperand(0);
18690 EVT SrcVT = Val.getValueType();
18691 EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
18692 EVT RoundVT = ContainerSrcVT.changeVectorElementType(
18693 VT.getVectorElementType());
18694 SDValue Pg = getPredicateForVector(DAG, DL, RoundVT);
18695
18696 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18697 Val = DAG.getNode(AArch64ISD::FP_ROUND_MERGE_PASSTHRU, DL, RoundVT, Pg, Val,
18698 Op.getOperand(1), DAG.getUNDEF(RoundVT));
18699 Val = getSVESafeBitCast(ContainerSrcVT.changeTypeToInteger(), Val, DAG);
18700 Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
18701
18702 Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
18703 return DAG.getNode(ISD::BITCAST, DL, VT, Val);
18704}
18705
18706SDValue
18707AArch64TargetLowering::LowerFixedLengthIntToFPToSVE(SDValue Op,
18708 SelectionDAG &DAG) const {
18709 EVT VT = Op.getValueType();
18710 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18710, __extension__ __PRETTY_FUNCTION__))
;
18711
18712 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP;
18713 unsigned Opcode = IsSigned ? AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU
18714 : AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU;
18715
18716 SDLoc DL(Op);
18717 SDValue Val = Op.getOperand(0);
18718 EVT SrcVT = Val.getValueType();
18719 EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18720 EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
18721
18722 if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
18723 ContainerDstVT.getVectorElementType().getSizeInBits()) {
18724 SDValue Pg = getPredicateForVector(DAG, DL, VT);
18725
18726 Val = DAG.getNode(IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, DL,
18727 VT.changeTypeToInteger(), Val);
18728
18729 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18730 Val = getSVESafeBitCast(ContainerDstVT.changeTypeToInteger(), Val, DAG);
18731 // Safe to use a larger than specified operand since we just unpacked the
18732 // data, hence the upper bits are zero.
18733 Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
18734 DAG.getUNDEF(ContainerDstVT));
18735 return convertFromScalableVector(DAG, VT, Val);
18736 } else {
18737 EVT CvtVT = ContainerSrcVT.changeVectorElementType(
18738 ContainerDstVT.getVectorElementType());
18739 SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
18740
18741 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18742 Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
18743 Val = getSVESafeBitCast(ContainerSrcVT, Val, DAG);
18744 Val = convertFromScalableVector(DAG, SrcVT, Val);
18745
18746 Val = DAG.getNode(ISD::TRUNCATE, DL, VT.changeTypeToInteger(), Val);
18747 return DAG.getNode(ISD::BITCAST, DL, VT, Val);
18748 }
18749}
18750
18751SDValue
18752AArch64TargetLowering::LowerFixedLengthFPToIntToSVE(SDValue Op,
18753 SelectionDAG &DAG) const {
18754 EVT VT = Op.getValueType();
18755 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18755, __extension__ __PRETTY_FUNCTION__))
;
18756
18757 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
18758 unsigned Opcode = IsSigned ? AArch64ISD::FCVTZS_MERGE_PASSTHRU
18759 : AArch64ISD::FCVTZU_MERGE_PASSTHRU;
18760
18761 SDLoc DL(Op);
18762 SDValue Val = Op.getOperand(0);
18763 EVT SrcVT = Val.getValueType();
18764 EVT ContainerDstVT = getContainerForFixedLengthVector(DAG, VT);
18765 EVT ContainerSrcVT = getContainerForFixedLengthVector(DAG, SrcVT);
18766
18767 if (ContainerSrcVT.getVectorElementType().getSizeInBits() <=
18768 ContainerDstVT.getVectorElementType().getSizeInBits()) {
18769 EVT CvtVT = ContainerDstVT.changeVectorElementType(
18770 ContainerSrcVT.getVectorElementType());
18771 SDValue Pg = getPredicateForVector(DAG, DL, VT);
18772
18773 Val = DAG.getNode(ISD::BITCAST, DL, SrcVT.changeTypeToInteger(), Val);
18774 Val = DAG.getNode(ISD::ANY_EXTEND, DL, VT, Val);
18775
18776 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18777 Val = getSVESafeBitCast(CvtVT, Val, DAG);
18778 Val = DAG.getNode(Opcode, DL, ContainerDstVT, Pg, Val,
18779 DAG.getUNDEF(ContainerDstVT));
18780 return convertFromScalableVector(DAG, VT, Val);
18781 } else {
18782 EVT CvtVT = ContainerSrcVT.changeTypeToInteger();
18783 SDValue Pg = getPredicateForVector(DAG, DL, CvtVT);
18784
18785 // Safe to use a larger than specified result since an fp_to_int where the
18786 // result doesn't fit into the destination is undefined.
18787 Val = convertToScalableVector(DAG, ContainerSrcVT, Val);
18788 Val = DAG.getNode(Opcode, DL, CvtVT, Pg, Val, DAG.getUNDEF(CvtVT));
18789 Val = convertFromScalableVector(DAG, SrcVT.changeTypeToInteger(), Val);
18790
18791 return DAG.getNode(ISD::TRUNCATE, DL, VT, Val);
18792 }
18793}
18794
18795SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
18796 SDValue Op, SelectionDAG &DAG) const {
18797 EVT VT = Op.getValueType();
18798 assert(VT.isFixedLengthVector() && "Expected fixed length vector type!")(static_cast <bool> (VT.isFixedLengthVector() &&
"Expected fixed length vector type!") ? void (0) : __assert_fail
("VT.isFixedLengthVector() && \"Expected fixed length vector type!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18798, __extension__ __PRETTY_FUNCTION__))
;
18799
18800 auto *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
18801 auto ShuffleMask = SVN->getMask();
18802
18803 SDLoc DL(Op);
18804 SDValue Op1 = Op.getOperand(0);
18805 SDValue Op2 = Op.getOperand(1);
18806
18807 EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
18808 Op1 = convertToScalableVector(DAG, ContainerVT, Op1);
18809 Op2 = convertToScalableVector(DAG, ContainerVT, Op2);
18810
18811 bool ReverseEXT = false;
18812 unsigned Imm;
18813 if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm) &&
18814 Imm == VT.getVectorNumElements() - 1) {
18815 if (ReverseEXT)
18816 std::swap(Op1, Op2);
18817
18818 EVT ScalarTy = VT.getVectorElementType();
18819 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
18820 ScalarTy = MVT::i32;
18821 SDValue Scalar = DAG.getNode(
18822 ISD::EXTRACT_VECTOR_ELT, DL, ScalarTy, Op1,
18823 DAG.getConstant(VT.getVectorNumElements() - 1, DL, MVT::i64));
18824 Op = DAG.getNode(AArch64ISD::INSR, DL, ContainerVT, Op2, Scalar);
18825 return convertFromScalableVector(DAG, VT, Op);
18826 }
18827
18828 return SDValue();
18829}
18830
18831SDValue AArch64TargetLowering::getSVESafeBitCast(EVT VT, SDValue Op,
18832 SelectionDAG &DAG) const {
18833 SDLoc DL(Op);
18834 EVT InVT = Op.getValueType();
18835 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18836 (void)TLI;
18837
18838 assert(VT.isScalableVector() && TLI.isTypeLegal(VT) &&(static_cast <bool> (VT.isScalableVector() && TLI
.isTypeLegal(VT) && InVT.isScalableVector() &&
TLI.isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && TLI.isTypeLegal(VT) && InVT.isScalableVector() && TLI.isTypeLegal(InVT) && \"Only expect to cast between legal scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18840, __extension__ __PRETTY_FUNCTION__))
18839 InVT.isScalableVector() && TLI.isTypeLegal(InVT) &&(static_cast <bool> (VT.isScalableVector() && TLI
.isTypeLegal(VT) && InVT.isScalableVector() &&
TLI.isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && TLI.isTypeLegal(VT) && InVT.isScalableVector() && TLI.isTypeLegal(InVT) && \"Only expect to cast between legal scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18840, __extension__ __PRETTY_FUNCTION__))
18840 "Only expect to cast between legal scalable vector types!")(static_cast <bool> (VT.isScalableVector() && TLI
.isTypeLegal(VT) && InVT.isScalableVector() &&
TLI.isTypeLegal(InVT) && "Only expect to cast between legal scalable vector types!"
) ? void (0) : __assert_fail ("VT.isScalableVector() && TLI.isTypeLegal(VT) && InVT.isScalableVector() && TLI.isTypeLegal(InVT) && \"Only expect to cast between legal scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18840, __extension__ __PRETTY_FUNCTION__))
;
18841 assert((VT.getVectorElementType() == MVT::i1) ==(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) == (InVT.getVectorElementType() == MVT::i1) && "Cannot cast between data and predicate scalable vector types!"
) ? void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) == (InVT.getVectorElementType() == MVT::i1) && \"Cannot cast between data and predicate scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18843, __extension__ __PRETTY_FUNCTION__))
18842 (InVT.getVectorElementType() == MVT::i1) &&(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) == (InVT.getVectorElementType() == MVT::i1) && "Cannot cast between data and predicate scalable vector types!"
) ? void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) == (InVT.getVectorElementType() == MVT::i1) && \"Cannot cast between data and predicate scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18843, __extension__ __PRETTY_FUNCTION__))
18843 "Cannot cast between data and predicate scalable vector types!")(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) == (InVT.getVectorElementType() == MVT::i1) && "Cannot cast between data and predicate scalable vector types!"
) ? void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) == (InVT.getVectorElementType() == MVT::i1) && \"Cannot cast between data and predicate scalable vector types!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18843, __extension__ __PRETTY_FUNCTION__))
;
18844
18845 if (InVT == VT)
18846 return Op;
18847
18848 if (VT.getVectorElementType() == MVT::i1)
18849 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
18850
18851 EVT PackedVT = getPackedSVEVectorVT(VT.getVectorElementType());
18852 EVT PackedInVT = getPackedSVEVectorVT(InVT.getVectorElementType());
18853
18854 // Pack input if required.
18855 if (InVT != PackedInVT)
18856 Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, PackedInVT, Op);
18857
18858 Op = DAG.getNode(ISD::BITCAST, DL, PackedVT, Op);
18859
18860 // Unpack result if required.
18861 if (VT != PackedVT)
18862 Op = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, VT, Op);
18863
18864 return Op;
18865}
18866
18867bool AArch64TargetLowering::isAllActivePredicate(SDValue N) const {
18868 return ::isAllActivePredicate(N);
18869}
18870
18871EVT AArch64TargetLowering::getPromotedVTForPredicate(EVT VT) const {
18872 return ::getPromotedVTForPredicate(VT);
18873}
18874
18875bool AArch64TargetLowering::SimplifyDemandedBitsForTargetNode(
18876 SDValue Op, const APInt &OriginalDemandedBits,
18877 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
18878 unsigned Depth) const {
18879
18880 unsigned Opc = Op.getOpcode();
18881 switch (Opc) {
18882 case AArch64ISD::VSHL: {
18883 // Match (VSHL (VLSHR Val X) X)
18884 SDValue ShiftL = Op;
18885 SDValue ShiftR = Op->getOperand(0);
18886 if (ShiftR->getOpcode() != AArch64ISD::VLSHR)
18887 return false;
18888
18889 if (!ShiftL.hasOneUse() || !ShiftR.hasOneUse())
18890 return false;
18891
18892 unsigned ShiftLBits = ShiftL->getConstantOperandVal(1);
18893 unsigned ShiftRBits = ShiftR->getConstantOperandVal(1);
18894
18895 // Other cases can be handled as well, but this is not
18896 // implemented.
18897 if (ShiftRBits != ShiftLBits)
18898 return false;
18899
18900 unsigned ScalarSize = Op.getScalarValueSizeInBits();
18901 assert(ScalarSize > ShiftLBits && "Invalid shift imm")(static_cast <bool> (ScalarSize > ShiftLBits &&
"Invalid shift imm") ? void (0) : __assert_fail ("ScalarSize > ShiftLBits && \"Invalid shift imm\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 18901, __extension__ __PRETTY_FUNCTION__))
;
18902
18903 APInt ZeroBits = APInt::getLowBitsSet(ScalarSize, ShiftLBits);
18904 APInt UnusedBits = ~OriginalDemandedBits;
18905
18906 if ((ZeroBits & UnusedBits) != ZeroBits)
18907 return false;
18908
18909 // All bits that are zeroed by (VSHL (VLSHR Val X) X) are not
18910 // used - simplify to just Val.
18911 return TLO.CombineTo(Op, ShiftR->getOperand(0));
18912 }
18913 }
18914
18915 return TargetLowering::SimplifyDemandedBitsForTargetNode(
18916 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
18917}
18918
18919bool AArch64TargetLowering::isConstantUnsignedBitfieldExtactLegal(
18920 unsigned Opc, LLT Ty1, LLT Ty2) const {
18921 return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
18922}

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a class to represent arbitrary precision
11/// integral constant values and operations on them.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ADT_APINT_H
16#define LLVM_ADT_APINT_H
17
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <utility>
24
25namespace llvm {
26class FoldingSetNodeID;
27class StringRef;
28class hash_code;
29class raw_ostream;
30
31template <typename T> class SmallVectorImpl;
32template <typename T> class ArrayRef;
33template <typename T> class Optional;
34template <typename T> struct DenseMapInfo;
35
36class APInt;
37
38inline APInt operator-(APInt);
39
40//===----------------------------------------------------------------------===//
41// APInt Class
42//===----------------------------------------------------------------------===//
43
44/// Class for arbitrary precision integers.
45///
46/// APInt is a functional replacement for common case unsigned integer type like
47/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
48/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
49/// than 64-bits of precision. APInt provides a variety of arithmetic operators
50/// and methods to manipulate integer values of any bit-width. It supports both
51/// the typical integer arithmetic and comparison operations as well as bitwise
52/// manipulation.
53///
54/// The class has several invariants worth noting:
55/// * All bit, byte, and word positions are zero-based.
56/// * Once the bit width is set, it doesn't change except by the Truncate,
57/// SignExtend, or ZeroExtend operations.
58/// * All binary operators must be on APInt instances of the same bit width.
59/// Attempting to use these operators on instances with different bit
60/// widths will yield an assertion.
61/// * The value is stored canonically as an unsigned value. For operations
62/// where it makes a difference, there are both signed and unsigned variants
63/// of the operation. For example, sdiv and udiv. However, because the bit
64/// widths must be the same, operations such as Mul and Add produce the same
65/// results regardless of whether the values are interpreted as signed or
66/// not.
67/// * In general, the class tries to follow the style of computation that LLVM
68/// uses in its IR. This simplifies its use for LLVM.
69///
70class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
71public:
72 typedef uint64_t WordType;
73
74 /// This enum is used to hold the constants we needed for APInt.
75 enum : unsigned {
76 /// Byte size of a word.
77 APINT_WORD_SIZE = sizeof(WordType),
78 /// Bits in a word.
79 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
80 };
81
82 enum class Rounding {
83 DOWN,
84 TOWARD_ZERO,
85 UP,
86 };
87
88 static constexpr WordType WORDTYPE_MAX = ~WordType(0);
89
90private:
91 /// This union is used to store the integer value. When the
92 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
93 union {
94 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
95 uint64_t *pVal; ///< Used to store the >64 bits integer value.
96 } U;
97
98 unsigned BitWidth; ///< The number of bits in this APInt.
99
100 friend struct DenseMapInfo<APInt>;
101
102 friend class APSInt;
103
104 /// Fast internal constructor
105 ///
106 /// This constructor is used only internally for speed of construction of
107 /// temporaries. It is unsafe for general use so it is not public.
108 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
109 U.pVal = val;
110 }
111
112 /// Determine which word a bit is in.
113 ///
114 /// \returns the word position for the specified bit position.
115 static unsigned whichWord(unsigned bitPosition) {
116 return bitPosition / APINT_BITS_PER_WORD;
117 }
118
119 /// Determine which bit in a word a bit is in.
120 ///
121 /// \returns the bit position in a word for the specified bit position
122 /// in the APInt.
123 static unsigned whichBit(unsigned bitPosition) {
124 return bitPosition % APINT_BITS_PER_WORD;
125 }
126
127 /// Get a single bit mask.
128 ///
129 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
130 /// This method generates and returns a uint64_t (word) mask for a single
131 /// bit at a specific bit position. This is used to mask the bit in the
132 /// corresponding word.
133 static uint64_t maskBit(unsigned bitPosition) {
134 return 1ULL << whichBit(bitPosition);
135 }
136
137 /// Clear unused high order bits
138 ///
139 /// This method is used internally to clear the top "N" bits in the high order
140 /// word that are not used by the APInt. This is needed after the most
141 /// significant word is assigned a value to ensure that those bits are
142 /// zero'd out.
143 APInt &clearUnusedBits() {
144 // Compute how many bits are used in the final word
145 unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
146
147 // Mask out the high bits.
148 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
149 if (isSingleWord())
150 U.VAL &= mask;
151 else
152 U.pVal[getNumWords() - 1] &= mask;
153 return *this;
154 }
155
156 /// Get the word corresponding to a bit position
157 /// \returns the corresponding word for the specified bit position.
158 uint64_t getWord(unsigned bitPosition) const {
159 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
160 }
161
162 /// Utility method to change the bit width of this APInt to new bit width,
163 /// allocating and/or deallocating as necessary. There is no guarantee on the
164 /// value of any bits upon return. Caller should populate the bits after.
165 void reallocate(unsigned NewBitWidth);
166
167 /// Convert a char array into an APInt
168 ///
169 /// \param radix 2, 8, 10, 16, or 36
170 /// Converts a string into a number. The string must be non-empty
171 /// and well-formed as a number of the given base. The bit-width
172 /// must be sufficient to hold the result.
173 ///
174 /// This is used by the constructors that take string arguments.
175 ///
176 /// StringRef::getAsInteger is superficially similar but (1) does
177 /// not assume that the string is well-formed and (2) grows the
178 /// result to hold the input.
179 void fromString(unsigned numBits, StringRef str, uint8_t radix);
180
181 /// An internal division function for dividing APInts.
182 ///
183 /// This is used by the toString method to divide by the radix. It simply
184 /// provides a more convenient form of divide for internal use since KnuthDiv
185 /// has specific constraints on its inputs. If those constraints are not met
186 /// then it provides a simpler form of divide.
187 static void divide(const WordType *LHS, unsigned lhsWords,
188 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
189 WordType *Remainder);
190
191 /// out-of-line slow case for inline constructor
192 void initSlowCase(uint64_t val, bool isSigned);
193
194 /// shared code between two array constructors
195 void initFromArray(ArrayRef<uint64_t> array);
196
197 /// out-of-line slow case for inline copy constructor
198 void initSlowCase(const APInt &that);
199
200 /// out-of-line slow case for shl
201 void shlSlowCase(unsigned ShiftAmt);
202
203 /// out-of-line slow case for lshr.
204 void lshrSlowCase(unsigned ShiftAmt);
205
206 /// out-of-line slow case for ashr.
207 void ashrSlowCase(unsigned ShiftAmt);
208
209 /// out-of-line slow case for operator=
210 void AssignSlowCase(const APInt &RHS);
211
212 /// out-of-line slow case for operator==
213 bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
214
215 /// out-of-line slow case for countLeadingZeros
216 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
217
218 /// out-of-line slow case for countLeadingOnes.
219 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
220
221 /// out-of-line slow case for countTrailingZeros.
222 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
223
224 /// out-of-line slow case for countTrailingOnes
225 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
226
227 /// out-of-line slow case for countPopulation
228 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
229
230 /// out-of-line slow case for intersects.
231 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
232
233 /// out-of-line slow case for isSubsetOf.
234 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
235
236 /// out-of-line slow case for setBits.
237 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
238
239 /// out-of-line slow case for flipAllBits.
240 void flipAllBitsSlowCase();
241
242 /// out-of-line slow case for operator&=.
243 void AndAssignSlowCase(const APInt& RHS);
244
245 /// out-of-line slow case for operator|=.
246 void OrAssignSlowCase(const APInt& RHS);
247
248 /// out-of-line slow case for operator^=.
249 void XorAssignSlowCase(const APInt& RHS);
250
251 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
252 /// to, or greater than RHS.
253 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
254
255 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
256 /// to, or greater than RHS.
257 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
258
259public:
260 /// \name Constructors
261 /// @{
262
263 /// Create a new APInt of numBits width, initialized as val.
264 ///
265 /// If isSigned is true then val is treated as if it were a signed value
266 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
267 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
268 /// the range of val are zero filled).
269 ///
270 /// \param numBits the bit width of the constructed APInt
271 /// \param val the initial value of the APInt
272 /// \param isSigned how to treat signedness of val
273 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
274 : BitWidth(numBits) {
275 assert(BitWidth && "bitwidth too small")(static_cast <bool> (BitWidth && "bitwidth too small"
) ? void (0) : __assert_fail ("BitWidth && \"bitwidth too small\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 275, __extension__ __PRETTY_FUNCTION__))
;
276 if (isSingleWord()) {
277 U.VAL = val;
278 clearUnusedBits();
279 } else {
280 initSlowCase(val, isSigned);
281 }
282 }
283
284 /// Construct an APInt of numBits width, initialized as bigVal[].
285 ///
286 /// Note that bigVal.size() can be smaller or larger than the corresponding
287 /// bit width but any extraneous bits will be dropped.
288 ///
289 /// \param numBits the bit width of the constructed APInt
290 /// \param bigVal a sequence of words to form the initial value of the APInt
291 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
292
293 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
294 /// deprecated because this constructor is prone to ambiguity with the
295 /// APInt(unsigned, uint64_t, bool) constructor.
296 ///
297 /// If this overload is ever deleted, care should be taken to prevent calls
298 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
299 /// constructor.
300 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
301
302 /// Construct an APInt from a string representation.
303 ///
304 /// This constructor interprets the string \p str in the given radix. The
305 /// interpretation stops when the first character that is not suitable for the
306 /// radix is encountered, or the end of the string. Acceptable radix values
307 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
308 /// string to require more bits than numBits.
309 ///
310 /// \param numBits the bit width of the constructed APInt
311 /// \param str the string to be interpreted
312 /// \param radix the radix to use for the conversion
313 APInt(unsigned numBits, StringRef str, uint8_t radix);
314
315 /// Simply makes *this a copy of that.
316 /// Copy Constructor.
317 APInt(const APInt &that) : BitWidth(that.BitWidth) {
318 if (isSingleWord())
319 U.VAL = that.U.VAL;
320 else
321 initSlowCase(that);
322 }
323
324 /// Move Constructor.
325 APInt(APInt &&that) : BitWidth(that.BitWidth) {
326 memcpy(&U, &that.U, sizeof(U));
327 that.BitWidth = 0;
328 }
329
330 /// Destructor.
331 ~APInt() {
332 if (needsCleanup())
333 delete[] U.pVal;
334 }
335
336 /// Default constructor that creates an uninteresting APInt
337 /// representing a 1-bit zero value.
338 ///
339 /// This is useful for object deserialization (pair this with the static
340 /// method Read).
341 explicit APInt() : BitWidth(1) { U.VAL = 0; }
342
343 /// Returns whether this instance allocated memory.
344 bool needsCleanup() const { return !isSingleWord(); }
345
346 /// Used to insert APInt objects, or objects that contain APInt objects, into
347 /// FoldingSets.
348 void Profile(FoldingSetNodeID &id) const;
349
350 /// @}
351 /// \name Value Tests
352 /// @{
353
354 /// Determine if this APInt just has one word to store value.
355 ///
356 /// \returns true if the number of bits <= 64, false otherwise.
357 bool isSingleWord() const { return BitWidth
5.1
Field 'BitWidth' is <= APINT_BITS_PER_WORD
5.1
Field 'BitWidth' is <= APINT_BITS_PER_WORD
5.1
Field 'BitWidth' is <= APINT_BITS_PER_WORD
<= APINT_BITS_PER_WORD
; }
6
Returning the value 1, which participates in a condition later
358
359 /// Determine sign of this APInt.
360 ///
361 /// This tests the high bit of this APInt to determine if it is set.
362 ///
363 /// \returns true if this APInt is negative, false otherwise
364 bool isNegative() const { return (*this)[BitWidth - 1]; }
365
366 /// Determine if this APInt Value is non-negative (>= 0)
367 ///
368 /// This tests the high bit of the APInt to determine if it is unset.
369 bool isNonNegative() const { return !isNegative(); }
370
371 /// Determine if sign bit of this APInt is set.
372 ///
373 /// This tests the high bit of this APInt to determine if it is set.
374 ///
375 /// \returns true if this APInt has its sign bit set, false otherwise.
376 bool isSignBitSet() const { return (*this)[BitWidth-1]; }
377
378 /// Determine if sign bit of this APInt is clear.
379 ///
380 /// This tests the high bit of this APInt to determine if it is clear.
381 ///
382 /// \returns true if this APInt has its sign bit clear, false otherwise.
383 bool isSignBitClear() const { return !isSignBitSet(); }
384
385 /// Determine if this APInt Value is positive.
386 ///
387 /// This tests if the value of this APInt is positive (> 0). Note
388 /// that 0 is not a positive value.
389 ///
390 /// \returns true if this APInt is positive.
391 bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
392
393 /// Determine if this APInt Value is non-positive (<= 0).
394 ///
395 /// \returns true if this APInt is non-positive.
396 bool isNonPositive() const { return !isStrictlyPositive(); }
397
398 /// Determine if all bits are set
399 ///
400 /// This checks to see if the value has all bits of the APInt are set or not.
401 bool isAllOnesValue() const {
402 if (isSingleWord())
403 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
404 return countTrailingOnesSlowCase() == BitWidth;
405 }
406
407 /// Determine if all bits are clear
408 ///
409 /// This checks to see if the value has all bits of the APInt are clear or
410 /// not.
411 bool isNullValue() const { return !*this; }
412
413 /// Determine if this is a value of 1.
414 ///
415 /// This checks to see if the value of this APInt is one.
416 bool isOneValue() const {
417 if (isSingleWord())
418 return U.VAL == 1;
419 return countLeadingZerosSlowCase() == BitWidth - 1;
420 }
421
422 /// Determine if this is the largest unsigned value.
423 ///
424 /// This checks to see if the value of this APInt is the maximum unsigned
425 /// value for the APInt's bit width.
426 bool isMaxValue() const { return isAllOnesValue(); }
427
428 /// Determine if this is the largest signed value.
429 ///
430 /// This checks to see if the value of this APInt is the maximum signed
431 /// value for the APInt's bit width.
432 bool isMaxSignedValue() const {
433 if (isSingleWord())
434 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
435 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
436 }
437
438 /// Determine if this is the smallest unsigned value.
439 ///
440 /// This checks to see if the value of this APInt is the minimum unsigned
441 /// value for the APInt's bit width.
442 bool isMinValue() const { return isNullValue(); }
443
444 /// Determine if this is the smallest signed value.
445 ///
446 /// This checks to see if the value of this APInt is the minimum signed
447 /// value for the APInt's bit width.
448 bool isMinSignedValue() const {
449 if (isSingleWord())
450 return U.VAL == (WordType(1) << (BitWidth - 1));
451 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
452 }
453
454 /// Check if this APInt has an N-bits unsigned integer value.
455 bool isIntN(unsigned N) const {
456 assert(N && "N == 0 ???")(static_cast <bool> (N && "N == 0 ???") ? void (
0) : __assert_fail ("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 456, __extension__ __PRETTY_FUNCTION__))
;
457 return getActiveBits() <= N;
458 }
459
460 /// Check if this APInt has an N-bits signed integer value.
461 bool isSignedIntN(unsigned N) const {
462 assert(N && "N == 0 ???")(static_cast <bool> (N && "N == 0 ???") ? void (
0) : __assert_fail ("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 462, __extension__ __PRETTY_FUNCTION__))
;
463 return getMinSignedBits() <= N;
464 }
465
466 /// Check if this APInt's value is a power of two greater than zero.
467 ///
468 /// \returns true if the argument APInt value is a power of two > 0.
469 bool isPowerOf2() const {
470 if (isSingleWord())
471 return isPowerOf2_64(U.VAL);
472 return countPopulationSlowCase() == 1;
473 }
474
475 /// Check if the APInt's value is returned by getSignMask.
476 ///
477 /// \returns true if this is the value returned by getSignMask.
478 bool isSignMask() const { return isMinSignedValue(); }
479
480 /// Convert APInt to a boolean value.
481 ///
482 /// This converts the APInt to a boolean value as a test against zero.
483 bool getBoolValue() const { return !!*this; }
484
485 /// If this value is smaller than the specified limit, return it, otherwise
486 /// return the limit value. This causes the value to saturate to the limit.
487 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
488 return ugt(Limit) ? Limit : getZExtValue();
489 }
490
491 /// Check if the APInt consists of a repeated bit pattern.
492 ///
493 /// e.g. 0x01010101 satisfies isSplat(8).
494 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
495 /// width without remainder.
496 bool isSplat(unsigned SplatSizeInBits) const;
497
498 /// \returns true if this APInt value is a sequence of \param numBits ones
499 /// starting at the least significant bit with the remainder zero.
500 bool isMask(unsigned numBits) const {
501 assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero"
) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 501, __extension__ __PRETTY_FUNCTION__))
;
502 assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range"
) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 502, __extension__ __PRETTY_FUNCTION__))
;
503 if (isSingleWord())
504 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
505 unsigned Ones = countTrailingOnesSlowCase();
506 return (numBits == Ones) &&
507 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
508 }
509
510 /// \returns true if this APInt is a non-empty sequence of ones starting at
511 /// the least significant bit with the remainder zero.
512 /// Ex. isMask(0x0000FFFFU) == true.
513 bool isMask() const {
514 if (isSingleWord())
515 return isMask_64(U.VAL);
516 unsigned Ones = countTrailingOnesSlowCase();
517 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
518 }
519
520 /// Return true if this APInt value contains a sequence of ones with
521 /// the remainder zero.
522 bool isShiftedMask() const {
523 if (isSingleWord())
524 return isShiftedMask_64(U.VAL);
525 unsigned Ones = countPopulationSlowCase();
526 unsigned LeadZ = countLeadingZerosSlowCase();
527 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
528 }
529
530 /// @}
531 /// \name Value Generators
532 /// @{
533
534 /// Gets maximum unsigned value of APInt for specific bit width.
535 static APInt getMaxValue(unsigned numBits) {
536 return getAllOnesValue(numBits);
537 }
538
539 /// Gets maximum signed value of APInt for a specific bit width.
540 static APInt getSignedMaxValue(unsigned numBits) {
541 APInt API = getAllOnesValue(numBits);
542 API.clearBit(numBits - 1);
543 return API;
544 }
545
546 /// Gets minimum unsigned value of APInt for a specific bit width.
547 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
548
549 /// Gets minimum signed value of APInt for a specific bit width.
550 static APInt getSignedMinValue(unsigned numBits) {
551 APInt API(numBits, 0);
552 API.setBit(numBits - 1);
553 return API;
554 }
555
556 /// Get the SignMask for a specific bit width.
557 ///
558 /// This is just a wrapper function of getSignedMinValue(), and it helps code
559 /// readability when we want to get a SignMask.
560 static APInt getSignMask(unsigned BitWidth) {
561 return getSignedMinValue(BitWidth);
562 }
563
564 /// Get the all-ones value.
565 ///
566 /// \returns the all-ones value for an APInt of the specified bit-width.
567 static APInt getAllOnesValue(unsigned numBits) {
568 return APInt(numBits, WORDTYPE_MAX, true);
569 }
570
571 /// Get the '0' value.
572 ///
573 /// \returns the '0' value for an APInt of the specified bit-width.
574 static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }
575
576 /// Compute an APInt containing numBits highbits from this APInt.
577 ///
578 /// Get an APInt with the same BitWidth as this APInt, just zero mask
579 /// the low bits and right shift to the least significant bit.
580 ///
581 /// \returns the high "numBits" bits of this APInt.
582 APInt getHiBits(unsigned numBits) const;
583
584 /// Compute an APInt containing numBits lowbits from this APInt.
585 ///
586 /// Get an APInt with the same BitWidth as this APInt, just zero mask
587 /// the high bits.
588 ///
589 /// \returns the low "numBits" bits of this APInt.
590 APInt getLoBits(unsigned numBits) const;
591
592 /// Return an APInt with exactly one bit set in the result.
593 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
594 APInt Res(numBits, 0);
595 Res.setBit(BitNo);
596 return Res;
597 }
598
599 /// Get a value with a block of bits set.
600 ///
601 /// Constructs an APInt value that has a contiguous range of bits set. The
602 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
603 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
604 /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
605 /// \p hiBit.
606 ///
607 /// \param numBits the intended bit width of the result
608 /// \param loBit the index of the lowest bit set.
609 /// \param hiBit the index of the highest bit set.
610 ///
611 /// \returns An APInt value with the requested bits set.
612 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
613 assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit"
) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 613, __extension__ __PRETTY_FUNCTION__))
;
614 APInt Res(numBits, 0);
615 Res.setBits(loBit, hiBit);
616 return Res;
617 }
618
619 /// Wrap version of getBitsSet.
620 /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
621 /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
622 /// with parameters (32, 28, 4), you would get 0xF000000F.
623 /// If \p hiBit is equal to \p loBit, you would get a result with all bits
624 /// set.
625 static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
626 unsigned hiBit) {
627 APInt Res(numBits, 0);
628 Res.setBitsWithWrap(loBit, hiBit);
629 return Res;
630 }
631
632 /// Get a value with upper bits starting at loBit set.
633 ///
634 /// Constructs an APInt value that has a contiguous range of bits set. The
635 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
636 /// bits will be zero. For example, with parameters(32, 12) you would get
637 /// 0xFFFFF000.
638 ///
639 /// \param numBits the intended bit width of the result
640 /// \param loBit the index of the lowest bit to set.
641 ///
642 /// \returns An APInt value with the requested bits set.
643 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
644 APInt Res(numBits, 0);
645 Res.setBitsFrom(loBit);
646 return Res;
647 }
648
649 /// Get a value with high bits set
650 ///
651 /// Constructs an APInt value that has the top hiBitsSet bits set.
652 ///
653 /// \param numBits the bitwidth of the result
654 /// \param hiBitsSet the number of high-order bits set in the result.
655 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
656 APInt Res(numBits, 0);
657 Res.setHighBits(hiBitsSet);
658 return Res;
659 }
660
661 /// Get a value with low bits set
662 ///
663 /// Constructs an APInt value that has the bottom loBitsSet bits set.
664 ///
665 /// \param numBits the bitwidth of the result
666 /// \param loBitsSet the number of low-order bits set in the result.
667 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
668 APInt Res(numBits, 0);
669 Res.setLowBits(loBitsSet);
670 return Res;
671 }
672
673 /// Return a value containing V broadcasted over NewLen bits.
674 static APInt getSplat(unsigned NewLen, const APInt &V);
675
676 /// Determine if two APInts have the same value, after zero-extending
677 /// one of them (if needed!) to ensure that the bit-widths match.
678 static bool isSameValue(const APInt &I1, const APInt &I2) {
679 if (I1.getBitWidth() == I2.getBitWidth())
680 return I1 == I2;
681
682 if (I1.getBitWidth() > I2.getBitWidth())
683 return I1 == I2.zext(I1.getBitWidth());
684
685 return I1.zext(I2.getBitWidth()) == I2;
686 }
687
688 /// Overload to compute a hash_code for an APInt value.
689 friend hash_code hash_value(const APInt &Arg);
690
691 /// This function returns a pointer to the internal storage of the APInt.
692 /// This is useful for writing out the APInt in binary form without any
693 /// conversions.
694 const uint64_t *getRawData() const {
695 if (isSingleWord())
696 return &U.VAL;
697 return &U.pVal[0];
698 }
699
700 /// @}
701 /// \name Unary Operators
702 /// @{
703
704 /// Postfix increment operator.
705 ///
706 /// Increments *this by 1.
707 ///
708 /// \returns a new APInt value representing the original value of *this.
709 APInt operator++(int) {
710 APInt API(*this);
711 ++(*this);
712 return API;
713 }
714
715 /// Prefix increment operator.
716 ///
717 /// \returns *this incremented by one
718 APInt &operator++();
719
720 /// Postfix decrement operator.
721 ///
722 /// Decrements *this by 1.
723 ///
724 /// \returns a new APInt value representing the original value of *this.
725 APInt operator--(int) {
726 APInt API(*this);
727 --(*this);
728 return API;
729 }
730
731 /// Prefix decrement operator.
732 ///
733 /// \returns *this decremented by one.
734 APInt &operator--();
735
736 /// Logical negation operator.
737 ///
738 /// Performs logical negation operation on this APInt.
739 ///
740 /// \returns true if *this is zero, false otherwise.
741 bool operator!() const {
742 if (isSingleWord())
743 return U.VAL == 0;
744 return countLeadingZerosSlowCase() == BitWidth;
745 }
746
747 /// @}
748 /// \name Assignment Operators
749 /// @{
750
751 /// Copy assignment operator.
752 ///
753 /// \returns *this after assignment of RHS.
754 APInt &operator=(const APInt &RHS) {
755 // If the bitwidths are the same, we can avoid mucking with memory
756 if (isSingleWord() && RHS.isSingleWord()) {
757 U.VAL = RHS.U.VAL;
758 BitWidth = RHS.BitWidth;
759 return clearUnusedBits();
760 }
761
762 AssignSlowCase(RHS);
763 return *this;
764 }
765
766 /// Move assignment operator.
767 APInt &operator=(APInt &&that) {
768#ifdef EXPENSIVE_CHECKS
769 // Some std::shuffle implementations still do self-assignment.
770 if (this == &that)
771 return *this;
772#endif
773 assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported"
) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 773, __extension__ __PRETTY_FUNCTION__))
;
774 if (!isSingleWord())
775 delete[] U.pVal;
776
777 // Use memcpy so that type based alias analysis sees both VAL and pVal
778 // as modified.
779 memcpy(&U, &that.U, sizeof(U));
780
781 BitWidth = that.BitWidth;
782 that.BitWidth = 0;
783
784 return *this;
785 }
786
787 /// Assignment operator.
788 ///
789 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
790 /// the bit width, the excess bits are truncated. If the bit width is larger
791 /// than 64, the value is zero filled in the unspecified high order bits.
792 ///
793 /// \returns *this after assignment of RHS value.
794 APInt &operator=(uint64_t RHS) {
795 if (isSingleWord()) {
796 U.VAL = RHS;
797 return clearUnusedBits();
798 }
799 U.pVal[0] = RHS;
800 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
801 return *this;
802 }
803
804 /// Bitwise AND assignment operator.
805 ///
806 /// Performs a bitwise AND operation on this APInt and RHS. The result is
807 /// assigned to *this.
808 ///
809 /// \returns *this after ANDing with RHS.
810 APInt &operator&=(const APInt &RHS) {
811 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 811, __extension__ __PRETTY_FUNCTION__))
;
812 if (isSingleWord())
813 U.VAL &= RHS.U.VAL;
814 else
815 AndAssignSlowCase(RHS);
816 return *this;
817 }
818
819 /// Bitwise AND assignment operator.
820 ///
821 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
822 /// logically zero-extended or truncated to match the bit-width of
823 /// the LHS.
824 APInt &operator&=(uint64_t RHS) {
825 if (isSingleWord()) {
826 U.VAL &= RHS;
827 return *this;
828 }
829 U.pVal[0] &= RHS;
830 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
831 return *this;
832 }
833
834 /// Bitwise OR assignment operator.
835 ///
836 /// Performs a bitwise OR operation on this APInt and RHS. The result is
837 /// assigned *this;
838 ///
839 /// \returns *this after ORing with RHS.
840 APInt &operator|=(const APInt &RHS) {
841 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 841, __extension__ __PRETTY_FUNCTION__))
;
842 if (isSingleWord())
843 U.VAL |= RHS.U.VAL;
844 else
845 OrAssignSlowCase(RHS);
846 return *this;
847 }
848
849 /// Bitwise OR assignment operator.
850 ///
851 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
852 /// logically zero-extended or truncated to match the bit-width of
853 /// the LHS.
854 APInt &operator|=(uint64_t RHS) {
855 if (isSingleWord()) {
856 U.VAL |= RHS;
857 return clearUnusedBits();
858 }
859 U.pVal[0] |= RHS;
860 return *this;
861 }
862
863 /// Bitwise XOR assignment operator.
864 ///
865 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
866 /// assigned to *this.
867 ///
868 /// \returns *this after XORing with RHS.
869 APInt &operator^=(const APInt &RHS) {
870 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 870, __extension__ __PRETTY_FUNCTION__))
;
871 if (isSingleWord())
872 U.VAL ^= RHS.U.VAL;
873 else
874 XorAssignSlowCase(RHS);
875 return *this;
876 }
877
878 /// Bitwise XOR assignment operator.
879 ///
880 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
881 /// logically zero-extended or truncated to match the bit-width of
882 /// the LHS.
883 APInt &operator^=(uint64_t RHS) {
884 if (isSingleWord()) {
885 U.VAL ^= RHS;
886 return clearUnusedBits();
887 }
888 U.pVal[0] ^= RHS;
889 return *this;
890 }
891
892 /// Multiplication assignment operator.
893 ///
894 /// Multiplies this APInt by RHS and assigns the result to *this.
895 ///
896 /// \returns *this
897 APInt &operator*=(const APInt &RHS);
898 APInt &operator*=(uint64_t RHS);
899
900 /// Addition assignment operator.
901 ///
902 /// Adds RHS to *this and assigns the result to *this.
903 ///
904 /// \returns *this
905 APInt &operator+=(const APInt &RHS);
906 APInt &operator+=(uint64_t RHS);
907
908 /// Subtraction assignment operator.
909 ///
910 /// Subtracts RHS from *this and assigns the result to *this.
911 ///
912 /// \returns *this
913 APInt &operator-=(const APInt &RHS);
914 APInt &operator-=(uint64_t RHS);
915
916 /// Left-shift assignment function.
917 ///
918 /// Shifts *this left by shiftAmt and assigns the result to *this.
919 ///
920 /// \returns *this after shifting left by ShiftAmt
921 APInt &operator<<=(unsigned ShiftAmt) {
922 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 922, __extension__ __PRETTY_FUNCTION__))
;
923 if (isSingleWord()) {
924 if (ShiftAmt == BitWidth)
925 U.VAL = 0;
926 else
927 U.VAL <<= ShiftAmt;
928 return clearUnusedBits();
929 }
930 shlSlowCase(ShiftAmt);
931 return *this;
932 }
933
934 /// Left-shift assignment function.
935 ///
936 /// Shifts *this left by shiftAmt and assigns the result to *this.
937 ///
938 /// \returns *this after shifting left by ShiftAmt
939 APInt &operator<<=(const APInt &ShiftAmt);
940
941 /// @}
942 /// \name Binary Operators
943 /// @{
944
945 /// Multiplication operator.
946 ///
947 /// Multiplies this APInt by RHS and returns the result.
948 APInt operator*(const APInt &RHS) const;
949
950 /// Left logical shift operator.
951 ///
952 /// Shifts this APInt left by \p Bits and returns the result.
953 APInt operator<<(unsigned Bits) const { return shl(Bits); }
954
955 /// Left logical shift operator.
956 ///
957 /// Shifts this APInt left by \p Bits and returns the result.
958 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
959
960 /// Arithmetic right-shift function.
961 ///
962 /// Arithmetic right-shift this APInt by shiftAmt.
963 APInt ashr(unsigned ShiftAmt) const {
964 APInt R(*this);
965 R.ashrInPlace(ShiftAmt);
966 return R;
967 }
968
969 /// Arithmetic right-shift this APInt by ShiftAmt in place.
970 void ashrInPlace(unsigned ShiftAmt) {
971 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 971, __extension__ __PRETTY_FUNCTION__))
;
972 if (isSingleWord()) {
973 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
974 if (ShiftAmt == BitWidth)
975 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
976 else
977 U.VAL = SExtVAL >> ShiftAmt;
978 clearUnusedBits();
979 return;
980 }
981 ashrSlowCase(ShiftAmt);
982 }
983
984 /// Logical right-shift function.
985 ///
986 /// Logical right-shift this APInt by shiftAmt.
987 APInt lshr(unsigned shiftAmt) const {
988 APInt R(*this);
989 R.lshrInPlace(shiftAmt);
990 return R;
991 }
992
993 /// Logical right-shift this APInt by ShiftAmt in place.
994 void lshrInPlace(unsigned ShiftAmt) {
995 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 995, __extension__ __PRETTY_FUNCTION__))
;
996 if (isSingleWord()) {
997 if (ShiftAmt == BitWidth)
998 U.VAL = 0;
999 else
1000 U.VAL >>= ShiftAmt;
1001 return;
1002 }
1003 lshrSlowCase(ShiftAmt);
1004 }
1005
1006 /// Left-shift function.
1007 ///
1008 /// Left-shift this APInt by shiftAmt.
1009 APInt shl(unsigned shiftAmt) const {
1010 APInt R(*this);
1011 R <<= shiftAmt;
1012 return R;
1013 }
1014
1015 /// Rotate left by rotateAmt.
1016 APInt rotl(unsigned rotateAmt) const;
1017
1018 /// Rotate right by rotateAmt.
1019 APInt rotr(unsigned rotateAmt) const;
1020
1021 /// Arithmetic right-shift function.
1022 ///
1023 /// Arithmetic right-shift this APInt by shiftAmt.
1024 APInt ashr(const APInt &ShiftAmt) const {
1025 APInt R(*this);
1026 R.ashrInPlace(ShiftAmt);
1027 return R;
1028 }
1029
1030 /// Arithmetic right-shift this APInt by shiftAmt in place.
1031 void ashrInPlace(const APInt &shiftAmt);
1032
1033 /// Logical right-shift function.
1034 ///
1035 /// Logical right-shift this APInt by shiftAmt.
1036 APInt lshr(const APInt &ShiftAmt) const {
1037 APInt R(*this);
1038 R.lshrInPlace(ShiftAmt);
1039 return R;
1040 }
1041
1042 /// Logical right-shift this APInt by ShiftAmt in place.
1043 void lshrInPlace(const APInt &ShiftAmt);
1044
1045 /// Left-shift function.
1046 ///
1047 /// Left-shift this APInt by shiftAmt.
1048 APInt shl(const APInt &ShiftAmt) const {
1049 APInt R(*this);
1050 R <<= ShiftAmt;
1051 return R;
1052 }
1053
1054 /// Rotate left by rotateAmt.
1055 APInt rotl(const APInt &rotateAmt) const;
1056
1057 /// Rotate right by rotateAmt.
1058 APInt rotr(const APInt &rotateAmt) const;
1059
1060 /// Unsigned division operation.
1061 ///
1062 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
1063 /// RHS are treated as unsigned quantities for purposes of this division.
1064 ///
1065 /// \returns a new APInt value containing the division result, rounded towards
1066 /// zero.
1067 APInt udiv(const APInt &RHS) const;
1068 APInt udiv(uint64_t RHS) const;
1069
1070 /// Signed division function for APInt.
1071 ///
1072 /// Signed divide this APInt by APInt RHS.
1073 ///
1074 /// The result is rounded towards zero.
1075 APInt sdiv(const APInt &RHS) const;
1076 APInt sdiv(int64_t RHS) const;
1077
1078 /// Unsigned remainder operation.
1079 ///
1080 /// Perform an unsigned remainder operation on this APInt with RHS being the
1081 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
1082 /// of this operation. Note that this is a true remainder operation and not a
1083 /// modulo operation because the sign follows the sign of the dividend which
1084 /// is *this.
1085 ///
1086 /// \returns a new APInt value containing the remainder result
1087 APInt urem(const APInt &RHS) const;
1088 uint64_t urem(uint64_t RHS) const;
1089
1090 /// Function for signed remainder operation.
1091 ///
1092 /// Signed remainder operation on APInt.
1093 APInt srem(const APInt &RHS) const;
1094 int64_t srem(int64_t RHS) const;
1095
1096 /// Dual division/remainder interface.
1097 ///
1098 /// Sometimes it is convenient to divide two APInt values and obtain both the
1099 /// quotient and remainder. This function does both operations in the same
1100 /// computation making it a little more efficient. The pair of input arguments
1101 /// may overlap with the pair of output arguments. It is safe to call
1102 /// udivrem(X, Y, X, Y), for example.
1103 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1104 APInt &Remainder);
1105 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1106 uint64_t &Remainder);
1107
1108 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1109 APInt &Remainder);
1110 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
1111 int64_t &Remainder);
1112
1113 // Operations that return overflow indicators.
1114 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
1115 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
1116 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
1117 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
1118 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
1119 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
1120 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
1121 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
1122 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
1123
1124 // Operations that saturate
1125 APInt sadd_sat(const APInt &RHS) const;
1126 APInt uadd_sat(const APInt &RHS) const;
1127 APInt ssub_sat(const APInt &RHS) const;
1128 APInt usub_sat(const APInt &RHS) const;
1129 APInt smul_sat(const APInt &RHS) const;
1130 APInt umul_sat(const APInt &RHS) const;
1131 APInt sshl_sat(const APInt &RHS) const;
1132 APInt ushl_sat(const APInt &RHS) const;
1133
1134 /// Array-indexing support.
1135 ///
1136 /// \returns the bit value at bitPosition
1137 bool operator[](unsigned bitPosition) const {
1138 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() &&
"Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1138, __extension__ __PRETTY_FUNCTION__))
;
1139 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
1140 }
1141
1142 /// @}
1143 /// \name Comparison Operators
1144 /// @{
1145
1146 /// Equality operator.
1147 ///
1148 /// Compares this APInt with RHS for the validity of the equality
1149 /// relationship.
1150 bool operator==(const APInt &RHS) const {
1151 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Comparison requires equal bit widths") ? void (0) : __assert_fail
("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1151, __extension__ __PRETTY_FUNCTION__))
;
1152 if (isSingleWord())
1153 return U.VAL == RHS.U.VAL;
1154 return EqualSlowCase(RHS);
1155 }
1156
1157 /// Equality operator.
1158 ///
1159 /// Compares this APInt with a uint64_t for the validity of the equality
1160 /// relationship.
1161 ///
1162 /// \returns true if *this == Val
1163 bool operator==(uint64_t Val) const {
1164 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1165 }
1166
1167 /// Equality comparison.
1168 ///
1169 /// Compares this APInt with RHS for the validity of the equality
1170 /// relationship.
1171 ///
1172 /// \returns true if *this == Val
1173 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1174
1175 /// Inequality operator.
1176 ///
1177 /// Compares this APInt with RHS for the validity of the inequality
1178 /// relationship.
1179 ///
1180 /// \returns true if *this != Val
1181 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1182
1183 /// Inequality operator.
1184 ///
1185 /// Compares this APInt with a uint64_t for the validity of the inequality
1186 /// relationship.
1187 ///
1188 /// \returns true if *this != Val
1189 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1190
1191 /// Inequality comparison
1192 ///
1193 /// Compares this APInt with RHS for the validity of the inequality
1194 /// relationship.
1195 ///
1196 /// \returns true if *this != Val
1197 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1198
1199 /// Unsigned less than comparison
1200 ///
1201 /// Regards both *this and RHS as unsigned quantities and compares them for
1202 /// the validity of the less-than relationship.
1203 ///
1204 /// \returns true if *this < RHS when both are considered unsigned.
1205 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1206
1207 /// Unsigned less than comparison
1208 ///
1209 /// Regards both *this as an unsigned quantity and compares it with RHS for
1210 /// the validity of the less-than relationship.
1211 ///
1212 /// \returns true if *this < RHS when considered unsigned.
1213 bool ult(uint64_t RHS) const {
1214 // Only need to check active bits if not a single word.
1215 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1216 }
1217
1218 /// Signed less than comparison
1219 ///
1220 /// Regards both *this and RHS as signed quantities and compares them for
1221 /// validity of the less-than relationship.
1222 ///
1223 /// \returns true if *this < RHS when both are considered signed.
1224 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1225
1226 /// Signed less than comparison
1227 ///
1228 /// Regards both *this as a signed quantity and compares it with RHS for
1229 /// the validity of the less-than relationship.
1230 ///
1231 /// \returns true if *this < RHS when considered signed.
1232 bool slt(int64_t RHS) const {
1233 return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative()
1234 : getSExtValue() < RHS;
1235 }
1236
1237 /// Unsigned less or equal comparison
1238 ///
1239 /// Regards both *this and RHS as unsigned quantities and compares them for
1240 /// validity of the less-or-equal relationship.
1241 ///
1242 /// \returns true if *this <= RHS when both are considered unsigned.
1243 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1244
1245 /// Unsigned less or equal comparison
1246 ///
1247 /// Regards both *this as an unsigned quantity and compares it with RHS for
1248 /// the validity of the less-or-equal relationship.
1249 ///
1250 /// \returns true if *this <= RHS when considered unsigned.
1251 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1252
1253 /// Signed less or equal comparison
1254 ///
1255 /// Regards both *this and RHS as signed quantities and compares them for
1256 /// validity of the less-or-equal relationship.
1257 ///
1258 /// \returns true if *this <= RHS when both are considered signed.
1259 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1260
1261 /// Signed less or equal comparison
1262 ///
1263 /// Regards both *this as a signed quantity and compares it with RHS for the
1264 /// validity of the less-or-equal relationship.
1265 ///
1266 /// \returns true if *this <= RHS when considered signed.
1267 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1268
1269 /// Unsigned greater than comparison
1270 ///
1271 /// Regards both *this and RHS as unsigned quantities and compares them for
1272 /// the validity of the greater-than relationship.
1273 ///
1274 /// \returns true if *this > RHS when both are considered unsigned.
1275 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1276
1277 /// Unsigned greater than comparison
1278 ///
1279 /// Regards both *this as an unsigned quantity and compares it with RHS for
1280 /// the validity of the greater-than relationship.
1281 ///
1282 /// \returns true if *this > RHS when considered unsigned.
1283 bool ugt(uint64_t RHS) const {
1284 // Only need to check active bits if not a single word.
1285 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1286 }
1287
1288 /// Signed greater than comparison
1289 ///
1290 /// Regards both *this and RHS as signed quantities and compares them for the
1291 /// validity of the greater-than relationship.
1292 ///
1293 /// \returns true if *this > RHS when both are considered signed.
1294 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1295
1296 /// Signed greater than comparison
1297 ///
1298 /// Regards both *this as a signed quantity and compares it with RHS for
1299 /// the validity of the greater-than relationship.
1300 ///
1301 /// \returns true if *this > RHS when considered signed.
1302 bool sgt(int64_t RHS) const {
1303 return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative()
1304 : getSExtValue() > RHS;
1305 }
1306
1307 /// Unsigned greater or equal comparison
1308 ///
1309 /// Regards both *this and RHS as unsigned quantities and compares them for
1310 /// validity of the greater-or-equal relationship.
1311 ///
1312 /// \returns true if *this >= RHS when both are considered unsigned.
1313 bool uge(const APInt &RHS) const { return !ult(RHS); }
1314
1315 /// Unsigned greater or equal comparison
1316 ///
1317 /// Regards both *this as an unsigned quantity and compares it with RHS for
1318 /// the validity of the greater-or-equal relationship.
1319 ///
1320 /// \returns true if *this >= RHS when considered unsigned.
1321 bool uge(uint64_t RHS) const { return !ult(RHS); }
1322
1323 /// Signed greater or equal comparison
1324 ///
1325 /// Regards both *this and RHS as signed quantities and compares them for
1326 /// validity of the greater-or-equal relationship.
1327 ///
1328 /// \returns true if *this >= RHS when both are considered signed.
1329 bool sge(const APInt &RHS) const { return !slt(RHS); }
1330
1331 /// Signed greater or equal comparison
1332 ///
1333 /// Regards both *this as a signed quantity and compares it with RHS for
1334 /// the validity of the greater-or-equal relationship.
1335 ///
1336 /// \returns true if *this >= RHS when considered signed.
1337 bool sge(int64_t RHS) const { return !slt(RHS); }
1338
1339 /// This operation tests if there are any pairs of corresponding bits
1340 /// between this APInt and RHS that are both set.
1341 bool intersects(const APInt &RHS) const {
1342 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1342, __extension__ __PRETTY_FUNCTION__))
;
1343 if (isSingleWord())
1344 return (U.VAL & RHS.U.VAL) != 0;
1345 return intersectsSlowCase(RHS);
1346 }
1347
1348 /// This operation checks that all bits set in this APInt are also set in RHS.
1349 bool isSubsetOf(const APInt &RHS) const {
1350 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1350, __extension__ __PRETTY_FUNCTION__))
;
1351 if (isSingleWord())
1352 return (U.VAL & ~RHS.U.VAL) == 0;
1353 return isSubsetOfSlowCase(RHS);
1354 }
1355
1356 /// @}
1357 /// \name Resizing Operators
1358 /// @{
1359
1360 /// Truncate to new width.
1361 ///
1362 /// Truncate the APInt to a specified width. It is an error to specify a width
1363 /// that is greater than or equal to the current width.
1364 APInt trunc(unsigned width) const;
1365
1366 /// Truncate to new width with unsigned saturation.
1367 ///
1368 /// If the APInt, treated as unsigned integer, can be losslessly truncated to
1369 /// the new bitwidth, then return truncated APInt. Else, return max value.
1370 APInt truncUSat(unsigned width) const;
1371
1372 /// Truncate to new width with signed saturation.
1373 ///
1374 /// If this APInt, treated as signed integer, can be losslessly truncated to
1375 /// the new bitwidth, then return truncated APInt. Else, return either
1376 /// signed min value if the APInt was negative, or signed max value.
1377 APInt truncSSat(unsigned width) const;
1378
1379 /// Sign extend to a new width.
1380 ///
1381 /// This operation sign extends the APInt to a new width. If the high order
1382 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1383 /// It is an error to specify a width that is less than or equal to the
1384 /// current width.
1385 APInt sext(unsigned width) const;
1386
1387 /// Zero extend to a new width.
1388 ///
1389 /// This operation zero extends the APInt to a new width. The high order bits
1390 /// are filled with 0 bits. It is an error to specify a width that is less
1391 /// than or equal to the current width.
1392 APInt zext(unsigned width) const;
1393
1394 /// Sign extend or truncate to width
1395 ///
1396 /// Make this APInt have the bit width given by \p width. The value is sign
1397 /// extended, truncated, or left alone to make it that width.
1398 APInt sextOrTrunc(unsigned width) const;
1399
1400 /// Zero extend or truncate to width
1401 ///
1402 /// Make this APInt have the bit width given by \p width. The value is zero
1403 /// extended, truncated, or left alone to make it that width.
1404 APInt zextOrTrunc(unsigned width) const;
1405
1406 /// Truncate to width
1407 ///
1408 /// Make this APInt have the bit width given by \p width. The value is
1409 /// truncated or left alone to make it that width.
1410 APInt truncOrSelf(unsigned width) const;
1411
1412 /// Sign extend or truncate to width
1413 ///
1414 /// Make this APInt have the bit width given by \p width. The value is sign
1415 /// extended, or left alone to make it that width.
1416 APInt sextOrSelf(unsigned width) const;
1417
1418 /// Zero extend or truncate to width
1419 ///
1420 /// Make this APInt have the bit width given by \p width. The value is zero
1421 /// extended, or left alone to make it that width.
1422 APInt zextOrSelf(unsigned width) const;
1423
1424 /// @}
1425 /// \name Bit Manipulation Operators
1426 /// @{
1427
1428 /// Set every bit to 1.
1429 void setAllBits() {
1430 if (isSingleWord())
1431 U.VAL = WORDTYPE_MAX;
1432 else
1433 // Set all the bits in all the words.
1434 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1435 // Clear the unused ones
1436 clearUnusedBits();
1437 }
1438
1439 /// Set a given bit to 1.
1440 ///
1441 /// Set the given bit to 1 whose position is given as "bitPosition".
1442 void setBit(unsigned BitPosition) {
1443 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1443, __extension__ __PRETTY_FUNCTION__))
;
1444 WordType Mask = maskBit(BitPosition);
1445 if (isSingleWord())
1446 U.VAL |= Mask;
1447 else
1448 U.pVal[whichWord(BitPosition)] |= Mask;
1449 }
1450
1451 /// Set the sign bit to 1.
1452 void setSignBit() {
1453 setBit(BitWidth - 1);
1454 }
1455
1456 /// Set a given bit to a given value.
1457 void setBitVal(unsigned BitPosition, bool BitValue) {
1458 if (BitValue)
1459 setBit(BitPosition);
1460 else
1461 clearBit(BitPosition);
1462 }
1463
1464 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1465 /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls
1466 /// setBits when \p loBit < \p hiBit.
1467 /// For \p loBit == \p hiBit wrap case, set every bit to 1.
1468 void setBitsWithWrap(unsigned loBit, unsigned hiBit) {
1469 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1469, __extension__ __PRETTY_FUNCTION__))
;
1470 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1470, __extension__ __PRETTY_FUNCTION__))
;
1471 if (loBit < hiBit) {
1472 setBits(loBit, hiBit);
1473 return;
1474 }
1475 setLowBits(hiBit);
1476 setHighBits(BitWidth - loBit);
1477 }
1478
1479 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1480 /// This function handles case when \p loBit <= \p hiBit.
1481 void setBits(unsigned loBit, unsigned hiBit) {
1482 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1482, __extension__ __PRETTY_FUNCTION__))
;
1483 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1483, __extension__ __PRETTY_FUNCTION__))
;
1484 assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit"
) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1484, __extension__ __PRETTY_FUNCTION__))
;
1485 if (loBit == hiBit)
1486 return;
1487 if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
1488 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1489 mask <<= loBit;
1490 if (isSingleWord())
1491 U.VAL |= mask;
1492 else
1493 U.pVal[0] |= mask;
1494 } else {
1495 setBitsSlowCase(loBit, hiBit);
1496 }
1497 }
1498
1499 /// Set the top bits starting from loBit.
1500 void setBitsFrom(unsigned loBit) {
1501 return setBits(loBit, BitWidth);
1502 }
1503
1504 /// Set the bottom loBits bits.
1505 void setLowBits(unsigned loBits) {
1506 return setBits(0, loBits);
1507 }
1508
1509 /// Set the top hiBits bits.
1510 void setHighBits(unsigned hiBits) {
1511 return setBits(BitWidth - hiBits, BitWidth);
1512 }
1513
1514 /// Set every bit to 0.
1515 void clearAllBits() {
1516 if (isSingleWord())
1517 U.VAL = 0;
1518 else
1519 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1520 }
1521
1522 /// Set a given bit to 0.
1523 ///
1524 /// Set the given bit to 0 whose position is given as "bitPosition".
1525 void clearBit(unsigned BitPosition) {
1526 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1526, __extension__ __PRETTY_FUNCTION__))
;
1527 WordType Mask = ~maskBit(BitPosition);
1528 if (isSingleWord())
1529 U.VAL &= Mask;
1530 else
1531 U.pVal[whichWord(BitPosition)] &= Mask;
1532 }
1533
1534 /// Set bottom loBits bits to 0.
1535 void clearLowBits(unsigned loBits) {
1536 assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast <bool> (loBits <= BitWidth && "More bits than bitwidth"
) ? void (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1536, __extension__ __PRETTY_FUNCTION__))
;
1537 APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
1538 *this &= Keep;
1539 }
1540
1541 /// Set the sign bit to 0.
1542 void clearSignBit() {
1543 clearBit(BitWidth - 1);
1544 }
1545
1546 /// Toggle every bit to its opposite value.
1547 void flipAllBits() {
1548 if (isSingleWord()) {
1549 U.VAL ^= WORDTYPE_MAX;
1550 clearUnusedBits();
1551 } else {
1552 flipAllBitsSlowCase();
1553 }
1554 }
1555
1556 /// Toggles a given bit to its opposite value.
1557 ///
1558 /// Toggle a given bit to its opposite value whose position is given
1559 /// as "bitPosition".
1560 void flipBit(unsigned bitPosition);
1561
1562 /// Negate this APInt in place.
1563 void negate() {
1564 flipAllBits();
1565 ++(*this);
1566 }
1567
1568 /// Insert the bits from a smaller APInt starting at bitPosition.
1569 void insertBits(const APInt &SubBits, unsigned bitPosition);
1570 void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);
1571
1572 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1573 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1574 uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;
1575
1576 /// @}
1577 /// \name Value Characterization Functions
1578 /// @{
1579
1580 /// Return the number of bits in the APInt.
1581 unsigned getBitWidth() const { return BitWidth; }
1582
1583 /// Get the number of words.
1584 ///
1585 /// Here one word's bitwidth equals to that of uint64_t.
1586 ///
1587 /// \returns the number of words to hold the integer value of this APInt.
1588 unsigned getNumWords() const { return getNumWords(BitWidth); }
1589
1590 /// Get the number of words.
1591 ///
1592 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1593 ///
1594 /// \returns the number of words to hold the integer value with a given bit
1595 /// width.
1596 static unsigned getNumWords(unsigned BitWidth) {
1597 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1598 }
1599
1600 /// Compute the number of active bits in the value
1601 ///
1602 /// This function returns the number of active bits which is defined as the
1603 /// bit width minus the number of leading zeros. This is used in several
1604 /// computations to see how "wide" the value is.
1605 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1606
1607 /// Compute the number of active words in the value of this APInt.
1608 ///
1609 /// This is used in conjunction with getActiveData to extract the raw value of
1610 /// the APInt.
1611 unsigned getActiveWords() const {
1612 unsigned numActiveBits = getActiveBits();
1613 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1614 }
1615
1616 /// Get the minimum bit size for this signed APInt
1617 ///
1618 /// Computes the minimum bit width for this APInt while considering it to be a
1619 /// signed (and probably negative) value. If the value is not negative, this
1620 /// function returns the same value as getActiveBits()+1. Otherwise, it
1621 /// returns the smallest bit width that will retain the negative value. For
1622 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1623 /// for -1, this function will always return 1.
1624 unsigned getMinSignedBits() const { return BitWidth - getNumSignBits() + 1; }
1625
1626 /// Get zero extended value
1627 ///
1628 /// This method attempts to return the value of this APInt as a zero extended
1629 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1630 /// uint64_t. Otherwise an assertion will result.
1631 uint64_t getZExtValue() const {
1632 if (isSingleWord())
1633 return U.VAL;
1634 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 &&
"Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1634, __extension__ __PRETTY_FUNCTION__))
;
1635 return U.pVal[0];
1636 }
1637
1638 /// Get sign extended value
1639 ///
1640 /// This method attempts to return the value of this APInt as a sign extended
1641 /// int64_t. The bit width must be <= 64 or the value must fit within an
1642 /// int64_t. Otherwise an assertion will result.
1643 int64_t getSExtValue() const {
1644 if (isSingleWord())
1645 return SignExtend64(U.VAL, BitWidth);
1646 assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getMinSignedBits() <= 64 &&
"Too many bits for int64_t") ? void (0) : __assert_fail ("getMinSignedBits() <= 64 && \"Too many bits for int64_t\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h"
, 1646, __extension__ __PRETTY_FUNCTION__))
;
1647 return int64_t(U.pVal[0]);
1648 }
1649
1650 /// Get bits required for string value.
1651 ///
1652 /// This method determines how many bits are required to hold the APInt
1653 /// equivalent of the string given by \p str.
1654 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1655
1656 /// The APInt version of the countLeadingZeros functions in
1657 /// MathExtras.h.
1658 ///
1659 /// It counts the number of zeros from the most significant bit to the first
1660 /// one bit.
1661 ///
1662 /// \returns BitWidth if the value is zero, otherwise returns the number of
1663 /// zeros from the most significant bit to the first one bits.
1664 unsigned countLeadingZeros() const {
1665 if (isSingleWord()) {
1666 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1667 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1668 }
1669 return countLeadingZerosSlowCase();
1670 }
1671
1672 /// Count the number of leading one bits.
1673 ///
1674 /// This function is an APInt version of the countLeadingOnes
1675 /// functions in MathExtras.h. It counts the number of ones from the most
1676 /// significant bit to the first zero bit.
1677 ///
1678 /// \returns 0 if the high order bit is not set, otherwise returns the number
1679 /// of 1 bits from the most significant to the least
1680 unsigned countLeadingOnes() const {
1681 if (isSingleWord())
1682 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1683 return countLeadingOnesSlowCase();
1684 }
1685
1686 /// Computes the number of leading bits of this APInt that are equal to its
1687 /// sign bit.
1688 unsigned getNumSignBits() const {
1689 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1690 }
1691
1692 /// Count the number of trailing zero bits.
1693 ///
1694 /// This function is an APInt version of the countTrailingZeros
1695 /// functions in MathExtras.h. It counts the number of zeros from the least
1696 /// significant bit to the first set bit.
1697 ///
1698 /// \returns BitWidth if the value is zero, otherwise returns the number of
1699 /// zeros from the least significant bit to the first one bit.
1700 unsigned countTrailingZeros() const {
1701 if (isSingleWord()) {
5
Calling 'APInt::isSingleWord'
7
Returning from 'APInt::isSingleWord'
8
Taking true branch
1702 unsigned TrailingZeros = llvm::countTrailingZeros(U.VAL);
9
Calling 'countTrailingZeros<unsigned long>'
15
Returning from 'countTrailingZeros<unsigned long>'
16
'TrailingZeros' initialized to 64
1703 return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros);
17
Assuming 'TrailingZeros' is <= field 'BitWidth'
18
'?' condition is false
19
Returning the value 64
1704 }
1705 return countTrailingZerosSlowCase();
1706 }
1707
1708 /// Count the number of trailing one bits.
1709 ///
1710 /// This function is an APInt version of the countTrailingOnes
1711 /// functions in MathExtras.h. It counts the number of ones from the least
1712 /// significant bit to the first zero bit.
1713 ///
1714 /// \returns BitWidth if the value is all ones, otherwise returns the number
1715 /// of ones from the least significant bit to the first zero bit.
1716 unsigned countTrailingOnes() const {
1717 if (isSingleWord())
1718 return llvm::countTrailingOnes(U.VAL);
1719 return countTrailingOnesSlowCase();
1720 }
1721
1722 /// Count the number of bits set.
1723 ///
1724 /// This function is an APInt version of the countPopulation functions
1725 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1726 ///
1727 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1728 unsigned countPopulation() const {
1729 if (isSingleWord())
1730 return llvm::countPopulation(U.VAL);
1731 return countPopulationSlowCase();
1732 }
1733
1734 /// @}
1735 /// \name Conversion Functions
1736 /// @{
1737 void print(raw_ostream &OS, bool isSigned) const;
1738
1739 /// Converts an APInt to a string and append it to Str. Str is commonly a
1740 /// SmallString.
1741 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1742 bool formatAsCLiteral = false) const;
1743
1744 /// Considers the APInt to be unsigned and converts it into a string in the
1745 /// radix given. The radix can be 2, 8, 10 16, or 36.
1746 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1747 toString(Str, Radix, false, false);
1748 }
1749
1750 /// Considers the APInt to be signed and converts it into a string in the
1751 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1752 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1753 toString(Str, Radix, true, false);
1754 }
1755
1756 /// \returns a byte-swapped representation of this APInt Value.
1757 APInt byteSwap() const;
1758
1759 /// \returns the value with the bit representation reversed of this APInt
1760 /// Value.
1761 APInt reverseBits() const;
1762
1763 /// Converts this APInt to a double value.
1764 double roundToDouble(bool isSigned) const;
1765
1766 /// Converts this unsigned APInt to a double value.
1767 double roundToDouble() const { return roundToDouble(false); }
1768
1769 /// Converts this signed APInt to a double value.
1770 double signedRoundToDouble() const { return roundToDouble(true); }
1771
1772 /// Converts APInt bits to a double
1773 ///
1774 /// The conversion does not do a translation from integer to double, it just
1775 /// re-interprets the bits as a double. Note that it is valid to do this on
1776 /// any bit width. Exactly 64 bits will be translated.
1777 double bitsToDouble() const {
1778 return BitsToDouble(getWord(0));
1779 }
1780
1781 /// Converts APInt bits to a float
1782 ///
1783 /// The conversion does not do a translation from integer to float, it just
1784 /// re-interprets the bits as a float. Note that it is valid to do this on
1785 /// any bit width. Exactly 32 bits will be translated.
1786 float bitsToFloat() const {
1787 return BitsToFloat(static_cast<uint32_t>(getWord(0)));
1788 }
1789
1790 /// Converts a double to APInt bits.
1791 ///
1792 /// The conversion does not do a translation from double to integer, it just
1793 /// re-interprets the bits of the double.
1794 static APInt doubleToBits(double V) {
1795 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1796 }
1797
1798 /// Converts a float to APInt bits.
1799 ///
1800 /// The conversion does not do a translation from float to integer, it just
1801 /// re-interprets the bits of the float.
1802 static APInt floatToBits(float V) {
1803 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1804 }
1805
1806 /// @}
1807 /// \name Mathematics Operations
1808 /// @{
1809
1810 /// \returns the floor log base 2 of this APInt.
1811 unsigned logBase2() const { return getActiveBits() - 1; }
1812
1813 /// \returns the ceil log base 2 of this APInt.
1814 unsigned ceilLogBase2() const {
1815 APInt temp(*this);
1816 --temp;
1817 return temp.getActiveBits();
1818 }
1819
1820 /// \returns the nearest log base 2 of this APInt. Ties round up.
1821 ///
1822 /// NOTE: When we have a BitWidth of 1, we define:
1823 ///
1824 /// log2(0) = UINT32_MAX
1825 /// log2(1) = 0
1826 ///
1827 /// to get around any mathematical concerns resulting from
1828 /// referencing 2 in a space where 2 does no exist.
1829 unsigned nearestLogBase2() const {
1830 // Special case when we have a bitwidth of 1. If VAL is 1, then we
1831 // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1832 // UINT32_MAX.
1833 if (BitWidth == 1)
1834 return U.VAL - 1;
1835
1836 // Handle the zero case.
1837 if (isNullValue())
1838 return UINT32_MAX(4294967295U);
1839
1840 // The non-zero case is handled by computing:
1841 //
1842 // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1843 //
1844 // where x[i] is referring to the value of the ith bit of x.
1845 unsigned lg = logBase2();
1846 return lg + unsigned((*this)[lg - 1]);
1847 }
1848
1849 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1850 /// otherwise
1851 int32_t exactLogBase2() const {
1852 if (!isPowerOf2())
1853 return -1;
1854 return logBase2();
1855 }
1856
1857 /// Compute the square root
1858 APInt sqrt() const;
1859
1860 /// Get the absolute value;
1861 ///
1862 /// If *this is < 0 then return -(*this), otherwise *this;
1863 APInt abs() const {
1864 if (isNegative())
1865 return -(*this);
1866 return *this;
1867 }
1868
1869 /// \returns the multiplicative inverse for a given modulo.
1870 APInt multiplicativeInverse(const APInt &modulo) const;
1871
1872 /// @}
1873 /// \name Support for division by constant
1874 /// @{
1875
1876 /// Calculate the magic number for signed division by a constant.
1877 struct ms;
1878 ms magic() const;
1879
1880 /// Calculate the magic number for unsigned division by a constant.
1881 struct mu;
1882 mu magicu(unsigned LeadingZeros = 0) const;
1883
1884 /// @}
1885 /// \name Building-block Operations for APInt and APFloat
1886 /// @{
1887
1888 // These building block operations operate on a representation of arbitrary
1889 // precision, two's-complement, bignum integer values. They should be
1890 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1891 // generally a pointer to the base of an array of integer parts, representing
1892 // an unsigned bignum, and a count of how many parts there are.
1893
1894 /// Sets the least significant part of a bignum to the input value, and zeroes
1895 /// out higher parts.
1896 static void tcSet(WordType *, WordType, unsigned);
1897
1898 /// Assign one bignum to another.
1899 static void tcAssign(WordType *, const WordType *, unsigned);
1900
1901 /// Returns true if a bignum is zero, false otherwise.
1902 static bool tcIsZero(const WordType *, unsigned);
1903
1904 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1905 static int tcExtractBit(const WordType *, unsigned bit);
1906
1907 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1908 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1909 /// significant bit of DST. All high bits above srcBITS in DST are
1910 /// zero-filled.
1911 static void tcExtract(WordType *, unsigned dstCount,
1912 const WordType *, unsigned srcBits,
1913 unsigned srcLSB);
1914
1915 /// Set the given bit of a bignum. Zero-based.
1916 static void tcSetBit(WordType *, unsigned bit);
1917
1918 /// Clear the given bit of a bignum. Zero-based.
1919 static void tcClearBit(WordType *, unsigned bit);
1920
1921 /// Returns the bit number of the least or most significant set bit of a
1922 /// number. If the input number has no bits set -1U is returned.
1923 static unsigned tcLSB(const WordType *, unsigned n);
1924 static unsigned tcMSB(const WordType *parts, unsigned n);
1925
1926 /// Negate a bignum in-place.
1927 static void tcNegate(WordType *, unsigned);
1928
1929 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1930 static WordType tcAdd(WordType *, const WordType *,
1931 WordType carry, unsigned);
1932 /// DST += RHS. Returns the carry flag.
1933 static WordType tcAddPart(WordType *, WordType, unsigned);
1934
1935 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1936 static WordType tcSubtract(WordType *, const WordType *,
1937 WordType carry, unsigned);
1938 /// DST -= RHS. Returns the carry flag.
1939 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1940
1941 /// DST += SRC * MULTIPLIER + PART if add is true
1942 /// DST = SRC * MULTIPLIER + PART if add is false
1943 ///
1944 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1945 /// start at the same point, i.e. DST == SRC.
1946 ///
1947 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1948 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1949 /// result, and if all of the omitted higher parts were zero return zero,
1950 /// otherwise overflow occurred and return one.
1951 static int tcMultiplyPart(WordType *dst, const WordType *src,
1952 WordType multiplier, WordType carry,
1953 unsigned srcParts, unsigned dstParts,
1954 bool add);
1955
1956 /// DST = LHS * RHS, where DST has the same width as the operands and is
1957 /// filled with the least significant parts of the result. Returns one if
1958 /// overflow occurred, otherwise zero. DST must be disjoint from both
1959 /// operands.
1960 static int tcMultiply(WordType *, const WordType *, const WordType *,
1961 unsigned);
1962
1963 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1964 /// operands. No overflow occurs. DST must be disjoint from both operands.
1965 static void tcFullMultiply(WordType *, const WordType *,
1966 const WordType *, unsigned, unsigned);
1967
1968 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1969 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1970 /// REMAINDER to the remainder, return zero. i.e.
1971 ///
1972 /// OLD_LHS = RHS * LHS + REMAINDER
1973 ///
1974 /// SCRATCH is a bignum of the same size as the operands and result for use by
1975 /// the routine; its contents need not be initialized and are destroyed. LHS,
1976 /// REMAINDER and SCRATCH must be distinct.
1977 static int tcDivide(WordType *lhs, const WordType *rhs,
1978 WordType *remainder, WordType *scratch,
1979 unsigned parts);
1980
1981 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1982 /// restrictions on Count.
1983 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1984
1985 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1986 /// restrictions on Count.
1987 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1988
1989 /// The obvious AND, OR and XOR and complement operations.
1990 static void tcAnd(WordType *, const WordType *, unsigned);
1991 static void tcOr(WordType *, const WordType *, unsigned);
1992 static void tcXor(WordType *, const WordType *, unsigned);
1993 static void tcComplement(WordType *, unsigned);
1994
1995 /// Comparison (unsigned) of two bignums.
1996 static int tcCompare(const WordType *, const WordType *, unsigned);
1997
1998 /// Increment a bignum in-place. Return the carry flag.
1999 static WordType tcIncrement(WordType *dst, unsigned parts) {
2000 return tcAddPart(dst, 1, parts);
2001 }
2002
2003 /// Decrement a bignum in-place. Return the borrow flag.
2004 static WordType tcDecrement(WordType *dst, unsigned parts) {
2005 return tcSubtractPart(dst, 1, parts);
2006 }
2007
2008 /// Set the least significant BITS and clear the rest.
2009 static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);
2010
2011 /// debug method
2012 void dump() const;
2013
2014 /// @}
2015};
2016
2017/// Magic data for optimising signed division by a constant.
2018struct APInt::ms {
2019 APInt m; ///< magic number
2020 unsigned s; ///< shift amount
2021};
2022
2023/// Magic data for optimising unsigned division by a constant.
2024struct APInt::mu {
2025 APInt m; ///< magic number
2026 bool a; ///< add indicator
2027 unsigned s; ///< shift amount
2028};
2029
2030inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
2031
2032inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
2033
2034/// Unary bitwise complement operator.
2035///
2036/// \returns an APInt that is the bitwise complement of \p v.
2037inline APInt operator~(APInt v) {
2038 v.flipAllBits();
2039 return v;
2040}
2041
2042inline APInt operator&(APInt a, const APInt &b) {
2043 a &= b;
2044 return a;
2045}
2046
2047inline APInt operator&(const APInt &a, APInt &&b) {
2048 b &= a;
2049 return std::move(b);
2050}
2051
2052inline APInt operator&(APInt a, uint64_t RHS) {
2053 a &= RHS;
2054 return a;
2055}
2056
2057inline APInt operator&(uint64_t LHS, APInt b) {
2058 b &= LHS;
2059 return b;
2060}
2061
2062inline APInt operator|(APInt a, const APInt &b) {
2063 a |= b;
2064 return a;
2065}
2066
2067inline APInt operator|(const APInt &a, APInt &&b) {
2068 b |= a;
2069 return std::move(b);
2070}
2071
2072inline APInt operator|(APInt a, uint64_t RHS) {
2073 a |= RHS;
2074 return a;
2075}
2076
2077inline APInt operator|(uint64_t LHS, APInt b) {
2078 b |= LHS;
2079 return b;
2080}
2081
2082inline APInt operator^(APInt a, const APInt &b) {
2083 a ^= b;
2084 return a;
2085}
2086
2087inline APInt operator^(const APInt &a, APInt &&b) {
2088 b ^= a;
2089 return std::move(b);
2090}
2091
2092inline APInt operator^(APInt a, uint64_t RHS) {
2093 a ^= RHS;
2094 return a;
2095}
2096
2097inline APInt operator^(uint64_t LHS, APInt b) {
2098 b ^= LHS;
2099 return b;
2100}
2101
2102inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2103 I.print(OS, true);
2104 return OS;
2105}
2106
2107inline APInt operator-(APInt v) {
2108 v.negate();
2109 return v;
2110}
2111
2112inline APInt operator+(APInt a, const APInt &b) {
2113 a += b;
2114 return a;
2115}
2116
2117inline APInt operator+(const APInt &a, APInt &&b) {
2118 b += a;
2119 return std::move(b);
2120}
2121
2122inline APInt operator+(APInt a, uint64_t RHS) {
2123 a += RHS;
2124 return a;
2125}
2126
2127inline APInt operator+(uint64_t LHS, APInt b) {
2128 b += LHS;
2129 return b;
2130}
2131
2132inline APInt operator-(APInt a, const APInt &b) {
2133 a -= b;
2134 return a;
2135}
2136
2137inline APInt operator-(const APInt &a, APInt &&b) {
2138 b.negate();
2139 b += a;
2140 return std::move(b);
2141}
2142
2143inline APInt operator-(APInt a, uint64_t RHS) {
2144 a -= RHS;
2145 return a;
2146}
2147
2148inline APInt operator-(uint64_t LHS, APInt b) {
2149 b.negate();
2150 b += LHS;
2151 return b;
2152}
2153
2154inline APInt operator*(APInt a, uint64_t RHS) {
2155 a *= RHS;
2156 return a;
2157}
2158
2159inline APInt operator*(uint64_t LHS, APInt b) {
2160 b *= LHS;
2161 return b;
2162}
2163
2164
2165namespace APIntOps {
2166
2167/// Determine the smaller of two APInts considered to be signed.
2168inline const APInt &smin(const APInt &A, const APInt &B) {
2169 return A.slt(B) ? A : B;
2170}
2171
2172/// Determine the larger of two APInts considered to be signed.
2173inline const APInt &smax(const APInt &A, const APInt &B) {
2174 return A.sgt(B) ? A : B;
2175}
2176
2177/// Determine the smaller of two APInts considered to be unsigned.
2178inline const APInt &umin(const APInt &A, const APInt &B) {
2179 return A.ult(B) ? A : B;
2180}
2181
2182/// Determine the larger of two APInts considered to be unsigned.
2183inline const APInt &umax(const APInt &A, const APInt &B) {
2184 return A.ugt(B) ? A : B;
2185}
2186
2187/// Compute GCD of two unsigned APInt values.
2188///
2189/// This function returns the greatest common divisor of the two APInt values
2190/// using Stein's algorithm.
2191///
2192/// \returns the greatest common divisor of A and B.
2193APInt GreatestCommonDivisor(APInt A, APInt B);
2194
2195/// Converts the given APInt to a double value.
2196///
2197/// Treats the APInt as an unsigned value for conversion purposes.
2198inline double RoundAPIntToDouble(const APInt &APIVal) {
2199 return APIVal.roundToDouble();
2200}
2201
2202/// Converts the given APInt to a double value.
2203///
2204/// Treats the APInt as a signed value for conversion purposes.
2205inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2206 return APIVal.signedRoundToDouble();
2207}
2208
2209/// Converts the given APInt to a float value.
2210inline float RoundAPIntToFloat(const APInt &APIVal) {
2211 return float(RoundAPIntToDouble(APIVal));
2212}
2213
2214/// Converts the given APInt to a float value.
2215///
2216/// Treats the APInt as a signed value for conversion purposes.
2217inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2218 return float(APIVal.signedRoundToDouble());
2219}
2220
2221/// Converts the given double value into a APInt.
2222///
2223/// This function convert a double value to an APInt value.
2224APInt RoundDoubleToAPInt(double Double, unsigned width);
2225
2226/// Converts a float value into a APInt.
2227///
2228/// Converts a float value into an APInt value.
2229inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2230 return RoundDoubleToAPInt(double(Float), width);
2231}
2232
2233/// Return A unsign-divided by B, rounded by the given rounding mode.
2234APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2235
2236/// Return A sign-divided by B, rounded by the given rounding mode.
2237APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2238
2239/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2240/// (e.g. 32 for i32).
2241/// This function finds the smallest number n, such that
2242/// (a) n >= 0 and q(n) = 0, or
2243/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2244/// integers, belong to two different intervals [Rk, Rk+R),
2245/// where R = 2^BW, and k is an integer.
2246/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2247/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2248/// subtraction (treated as addition of negated numbers) would always
2249/// count as an overflow, but here we want to allow values to decrease
2250/// and increase as long as they are within the same interval.
2251/// Specifically, adding of two negative numbers should not cause an
2252/// overflow (as long as the magnitude does not exceed the bit width).
2253/// On the other hand, given a positive number, adding a negative
2254/// number to it can give a negative result, which would cause the
2255/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2256/// treated as a special case of an overflow.
2257///
2258/// This function returns None if after finding k that minimizes the
2259/// positive solution to q(n) = kR, both solutions are contained between
2260/// two consecutive integers.
2261///
2262/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2263/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2264/// virtue of *signed* overflow. This function will *not* find such an n,
2265/// however it may find a value of n satisfying the inequalities due to
2266/// an *unsigned* overflow (if the values are treated as unsigned).
2267/// To find a solution for a signed overflow, treat it as a problem of
2268/// finding an unsigned overflow with a range with of BW-1.
2269///
2270/// The returned value may have a different bit width from the input
2271/// coefficients.
2272Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2273 unsigned RangeWidth);
2274
2275/// Compare two values, and if they are different, return the position of the
2276/// most significant bit that is different in the values.
2277Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
2278 const APInt &B);
2279
2280} // End of APIntOps namespace
2281
2282// See friend declaration above. This additional declaration is required in
2283// order to compile LLVM with IBM xlC compiler.
2284hash_code hash_value(const APInt &Arg);
2285
2286/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2287/// with the integer held in IntVal.
2288void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
2289
2290/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2291/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2292void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
2293
2294/// Provide DenseMapInfo for APInt.
2295template <> struct DenseMapInfo<APInt> {
2296 static inline APInt getEmptyKey() {
2297 APInt V(nullptr, 0);
2298 V.U.VAL = 0;
2299 return V;
2300 }
2301
2302 static inline APInt getTombstoneKey() {
2303 APInt V(nullptr, 0);
2304 V.U.VAL = 1;
2305 return V;
2306 }
2307
2308 static unsigned getHashValue(const APInt &Key);
2309
2310 static bool isEqual(const APInt &LHS, const APInt &RHS) {
2311 return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
2312 }
2313};
2314
2315} // namespace llvm
2316
2317#endif

/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h

1//===-- llvm/Support/MathExtras.h - Useful math functions -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains some functions that are useful for math stuff.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_SUPPORT_MATHEXTRAS_H
14#define LLVM_SUPPORT_MATHEXTRAS_H
15
16#include "llvm/Support/Compiler.h"
17#include <cassert>
18#include <climits>
19#include <cmath>
20#include <cstdint>
21#include <cstring>
22#include <limits>
23#include <type_traits>
24
25#ifdef __ANDROID_NDK__
26#include <android/api-level.h>
27#endif
28
29#ifdef _MSC_VER
30// Declare these intrinsics manually rather including intrin.h. It's very
31// expensive, and MathExtras.h is popular.
32// #include <intrin.h>
33extern "C" {
34unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
35unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
36unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
37unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
38}
39#endif
40
41namespace llvm {
42
43/// The behavior an operation has on an input of 0.
44enum ZeroBehavior {
45 /// The returned value is undefined.
46 ZB_Undefined,
47 /// The returned value is numeric_limits<T>::max()
48 ZB_Max,
49 /// The returned value is numeric_limits<T>::digits
50 ZB_Width
51};
52
53/// Mathematical constants.
54namespace numbers {
55// TODO: Track C++20 std::numbers.
56// TODO: Favor using the hexadecimal FP constants (requires C++17).
57constexpr double e = 2.7182818284590452354, // (0x1.5bf0a8b145749P+1) https://oeis.org/A001113
58 egamma = .57721566490153286061, // (0x1.2788cfc6fb619P-1) https://oeis.org/A001620
59 ln2 = .69314718055994530942, // (0x1.62e42fefa39efP-1) https://oeis.org/A002162
60 ln10 = 2.3025850929940456840, // (0x1.24bb1bbb55516P+1) https://oeis.org/A002392
61 log2e = 1.4426950408889634074, // (0x1.71547652b82feP+0)
62 log10e = .43429448190325182765, // (0x1.bcb7b1526e50eP-2)
63 pi = 3.1415926535897932385, // (0x1.921fb54442d18P+1) https://oeis.org/A000796
64 inv_pi = .31830988618379067154, // (0x1.45f306bc9c883P-2) https://oeis.org/A049541
65 sqrtpi = 1.7724538509055160273, // (0x1.c5bf891b4ef6bP+0) https://oeis.org/A002161
66 inv_sqrtpi = .56418958354775628695, // (0x1.20dd750429b6dP-1) https://oeis.org/A087197
67 sqrt2 = 1.4142135623730950488, // (0x1.6a09e667f3bcdP+0) https://oeis.org/A00219
68 inv_sqrt2 = .70710678118654752440, // (0x1.6a09e667f3bcdP-1)
69 sqrt3 = 1.7320508075688772935, // (0x1.bb67ae8584caaP+0) https://oeis.org/A002194
70 inv_sqrt3 = .57735026918962576451, // (0x1.279a74590331cP-1)
71 phi = 1.6180339887498948482; // (0x1.9e3779b97f4a8P+0) https://oeis.org/A001622
72constexpr float ef = 2.71828183F, // (0x1.5bf0a8P+1) https://oeis.org/A001113
73 egammaf = .577215665F, // (0x1.2788d0P-1) https://oeis.org/A001620
74 ln2f = .693147181F, // (0x1.62e430P-1) https://oeis.org/A002162
75 ln10f = 2.30258509F, // (0x1.26bb1cP+1) https://oeis.org/A002392
76 log2ef = 1.44269504F, // (0x1.715476P+0)
77 log10ef = .434294482F, // (0x1.bcb7b2P-2)
78 pif = 3.14159265F, // (0x1.921fb6P+1) https://oeis.org/A000796
79 inv_pif = .318309886F, // (0x1.45f306P-2) https://oeis.org/A049541
80 sqrtpif = 1.77245385F, // (0x1.c5bf8aP+0) https://oeis.org/A002161
81 inv_sqrtpif = .564189584F, // (0x1.20dd76P-1) https://oeis.org/A087197
82 sqrt2f = 1.41421356F, // (0x1.6a09e6P+0) https://oeis.org/A002193
83 inv_sqrt2f = .707106781F, // (0x1.6a09e6P-1)
84 sqrt3f = 1.73205081F, // (0x1.bb67aeP+0) https://oeis.org/A002194
85 inv_sqrt3f = .577350269F, // (0x1.279a74P-1)
86 phif = 1.61803399F; // (0x1.9e377aP+0) https://oeis.org/A001622
87} // namespace numbers
88
89namespace detail {
90template <typename T, std::size_t SizeOfT> struct TrailingZerosCounter {
91 static unsigned count(T Val, ZeroBehavior) {
92 if (!Val)
93 return std::numeric_limits<T>::digits;
94 if (Val & 0x1)
95 return 0;
96
97 // Bisection method.
98 unsigned ZeroBits = 0;
99 T Shift = std::numeric_limits<T>::digits >> 1;
100 T Mask = std::numeric_limits<T>::max() >> Shift;
101 while (Shift) {
102 if ((Val & Mask) == 0) {
103 Val >>= Shift;
104 ZeroBits |= Shift;
105 }
106 Shift >>= 1;
107 Mask >>= Shift;
108 }
109 return ZeroBits;
110 }
111};
112
113#if defined(__GNUC__4) || defined(_MSC_VER)
114template <typename T> struct TrailingZerosCounter<T, 4> {
115 static unsigned count(T Val, ZeroBehavior ZB) {
116 if (ZB != ZB_Undefined && Val == 0)
117 return 32;
118
119#if __has_builtin(__builtin_ctz)1 || defined(__GNUC__4)
120 return __builtin_ctz(Val);
121#elif defined(_MSC_VER)
122 unsigned long Index;
123 _BitScanForward(&Index, Val);
124 return Index;
125#endif
126 }
127};
128
129#if !defined(_MSC_VER) || defined(_M_X64)
130template <typename T> struct TrailingZerosCounter<T, 8> {
131 static unsigned count(T Val, ZeroBehavior ZB) {
132 if (ZB
10.1
'ZB' is not equal to ZB_Undefined
10.1
'ZB' is not equal to ZB_Undefined
10.1
'ZB' is not equal to ZB_Undefined
!= ZB_Undefined && Val
10.2
'Val' is equal to 0
10.2
'Val' is equal to 0
10.2
'Val' is equal to 0
== 0)
11
Taking true branch
133 return 64;
12
Returning the value 64
134
135#if __has_builtin(__builtin_ctzll)1 || defined(__GNUC__4)
136 return __builtin_ctzll(Val);
137#elif defined(_MSC_VER)
138 unsigned long Index;
139 _BitScanForward64(&Index, Val);
140 return Index;
141#endif
142 }
143};
144#endif
145#endif
146} // namespace detail
147
148/// Count number of 0's from the least significant bit to the most
149/// stopping at the first 1.
150///
151/// Only unsigned integral types are allowed.
152///
153/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
154/// valid arguments.
155template <typename T>
156unsigned countTrailingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
157 static_assert(std::numeric_limits<T>::is_integer &&
158 !std::numeric_limits<T>::is_signed,
159 "Only unsigned integral types are allowed.");
160 return llvm::detail::TrailingZerosCounter<T, sizeof(T)>::count(Val, ZB);
10
Calling 'TrailingZerosCounter::count'
13
Returning from 'TrailingZerosCounter::count'
14
Returning the value 64
161}
162
163namespace detail {
164template <typename T, std::size_t SizeOfT> struct LeadingZerosCounter {
165 static unsigned count(T Val, ZeroBehavior) {
166 if (!Val)
167 return std::numeric_limits<T>::digits;
168
169 // Bisection method.
170 unsigned ZeroBits = 0;
171 for (T Shift = std::numeric_limits<T>::digits >> 1; Shift; Shift >>= 1) {
172 T Tmp = Val >> Shift;
173 if (Tmp)
174 Val = Tmp;
175 else
176 ZeroBits |= Shift;
177 }
178 return ZeroBits;
179 }
180};
181
182#if defined(__GNUC__4) || defined(_MSC_VER)
183template <typename T> struct LeadingZerosCounter<T, 4> {
184 static unsigned count(T Val, ZeroBehavior ZB) {
185 if (ZB != ZB_Undefined && Val == 0)
186 return 32;
187
188#if __has_builtin(__builtin_clz)1 || defined(__GNUC__4)
189 return __builtin_clz(Val);
190#elif defined(_MSC_VER)
191 unsigned long Index;
192 _BitScanReverse(&Index, Val);
193 return Index ^ 31;
194#endif
195 }
196};
197
198#if !defined(_MSC_VER) || defined(_M_X64)
199template <typename T> struct LeadingZerosCounter<T, 8> {
200 static unsigned count(T Val, ZeroBehavior ZB) {
201 if (ZB != ZB_Undefined && Val == 0)
202 return 64;
203
204#if __has_builtin(__builtin_clzll)1 || defined(__GNUC__4)
205 return __builtin_clzll(Val);
206#elif defined(_MSC_VER)
207 unsigned long Index;
208 _BitScanReverse64(&Index, Val);
209 return Index ^ 63;
210#endif
211 }
212};
213#endif
214#endif
215} // namespace detail
216
217/// Count number of 0's from the most significant bit to the least
218/// stopping at the first 1.
219///
220/// Only unsigned integral types are allowed.
221///
222/// \param ZB the behavior on an input of 0. Only ZB_Width and ZB_Undefined are
223/// valid arguments.
224template <typename T>
225unsigned countLeadingZeros(T Val, ZeroBehavior ZB = ZB_Width) {
226 static_assert(std::numeric_limits<T>::is_integer &&
227 !std::numeric_limits<T>::is_signed,
228 "Only unsigned integral types are allowed.");
229 return llvm::detail::LeadingZerosCounter<T, sizeof(T)>::count(Val, ZB);
230}
231
232/// Get the index of the first set bit starting from the least
233/// significant bit.
234///
235/// Only unsigned integral types are allowed.
236///
237/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
238/// valid arguments.
239template <typename T> T findFirstSet(T Val, ZeroBehavior ZB = ZB_Max) {
240 if (ZB == ZB_Max && Val == 0)
241 return std::numeric_limits<T>::max();
242
243 return countTrailingZeros(Val, ZB_Undefined);
244}
245
246/// Create a bitmask with the N right-most bits set to 1, and all other
247/// bits set to 0. Only unsigned types are allowed.
248template <typename T> T maskTrailingOnes(unsigned N) {
249 static_assert(std::is_unsigned<T>::value, "Invalid type!");
250 const unsigned Bits = CHAR_BIT8 * sizeof(T);
251 assert(N <= Bits && "Invalid bit index")(static_cast <bool> (N <= Bits && "Invalid bit index"
) ? void (0) : __assert_fail ("N <= Bits && \"Invalid bit index\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h"
, 251, __extension__ __PRETTY_FUNCTION__))
;
252 return N == 0 ? 0 : (T(-1) >> (Bits - N));
253}
254
255/// Create a bitmask with the N left-most bits set to 1, and all other
256/// bits set to 0. Only unsigned types are allowed.
257template <typename T> T maskLeadingOnes(unsigned N) {
258 return ~maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
259}
260
261/// Create a bitmask with the N right-most bits set to 0, and all other
262/// bits set to 1. Only unsigned types are allowed.
263template <typename T> T maskTrailingZeros(unsigned N) {
264 return maskLeadingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
265}
266
267/// Create a bitmask with the N left-most bits set to 0, and all other
268/// bits set to 1. Only unsigned types are allowed.
269template <typename T> T maskLeadingZeros(unsigned N) {
270 return maskTrailingOnes<T>(CHAR_BIT8 * sizeof(T) - N);
271}
272
273/// Get the index of the last set bit starting from the least
274/// significant bit.
275///
276/// Only unsigned integral types are allowed.
277///
278/// \param ZB the behavior on an input of 0. Only ZB_Max and ZB_Undefined are
279/// valid arguments.
280template <typename T> T findLastSet(T Val, ZeroBehavior ZB = ZB_Max) {
281 if (ZB == ZB_Max && Val == 0)
282 return std::numeric_limits<T>::max();
283
284 // Use ^ instead of - because both gcc and llvm can remove the associated ^
285 // in the __builtin_clz intrinsic on x86.
286 return countLeadingZeros(Val, ZB_Undefined) ^
287 (std::numeric_limits<T>::digits - 1);
288}
289
290/// Macro compressed bit reversal table for 256 bits.
291///
292/// http://graphics.stanford.edu/~seander/bithacks.html#BitReverseTable
293static const unsigned char BitReverseTable256[256] = {
294#define R2(n) n, n + 2 * 64, n + 1 * 64, n + 3 * 64
295#define R4(n) R2(n), R2(n + 2 * 16), R2(n + 1 * 16), R2(n + 3 * 16)
296#define R6(n) R4(n), R4(n + 2 * 4), R4(n + 1 * 4), R4(n + 3 * 4)
297 R6(0), R6(2), R6(1), R6(3)
298#undef R2
299#undef R4
300#undef R6
301};
302
303/// Reverse the bits in \p Val.
304template <typename T>
305T reverseBits(T Val) {
306 unsigned char in[sizeof(Val)];
307 unsigned char out[sizeof(Val)];
308 std::memcpy(in, &Val, sizeof(Val));
309 for (unsigned i = 0; i < sizeof(Val); ++i)
310 out[(sizeof(Val) - i) - 1] = BitReverseTable256[in[i]];
311 std::memcpy(&Val, out, sizeof(Val));
312 return Val;
313}
314
315#if __has_builtin(__builtin_bitreverse8)1
316template<>
317inline uint8_t reverseBits<uint8_t>(uint8_t Val) {
318 return __builtin_bitreverse8(Val);
319}
320#endif
321
322#if __has_builtin(__builtin_bitreverse16)1
323template<>
324inline uint16_t reverseBits<uint16_t>(uint16_t Val) {
325 return __builtin_bitreverse16(Val);
326}
327#endif
328
329#if __has_builtin(__builtin_bitreverse32)1
330template<>
331inline uint32_t reverseBits<uint32_t>(uint32_t Val) {
332 return __builtin_bitreverse32(Val);
333}
334#endif
335
336#if __has_builtin(__builtin_bitreverse64)1
337template<>
338inline uint64_t reverseBits<uint64_t>(uint64_t Val) {
339 return __builtin_bitreverse64(Val);
340}
341#endif
342
343// NOTE: The following support functions use the _32/_64 extensions instead of
344// type overloading so that signed and unsigned integers can be used without
345// ambiguity.
346
347/// Return the high 32 bits of a 64 bit value.
348constexpr inline uint32_t Hi_32(uint64_t Value) {
349 return static_cast<uint32_t>(Value >> 32);
350}
351
352/// Return the low 32 bits of a 64 bit value.
353constexpr inline uint32_t Lo_32(uint64_t Value) {
354 return static_cast<uint32_t>(Value);
355}
356
357/// Make a 64-bit integer from a high / low pair of 32-bit integers.
358constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
359 return ((uint64_t)High << 32) | (uint64_t)Low;
360}
361
362/// Checks if an integer fits into the given bit width.
363template <unsigned N> constexpr inline bool isInt(int64_t x) {
364 return N >= 64 || (-(INT64_C(1)1L<<(N-1)) <= x && x < (INT64_C(1)1L<<(N-1)));
365}
366// Template specializations to get better code for common cases.
367template <> constexpr inline bool isInt<8>(int64_t x) {
368 return static_cast<int8_t>(x) == x;
369}
370template <> constexpr inline bool isInt<16>(int64_t x) {
371 return static_cast<int16_t>(x) == x;
372}
373template <> constexpr inline bool isInt<32>(int64_t x) {
374 return static_cast<int32_t>(x) == x;
375}
376
377/// Checks if a signed integer is an N bit number shifted left by S.
378template <unsigned N, unsigned S>
379constexpr inline bool isShiftedInt(int64_t x) {
380 static_assert(
381 N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
382 static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
383 return isInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
384}
385
386/// Checks if an unsigned integer fits into the given bit width.
387///
388/// This is written as two functions rather than as simply
389///
390/// return N >= 64 || X < (UINT64_C(1) << N);
391///
392/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
393/// left too many places.
394template <unsigned N>
395constexpr inline std::enable_if_t<(N < 64), bool> isUInt(uint64_t X) {
396 static_assert(N > 0, "isUInt<0> doesn't make sense");
397 return X < (UINT64_C(1)1UL << (N));
398}
399template <unsigned N>
400constexpr inline std::enable_if_t<N >= 64, bool> isUInt(uint64_t) {
401 return true;
402}
403
404// Template specializations to get better code for common cases.
405template <> constexpr inline bool isUInt<8>(uint64_t x) {
406 return static_cast<uint8_t>(x) == x;
407}
408template <> constexpr inline bool isUInt<16>(uint64_t x) {
409 return static_cast<uint16_t>(x) == x;
410}
411template <> constexpr inline bool isUInt<32>(uint64_t x) {
412 return static_cast<uint32_t>(x) == x;
413}
414
415/// Checks if a unsigned integer is an N bit number shifted left by S.
416template <unsigned N, unsigned S>
417constexpr inline bool isShiftedUInt(uint64_t x) {
418 static_assert(
419 N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
420 static_assert(N + S <= 64,
421 "isShiftedUInt<N, S> with N + S > 64 is too wide.");
422 // Per the two static_asserts above, S must be strictly less than 64. So
423 // 1 << S is not undefined behavior.
424 return isUInt<N + S>(x) && (x % (UINT64_C(1)1UL << S) == 0);
425}
426
427/// Gets the maximum value for a N-bit unsigned integer.
428inline uint64_t maxUIntN(uint64_t N) {
429 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h"
, 429, __extension__ __PRETTY_FUNCTION__))
;
430
431 // uint64_t(1) << 64 is undefined behavior, so we can't do
432 // (uint64_t(1) << N) - 1
433 // without checking first that N != 64. But this works and doesn't have a
434 // branch.
435 return UINT64_MAX(18446744073709551615UL) >> (64 - N);
436}
437
438/// Gets the minimum value for a N-bit signed integer.
439inline int64_t minIntN(int64_t N) {
440 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h"
, 440, __extension__ __PRETTY_FUNCTION__))
;
441
442 return UINT64_C(1)1UL + ~(UINT64_C(1)1UL << (N - 1));
443}
444
445/// Gets the maximum value for a N-bit signed integer.
446inline int64_t maxIntN(int64_t N) {
447 assert(N > 0 && N <= 64 && "integer width out of range")(static_cast <bool> (N > 0 && N <= 64 &&
"integer width out of range") ? void (0) : __assert_fail ("N > 0 && N <= 64 && \"integer width out of range\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h"
, 447, __extension__ __PRETTY_FUNCTION__))
;
448
449 // This relies on two's complement wraparound when N == 64, so we convert to
450 // int64_t only at the very end to avoid UB.
451 return (UINT64_C(1)1UL << (N - 1)) - 1;
452}
453
454/// Checks if an unsigned integer fits into the given (dynamic) bit width.
455inline bool isUIntN(unsigned N, uint64_t x) {
456 return N >= 64 || x <= maxUIntN(N);
457}
458
459/// Checks if an signed integer fits into the given (dynamic) bit width.
460inline bool isIntN(unsigned N, int64_t x) {
461 return N >= 64 || (minIntN(N) <= x && x <= maxIntN(N));
462}
463
464/// Return true if the argument is a non-empty sequence of ones starting at the
465/// least significant bit with the remainder zero (32 bit version).
466/// Ex. isMask_32(0x0000FFFFU) == true.
467constexpr inline bool isMask_32(uint32_t Value) {
468 return Value && ((Value + 1) & Value) == 0;
469}
470
471/// Return true if the argument is a non-empty sequence of ones starting at the
472/// least significant bit with the remainder zero (64 bit version).
473constexpr inline bool isMask_64(uint64_t Value) {
474 return Value && ((Value + 1) & Value) == 0;
475}
476
477/// Return true if the argument contains a non-empty sequence of ones with the
478/// remainder zero (32 bit version.) Ex. isShiftedMask_32(0x0000FF00U) == true.
479constexpr inline bool isShiftedMask_32(uint32_t Value) {
480 return Value && isMask_32((Value - 1) | Value);
481}
482
483/// Return true if the argument contains a non-empty sequence of ones with the
484/// remainder zero (64 bit version.)
485constexpr inline bool isShiftedMask_64(uint64_t Value) {
486 return Value && isMask_64((Value - 1) | Value);
487}
488
489/// Return true if the argument is a power of two > 0.
490/// Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
491constexpr inline bool isPowerOf2_32(uint32_t Value) {
492 return Value && !(Value & (Value - 1));
493}
494
495/// Return true if the argument is a power of two > 0 (64 bit edition.)
496constexpr inline bool isPowerOf2_64(uint64_t Value) {
497 return Value && !(Value & (Value - 1));
498}
499
500/// Count the number of ones from the most significant bit to the first
501/// zero bit.
502///
503/// Ex. countLeadingOnes(0xFF0FFF00) == 8.
504/// Only unsigned integral types are allowed.
505///
506/// \param ZB the behavior on an input of all ones. Only ZB_Width and
507/// ZB_Undefined are valid arguments.
508template <typename T>
509unsigned countLeadingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
510 static_assert(std::numeric_limits<T>::is_integer &&
511 !std::numeric_limits<T>::is_signed,
512 "Only unsigned integral types are allowed.");
513 return countLeadingZeros<T>(~Value, ZB);
514}
515
516/// Count the number of ones from the least significant bit to the first
517/// zero bit.
518///
519/// Ex. countTrailingOnes(0x00FF00FF) == 8.
520/// Only unsigned integral types are allowed.
521///
522/// \param ZB the behavior on an input of all ones. Only ZB_Width and
523/// ZB_Undefined are valid arguments.
524template <typename T>
525unsigned countTrailingOnes(T Value, ZeroBehavior ZB = ZB_Width) {
526 static_assert(std::numeric_limits<T>::is_integer &&
527 !std::numeric_limits<T>::is_signed,
528 "Only unsigned integral types are allowed.");
529 return countTrailingZeros<T>(~Value, ZB);
530}
531
532namespace detail {
533template <typename T, std::size_t SizeOfT> struct PopulationCounter {
534 static unsigned count(T Value) {
535 // Generic version, forward to 32 bits.
536 static_assert(SizeOfT <= 4, "Not implemented!");
537#if defined(__GNUC__4)
538 return __builtin_popcount(Value);
539#else
540 uint32_t v = Value;
541 v = v - ((v >> 1) & 0x55555555);
542 v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
543 return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
544#endif
545 }
546};
547
548template <typename T> struct PopulationCounter<T, 8> {
549 static unsigned count(T Value) {
550#if defined(__GNUC__4)
551 return __builtin_popcountll(Value);
552#else
553 uint64_t v = Value;
554 v = v - ((v >> 1) & 0x5555555555555555ULL);
555 v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
556 v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
557 return unsigned((uint64_t)(v * 0x0101010101010101ULL) >> 56);
558#endif
559 }
560};
561} // namespace detail
562
563/// Count the number of set bits in a value.
564/// Ex. countPopulation(0xF000F000) = 8
565/// Returns 0 if the word is zero.
566template <typename T>
567inline unsigned countPopulation(T Value) {
568 static_assert(std::numeric_limits<T>::is_integer &&
569 !std::numeric_limits<T>::is_signed,
570 "Only unsigned integral types are allowed.");
571 return detail::PopulationCounter<T, sizeof(T)>::count(Value);
572}
573
574/// Compile time Log2.
575/// Valid only for positive powers of two.
576template <size_t kValue> constexpr inline size_t CTLog2() {
577 static_assert(kValue > 0 && llvm::isPowerOf2_64(kValue),
578 "Value is not a valid power of 2");
579 return 1 + CTLog2<kValue / 2>();
580}
581
582template <> constexpr inline size_t CTLog2<1>() { return 0; }
583
584/// Return the log base 2 of the specified value.
585inline double Log2(double Value) {
586#if defined(__ANDROID_API__) && __ANDROID_API__ < 18
587 return __builtin_log(Value) / __builtin_log(2.0);
588#else
589 return log2(Value);
590#endif
591}
592
593/// Return the floor log base 2 of the specified value, -1 if the value is zero.
594/// (32 bit edition.)
595/// Ex. Log2_32(32) == 5, Log2_32(1) == 0, Log2_32(0) == -1, Log2_32(6) == 2
596inline unsigned Log2_32(uint32_t Value) {
597 return 31 - countLeadingZeros(Value);
598}
599
600/// Return the floor log base 2 of the specified value, -1 if the value is zero.
601/// (64 bit edition.)
602inline unsigned Log2_64(uint64_t Value) {
603 return 63 - countLeadingZeros(Value);
604}
605
606/// Return the ceil log base 2 of the specified value, 32 if the value is zero.
607/// (32 bit edition).
608/// Ex. Log2_32_Ceil(32) == 5, Log2_32_Ceil(1) == 0, Log2_32_Ceil(6) == 3
609inline unsigned Log2_32_Ceil(uint32_t Value) {
610 return 32 - countLeadingZeros(Value - 1);
611}
612
613/// Return the ceil log base 2 of the specified value, 64 if the value is zero.
614/// (64 bit edition.)
615inline unsigned Log2_64_Ceil(uint64_t Value) {
616 return 64 - countLeadingZeros(Value - 1);
617}
618
619/// Return the greatest common divisor of the values using Euclid's algorithm.
620template <typename T>
621inline T greatestCommonDivisor(T A, T B) {
622 while (B) {
623 T Tmp = B;
624 B = A % B;
625 A = Tmp;
626 }
627 return A;
628}
629
630inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
631 return greatestCommonDivisor<uint64_t>(A, B);
632}
633
634/// This function takes a 64-bit integer and returns the bit equivalent double.
635inline double BitsToDouble(uint64_t Bits) {
636 double D;
637 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
638 memcpy(&D, &Bits, sizeof(Bits));
639 return D;
640}
641
642/// This function takes a 32-bit integer and returns the bit equivalent float.
643inline float BitsToFloat(uint32_t Bits) {
644 float F;
645 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
646 memcpy(&F, &Bits, sizeof(Bits));
647 return F;
648}
649
650/// This function takes a double and returns the bit equivalent 64-bit integer.
651/// Note that copying doubles around changes the bits of NaNs on some hosts,
652/// notably x86, so this routine cannot be used if these bits are needed.
653inline uint64_t DoubleToBits(double Double) {
654 uint64_t Bits;
655 static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
656 memcpy(&Bits, &Double, sizeof(Double));
657 return Bits;
658}
659
660/// This function takes a float and returns the bit equivalent 32-bit integer.
661/// Note that copying floats around changes the bits of NaNs on some hosts,
662/// notably x86, so this routine cannot be used if these bits are needed.
663inline uint32_t FloatToBits(float Float) {
664 uint32_t Bits;
665 static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
666 memcpy(&Bits, &Float, sizeof(Float));
667 return Bits;
668}
669
670/// A and B are either alignments or offsets. Return the minimum alignment that
671/// may be assumed after adding the two together.
672constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
673 // The largest power of 2 that divides both A and B.
674 //
675 // Replace "-Value" by "1+~Value" in the following commented code to avoid
676 // MSVC warning C4146
677 // return (A | B) & -(A | B);
678 return (A | B) & (1 + ~(A | B));
679}
680
681/// Returns the next power of two (in 64-bits) that is strictly greater than A.
682/// Returns zero on overflow.
683inline uint64_t NextPowerOf2(uint64_t A) {
684 A |= (A >> 1);
685 A |= (A >> 2);
686 A |= (A >> 4);
687 A |= (A >> 8);
688 A |= (A >> 16);
689 A |= (A >> 32);
690 return A + 1;
691}
692
693/// Returns the power of two which is less than or equal to the given value.
694/// Essentially, it is a floor operation across the domain of powers of two.
695inline uint64_t PowerOf2Floor(uint64_t A) {
696 if (!A) return 0;
697 return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
698}
699
700/// Returns the power of two which is greater than or equal to the given value.
701/// Essentially, it is a ceil operation across the domain of powers of two.
702inline uint64_t PowerOf2Ceil(uint64_t A) {
703 if (!A)
704 return 0;
705 return NextPowerOf2(A - 1);
706}
707
708/// Returns the next integer (mod 2**64) that is greater than or equal to
709/// \p Value and is a multiple of \p Align. \p Align must be non-zero.
710///
711/// If non-zero \p Skew is specified, the return value will be a minimal
712/// integer that is greater than or equal to \p Value and equal to
713/// \p Align * N + \p Skew for some integer N. If \p Skew is larger than
714/// \p Align, its value is adjusted to '\p Skew mod \p Align'.
715///
716/// Examples:
717/// \code
718/// alignTo(5, 8) = 8
719/// alignTo(17, 8) = 24
720/// alignTo(~0LL, 8) = 0
721/// alignTo(321, 255) = 510
722///
723/// alignTo(5, 8, 7) = 7
724/// alignTo(17, 8, 1) = 17
725/// alignTo(~0LL, 8, 3) = 3
726/// alignTo(321, 255, 42) = 552
727/// \endcode
728inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
729 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h"
, 729, __extension__ __PRETTY_FUNCTION__))
;
730 Skew %= Align;
731 return (Value + Align - 1 - Skew) / Align * Align + Skew;
732}
733
734/// Returns the next integer (mod 2**64) that is greater than or equal to
735/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
736template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
737 static_assert(Align != 0u, "Align must be non-zero");
738 return (Value + Align - 1) / Align * Align;
739}
740
741/// Returns the integer ceil(Numerator / Denominator).
742inline uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator) {
743 return alignTo(Numerator, Denominator) / Denominator;
744}
745
746/// Returns the integer nearest(Numerator / Denominator).
747inline uint64_t divideNearest(uint64_t Numerator, uint64_t Denominator) {
748 return (Numerator + (Denominator / 2)) / Denominator;
749}
750
751/// Returns the largest uint64_t less than or equal to \p Value and is
752/// \p Skew mod \p Align. \p Align must be non-zero
753inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
754 assert(Align != 0u && "Align can't be 0.")(static_cast <bool> (Align != 0u && "Align can't be 0."
) ? void (0) : __assert_fail ("Align != 0u && \"Align can't be 0.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h"
, 754, __extension__ __PRETTY_FUNCTION__))
;
755 Skew %= Align;
756 return (Value - Skew) / Align * Align + Skew;
757}
758
759/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
760/// Requires 0 < B <= 32.
761template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
762 static_assert(B > 0, "Bit width can't be 0.");
763 static_assert(B <= 32, "Bit width out of range.");
764 return int32_t(X << (32 - B)) >> (32 - B);
765}
766
767/// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
768/// Requires 0 < B <= 32.
769inline int32_t SignExtend32(uint32_t X, unsigned B) {
770 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h"
, 770, __extension__ __PRETTY_FUNCTION__))
;
771 assert(B <= 32 && "Bit width out of range.")(static_cast <bool> (B <= 32 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 32 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h"
, 771, __extension__ __PRETTY_FUNCTION__))
;
772 return int32_t(X << (32 - B)) >> (32 - B);
773}
774
775/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
776/// Requires 0 < B <= 64.
777template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
778 static_assert(B > 0, "Bit width can't be 0.");
779 static_assert(B <= 64, "Bit width out of range.");
780 return int64_t(x << (64 - B)) >> (64 - B);
781}
782
783/// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
784/// Requires 0 < B <= 64.
785inline int64_t SignExtend64(uint64_t X, unsigned B) {
786 assert(B > 0 && "Bit width can't be 0.")(static_cast <bool> (B > 0 && "Bit width can't be 0."
) ? void (0) : __assert_fail ("B > 0 && \"Bit width can't be 0.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h"
, 786, __extension__ __PRETTY_FUNCTION__))
;
787 assert(B <= 64 && "Bit width out of range.")(static_cast <bool> (B <= 64 && "Bit width out of range."
) ? void (0) : __assert_fail ("B <= 64 && \"Bit width out of range.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/Support/MathExtras.h"
, 787, __extension__ __PRETTY_FUNCTION__))
;
788 return int64_t(X << (64 - B)) >> (64 - B);
789}
790
791/// Subtract two unsigned integers, X and Y, of type T and return the absolute
792/// value of the result.
793template <typename T>
794std::enable_if_t<std::is_unsigned<T>::value, T> AbsoluteDifference(T X, T Y) {
795 return X > Y ? (X - Y) : (Y - X);
796}
797
798/// Add two unsigned integers, X and Y, of type T. Clamp the result to the
799/// maximum representable value of T on overflow. ResultOverflowed indicates if
800/// the result is larger than the maximum representable value of type T.
801template <typename T>
802std::enable_if_t<std::is_unsigned<T>::value, T>
803SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) {
804 bool Dummy;
805 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
806 // Hacker's Delight, p. 29
807 T Z = X + Y;
808 Overflowed = (Z < X || Z < Y);
809 if (Overflowed)
810 return std::numeric_limits<T>::max();
811 else
812 return Z;
813}
814
815/// Multiply two unsigned integers, X and Y, of type T. Clamp the result to the
816/// maximum representable value of T on overflow. ResultOverflowed indicates if
817/// the result is larger than the maximum representable value of type T.
818template <typename T>
819std::enable_if_t<std::is_unsigned<T>::value, T>
820SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
821 bool Dummy;
822 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
823
824 // Hacker's Delight, p. 30 has a different algorithm, but we don't use that
825 // because it fails for uint16_t (where multiplication can have undefined
826 // behavior due to promotion to int), and requires a division in addition
827 // to the multiplication.
828
829 Overflowed = false;
830
831 // Log2(Z) would be either Log2Z or Log2Z + 1.
832 // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z
833 // will necessarily be less than Log2Max as desired.
834 int Log2Z = Log2_64(X) + Log2_64(Y);
835 const T Max = std::numeric_limits<T>::max();
836 int Log2Max = Log2_64(Max);
837 if (Log2Z < Log2Max) {
838 return X * Y;
839 }
840 if (Log2Z > Log2Max) {
841 Overflowed = true;
842 return Max;
843 }
844
845 // We're going to use the top bit, and maybe overflow one
846 // bit past it. Multiply all but the bottom bit then add
847 // that on at the end.
848 T Z = (X >> 1) * Y;
849 if (Z & ~(Max >> 1)) {
850 Overflowed = true;
851 return Max;
852 }
853 Z <<= 1;
854 if (X & 1)
855 return SaturatingAdd(Z, Y, ResultOverflowed);
856
857 return Z;
858}
859
860/// Multiply two unsigned integers, X and Y, and add the unsigned integer, A to
861/// the product. Clamp the result to the maximum representable value of T on
862/// overflow. ResultOverflowed indicates if the result is larger than the
863/// maximum representable value of type T.
864template <typename T>
865std::enable_if_t<std::is_unsigned<T>::value, T>
866SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
867 bool Dummy;
868 bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
869
870 T Product = SaturatingMultiply(X, Y, &Overflowed);
871 if (Overflowed)
872 return Product;
873
874 return SaturatingAdd(A, Product, &Overflowed);
875}
876
877/// Use this rather than HUGE_VALF; the latter causes warnings on MSVC.
878extern const float huge_valf;
879
880
881/// Add two signed integers, computing the two's complement truncated result,
882/// returning true if overflow occured.
883template <typename T>
884std::enable_if_t<std::is_signed<T>::value, T> AddOverflow(T X, T Y, T &Result) {
885#if __has_builtin(__builtin_add_overflow)1
886 return __builtin_add_overflow(X, Y, &Result);
887#else
888 // Perform the unsigned addition.
889 using U = std::make_unsigned_t<T>;
890 const U UX = static_cast<U>(X);
891 const U UY = static_cast<U>(Y);
892 const U UResult = UX + UY;
893
894 // Convert to signed.
895 Result = static_cast<T>(UResult);
896
897 // Adding two positive numbers should result in a positive number.
898 if (X > 0 && Y > 0)
899 return Result <= 0;
900 // Adding two negatives should result in a negative number.
901 if (X < 0 && Y < 0)
902 return Result >= 0;
903 return false;
904#endif
905}
906
907/// Subtract two signed integers, computing the two's complement truncated
908/// result, returning true if an overflow ocurred.
909template <typename T>
910std::enable_if_t<std::is_signed<T>::value, T> SubOverflow(T X, T Y, T &Result) {
911#if __has_builtin(__builtin_sub_overflow)1
912 return __builtin_sub_overflow(X, Y, &Result);
913#else
914 // Perform the unsigned addition.
915 using U = std::make_unsigned_t<T>;
916 const U UX = static_cast<U>(X);
917 const U UY = static_cast<U>(Y);
918 const U UResult = UX - UY;
919
920 // Convert to signed.
921 Result = static_cast<T>(UResult);
922
923 // Subtracting a positive number from a negative results in a negative number.
924 if (X <= 0 && Y > 0)
925 return Result >= 0;
926 // Subtracting a negative number from a positive results in a positive number.
927 if (X >= 0 && Y < 0)
928 return Result <= 0;
929 return false;
930#endif
931}
932
933/// Multiply two signed integers, computing the two's complement truncated
934/// result, returning true if an overflow ocurred.
935template <typename T>
936std::enable_if_t<std::is_signed<T>::value, T> MulOverflow(T X, T Y, T &Result) {
937 // Perform the unsigned multiplication on absolute values.
938 using U = std::make_unsigned_t<T>;
939 const U UX = X < 0 ? (0 - static_cast<U>(X)) : static_cast<U>(X);
940 const U UY = Y < 0 ? (0 - static_cast<U>(Y)) : static_cast<U>(Y);
941 const U UResult = UX * UY;
942
943 // Convert to signed.
944 const bool IsNegative = (X < 0) ^ (Y < 0);
945 Result = IsNegative ? (0 - UResult) : UResult;
946
947 // If any of the args was 0, result is 0 and no overflow occurs.
948 if (UX == 0 || UY == 0)
949 return false;
950
951 // UX and UY are in [1, 2^n], where n is the number of digits.
952 // Check how the max allowed absolute value (2^n for negative, 2^(n-1) for
953 // positive) divided by an argument compares to the other.
954 if (IsNegative)
955 return UX > (static_cast<U>(std::numeric_limits<T>::max()) + U(1)) / UY;
956 else
957 return UX > (static_cast<U>(std::numeric_limits<T>::max())) / UY;
958}
959
960} // End llvm namespace
961
962#endif