LLVM 19.0.0git
ARMISelLowering.cpp
Go to the documentation of this file.
1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that ARM uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "ARMISelLowering.h"
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
20#include "ARMPerfectShuffle.h"
21#include "ARMRegisterInfo.h"
22#include "ARMSelectionDAGInfo.h"
23#include "ARMSubtarget.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/StringRef.h"
40#include "llvm/ADT/Twine.h"
67#include "llvm/IR/Attributes.h"
68#include "llvm/IR/CallingConv.h"
69#include "llvm/IR/Constant.h"
70#include "llvm/IR/Constants.h"
71#include "llvm/IR/DataLayout.h"
72#include "llvm/IR/DebugLoc.h"
74#include "llvm/IR/Function.h"
75#include "llvm/IR/GlobalAlias.h"
76#include "llvm/IR/GlobalValue.h"
78#include "llvm/IR/IRBuilder.h"
79#include "llvm/IR/InlineAsm.h"
80#include "llvm/IR/Instruction.h"
83#include "llvm/IR/Intrinsics.h"
84#include "llvm/IR/IntrinsicsARM.h"
85#include "llvm/IR/Module.h"
87#include "llvm/IR/Type.h"
88#include "llvm/IR/User.h"
89#include "llvm/IR/Value.h"
90#include "llvm/MC/MCInstrDesc.h"
93#include "llvm/MC/MCSchedule.h"
100#include "llvm/Support/Debug.h"
108#include <algorithm>
109#include <cassert>
110#include <cstdint>
111#include <cstdlib>
112#include <iterator>
113#include <limits>
114#include <optional>
115#include <tuple>
116#include <utility>
117#include <vector>
118
119using namespace llvm;
120using namespace llvm::PatternMatch;
121
122#define DEBUG_TYPE "arm-isel"
123
124STATISTIC(NumTailCalls, "Number of tail calls");
125STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
126STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
127STATISTIC(NumConstpoolPromoted,
128 "Number of constants with their storage promoted into constant pools");
129
130static cl::opt<bool>
131ARMInterworking("arm-interworking", cl::Hidden,
132 cl::desc("Enable / disable ARM interworking (for debugging only)"),
133 cl::init(true));
134
136 "arm-promote-constant", cl::Hidden,
137 cl::desc("Enable / disable promotion of unnamed_addr constants into "
138 "constant pools"),
139 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
141 "arm-promote-constant-max-size", cl::Hidden,
142 cl::desc("Maximum size of constant to promote into a constant pool"),
143 cl::init(64));
145 "arm-promote-constant-max-total", cl::Hidden,
146 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
147 cl::init(128));
148
150MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden,
151 cl::desc("Maximum interleave factor for MVE VLDn to generate."),
152 cl::init(2));
153
154// The APCS parameter registers.
155static const MCPhysReg GPRArgRegs[] = {
156 ARM::R0, ARM::R1, ARM::R2, ARM::R3
157};
158
159void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
160 if (VT != PromotedLdStVT) {
162 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
163
165 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
166 }
167
168 MVT ElemTy = VT.getVectorElementType();
169 if (ElemTy != MVT::f64)
173 if (ElemTy == MVT::i32) {
178 } else {
183 }
192 if (VT.isInteger()) {
196 }
197
198 // Neon does not support vector divide/remainder operations.
207
208 if (!VT.isFloatingPoint() &&
209 VT != MVT::v2i64 && VT != MVT::v1i64)
210 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
211 setOperationAction(Opcode, VT, Legal);
212 if (!VT.isFloatingPoint())
213 for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
214 setOperationAction(Opcode, VT, Legal);
215}
216
217void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
218 addRegisterClass(VT, &ARM::DPRRegClass);
219 addTypeForNEON(VT, MVT::f64);
220}
221
222void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
223 addRegisterClass(VT, &ARM::DPairRegClass);
224 addTypeForNEON(VT, MVT::v2f64);
225}
226
227void ARMTargetLowering::setAllExpand(MVT VT) {
228 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
229 setOperationAction(Opc, VT, Expand);
230
231 // We support these really simple operations even on types where all
232 // the actual arithmetic has to be broken down into simpler
233 // operations or turned into library calls.
238}
239
240void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To,
241 LegalizeAction Action) {
242 setLoadExtAction(ISD::EXTLOAD, From, To, Action);
243 setLoadExtAction(ISD::ZEXTLOAD, From, To, Action);
244 setLoadExtAction(ISD::SEXTLOAD, From, To, Action);
245}
246
247void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
248 const MVT IntTypes[] = { MVT::v16i8, MVT::v8i16, MVT::v4i32 };
249
250 for (auto VT : IntTypes) {
251 addRegisterClass(VT, &ARM::MQPRRegClass);
281
282 // No native support for these.
292
293 // Vector reductions
303
304 if (!HasMVEFP) {
309 } else {
312 }
313
314 // Pre and Post inc are supported on loads and stores
315 for (unsigned im = (unsigned)ISD::PRE_INC;
321 }
322 }
323
324 const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 };
325 for (auto VT : FloatTypes) {
326 addRegisterClass(VT, &ARM::MQPRRegClass);
327 if (!HasMVEFP)
328 setAllExpand(VT);
329
330 // These are legal or custom whether we have MVE.fp or not
343
344 // Pre and Post inc are supported on loads and stores
345 for (unsigned im = (unsigned)ISD::PRE_INC;
351 }
352
353 if (HasMVEFP) {
361
362 // No native support for these.
376 }
377 }
378
379 // Custom Expand smaller than legal vector reductions to prevent false zero
380 // items being added.
389
390 // We 'support' these types up to bitcast/load/store level, regardless of
391 // MVE integer-only / float support. Only doing FP data processing on the FP
392 // vector types is inhibited at integer-only level.
393 const MVT LongTypes[] = { MVT::v2i64, MVT::v2f64 };
394 for (auto VT : LongTypes) {
395 addRegisterClass(VT, &ARM::MQPRRegClass);
396 setAllExpand(VT);
402 }
404
405 // We can do bitwise operations on v2i64 vectors
406 setOperationAction(ISD::AND, MVT::v2i64, Legal);
407 setOperationAction(ISD::OR, MVT::v2i64, Legal);
408 setOperationAction(ISD::XOR, MVT::v2i64, Legal);
409
410 // It is legal to extload from v4i8 to v4i16 or v4i32.
411 addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal);
412 addAllExtLoads(MVT::v4i32, MVT::v4i16, Legal);
413 addAllExtLoads(MVT::v4i32, MVT::v4i8, Legal);
414
415 // It is legal to sign extend from v4i8/v4i16 to v4i32 or v8i8 to v8i16.
421
422 // Some truncating stores are legal too.
423 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
424 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
425 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
426
427 // Pre and Post inc on these are legal, given the correct extends
428 for (unsigned im = (unsigned)ISD::PRE_INC;
430 for (auto VT : {MVT::v8i8, MVT::v4i8, MVT::v4i16}) {
435 }
436 }
437
438 // Predicate types
439 const MVT pTypes[] = {MVT::v16i1, MVT::v8i1, MVT::v4i1, MVT::v2i1};
440 for (auto VT : pTypes) {
441 addRegisterClass(VT, &ARM::VCCRRegClass);
456
457 if (!HasMVEFP) {
462 }
463 }
467 setOperationAction(ISD::OR, MVT::v2i1, Expand);
473
482}
483
485 const ARMSubtarget &STI)
486 : TargetLowering(TM), Subtarget(&STI) {
487 RegInfo = Subtarget->getRegisterInfo();
488 Itins = Subtarget->getInstrItineraryData();
489
492
493 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
494 !Subtarget->isTargetWatchOS() && !Subtarget->isTargetDriverKit()) {
495 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
496 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
497 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
498 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
500 }
501
502 if (Subtarget->isTargetMachO()) {
503 // Uses VFP for Thumb libfuncs if available.
504 if (Subtarget->isThumb() && Subtarget->hasVFP2Base() &&
505 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
506 static const struct {
507 const RTLIB::Libcall Op;
508 const char * const Name;
509 const ISD::CondCode Cond;
510 } LibraryCalls[] = {
511 // Single-precision floating-point arithmetic.
512 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
513 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
514 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
515 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
516
517 // Double-precision floating-point arithmetic.
518 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
519 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
520 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
521 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
522
523 // Single-precision comparisons.
524 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
525 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
526 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
527 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
528 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
529 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
530 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
531
532 // Double-precision comparisons.
533 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
534 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
535 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
536 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
537 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
538 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
539 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
540
541 // Floating-point to integer conversions.
542 // i64 conversions are done via library routines even when generating VFP
543 // instructions, so use the same ones.
544 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
545 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
546 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
547 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
548
549 // Conversions between floating types.
550 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
551 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
552
553 // Integer to floating-point conversions.
554 // i64 conversions are done via library routines even when generating VFP
555 // instructions, so use the same ones.
556 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
557 // e.g., __floatunsidf vs. __floatunssidfvfp.
558 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
559 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
560 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
561 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
562 };
563
564 for (const auto &LC : LibraryCalls) {
565 setLibcallName(LC.Op, LC.Name);
566 if (LC.Cond != ISD::SETCC_INVALID)
567 setCmpLibcallCC(LC.Op, LC.Cond);
568 }
569 }
570 }
571
572 // These libcalls are not available in 32-bit.
573 setLibcallName(RTLIB::SHL_I128, nullptr);
574 setLibcallName(RTLIB::SRL_I128, nullptr);
575 setLibcallName(RTLIB::SRA_I128, nullptr);
576 setLibcallName(RTLIB::MUL_I128, nullptr);
577 setLibcallName(RTLIB::MULO_I64, nullptr);
578 setLibcallName(RTLIB::MULO_I128, nullptr);
579
580 // RTLIB
581 if (Subtarget->isAAPCS_ABI() &&
582 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
583 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
584 static const struct {
585 const RTLIB::Libcall Op;
586 const char * const Name;
587 const CallingConv::ID CC;
588 const ISD::CondCode Cond;
589 } LibraryCalls[] = {
590 // Double-precision floating-point arithmetic helper functions
591 // RTABI chapter 4.1.2, Table 2
592 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
593 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
594 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
595 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
596
597 // Double-precision floating-point comparison helper functions
598 // RTABI chapter 4.1.2, Table 3
599 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
600 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
601 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
602 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
603 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
604 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
605 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
606
607 // Single-precision floating-point arithmetic helper functions
608 // RTABI chapter 4.1.2, Table 4
609 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
610 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
611 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
612 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
613
614 // Single-precision floating-point comparison helper functions
615 // RTABI chapter 4.1.2, Table 5
616 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
617 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
618 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
619 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
620 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
621 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
622 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
623
624 // Floating-point to integer conversions.
625 // RTABI chapter 4.1.2, Table 6
626 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
627 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
628 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
629 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
630 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
631 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
632 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
633 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
634
635 // Conversions between floating types.
636 // RTABI chapter 4.1.2, Table 7
637 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
638 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
639 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
640
641 // Integer to floating-point conversions.
642 // RTABI chapter 4.1.2, Table 8
643 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
644 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
645 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
646 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
647 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
648 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
649 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
650 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
651
652 // Long long helper functions
653 // RTABI chapter 4.2, Table 9
654 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
655 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
656 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
657 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
658
659 // Integer division functions
660 // RTABI chapter 4.3.1
661 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
662 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
663 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
664 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
665 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
666 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
667 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
668 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
669 };
670
671 for (const auto &LC : LibraryCalls) {
672 setLibcallName(LC.Op, LC.Name);
673 setLibcallCallingConv(LC.Op, LC.CC);
674 if (LC.Cond != ISD::SETCC_INVALID)
675 setCmpLibcallCC(LC.Op, LC.Cond);
676 }
677
678 // EABI dependent RTLIB
679 if (TM.Options.EABIVersion == EABI::EABI4 ||
680 TM.Options.EABIVersion == EABI::EABI5) {
681 static const struct {
682 const RTLIB::Libcall Op;
683 const char *const Name;
684 const CallingConv::ID CC;
685 const ISD::CondCode Cond;
686 } MemOpsLibraryCalls[] = {
687 // Memory operations
688 // RTABI chapter 4.3.4
689 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
690 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
691 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
692 };
693
694 for (const auto &LC : MemOpsLibraryCalls) {
695 setLibcallName(LC.Op, LC.Name);
696 setLibcallCallingConv(LC.Op, LC.CC);
697 if (LC.Cond != ISD::SETCC_INVALID)
698 setCmpLibcallCC(LC.Op, LC.Cond);
699 }
700 }
701 }
702
703 if (Subtarget->isTargetWindows()) {
704 static const struct {
705 const RTLIB::Libcall Op;
706 const char * const Name;
707 const CallingConv::ID CC;
708 } LibraryCalls[] = {
709 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
710 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
711 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
712 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
713 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
714 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
715 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
716 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
717 };
718
719 for (const auto &LC : LibraryCalls) {
720 setLibcallName(LC.Op, LC.Name);
721 setLibcallCallingConv(LC.Op, LC.CC);
722 }
723 }
724
725 // Use divmod compiler-rt calls for iOS 5.0 and later.
726 if (Subtarget->isTargetMachO() &&
727 !(Subtarget->isTargetIOS() &&
728 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
729 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
730 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
731 }
732
733 // The half <-> float conversion functions are always soft-float on
734 // non-watchos platforms, but are needed for some targets which use a
735 // hard-float calling convention by default.
736 if (!Subtarget->isTargetWatchABI()) {
737 if (Subtarget->isAAPCS_ABI()) {
738 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
739 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
740 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
741 } else {
742 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
743 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
744 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
745 }
746 }
747
748 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
749 // a __gnu_ prefix (which is the default).
750 if (Subtarget->isTargetAEABI()) {
751 static const struct {
752 const RTLIB::Libcall Op;
753 const char * const Name;
754 const CallingConv::ID CC;
755 } LibraryCalls[] = {
756 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
757 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
758 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
759 };
760
761 for (const auto &LC : LibraryCalls) {
762 setLibcallName(LC.Op, LC.Name);
763 setLibcallCallingConv(LC.Op, LC.CC);
764 }
765 }
766
767 if (Subtarget->isThumb1Only())
768 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
769 else
770 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
771
772 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() &&
773 Subtarget->hasFPRegs()) {
774 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
775 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
776
781
782 if (!Subtarget->hasVFP2Base())
783 setAllExpand(MVT::f32);
784 if (!Subtarget->hasFP64())
785 setAllExpand(MVT::f64);
786 }
787
788 if (Subtarget->hasFullFP16()) {
789 addRegisterClass(MVT::f16, &ARM::HPRRegClass);
792
795 }
796
797 if (Subtarget->hasBF16()) {
798 addRegisterClass(MVT::bf16, &ARM::HPRRegClass);
799 setAllExpand(MVT::bf16);
800 if (!Subtarget->hasFullFP16())
802 }
803
805 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
806 setTruncStoreAction(VT, InnerVT, Expand);
807 addAllExtLoads(VT, InnerVT, Expand);
808 }
809
812
814 }
815
818
821
822 if (Subtarget->hasMVEIntegerOps())
823 addMVEVectorTypes(Subtarget->hasMVEFloatOps());
824
825 // Combine low-overhead loop intrinsics so that we can lower i1 types.
826 if (Subtarget->hasLOB()) {
828 }
829
830 if (Subtarget->hasNEON()) {
831 addDRTypeForNEON(MVT::v2f32);
832 addDRTypeForNEON(MVT::v8i8);
833 addDRTypeForNEON(MVT::v4i16);
834 addDRTypeForNEON(MVT::v2i32);
835 addDRTypeForNEON(MVT::v1i64);
836
837 addQRTypeForNEON(MVT::v4f32);
838 addQRTypeForNEON(MVT::v2f64);
839 addQRTypeForNEON(MVT::v16i8);
840 addQRTypeForNEON(MVT::v8i16);
841 addQRTypeForNEON(MVT::v4i32);
842 addQRTypeForNEON(MVT::v2i64);
843
844 if (Subtarget->hasFullFP16()) {
845 addQRTypeForNEON(MVT::v8f16);
846 addDRTypeForNEON(MVT::v4f16);
847 }
848
849 if (Subtarget->hasBF16()) {
850 addQRTypeForNEON(MVT::v8bf16);
851 addDRTypeForNEON(MVT::v4bf16);
852 }
853 }
854
855 if (Subtarget->hasMVEIntegerOps() || Subtarget->hasNEON()) {
856 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
857 // none of Neon, MVE or VFP supports any arithmetic operations on it.
858 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
859 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
860 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
861 // FIXME: Code duplication: FDIV and FREM are expanded always, see
862 // ARMTargetLowering::addTypeForNEON method for details.
863 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
864 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
865 // FIXME: Create unittest.
866 // In another words, find a way when "copysign" appears in DAG with vector
867 // operands.
869 // FIXME: Code duplication: SETCC has custom operation action, see
870 // ARMTargetLowering::addTypeForNEON method for details.
872 // FIXME: Create unittest for FNEG and for FABS.
873 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
874 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
876 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
877 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
878 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
879 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
882 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
885 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
891 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
892 }
893
894 if (Subtarget->hasNEON()) {
895 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
896 // supported for v4f32.
898 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
899 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
900 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
901 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
904 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
912
913 // Mark v2f32 intrinsics.
915 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
916 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
917 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
918 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
921 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
929
930 // Neon does not support some operations on v1i64 and v2i64 types.
931 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
932 // Custom handling for some quad-vector types to detect VMULL.
933 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
934 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
935 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
936 // Custom handling for some vector types to avoid expensive expansions
937 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
939 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
941 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
942 // a destination type that is wider than the source, and nor does
943 // it have a FP_TO_[SU]INT instruction with a narrower destination than
944 // source.
953
956
957 // NEON does not have single instruction CTPOP for vectors with element
958 // types wider than 8-bits. However, custom lowering can leverage the
959 // v8i8/v16i8 vcnt instruction.
966
967 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
968 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
969
970 // NEON does not have single instruction CTTZ for vectors.
972 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
973 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
974 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
975
976 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
977 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
978 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
979 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
980
985
990
994 }
995
996 // NEON only has FMA instructions as of VFP4.
997 if (!Subtarget->hasVFP4Base()) {
998 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
999 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
1000 }
1001
1004
1005 // It is legal to extload from v4i8 to v4i16 or v4i32.
1006 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
1007 MVT::v2i32}) {
1012 }
1013 }
1014
1015 for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32, MVT::v16i8, MVT::v8i16,
1016 MVT::v4i32}) {
1021 }
1022 }
1023
1024 if (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) {
1031 }
1032 if (Subtarget->hasMVEIntegerOps()) {
1035 ISD::SETCC});
1036 }
1037 if (Subtarget->hasMVEFloatOps()) {
1039 }
1040
1041 if (!Subtarget->hasFP64()) {
1042 // When targeting a floating-point unit with only single-precision
1043 // operations, f64 is legal for the few double-precision instructions which
1044 // are present However, no double-precision operations other than moves,
1045 // loads and stores are provided by the hardware.
1083 }
1084
1085 if (!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) {
1088 if (Subtarget->hasFullFP16()) {
1091 }
1092 }
1093
1094 if (!Subtarget->hasFP16()) {
1097 }
1098
1100
1101 // ARM does not have floating-point extending loads.
1102 for (MVT VT : MVT::fp_valuetypes()) {
1103 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
1104 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
1105 }
1106
1107 // ... or truncating stores
1108 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1109 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
1110 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
1111
1112 // ARM does not have i1 sign extending load.
1113 for (MVT VT : MVT::integer_valuetypes())
1114 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
1115
1116 // ARM supports all 4 flavors of integer indexed load / store.
1117 if (!Subtarget->isThumb1Only()) {
1118 for (unsigned im = (unsigned)ISD::PRE_INC;
1120 setIndexedLoadAction(im, MVT::i1, Legal);
1121 setIndexedLoadAction(im, MVT::i8, Legal);
1122 setIndexedLoadAction(im, MVT::i16, Legal);
1123 setIndexedLoadAction(im, MVT::i32, Legal);
1124 setIndexedStoreAction(im, MVT::i1, Legal);
1125 setIndexedStoreAction(im, MVT::i8, Legal);
1126 setIndexedStoreAction(im, MVT::i16, Legal);
1127 setIndexedStoreAction(im, MVT::i32, Legal);
1128 }
1129 } else {
1130 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
1133 }
1134
1139
1142 if (Subtarget->hasDSP()) {
1151 }
1152 if (Subtarget->hasBaseDSP()) {
1155 }
1156
1157 // i64 operation support.
1160 if (Subtarget->isThumb1Only()) {
1163 }
1164 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
1165 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
1167
1177
1178 // MVE lowers 64 bit shifts to lsll and lsrl
1179 // assuming that ISD::SRL and SRA of i64 are already marked custom
1180 if (Subtarget->hasMVEIntegerOps())
1182
1183 // Expand to __aeabi_l{lsl,lsr,asr} calls for Thumb1.
1184 if (Subtarget->isThumb1Only()) {
1188 }
1189
1190 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
1192
1193 // ARM does not have ROTL.
1198 }
1201 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) {
1204 }
1205
1206 // @llvm.readcyclecounter requires the Performance Monitors extension.
1207 // Default to the 0 expansion on unsupported platforms.
1208 // FIXME: Technically there are older ARM CPUs that have
1209 // implementation-specific ways of obtaining this information.
1210 if (Subtarget->hasPerfMon())
1212
1213 // Only ARMv6 has BSWAP.
1214 if (!Subtarget->hasV6Ops())
1216
1217 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
1218 : Subtarget->hasDivideInARMMode();
1219 if (!hasDivide) {
1220 // These are expanded into libcalls if the cpu doesn't have HW divider.
1223 }
1224
1225 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
1228
1231 }
1232
1235
1236 // Register based DivRem for AEABI (RTABI 4.2)
1237 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
1238 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
1239 Subtarget->isTargetWindows()) {
1242 HasStandaloneRem = false;
1243
1244 if (Subtarget->isTargetWindows()) {
1245 const struct {
1246 const RTLIB::Libcall Op;
1247 const char * const Name;
1248 const CallingConv::ID CC;
1249 } LibraryCalls[] = {
1250 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
1251 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
1252 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
1253 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
1254
1255 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
1256 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
1257 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
1258 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
1259 };
1260
1261 for (const auto &LC : LibraryCalls) {
1262 setLibcallName(LC.Op, LC.Name);
1263 setLibcallCallingConv(LC.Op, LC.CC);
1264 }
1265 } else {
1266 const struct {
1267 const RTLIB::Libcall Op;
1268 const char * const Name;
1269 const CallingConv::ID CC;
1270 } LibraryCalls[] = {
1271 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1272 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1273 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
1274 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
1275
1276 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1277 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1278 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
1279 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
1280 };
1281
1282 for (const auto &LC : LibraryCalls) {
1283 setLibcallName(LC.Op, LC.Name);
1284 setLibcallCallingConv(LC.Op, LC.CC);
1285 }
1286 }
1287
1292 } else {
1295 }
1296
1297 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
1298 // MSVCRT doesn't have powi; fall back to pow
1299 setLibcallName(RTLIB::POWI_F32, nullptr);
1300 setLibcallName(RTLIB::POWI_F64, nullptr);
1301 }
1302
1307
1308 setOperationAction(ISD::TRAP, MVT::Other, Legal);
1310
1311 // Use the default implementation.
1313 setOperationAction(ISD::VAARG, MVT::Other, Expand);
1315 setOperationAction(ISD::VAEND, MVT::Other, Expand);
1318
1319 if (Subtarget->isTargetWindows())
1321 else
1323
1324 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
1325 // the default expansion.
1326 InsertFencesForAtomic = false;
1327 if (Subtarget->hasAnyDataBarrier() &&
1328 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
1329 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
1330 // to ldrex/strex loops already.
1332 if (!Subtarget->isThumb() || !Subtarget->isMClass())
1334
1335 // On v8, we have particularly efficient implementations of atomic fences
1336 // if they can be combined with nearby atomic loads and stores.
1337 if (!Subtarget->hasAcquireRelease() ||
1338 getTargetMachine().getOptLevel() == CodeGenOptLevel::None) {
1339 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
1340 InsertFencesForAtomic = true;
1341 }
1342 } else {
1343 // If there's anything we can use as a barrier, go through custom lowering
1344 // for ATOMIC_FENCE.
1345 // If target has DMB in thumb, Fences can be inserted.
1346 if (Subtarget->hasDataBarrier())
1347 InsertFencesForAtomic = true;
1348
1350 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
1351
1352 // Set them all for libcall, which will force libcalls.
1365 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1366 // Unordered/Monotonic case.
1367 if (!InsertFencesForAtomic) {
1370 }
1371 }
1372
1373 // Compute supported atomic widths.
1374 if (Subtarget->isTargetLinux() ||
1375 (!Subtarget->isMClass() && Subtarget->hasV6Ops())) {
1376 // For targets where __sync_* routines are reliably available, we use them
1377 // if necessary.
1378 //
1379 // ARM Linux always supports 64-bit atomics through kernel-assisted atomic
1380 // routines (kernel 3.1 or later). FIXME: Not with compiler-rt?
1381 //
1382 // ARMv6 targets have native instructions in ARM mode. For Thumb mode,
1383 // such targets should provide __sync_* routines, which use the ARM mode
1384 // instructions. (ARMv6 doesn't have dmb, but it has an equivalent
1385 // encoding; see ARMISD::MEMBARRIER_MCR.)
1387 } else if ((Subtarget->isMClass() && Subtarget->hasV8MBaselineOps()) ||
1388 Subtarget->hasForced32BitAtomics()) {
1389 // Cortex-M (besides Cortex-M0) have 32-bit atomics.
1391 } else {
1392 // We can't assume anything about other targets; just use libatomic
1393 // routines.
1395 }
1396
1398
1400
1401 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1402 if (!Subtarget->hasV6Ops()) {
1405 }
1407
1408 if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() &&
1409 !Subtarget->isThumb1Only()) {
1410 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1411 // iff target supports vfp2.
1421 }
1422
1423 // We want to custom lower some of our intrinsics.
1428 if (Subtarget->useSjLjEH())
1429 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1430
1440 if (Subtarget->hasFullFP16()) {
1444 }
1445
1447
1450 if (Subtarget->hasFullFP16())
1454 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1455
1456 // We don't support sin/cos/fmod/copysign/pow
1465 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2Base() &&
1466 !Subtarget->isThumb1Only()) {
1469 }
1472
1473 if (!Subtarget->hasVFP4Base()) {
1476 }
1477
1478 // Various VFP goodness
1479 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1480 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1481 if (!Subtarget->hasFPARMv8Base() || !Subtarget->hasFP64()) {
1484 }
1485
1486 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1487 if (!Subtarget->hasFP16()) {
1490 }
1491
1492 // Strict floating-point comparisons need custom lowering.
1499 }
1500
1501 // Use __sincos_stret if available.
1502 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1503 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1506 }
1507
1508 // FP-ARMv8 implements a lot of rounding-like FP operations.
1509 if (Subtarget->hasFPARMv8Base()) {
1518 if (Subtarget->hasNEON()) {
1523 }
1524
1525 if (Subtarget->hasFP64()) {
1534 }
1535 }
1536
1537 // FP16 often need to be promoted to call lib functions
1538 if (Subtarget->hasFullFP16()) {
1552
1554 }
1555
1556 if (Subtarget->hasNEON()) {
1557 // vmin and vmax aren't available in a scalar form, so we can use
1558 // a NEON instruction with an undef lane instead. This has a performance
1559 // penalty on some cores, so we don't do this unless we have been
1560 // asked to by the core tuning model.
1561 if (Subtarget->useNEONForSinglePrecisionFP()) {
1566 }
1571
1572 if (Subtarget->hasFullFP16()) {
1577
1582 }
1583 }
1584
1585 // We have target-specific dag combine patterns for the following nodes:
1586 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1589
1590 if (Subtarget->hasMVEIntegerOps())
1592
1593 if (Subtarget->hasV6Ops())
1595 if (Subtarget->isThumb1Only())
1597 // Attempt to lower smin/smax to ssat/usat
1598 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) ||
1599 Subtarget->isThumb2()) {
1601 }
1602
1604
1605 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1606 !Subtarget->hasVFP2Base() || Subtarget->hasMinSize())
1608 else
1610
1611 //// temporary - rewrite interface to use type
1614 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1616 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1618
1619 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1620 // are at least 4 bytes aligned.
1622
1623 // Prefer likely predicted branches to selects on out-of-order cores.
1624 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1625
1626 setPrefLoopAlignment(Align(1ULL << Subtarget->getPrefLoopLogAlignment()));
1628
1629 setMinFunctionAlignment(Subtarget->isThumb() ? Align(2) : Align(4));
1630
1631 if (Subtarget->isThumb() || Subtarget->isThumb2())
1633}
1634
1636 return Subtarget->useSoftFloat();
1637}
1638
1639// FIXME: It might make sense to define the representative register class as the
1640// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1641// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1642// SPR's representative would be DPR_VFP2. This should work well if register
1643// pressure tracking were modified such that a register use would increment the
1644// pressure of the register class's representative and all of it's super
1645// classes' representatives transitively. We have not implemented this because
1646// of the difficulty prior to coalescing of modeling operand register classes
1647// due to the common occurrence of cross class copies and subregister insertions
1648// and extractions.
1649std::pair<const TargetRegisterClass *, uint8_t>
1651 MVT VT) const {
1652 const TargetRegisterClass *RRC = nullptr;
1653 uint8_t Cost = 1;
1654 switch (VT.SimpleTy) {
1655 default:
1657 // Use DPR as representative register class for all floating point
1658 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1659 // the cost is 1 for both f32 and f64.
1660 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1661 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1662 RRC = &ARM::DPRRegClass;
1663 // When NEON is used for SP, only half of the register file is available
1664 // because operations that define both SP and DP results will be constrained
1665 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1666 // coalescing by double-counting the SP regs. See the FIXME above.
1667 if (Subtarget->useNEONForSinglePrecisionFP())
1668 Cost = 2;
1669 break;
1670 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1671 case MVT::v4f32: case MVT::v2f64:
1672 RRC = &ARM::DPRRegClass;
1673 Cost = 2;
1674 break;
1675 case MVT::v4i64:
1676 RRC = &ARM::DPRRegClass;
1677 Cost = 4;
1678 break;
1679 case MVT::v8i64:
1680 RRC = &ARM::DPRRegClass;
1681 Cost = 8;
1682 break;
1683 }
1684 return std::make_pair(RRC, Cost);
1685}
1686
1687const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1688#define MAKE_CASE(V) \
1689 case V: \
1690 return #V;
1691 switch ((ARMISD::NodeType)Opcode) {
1693 break;
1897#undef MAKE_CASE
1898 }
1899 return nullptr;
1900}
1901
1903 EVT VT) const {
1904 if (!VT.isVector())
1905 return getPointerTy(DL);
1906
1907 // MVE has a predicate register.
1908 if ((Subtarget->hasMVEIntegerOps() &&
1909 (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 ||
1910 VT == MVT::v16i8)) ||
1911 (Subtarget->hasMVEFloatOps() &&
1912 (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16)))
1913 return MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
1915}
1916
1917/// getRegClassFor - Return the register class that should be used for the
1918/// specified value type.
1919const TargetRegisterClass *
1920ARMTargetLowering::getRegClassFor(MVT VT, bool isDivergent) const {
1921 (void)isDivergent;
1922 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1923 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1924 // load / store 4 to 8 consecutive NEON D registers, or 2 to 4 consecutive
1925 // MVE Q registers.
1926 if (Subtarget->hasNEON()) {
1927 if (VT == MVT::v4i64)
1928 return &ARM::QQPRRegClass;
1929 if (VT == MVT::v8i64)
1930 return &ARM::QQQQPRRegClass;
1931 }
1932 if (Subtarget->hasMVEIntegerOps()) {
1933 if (VT == MVT::v4i64)
1934 return &ARM::MQQPRRegClass;
1935 if (VT == MVT::v8i64)
1936 return &ARM::MQQQQPRRegClass;
1937 }
1939}
1940
1941// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1942// source/dest is aligned and the copy size is large enough. We therefore want
1943// to align such objects passed to memory intrinsics.
1945 Align &PrefAlign) const {
1946 if (!isa<MemIntrinsic>(CI))
1947 return false;
1948 MinSize = 8;
1949 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1950 // cycle faster than 4-byte aligned LDM.
1951 PrefAlign =
1952 (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? Align(8) : Align(4));
1953 return true;
1954}
1955
1956// Create a fast isel object.
1957FastISel *
1959 const TargetLibraryInfo *libInfo) const {
1960 return ARM::createFastISel(funcInfo, libInfo);
1961}
1962
1964 unsigned NumVals = N->getNumValues();
1965 if (!NumVals)
1966 return Sched::RegPressure;
1967
1968 for (unsigned i = 0; i != NumVals; ++i) {
1969 EVT VT = N->getValueType(i);
1970 if (VT == MVT::Glue || VT == MVT::Other)
1971 continue;
1972 if (VT.isFloatingPoint() || VT.isVector())
1973 return Sched::ILP;
1974 }
1975
1976 if (!N->isMachineOpcode())
1977 return Sched::RegPressure;
1978
1979 // Load are scheduled for latency even if there instruction itinerary
1980 // is not available.
1981 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1982 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1983
1984 if (MCID.getNumDefs() == 0)
1985 return Sched::RegPressure;
1986 if (!Itins->isEmpty() &&
1987 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2U)
1988 return Sched::ILP;
1989
1990 return Sched::RegPressure;
1991}
1992
1993//===----------------------------------------------------------------------===//
1994// Lowering Code
1995//===----------------------------------------------------------------------===//
1996
1997static bool isSRL16(const SDValue &Op) {
1998 if (Op.getOpcode() != ISD::SRL)
1999 return false;
2000 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2001 return Const->getZExtValue() == 16;
2002 return false;
2003}
2004
2005static bool isSRA16(const SDValue &Op) {
2006 if (Op.getOpcode() != ISD::SRA)
2007 return false;
2008 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2009 return Const->getZExtValue() == 16;
2010 return false;
2011}
2012
2013static bool isSHL16(const SDValue &Op) {
2014 if (Op.getOpcode() != ISD::SHL)
2015 return false;
2016 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
2017 return Const->getZExtValue() == 16;
2018 return false;
2019}
2020
2021// Check for a signed 16-bit value. We special case SRA because it makes it
2022// more simple when also looking for SRAs that aren't sign extending a
2023// smaller value. Without the check, we'd need to take extra care with
2024// checking order for some operations.
2025static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
2026 if (isSRA16(Op))
2027 return isSHL16(Op.getOperand(0));
2028 return DAG.ComputeNumSignBits(Op) == 17;
2029}
2030
2031/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
2033 switch (CC) {
2034 default: llvm_unreachable("Unknown condition code!");
2035 case ISD::SETNE: return ARMCC::NE;
2036 case ISD::SETEQ: return ARMCC::EQ;
2037 case ISD::SETGT: return ARMCC::GT;
2038 case ISD::SETGE: return ARMCC::GE;
2039 case ISD::SETLT: return ARMCC::LT;
2040 case ISD::SETLE: return ARMCC::LE;
2041 case ISD::SETUGT: return ARMCC::HI;
2042 case ISD::SETUGE: return ARMCC::HS;
2043 case ISD::SETULT: return ARMCC::LO;
2044 case ISD::SETULE: return ARMCC::LS;
2045 }
2046}
2047
2048/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
2050 ARMCC::CondCodes &CondCode2) {
2051 CondCode2 = ARMCC::AL;
2052 switch (CC) {
2053 default: llvm_unreachable("Unknown FP condition!");
2054 case ISD::SETEQ:
2055 case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
2056 case ISD::SETGT:
2057 case ISD::SETOGT: CondCode = ARMCC::GT; break;
2058 case ISD::SETGE:
2059 case ISD::SETOGE: CondCode = ARMCC::GE; break;
2060 case ISD::SETOLT: CondCode = ARMCC::MI; break;
2061 case ISD::SETOLE: CondCode = ARMCC::LS; break;
2062 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
2063 case ISD::SETO: CondCode = ARMCC::VC; break;
2064 case ISD::SETUO: CondCode = ARMCC::VS; break;
2065 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
2066 case ISD::SETUGT: CondCode = ARMCC::HI; break;
2067 case ISD::SETUGE: CondCode = ARMCC::PL; break;
2068 case ISD::SETLT:
2069 case ISD::SETULT: CondCode = ARMCC::LT; break;
2070 case ISD::SETLE:
2071 case ISD::SETULE: CondCode = ARMCC::LE; break;
2072 case ISD::SETNE:
2073 case ISD::SETUNE: CondCode = ARMCC::NE; break;
2074 }
2075}
2076
2077//===----------------------------------------------------------------------===//
2078// Calling Convention Implementation
2079//===----------------------------------------------------------------------===//
2080
2081/// getEffectiveCallingConv - Get the effective calling convention, taking into
2082/// account presence of floating point hardware and calling convention
2083/// limitations, such as support for variadic functions.
2085ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
2086 bool isVarArg) const {
2087 switch (CC) {
2088 default:
2089 report_fatal_error("Unsupported calling convention");
2092 case CallingConv::GHC:
2094 return CC;
2100 case CallingConv::Swift:
2103 case CallingConv::C:
2104 case CallingConv::Tail:
2105 if (!Subtarget->isAAPCS_ABI())
2106 return CallingConv::ARM_APCS;
2107 else if (Subtarget->hasFPRegs() && !Subtarget->isThumb1Only() &&
2108 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
2109 !isVarArg)
2111 else
2113 case CallingConv::Fast:
2115 if (!Subtarget->isAAPCS_ABI()) {
2116 if (Subtarget->hasVFP2Base() && !Subtarget->isThumb1Only() && !isVarArg)
2117 return CallingConv::Fast;
2118 return CallingConv::ARM_APCS;
2119 } else if (Subtarget->hasVFP2Base() &&
2120 !Subtarget->isThumb1Only() && !isVarArg)
2122 else
2124 }
2125}
2126
2128 bool isVarArg) const {
2129 return CCAssignFnForNode(CC, false, isVarArg);
2130}
2131
2133 bool isVarArg) const {
2134 return CCAssignFnForNode(CC, true, isVarArg);
2135}
2136
2137/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
2138/// CallingConvention.
2139CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
2140 bool Return,
2141 bool isVarArg) const {
2142 switch (getEffectiveCallingConv(CC, isVarArg)) {
2143 default:
2144 report_fatal_error("Unsupported calling convention");
2146 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
2148 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2150 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
2151 case CallingConv::Fast:
2152 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
2153 case CallingConv::GHC:
2154 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
2156 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2158 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
2160 return (Return ? RetCC_ARM_AAPCS : CC_ARM_Win32_CFGuard_Check);
2161 }
2162}
2163
2164SDValue ARMTargetLowering::MoveToHPR(const SDLoc &dl, SelectionDAG &DAG,
2165 MVT LocVT, MVT ValVT, SDValue Val) const {
2166 Val = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocVT.getSizeInBits()),
2167 Val);
2168 if (Subtarget->hasFullFP16()) {
2169 Val = DAG.getNode(ARMISD::VMOVhr, dl, ValVT, Val);
2170 } else {
2171 Val = DAG.getNode(ISD::TRUNCATE, dl,
2172 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2173 Val = DAG.getNode(ISD::BITCAST, dl, ValVT, Val);
2174 }
2175 return Val;
2176}
2177
2178SDValue ARMTargetLowering::MoveFromHPR(const SDLoc &dl, SelectionDAG &DAG,
2179 MVT LocVT, MVT ValVT,
2180 SDValue Val) const {
2181 if (Subtarget->hasFullFP16()) {
2182 Val = DAG.getNode(ARMISD::VMOVrh, dl,
2183 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2184 } else {
2185 Val = DAG.getNode(ISD::BITCAST, dl,
2186 MVT::getIntegerVT(ValVT.getSizeInBits()), Val);
2187 Val = DAG.getNode(ISD::ZERO_EXTEND, dl,
2188 MVT::getIntegerVT(LocVT.getSizeInBits()), Val);
2189 }
2190 return DAG.getNode(ISD::BITCAST, dl, LocVT, Val);
2191}
2192
2193/// LowerCallResult - Lower the result values of a call into the
2194/// appropriate copies out of appropriate physical registers.
2195SDValue ARMTargetLowering::LowerCallResult(
2196 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
2197 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2198 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
2199 SDValue ThisVal) const {
2200 // Assign locations to each value returned by this call.
2202 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2203 *DAG.getContext());
2204 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
2205
2206 // Copy all of the result registers out of their specified physreg.
2207 for (unsigned i = 0; i != RVLocs.size(); ++i) {
2208 CCValAssign VA = RVLocs[i];
2209
2210 // Pass 'this' value directly from the argument to return value, to avoid
2211 // reg unit interference
2212 if (i == 0 && isThisReturn) {
2213 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
2214 "unexpected return calling convention register assignment");
2215 InVals.push_back(ThisVal);
2216 continue;
2217 }
2218
2219 SDValue Val;
2220 if (VA.needsCustom() &&
2221 (VA.getLocVT() == MVT::f64 || VA.getLocVT() == MVT::v2f64)) {
2222 // Handle f64 or half of a v2f64.
2223 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2224 InGlue);
2225 Chain = Lo.getValue(1);
2226 InGlue = Lo.getValue(2);
2227 VA = RVLocs[++i]; // skip ahead to next loc
2228 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
2229 InGlue);
2230 Chain = Hi.getValue(1);
2231 InGlue = Hi.getValue(2);
2232 if (!Subtarget->isLittle())
2233 std::swap (Lo, Hi);
2234 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2235
2236 if (VA.getLocVT() == MVT::v2f64) {
2237 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
2238 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2239 DAG.getConstant(0, dl, MVT::i32));
2240
2241 VA = RVLocs[++i]; // skip ahead to next loc
2242 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2243 Chain = Lo.getValue(1);
2244 InGlue = Lo.getValue(2);
2245 VA = RVLocs[++i]; // skip ahead to next loc
2246 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InGlue);
2247 Chain = Hi.getValue(1);
2248 InGlue = Hi.getValue(2);
2249 if (!Subtarget->isLittle())
2250 std::swap (Lo, Hi);
2251 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
2252 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
2253 DAG.getConstant(1, dl, MVT::i32));
2254 }
2255 } else {
2256 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
2257 InGlue);
2258 Chain = Val.getValue(1);
2259 InGlue = Val.getValue(2);
2260 }
2261
2262 switch (VA.getLocInfo()) {
2263 default: llvm_unreachable("Unknown loc info!");
2264 case CCValAssign::Full: break;
2265 case CCValAssign::BCvt:
2266 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
2267 break;
2268 }
2269
2270 // f16 arguments have their size extended to 4 bytes and passed as if they
2271 // had been copied to the LSBs of a 32-bit register.
2272 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2273 if (VA.needsCustom() &&
2274 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
2275 Val = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Val);
2276
2277 InVals.push_back(Val);
2278 }
2279
2280 return Chain;
2281}
2282
2283std::pair<SDValue, MachinePointerInfo> ARMTargetLowering::computeAddrForCallArg(
2284 const SDLoc &dl, SelectionDAG &DAG, const CCValAssign &VA, SDValue StackPtr,
2285 bool IsTailCall, int SPDiff) const {
2286 SDValue DstAddr;
2287 MachinePointerInfo DstInfo;
2288 int32_t Offset = VA.getLocMemOffset();
2290
2291 if (IsTailCall) {
2292 Offset += SPDiff;
2293 auto PtrVT = getPointerTy(DAG.getDataLayout());
2294 int Size = VA.getLocVT().getFixedSizeInBits() / 8;
2295 int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
2296 DstAddr = DAG.getFrameIndex(FI, PtrVT);
2297 DstInfo =
2299 } else {
2300 SDValue PtrOff = DAG.getIntPtrConstant(Offset, dl);
2301 DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
2302 StackPtr, PtrOff);
2303 DstInfo =
2305 }
2306
2307 return std::make_pair(DstAddr, DstInfo);
2308}
2309
2310void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
2311 SDValue Chain, SDValue &Arg,
2312 RegsToPassVector &RegsToPass,
2313 CCValAssign &VA, CCValAssign &NextVA,
2314 SDValue &StackPtr,
2315 SmallVectorImpl<SDValue> &MemOpChains,
2316 bool IsTailCall,
2317 int SPDiff) const {
2318 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2319 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2320 unsigned id = Subtarget->isLittle() ? 0 : 1;
2321 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
2322
2323 if (NextVA.isRegLoc())
2324 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
2325 else {
2326 assert(NextVA.isMemLoc());
2327 if (!StackPtr.getNode())
2328 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
2330
2331 SDValue DstAddr;
2332 MachinePointerInfo DstInfo;
2333 std::tie(DstAddr, DstInfo) =
2334 computeAddrForCallArg(dl, DAG, NextVA, StackPtr, IsTailCall, SPDiff);
2335 MemOpChains.push_back(
2336 DAG.getStore(Chain, dl, fmrrd.getValue(1 - id), DstAddr, DstInfo));
2337 }
2338}
2339
2340static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls) {
2341 return (CC == CallingConv::Fast && GuaranteeTailCalls) ||
2343}
2344
2345/// LowerCall - Lowering a call into a callseq_start <-
2346/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
2347/// nodes.
2348SDValue
2349ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2350 SmallVectorImpl<SDValue> &InVals) const {
2351 SelectionDAG &DAG = CLI.DAG;
2352 SDLoc &dl = CLI.DL;
2354 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2356 SDValue Chain = CLI.Chain;
2357 SDValue Callee = CLI.Callee;
2358 bool &isTailCall = CLI.IsTailCall;
2359 CallingConv::ID CallConv = CLI.CallConv;
2360 bool doesNotRet = CLI.DoesNotReturn;
2361 bool isVarArg = CLI.IsVarArg;
2362
2366 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
2367 bool isThisReturn = false;
2368 bool isCmseNSCall = false;
2369 bool isSibCall = false;
2370 bool PreferIndirect = false;
2371 bool GuardWithBTI = false;
2372
2373 // Lower 'returns_twice' calls to a pseudo-instruction.
2374 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) &&
2375 !Subtarget->noBTIAtReturnTwice())
2376 GuardWithBTI = AFI->branchTargetEnforcement();
2377
2378 // Determine whether this is a non-secure function call.
2379 if (CLI.CB && CLI.CB->getAttributes().hasFnAttr("cmse_nonsecure_call"))
2380 isCmseNSCall = true;
2381
2382 // Disable tail calls if they're not supported.
2383 if (!Subtarget->supportsTailCall())
2384 isTailCall = false;
2385
2386 // For both the non-secure calls and the returns from a CMSE entry function,
2387 // the function needs to do some extra work afte r the call, or before the
2388 // return, respectively, thus it cannot end with atail call
2389 if (isCmseNSCall || AFI->isCmseNSEntryFunction())
2390 isTailCall = false;
2391
2392 if (isa<GlobalAddressSDNode>(Callee)) {
2393 // If we're optimizing for minimum size and the function is called three or
2394 // more times in this block, we can improve codesize by calling indirectly
2395 // as BLXr has a 16-bit encoding.
2396 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2397 if (CLI.CB) {
2398 auto *BB = CLI.CB->getParent();
2399 PreferIndirect = Subtarget->isThumb() && Subtarget->hasMinSize() &&
2400 count_if(GV->users(), [&BB](const User *U) {
2401 return isa<Instruction>(U) &&
2402 cast<Instruction>(U)->getParent() == BB;
2403 }) > 2;
2404 }
2405 }
2406 if (isTailCall) {
2407 // Check if it's really possible to do a tail call.
2408 isTailCall = IsEligibleForTailCallOptimization(
2409 Callee, CallConv, isVarArg, isStructRet,
2410 MF.getFunction().hasStructRetAttr(), Outs, OutVals, Ins, DAG,
2411 PreferIndirect);
2412
2413 if (isTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt &&
2414 CallConv != CallingConv::Tail && CallConv != CallingConv::SwiftTail)
2415 isSibCall = true;
2416
2417 // We don't support GuaranteedTailCallOpt for ARM, only automatically
2418 // detected sibcalls.
2419 if (isTailCall)
2420 ++NumTailCalls;
2421 }
2422
2423 if (!isTailCall && CLI.CB && CLI.CB->isMustTailCall())
2424 report_fatal_error("failed to perform tail call elimination on a call "
2425 "site marked musttail");
2426 // Analyze operands of the call, assigning locations to each operand.
2428 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2429 *DAG.getContext());
2430 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
2431
2432 // Get a count of how many bytes are to be pushed on the stack.
2433 unsigned NumBytes = CCInfo.getStackSize();
2434
2435 // SPDiff is the byte offset of the call's argument area from the callee's.
2436 // Stores to callee stack arguments will be placed in FixedStackSlots offset
2437 // by this amount for a tail call. In a sibling call it must be 0 because the
2438 // caller will deallocate the entire stack and the callee still expects its
2439 // arguments to begin at SP+0. Completely unused for non-tail calls.
2440 int SPDiff = 0;
2441
2442 if (isTailCall && !isSibCall) {
2443 auto FuncInfo = MF.getInfo<ARMFunctionInfo>();
2444 unsigned NumReusableBytes = FuncInfo->getArgumentStackSize();
2445
2446 // Since callee will pop argument stack as a tail call, we must keep the
2447 // popped size 16-byte aligned.
2448 Align StackAlign = DAG.getDataLayout().getStackAlignment();
2449 NumBytes = alignTo(NumBytes, StackAlign);
2450
2451 // SPDiff will be negative if this tail call requires more space than we
2452 // would automatically have in our incoming argument space. Positive if we
2453 // can actually shrink the stack.
2454 SPDiff = NumReusableBytes - NumBytes;
2455
2456 // If this call requires more stack than we have available from
2457 // LowerFormalArguments, tell FrameLowering to reserve space for it.
2458 if (SPDiff < 0 && AFI->getArgRegsSaveSize() < (unsigned)-SPDiff)
2459 AFI->setArgRegsSaveSize(-SPDiff);
2460 }
2461
2462 if (isSibCall) {
2463 // For sibling tail calls, memory operands are available in our caller's stack.
2464 NumBytes = 0;
2465 } else {
2466 // Adjust the stack pointer for the new arguments...
2467 // These operations are automatically eliminated by the prolog/epilog pass
2468 Chain = DAG.getCALLSEQ_START(Chain, isTailCall ? 0 : NumBytes, 0, dl);
2469 }
2470
2472 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
2473
2474 RegsToPassVector RegsToPass;
2475 SmallVector<SDValue, 8> MemOpChains;
2476
2477 // During a tail call, stores to the argument area must happen after all of
2478 // the function's incoming arguments have been loaded because they may alias.
2479 // This is done by folding in a TokenFactor from LowerFormalArguments, but
2480 // there's no point in doing so repeatedly so this tracks whether that's
2481 // happened yet.
2482 bool AfterFormalArgLoads = false;
2483
2484 // Walk the register/memloc assignments, inserting copies/loads. In the case
2485 // of tail call optimization, arguments are handled later.
2486 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2487 i != e;
2488 ++i, ++realArgIdx) {
2489 CCValAssign &VA = ArgLocs[i];
2490 SDValue Arg = OutVals[realArgIdx];
2491 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2492 bool isByVal = Flags.isByVal();
2493
2494 // Promote the value if needed.
2495 switch (VA.getLocInfo()) {
2496 default: llvm_unreachable("Unknown loc info!");
2497 case CCValAssign::Full: break;
2498 case CCValAssign::SExt:
2499 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
2500 break;
2501 case CCValAssign::ZExt:
2502 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
2503 break;
2504 case CCValAssign::AExt:
2505 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
2506 break;
2507 case CCValAssign::BCvt:
2508 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2509 break;
2510 }
2511
2512 if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
2513 Chain = DAG.getStackArgumentTokenFactor(Chain);
2514 AfterFormalArgLoads = true;
2515 }
2516
2517 // f16 arguments have their size extended to 4 bytes and passed as if they
2518 // had been copied to the LSBs of a 32-bit register.
2519 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
2520 if (VA.needsCustom() &&
2521 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16)) {
2522 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
2523 } else {
2524 // f16 arguments could have been extended prior to argument lowering.
2525 // Mask them arguments if this is a CMSE nonsecure call.
2526 auto ArgVT = Outs[realArgIdx].ArgVT;
2527 if (isCmseNSCall && (ArgVT == MVT::f16)) {
2528 auto LocBits = VA.getLocVT().getSizeInBits();
2529 auto MaskValue = APInt::getLowBitsSet(LocBits, ArgVT.getSizeInBits());
2530 SDValue Mask =
2531 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
2532 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
2533 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
2534 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2535 }
2536 }
2537
2538 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
2539 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
2540 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2541 DAG.getConstant(0, dl, MVT::i32));
2542 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2543 DAG.getConstant(1, dl, MVT::i32));
2544
2545 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, VA, ArgLocs[++i],
2546 StackPtr, MemOpChains, isTailCall, SPDiff);
2547
2548 VA = ArgLocs[++i]; // skip ahead to next loc
2549 if (VA.isRegLoc()) {
2550 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, VA, ArgLocs[++i],
2551 StackPtr, MemOpChains, isTailCall, SPDiff);
2552 } else {
2553 assert(VA.isMemLoc());
2554 SDValue DstAddr;
2555 MachinePointerInfo DstInfo;
2556 std::tie(DstAddr, DstInfo) =
2557 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2558 MemOpChains.push_back(DAG.getStore(Chain, dl, Op1, DstAddr, DstInfo));
2559 }
2560 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
2561 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
2562 StackPtr, MemOpChains, isTailCall, SPDiff);
2563 } else if (VA.isRegLoc()) {
2564 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
2565 Outs[0].VT == MVT::i32) {
2566 assert(VA.getLocVT() == MVT::i32 &&
2567 "unexpected calling convention register assignment");
2568 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
2569 "unexpected use of 'returned'");
2570 isThisReturn = true;
2571 }
2572 const TargetOptions &Options = DAG.getTarget().Options;
2573 if (Options.EmitCallSiteInfo)
2574 CSInfo.emplace_back(VA.getLocReg(), i);
2575 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2576 } else if (isByVal) {
2577 assert(VA.isMemLoc());
2578 unsigned offset = 0;
2579
2580 // True if this byval aggregate will be split between registers
2581 // and memory.
2582 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
2583 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
2584
2585 if (CurByValIdx < ByValArgsCount) {
2586
2587 unsigned RegBegin, RegEnd;
2588 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
2589
2590 EVT PtrVT =
2592 unsigned int i, j;
2593 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
2594 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
2595 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
2596 SDValue Load =
2597 DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(),
2598 DAG.InferPtrAlign(AddArg));
2599 MemOpChains.push_back(Load.getValue(1));
2600 RegsToPass.push_back(std::make_pair(j, Load));
2601 }
2602
2603 // If parameter size outsides register area, "offset" value
2604 // helps us to calculate stack slot for remained part properly.
2605 offset = RegEnd - RegBegin;
2606
2607 CCInfo.nextInRegsParam();
2608 }
2609
2610 if (Flags.getByValSize() > 4*offset) {
2611 auto PtrVT = getPointerTy(DAG.getDataLayout());
2612 SDValue Dst;
2613 MachinePointerInfo DstInfo;
2614 std::tie(Dst, DstInfo) =
2615 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2616 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
2617 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
2618 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
2619 MVT::i32);
2620 SDValue AlignNode =
2621 DAG.getConstant(Flags.getNonZeroByValAlign().value(), dl, MVT::i32);
2622
2623 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
2624 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
2625 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
2626 Ops));
2627 }
2628 } else {
2629 assert(VA.isMemLoc());
2630 SDValue DstAddr;
2631 MachinePointerInfo DstInfo;
2632 std::tie(DstAddr, DstInfo) =
2633 computeAddrForCallArg(dl, DAG, VA, StackPtr, isTailCall, SPDiff);
2634
2635 SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo);
2636 MemOpChains.push_back(Store);
2637 }
2638 }
2639
2640 if (!MemOpChains.empty())
2641 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2642
2643 // Build a sequence of copy-to-reg nodes chained together with token chain
2644 // and flag operands which copy the outgoing args into the appropriate regs.
2645 SDValue InGlue;
2646 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2647 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2648 RegsToPass[i].second, InGlue);
2649 InGlue = Chain.getValue(1);
2650 }
2651
2652 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2653 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2654 // node so that legalize doesn't hack it.
2655 bool isDirect = false;
2656
2658 const GlobalValue *GVal = nullptr;
2659 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
2660 GVal = G->getGlobal();
2661 bool isStub = !TM.shouldAssumeDSOLocal(GVal) && Subtarget->isTargetMachO();
2662
2663 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
2664 bool isLocalARMFunc = false;
2665 auto PtrVt = getPointerTy(DAG.getDataLayout());
2666
2667 if (Subtarget->genLongCalls()) {
2668 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&
2669 "long-calls codegen is not position independent!");
2670 // Handle a global address or an external symbol. If it's not one of
2671 // those, the target's already in a register, so we don't need to do
2672 // anything extra.
2673 if (isa<GlobalAddressSDNode>(Callee)) {
2674 if (Subtarget->genExecuteOnly()) {
2675 if (Subtarget->useMovt())
2676 ++NumMovwMovt;
2677 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2678 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2679 } else {
2680 // Create a constant pool entry for the callee address
2681 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2683 GVal, ARMPCLabelIndex, ARMCP::CPValue, 0);
2684
2685 // Get the address of the callee into a register
2686 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2687 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2688 Callee = DAG.getLoad(
2689 PtrVt, dl, DAG.getEntryNode(), Addr,
2691 }
2692 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2693 const char *Sym = S->getSymbol();
2694
2695 if (Subtarget->genExecuteOnly()) {
2696 if (Subtarget->useMovt())
2697 ++NumMovwMovt;
2698 Callee = DAG.getNode(ARMISD::Wrapper, dl, PtrVt,
2699 DAG.getTargetGlobalAddress(GVal, dl, PtrVt));
2700 } else {
2701 // Create a constant pool entry for the callee address
2702 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2704 *DAG.getContext(), Sym, ARMPCLabelIndex, 0);
2705
2706 // Get the address of the callee into a register
2707 SDValue Addr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2708 Addr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Addr);
2709 Callee = DAG.getLoad(
2710 PtrVt, dl, DAG.getEntryNode(), Addr,
2712 }
2713 }
2714 } else if (isa<GlobalAddressSDNode>(Callee)) {
2715 if (!PreferIndirect) {
2716 isDirect = true;
2717 bool isDef = GVal->isStrongDefinitionForLinker();
2718
2719 // ARM call to a local ARM function is predicable.
2720 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2721 // tBX takes a register source operand.
2722 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2723 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?");
2724 Callee = DAG.getNode(
2725 ARMISD::WrapperPIC, dl, PtrVt,
2726 DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2727 Callee = DAG.getLoad(
2728 PtrVt, dl, DAG.getEntryNode(), Callee,
2732 } else if (Subtarget->isTargetCOFF()) {
2733 assert(Subtarget->isTargetWindows() &&
2734 "Windows is the only supported COFF target");
2735 unsigned TargetFlags = ARMII::MO_NO_FLAG;
2736 if (GVal->hasDLLImportStorageClass())
2737 TargetFlags = ARMII::MO_DLLIMPORT;
2738 else if (!TM.shouldAssumeDSOLocal(GVal))
2739 TargetFlags = ARMII::MO_COFFSTUB;
2740 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, /*offset=*/0,
2741 TargetFlags);
2742 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
2743 Callee =
2744 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2745 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2747 } else {
2748 Callee = DAG.getTargetGlobalAddress(GVal, dl, PtrVt, 0, 0);
2749 }
2750 }
2751 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2752 isDirect = true;
2753 // tBX takes a register source operand.
2754 const char *Sym = S->getSymbol();
2755 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2756 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2759 ARMPCLabelIndex, 4);
2760 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, Align(4));
2761 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2762 Callee = DAG.getLoad(
2763 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2765 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2766 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2767 } else {
2768 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2769 }
2770 }
2771
2772 if (isCmseNSCall) {
2773 assert(!isARMFunc && !isDirect &&
2774 "Cannot handle call to ARM function or direct call");
2775 if (NumBytes > 0) {
2777 "call to non-secure function would "
2778 "require passing arguments on stack",
2779 dl.getDebugLoc());
2780 DAG.getContext()->diagnose(Diag);
2781 }
2782 if (isStructRet) {
2785 "call to non-secure function would return value through pointer",
2786 dl.getDebugLoc());
2787 DAG.getContext()->diagnose(Diag);
2788 }
2789 }
2790
2791 // FIXME: handle tail calls differently.
2792 unsigned CallOpc;
2793 if (Subtarget->isThumb()) {
2794 if (GuardWithBTI)
2795 CallOpc = ARMISD::t2CALL_BTI;
2796 else if (isCmseNSCall)
2797 CallOpc = ARMISD::tSECALL;
2798 else if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2799 CallOpc = ARMISD::CALL_NOLINK;
2800 else
2801 CallOpc = ARMISD::CALL;
2802 } else {
2803 if (!isDirect && !Subtarget->hasV5TOps())
2804 CallOpc = ARMISD::CALL_NOLINK;
2805 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2806 // Emit regular call when code size is the priority
2807 !Subtarget->hasMinSize())
2808 // "mov lr, pc; b _foo" to avoid confusing the RSP
2809 CallOpc = ARMISD::CALL_NOLINK;
2810 else
2811 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2812 }
2813
2814 // We don't usually want to end the call-sequence here because we would tidy
2815 // the frame up *after* the call, however in the ABI-changing tail-call case
2816 // we've carefully laid out the parameters so that when sp is reset they'll be
2817 // in the correct location.
2818 if (isTailCall && !isSibCall) {
2819 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, dl);
2820 InGlue = Chain.getValue(1);
2821 }
2822
2823 std::vector<SDValue> Ops;
2824 Ops.push_back(Chain);
2825 Ops.push_back(Callee);
2826
2827 if (isTailCall) {
2828 Ops.push_back(DAG.getTargetConstant(SPDiff, dl, MVT::i32));
2829 }
2830
2831 // Add argument registers to the end of the list so that they are known live
2832 // into the call.
2833 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2834 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2835 RegsToPass[i].second.getValueType()));
2836
2837 // Add a register mask operand representing the call-preserved registers.
2838 const uint32_t *Mask;
2839 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2840 if (isThisReturn) {
2841 // For 'this' returns, use the R0-preserving mask if applicable
2842 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2843 if (!Mask) {
2844 // Set isThisReturn to false if the calling convention is not one that
2845 // allows 'returned' to be modeled in this way, so LowerCallResult does
2846 // not try to pass 'this' straight through
2847 isThisReturn = false;
2848 Mask = ARI->getCallPreservedMask(MF, CallConv);
2849 }
2850 } else
2851 Mask = ARI->getCallPreservedMask(MF, CallConv);
2852
2853 assert(Mask && "Missing call preserved mask for calling convention");
2854 Ops.push_back(DAG.getRegisterMask(Mask));
2855
2856 if (InGlue.getNode())
2857 Ops.push_back(InGlue);
2858
2859 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2860 if (isTailCall) {
2862 SDValue Ret = DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2863 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2864 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2865 return Ret;
2866 }
2867
2868 // Returns a chain and a flag for retval copy to use.
2869 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2870 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2871 InGlue = Chain.getValue(1);
2872 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2873
2874 // If we're guaranteeing tail-calls will be honoured, the callee must
2875 // pop its own argument stack on return. But this call is *not* a tail call so
2876 // we need to undo that after it returns to restore the status-quo.
2877 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
2878 uint64_t CalleePopBytes =
2879 canGuaranteeTCO(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : -1ULL;
2880
2881 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, CalleePopBytes, InGlue, dl);
2882 if (!Ins.empty())
2883 InGlue = Chain.getValue(1);
2884
2885 // Handle result values, copying them out of physregs into vregs that we
2886 // return.
2887 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2888 InVals, isThisReturn,
2889 isThisReturn ? OutVals[0] : SDValue());
2890}
2891
2892/// HandleByVal - Every parameter *after* a byval parameter is passed
2893/// on the stack. Remember the next parameter register to allocate,
2894/// and then confiscate the rest of the parameter registers to insure
2895/// this.
2896void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2897 Align Alignment) const {
2898 // Byval (as with any stack) slots are always at least 4 byte aligned.
2899 Alignment = std::max(Alignment, Align(4));
2900
2901 unsigned Reg = State->AllocateReg(GPRArgRegs);
2902 if (!Reg)
2903 return;
2904
2905 unsigned AlignInRegs = Alignment.value() / 4;
2906 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2907 for (unsigned i = 0; i < Waste; ++i)
2908 Reg = State->AllocateReg(GPRArgRegs);
2909
2910 if (!Reg)
2911 return;
2912
2913 unsigned Excess = 4 * (ARM::R4 - Reg);
2914
2915 // Special case when NSAA != SP and parameter size greater than size of
2916 // all remained GPR regs. In that case we can't split parameter, we must
2917 // send it to stack. We also must set NCRN to R4, so waste all
2918 // remained registers.
2919 const unsigned NSAAOffset = State->getStackSize();
2920 if (NSAAOffset != 0 && Size > Excess) {
2921 while (State->AllocateReg(GPRArgRegs))
2922 ;
2923 return;
2924 }
2925
2926 // First register for byval parameter is the first register that wasn't
2927 // allocated before this method call, so it would be "reg".
2928 // If parameter is small enough to be saved in range [reg, r4), then
2929 // the end (first after last) register would be reg + param-size-in-regs,
2930 // else parameter would be splitted between registers and stack,
2931 // end register would be r4 in this case.
2932 unsigned ByValRegBegin = Reg;
2933 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2934 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2935 // Note, first register is allocated in the beginning of function already,
2936 // allocate remained amount of registers we need.
2937 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2938 State->AllocateReg(GPRArgRegs);
2939 // A byval parameter that is split between registers and memory needs its
2940 // size truncated here.
2941 // In the case where the entire structure fits in registers, we set the
2942 // size in memory to zero.
2943 Size = std::max<int>(Size - Excess, 0);
2944}
2945
2946/// MatchingStackOffset - Return true if the given stack call argument is
2947/// already available in the same position (relatively) of the caller's
2948/// incoming argument stack.
2949static
2952 const TargetInstrInfo *TII) {
2953 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2954 int FI = std::numeric_limits<int>::max();
2955 if (Arg.getOpcode() == ISD::CopyFromReg) {
2956 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2957 if (!VR.isVirtual())
2958 return false;
2959 MachineInstr *Def = MRI->getVRegDef(VR);
2960 if (!Def)
2961 return false;
2962 if (!Flags.isByVal()) {
2963 if (!TII->isLoadFromStackSlot(*Def, FI))
2964 return false;
2965 } else {
2966 return false;
2967 }
2968 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2969 if (Flags.isByVal())
2970 // ByVal argument is passed in as a pointer but it's now being
2971 // dereferenced. e.g.
2972 // define @foo(%struct.X* %A) {
2973 // tail call @bar(%struct.X* byval %A)
2974 // }
2975 return false;
2976 SDValue Ptr = Ld->getBasePtr();
2977 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2978 if (!FINode)
2979 return false;
2980 FI = FINode->getIndex();
2981 } else
2982 return false;
2983
2984 assert(FI != std::numeric_limits<int>::max());
2985 if (!MFI.isFixedObjectIndex(FI))
2986 return false;
2987 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2988}
2989
2990/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2991/// for tail call optimization. Targets which want to do tail call
2992/// optimization should implement this function.
2993bool ARMTargetLowering::IsEligibleForTailCallOptimization(
2994 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
2995 bool isCalleeStructRet, bool isCallerStructRet,
2997 const SmallVectorImpl<SDValue> &OutVals,
2999 const bool isIndirect) const {
3001 const Function &CallerF = MF.getFunction();
3002 CallingConv::ID CallerCC = CallerF.getCallingConv();
3003
3004 assert(Subtarget->supportsTailCall());
3005
3006 // Indirect tail calls cannot be optimized for Thumb1 if the args
3007 // to the call take up r0-r3. The reason is that there are no legal registers
3008 // left to hold the pointer to the function to be called.
3009 // Similarly, if the function uses return address sign and authentication,
3010 // r12 is needed to hold the PAC and is not available to hold the callee
3011 // address.
3012 if (Outs.size() >= 4 &&
3013 (!isa<GlobalAddressSDNode>(Callee.getNode()) || isIndirect)) {
3014 if (Subtarget->isThumb1Only())
3015 return false;
3016 // Conservatively assume the function spills LR.
3018 return false;
3019 }
3020
3021 // Look for obvious safe cases to perform tail call optimization that do not
3022 // require ABI changes. This is what gcc calls sibcall.
3023
3024 // Exception-handling functions need a special set of instructions to indicate
3025 // a return to the hardware. Tail-calling another function would probably
3026 // break this.
3027 if (CallerF.hasFnAttribute("interrupt"))
3028 return false;
3029
3030 if (canGuaranteeTCO(CalleeCC, getTargetMachine().Options.GuaranteedTailCallOpt))
3031 return CalleeCC == CallerCC;
3032
3033 // Also avoid sibcall optimization if either caller or callee uses struct
3034 // return semantics.
3035 if (isCalleeStructRet || isCallerStructRet)
3036 return false;
3037
3038 // Externally-defined functions with weak linkage should not be
3039 // tail-called on ARM when the OS does not support dynamic
3040 // pre-emption of symbols, as the AAELF spec requires normal calls
3041 // to undefined weak functions to be replaced with a NOP or jump to the
3042 // next instruction. The behaviour of branch instructions in this
3043 // situation (as used for tail calls) is implementation-defined, so we
3044 // cannot rely on the linker replacing the tail call with a return.
3045 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3046 const GlobalValue *GV = G->getGlobal();
3048 if (GV->hasExternalWeakLinkage() &&
3049 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3050 return false;
3051 }
3052
3053 // Check that the call results are passed in the same way.
3054 LLVMContext &C = *DAG.getContext();
3056 getEffectiveCallingConv(CalleeCC, isVarArg),
3057 getEffectiveCallingConv(CallerCC, CallerF.isVarArg()), MF, C, Ins,
3058 CCAssignFnForReturn(CalleeCC, isVarArg),
3059 CCAssignFnForReturn(CallerCC, CallerF.isVarArg())))
3060 return false;
3061 // The callee has to preserve all registers the caller needs to preserve.
3062 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3063 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3064 if (CalleeCC != CallerCC) {
3065 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3066 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3067 return false;
3068 }
3069
3070 // If Caller's vararg or byval argument has been split between registers and
3071 // stack, do not perform tail call, since part of the argument is in caller's
3072 // local frame.
3073 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
3074 if (AFI_Caller->getArgRegsSaveSize())
3075 return false;
3076
3077 // If the callee takes no arguments then go on to check the results of the
3078 // call.
3079 if (!Outs.empty()) {
3080 // Check if stack adjustment is needed. For now, do not do this if any
3081 // argument is passed on the stack.
3083 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3084 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3085 if (CCInfo.getStackSize()) {
3086 // Check if the arguments are already laid out in the right way as
3087 // the caller's fixed stack objects.
3088 MachineFrameInfo &MFI = MF.getFrameInfo();
3089 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3090 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
3091 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
3092 i != e;
3093 ++i, ++realArgIdx) {
3094 CCValAssign &VA = ArgLocs[i];
3095 EVT RegVT = VA.getLocVT();
3096 SDValue Arg = OutVals[realArgIdx];
3097 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3099 return false;
3100 if (VA.needsCustom() && (RegVT == MVT::f64 || RegVT == MVT::v2f64)) {
3101 // f64 and vector types are split into multiple registers or
3102 // register/stack-slot combinations. The types will not match
3103 // the registers; give up on memory f64 refs until we figure
3104 // out what to do about this.
3105 if (!VA.isRegLoc())
3106 return false;
3107 if (!ArgLocs[++i].isRegLoc())
3108 return false;
3109 if (RegVT == MVT::v2f64) {
3110 if (!ArgLocs[++i].isRegLoc())
3111 return false;
3112 if (!ArgLocs[++i].isRegLoc())
3113 return false;
3114 }
3115 } else if (!VA.isRegLoc()) {
3116 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
3117 MFI, MRI, TII))
3118 return false;
3119 }
3120 }
3121 }
3122
3123 const MachineRegisterInfo &MRI = MF.getRegInfo();
3124 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3125 return false;
3126 }
3127
3128 return true;
3129}
3130
3131bool
3132ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
3133 MachineFunction &MF, bool isVarArg,
3135 LLVMContext &Context) const {
3137 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3138 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3139}
3140
3142 const SDLoc &DL, SelectionDAG &DAG) {
3143 const MachineFunction &MF = DAG.getMachineFunction();
3144 const Function &F = MF.getFunction();
3145
3146 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
3147
3148 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
3149 // version of the "preferred return address". These offsets affect the return
3150 // instruction if this is a return from PL1 without hypervisor extensions.
3151 // IRQ/FIQ: +4 "subs pc, lr, #4"
3152 // SWI: 0 "subs pc, lr, #0"
3153 // ABORT: +4 "subs pc, lr, #4"
3154 // UNDEF: +4/+2 "subs pc, lr, #0"
3155 // UNDEF varies depending on where the exception came from ARM or Thumb
3156 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
3157
3158 int64_t LROffset;
3159 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
3160 IntKind == "ABORT")
3161 LROffset = 4;
3162 else if (IntKind == "SWI" || IntKind == "UNDEF")
3163 LROffset = 0;
3164 else
3165 report_fatal_error("Unsupported interrupt attribute. If present, value "
3166 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
3167
3168 RetOps.insert(RetOps.begin() + 1,
3169 DAG.getConstant(LROffset, DL, MVT::i32, false));
3170
3171 return DAG.getNode(ARMISD::INTRET_GLUE, DL, MVT::Other, RetOps);
3172}
3173
3174SDValue
3175ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3176 bool isVarArg,
3178 const SmallVectorImpl<SDValue> &OutVals,
3179 const SDLoc &dl, SelectionDAG &DAG) const {
3180 // CCValAssign - represent the assignment of the return value to a location.
3182
3183 // CCState - Info about the registers and stack slots.
3184 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3185 *DAG.getContext());
3186
3187 // Analyze outgoing return values.
3188 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
3189
3190 SDValue Glue;
3192 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
3193 bool isLittleEndian = Subtarget->isLittle();
3194
3197 AFI->setReturnRegsCount(RVLocs.size());
3198
3199 // Report error if cmse entry function returns structure through first ptr arg.
3200 if (AFI->isCmseNSEntryFunction() && MF.getFunction().hasStructRetAttr()) {
3201 // Note: using an empty SDLoc(), as the first line of the function is a
3202 // better place to report than the last line.
3205 "secure entry function would return value through pointer",
3206 SDLoc().getDebugLoc());
3207 DAG.getContext()->diagnose(Diag);
3208 }
3209
3210 // Copy the result values into the output registers.
3211 for (unsigned i = 0, realRVLocIdx = 0;
3212 i != RVLocs.size();
3213 ++i, ++realRVLocIdx) {
3214 CCValAssign &VA = RVLocs[i];
3215 assert(VA.isRegLoc() && "Can only return in registers!");
3216
3217 SDValue Arg = OutVals[realRVLocIdx];
3218 bool ReturnF16 = false;
3219
3220 if (Subtarget->hasFullFP16() && Subtarget->isTargetHardFloat()) {
3221 // Half-precision return values can be returned like this:
3222 //
3223 // t11 f16 = fadd ...
3224 // t12: i16 = bitcast t11
3225 // t13: i32 = zero_extend t12
3226 // t14: f32 = bitcast t13 <~~~~~~~ Arg
3227 //
3228 // to avoid code generation for bitcasts, we simply set Arg to the node
3229 // that produces the f16 value, t11 in this case.
3230 //
3231 if (Arg.getValueType() == MVT::f32 && Arg.getOpcode() == ISD::BITCAST) {
3232 SDValue ZE = Arg.getOperand(0);
3233 if (ZE.getOpcode() == ISD::ZERO_EXTEND && ZE.getValueType() == MVT::i32) {
3234 SDValue BC = ZE.getOperand(0);
3235 if (BC.getOpcode() == ISD::BITCAST && BC.getValueType() == MVT::i16) {
3236 Arg = BC.getOperand(0);
3237 ReturnF16 = true;
3238 }
3239 }
3240 }
3241 }
3242
3243 switch (VA.getLocInfo()) {
3244 default: llvm_unreachable("Unknown loc info!");
3245 case CCValAssign::Full: break;
3246 case CCValAssign::BCvt:
3247 if (!ReturnF16)
3248 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3249 break;
3250 }
3251
3252 // Mask f16 arguments if this is a CMSE nonsecure entry.
3253 auto RetVT = Outs[realRVLocIdx].ArgVT;
3254 if (AFI->isCmseNSEntryFunction() && (RetVT == MVT::f16)) {
3255 if (VA.needsCustom() && VA.getValVT() == MVT::f16) {
3256 Arg = MoveFromHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), Arg);
3257 } else {
3258 auto LocBits = VA.getLocVT().getSizeInBits();
3259 auto MaskValue = APInt::getLowBitsSet(LocBits, RetVT.getSizeInBits());
3260 SDValue Mask =
3261 DAG.getConstant(MaskValue, dl, MVT::getIntegerVT(LocBits));
3262 Arg = DAG.getNode(ISD::BITCAST, dl, MVT::getIntegerVT(LocBits), Arg);
3263 Arg = DAG.getNode(ISD::AND, dl, MVT::getIntegerVT(LocBits), Arg, Mask);
3264 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
3265 }
3266 }
3267
3268 if (VA.needsCustom() &&
3269 (VA.getLocVT() == MVT::v2f64 || VA.getLocVT() == MVT::f64)) {
3270 if (VA.getLocVT() == MVT::v2f64) {
3271 // Extract the first half and return it in two registers.
3272 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3273 DAG.getConstant(0, dl, MVT::i32));
3274 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
3275 DAG.getVTList(MVT::i32, MVT::i32), Half);
3276
3277 Chain =
3278 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3279 HalfGPRs.getValue(isLittleEndian ? 0 : 1), Glue);
3280 Glue = Chain.getValue(1);
3281 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3282 VA = RVLocs[++i]; // skip ahead to next loc
3283 Chain =
3284 DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3285 HalfGPRs.getValue(isLittleEndian ? 1 : 0), Glue);
3286 Glue = Chain.getValue(1);
3287 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3288 VA = RVLocs[++i]; // skip ahead to next loc
3289
3290 // Extract the 2nd half and fall through to handle it as an f64 value.
3291 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
3292 DAG.getConstant(1, dl, MVT::i32));
3293 }
3294 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
3295 // available.
3296 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
3297 DAG.getVTList(MVT::i32, MVT::i32), Arg);
3298 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3299 fmrrd.getValue(isLittleEndian ? 0 : 1), Glue);
3300 Glue = Chain.getValue(1);
3301 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3302 VA = RVLocs[++i]; // skip ahead to next loc
3303 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
3304 fmrrd.getValue(isLittleEndian ? 1 : 0), Glue);
3305 } else
3306 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
3307
3308 // Guarantee that all emitted copies are
3309 // stuck together, avoiding something bad.
3310 Glue = Chain.getValue(1);
3311 RetOps.push_back(DAG.getRegister(
3312 VA.getLocReg(), ReturnF16 ? Arg.getValueType() : VA.getLocVT()));
3313 }
3314 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
3315 const MCPhysReg *I =
3316 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3317 if (I) {
3318 for (; *I; ++I) {
3319 if (ARM::GPRRegClass.contains(*I))
3320 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
3321 else if (ARM::DPRRegClass.contains(*I))
3323 else
3324 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
3325 }
3326 }
3327
3328 // Update chain and glue.
3329 RetOps[0] = Chain;
3330 if (Glue.getNode())
3331 RetOps.push_back(Glue);
3332
3333 // CPUs which aren't M-class use a special sequence to return from
3334 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
3335 // though we use "subs pc, lr, #N").
3336 //
3337 // M-class CPUs actually use a normal return sequence with a special
3338 // (hardware-provided) value in LR, so the normal code path works.
3339 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
3340 !Subtarget->isMClass()) {
3341 if (Subtarget->isThumb1Only())
3342 report_fatal_error("interrupt attribute is not supported in Thumb1");
3343 return LowerInterruptReturn(RetOps, dl, DAG);
3344 }
3345
3348 return DAG.getNode(RetNode, dl, MVT::Other, RetOps);
3349}
3350
3351bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
3352 if (N->getNumValues() != 1)
3353 return false;
3354 if (!N->hasNUsesOfValue(1, 0))
3355 return false;
3356
3357 SDValue TCChain = Chain;
3358 SDNode *Copy = *N->use_begin();
3359 if (Copy->getOpcode() == ISD::CopyToReg) {
3360 // If the copy has a glue operand, we conservatively assume it isn't safe to
3361 // perform a tail call.
3362 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3363 return false;
3364 TCChain = Copy->getOperand(0);
3365 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
3366 SDNode *VMov = Copy;
3367 // f64 returned in a pair of GPRs.
3369 for (SDNode *U : VMov->uses()) {
3370 if (U->getOpcode() != ISD::CopyToReg)
3371 return false;
3372 Copies.insert(U);
3373 }
3374 if (Copies.size() > 2)
3375 return false;
3376
3377 for (SDNode *U : VMov->uses()) {
3378 SDValue UseChain = U->getOperand(0);
3379 if (Copies.count(UseChain.getNode()))
3380 // Second CopyToReg
3381 Copy = U;
3382 else {
3383 // We are at the top of this chain.
3384 // If the copy has a glue operand, we conservatively assume it
3385 // isn't safe to perform a tail call.
3386 if (U->getOperand(U->getNumOperands() - 1).getValueType() == MVT::Glue)
3387 return false;
3388 // First CopyToReg
3389 TCChain = UseChain;
3390 }
3391 }
3392 } else if (Copy->getOpcode() == ISD::BITCAST) {
3393 // f32 returned in a single GPR.
3394 if (!Copy->hasOneUse())
3395 return false;
3396 Copy = *Copy->use_begin();
3397 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
3398 return false;
3399 // If the copy has a glue operand, we conservatively assume it isn't safe to
3400 // perform a tail call.
3401 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
3402 return false;
3403 TCChain = Copy->getOperand(0);
3404 } else {
3405 return false;
3406 }
3407
3408 bool HasRet = false;
3409 for (const SDNode *U : Copy->uses()) {
3410 if (U->getOpcode() != ARMISD::RET_GLUE &&
3411 U->getOpcode() != ARMISD::INTRET_GLUE)
3412 return false;
3413 HasRet = true;
3414 }
3415
3416 if (!HasRet)
3417 return false;
3418
3419 Chain = TCChain;
3420 return true;
3421}
3422
3423bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3424 if (!Subtarget->supportsTailCall())
3425 return false;
3426
3427 if (!CI->isTailCall())
3428 return false;
3429
3430 return true;
3431}
3432
3433// Trying to write a 64 bit value so need to split into two 32 bit values first,
3434// and pass the lower and high parts through.
3436 SDLoc DL(Op);
3437 SDValue WriteValue = Op->getOperand(2);
3438
3439 // This function is only supposed to be called for i64 type argument.
3440 assert(WriteValue.getValueType() == MVT::i64
3441 && "LowerWRITE_REGISTER called for non-i64 type argument.");
3442
3443 SDValue Lo, Hi;
3444 std::tie(Lo, Hi) = DAG.SplitScalar(WriteValue, DL, MVT::i32, MVT::i32);
3445 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
3446 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
3447}
3448
3449// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
3450// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
3451// one of the above mentioned nodes. It has to be wrapped because otherwise
3452// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
3453// be used to form addressing mode. These wrapped nodes will be selected
3454// into MOVi.
3455SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
3456 SelectionDAG &DAG) const {
3457 EVT PtrVT = Op.getValueType();
3458 // FIXME there is no actual debug info here
3459 SDLoc dl(Op);
3460 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3461 SDValue Res;
3462
3463 // When generating execute-only code Constant Pools must be promoted to the
3464 // global data section. It's a bit ugly that we can't share them across basic
3465 // blocks, but this way we guarantee that execute-only behaves correct with
3466 // position-independent addressing modes.
3467 if (Subtarget->genExecuteOnly()) {
3468 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
3469 auto T = const_cast<Type*>(CP->getType());
3470 auto C = const_cast<Constant*>(CP->getConstVal());
3471 auto M = const_cast<Module*>(DAG.getMachineFunction().
3473 auto GV = new GlobalVariable(
3474 *M, T, /*isConstant=*/true, GlobalVariable::InternalLinkage, C,
3477 Twine(AFI->createPICLabelUId())
3478 );
3479 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
3480 dl, PtrVT);
3481 return LowerGlobalAddress(GA, DAG);
3482 }
3483
3484 // The 16-bit ADR instruction can only encode offsets that are multiples of 4,
3485 // so we need to align to at least 4 bytes when we don't have 32-bit ADR.
3486 Align CPAlign = CP->getAlign();
3487 if (Subtarget->isThumb1Only())
3488 CPAlign = std::max(CPAlign, Align(4));
3489 if (CP->isMachineConstantPoolEntry())
3490 Res =
3491 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CPAlign);
3492 else
3493 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CPAlign);
3494 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
3495}
3496
3498 // If we don't have a 32-bit pc-relative branch instruction then the jump
3499 // table consists of block addresses. Usually this is inline, but for
3500 // execute-only it must be placed out-of-line.
3501 if (Subtarget->genExecuteOnly() && !Subtarget->hasV8MBaselineOps())
3504}
3505
3506SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
3507 SelectionDAG &DAG) const {
3510 unsigned ARMPCLabelIndex = 0;
3511 SDLoc DL(Op);
3512 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3513 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
3514 SDValue CPAddr;
3515 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
3516 if (!IsPositionIndependent) {
3517 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, Align(4));
3518 } else {
3519 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3520 ARMPCLabelIndex = AFI->createPICLabelUId();
3522 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
3523 ARMCP::CPBlockAddress, PCAdj);
3524 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3525 }
3526 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
3527 SDValue Result = DAG.getLoad(
3528 PtrVT, DL, DAG.getEntryNode(), CPAddr,
3530 if (!IsPositionIndependent)
3531 return Result;
3532 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
3533 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
3534}
3535
3536/// Convert a TLS address reference into the correct sequence of loads
3537/// and calls to compute the variable's address for Darwin, and return an
3538/// SDValue containing the final node.
3539
3540/// Darwin only has one TLS scheme which must be capable of dealing with the
3541/// fully general situation, in the worst case. This means:
3542/// + "extern __thread" declaration.
3543/// + Defined in a possibly unknown dynamic library.
3544///
3545/// The general system is that each __thread variable has a [3 x i32] descriptor
3546/// which contains information used by the runtime to calculate the address. The
3547/// only part of this the compiler needs to know about is the first word, which
3548/// contains a function pointer that must be called with the address of the
3549/// entire descriptor in "r0".
3550///
3551/// Since this descriptor may be in a different unit, in general access must
3552/// proceed along the usual ARM rules. A common sequence to produce is:
3553///
3554/// movw rT1, :lower16:_var$non_lazy_ptr
3555/// movt rT1, :upper16:_var$non_lazy_ptr
3556/// ldr r0, [rT1]
3557/// ldr rT2, [r0]
3558/// blx rT2
3559/// [...address now in r0...]
3560SDValue
3561ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
3562 SelectionDAG &DAG) const {
3563 assert(Subtarget->isTargetDarwin() &&
3564 "This function expects a Darwin target");
3565 SDLoc DL(Op);
3566
3567 // First step is to get the address of the actua global symbol. This is where
3568 // the TLS descriptor lives.
3569 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
3570
3571 // The first entry in the descriptor is a function pointer that we must call
3572 // to obtain the address of the variable.
3573 SDValue Chain = DAG.getEntryNode();
3574 SDValue FuncTLVGet = DAG.getLoad(
3575 MVT::i32, DL, Chain, DescAddr,
3579 Chain = FuncTLVGet.getValue(1);
3580
3582 MachineFrameInfo &MFI = F.getFrameInfo();
3583 MFI.setAdjustsStack(true);
3584
3585 // TLS calls preserve all registers except those that absolutely must be
3586 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
3587 // silly).
3588 auto TRI =
3590 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
3592
3593 // Finally, we can make the call. This is just a degenerate version of a
3594 // normal AArch64 call node: r0 takes the address of the descriptor, and
3595 // returns the address of the variable in this thread.
3596 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
3597 Chain =
3598 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3599 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
3600 DAG.getRegisterMask(Mask), Chain.getValue(1));
3601 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
3602}
3603
3604SDValue
3605ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
3606 SelectionDAG &DAG) const {
3607 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering");
3608
3609 SDValue Chain = DAG.getEntryNode();
3610 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3611 SDLoc DL(Op);
3612
3613 // Load the current TEB (thread environment block)
3614 SDValue Ops[] = {Chain,
3615 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
3616 DAG.getTargetConstant(15, DL, MVT::i32),
3617 DAG.getTargetConstant(0, DL, MVT::i32),
3618 DAG.getTargetConstant(13, DL, MVT::i32),
3619 DAG.getTargetConstant(0, DL, MVT::i32),
3620 DAG.getTargetConstant(2, DL, MVT::i32)};
3621 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
3622 DAG.getVTList(MVT::i32, MVT::Other), Ops);
3623
3624 SDValue TEB = CurrentTEB.getValue(0);
3625 Chain = CurrentTEB.getValue(1);
3626
3627 // Load the ThreadLocalStoragePointer from the TEB
3628 // A pointer to the TLS array is located at offset 0x2c from the TEB.
3629 SDValue TLSArray =
3630 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
3631 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3632
3633 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
3634 // offset into the TLSArray.
3635
3636 // Load the TLS index from the C runtime
3637 SDValue TLSIndex =
3638 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
3639 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
3640 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
3641
3642 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
3643 DAG.getConstant(2, DL, MVT::i32));
3644 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
3645 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
3647
3648 // Get the offset of the start of the .tls section (section base)
3649 const auto *GA = cast<GlobalAddressSDNode>(Op);
3650 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
3651 SDValue Offset = DAG.getLoad(
3652 PtrVT, DL, Chain,
3653 DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
3654 DAG.getTargetConstantPool(CPV, PtrVT, Align(4))),
3656
3657 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
3658}
3659
3660// Lower ISD::GlobalTLSAddress using the "general dynamic" model
3661SDValue
3662ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
3663 SelectionDAG &DAG) const {
3664 SDLoc dl(GA);
3665 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3666 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3669 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3671 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3672 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
3673 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3674 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
3675 Argument = DAG.getLoad(
3676 PtrVT, dl, DAG.getEntryNode(), Argument,
3678 SDValue Chain = Argument.getValue(1);
3679
3680 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3681 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
3682
3683 // call __tls_get_addr.
3685 ArgListEntry Entry;
3686 Entry.Node = Argument;
3687 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
3688 Args.push_back(Entry);
3689
3690 // FIXME: is there useful debug info available here?
3692 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3694 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
3695
3696 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3697 return CallResult.first;
3698}
3699
3700// Lower ISD::GlobalTLSAddress using the "initial exec" or
3701// "local exec" model.
3702SDValue
3703ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
3704 SelectionDAG &DAG,
3705 TLSModel::Model model) const {
3706 const GlobalValue *GV = GA->getGlobal();
3707 SDLoc dl(GA);
3709 SDValue Chain = DAG.getEntryNode();
3710 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3711 // Get the Thread Pointer
3713
3714 if (model == TLSModel::InitialExec) {
3717 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3718 // Initial exec model.
3719 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
3721 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
3723 true);
3724 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3725 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3726 Offset = DAG.getLoad(
3727 PtrVT, dl, Chain, Offset,
3729 Chain = Offset.getValue(1);
3730
3731 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3732 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
3733
3734 Offset = DAG.getLoad(
3735 PtrVT, dl, Chain, Offset,
3737 } else {
3738 // local exec model
3739 assert(model == TLSModel::LocalExec);
3742 Offset = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3743 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
3744 Offset = DAG.getLoad(
3745 PtrVT, dl, Chain, Offset,
3747 }
3748
3749 // The address of the thread local variable is the add of the thread
3750 // pointer with the offset of the variable.
3751 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
3752}
3753
3754SDValue
3755ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
3756 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3757 if (DAG.getTarget().useEmulatedTLS())
3758 return LowerToTLSEmulatedModel(GA, DAG);
3759
3760 if (Subtarget->isTargetDarwin())
3761 return LowerGlobalTLSAddressDarwin(Op, DAG);
3762
3763 if (Subtarget->isTargetWindows())
3764 return LowerGlobalTLSAddressWindows(Op, DAG);
3765
3766 // TODO: implement the "local dynamic" model
3767 assert(Subtarget->isTargetELF() && "Only ELF implemented here");
3769
3770 switch (model) {
3773 return LowerToTLSGeneralDynamicModel(GA, DAG);
3776 return LowerToTLSExecModels(GA, DAG, model);
3777 }
3778 llvm_unreachable("bogus TLS model");
3779}
3780
3781/// Return true if all users of V are within function F, looking through
3782/// ConstantExprs.
3783static bool allUsersAreInFunction(const Value *V, const Function *F) {
3784 SmallVector<const User*,4> Worklist(V->users());
3785 while (!Worklist.empty()) {
3786 auto *U = Worklist.pop_back_val();
3787 if (isa<ConstantExpr>(U)) {
3788 append_range(Worklist, U->users());
3789 continue;
3790 }
3791
3792 auto *I = dyn_cast<Instruction>(U);
3793 if (!I || I->getParent()->getParent() != F)
3794 return false;
3795 }
3796 return true;
3797}
3798
3800 const GlobalValue *GV, SelectionDAG &DAG,
3801 EVT PtrVT, const SDLoc &dl) {
3802 // If we're creating a pool entry for a constant global with unnamed address,
3803 // and the global is small enough, we can emit it inline into the constant pool
3804 // to save ourselves an indirection.
3805 //
3806 // This is a win if the constant is only used in one function (so it doesn't
3807 // need to be duplicated) or duplicating the constant wouldn't increase code
3808 // size (implying the constant is no larger than 4 bytes).
3809 const Function &F = DAG.getMachineFunction().getFunction();
3810
3811 // We rely on this decision to inline being idemopotent and unrelated to the
3812 // use-site. We know that if we inline a variable at one use site, we'll
3813 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3814 // doesn't know about this optimization, so bail out if it's enabled else
3815 // we could decide to inline here (and thus never emit the GV) but require
3816 // the GV from fast-isel generated code.
3819 return SDValue();
3820
3821 auto *GVar = dyn_cast<GlobalVariable>(GV);
3822 if (!GVar || !GVar->hasInitializer() ||
3823 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3824 !GVar->hasLocalLinkage())
3825 return SDValue();
3826
3827 // If we inline a value that contains relocations, we move the relocations
3828 // from .data to .text. This is not allowed in position-independent code.
3829 auto *Init = GVar->getInitializer();
3830 if ((TLI->isPositionIndependent() || TLI->getSubtarget()->isROPI()) &&
3831 Init->needsDynamicRelocation())
3832 return SDValue();
3833
3834 // The constant islands pass can only really deal with alignment requests
3835 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3836 // any type wanting greater alignment requirements than 4 bytes. We also
3837 // can only promote constants that are multiples of 4 bytes in size or
3838 // are paddable to a multiple of 4. Currently we only try and pad constants
3839 // that are strings for simplicity.
3840 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3841 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3842 Align PrefAlign = DAG.getDataLayout().getPreferredAlign(GVar);
3843 unsigned RequiredPadding = 4 - (Size % 4);
3844 bool PaddingPossible =
3845 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3846 if (!PaddingPossible || PrefAlign > 4 || Size > ConstpoolPromotionMaxSize ||
3847 Size == 0)
3848 return SDValue();
3849
3850 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3853
3854 // We can't bloat the constant pool too much, else the ConstantIslands pass
3855 // may fail to converge. If we haven't promoted this global yet (it may have
3856 // multiple uses), and promoting it would increase the constant pool size (Sz
3857 // > 4), ensure we have space to do so up to MaxTotal.
3858 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3859 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3861 return SDValue();
3862
3863 // This is only valid if all users are in a single function; we can't clone
3864 // the constant in general. The LLVM IR unnamed_addr allows merging
3865 // constants, but not cloning them.
3866 //
3867 // We could potentially allow cloning if we could prove all uses of the
3868 // constant in the current function don't care about the address, like
3869 // printf format strings. But that isn't implemented for now.
3870 if (!allUsersAreInFunction(GVar, &F))
3871 return SDValue();
3872
3873 // We're going to inline this global. Pad it out if needed.
3874 if (RequiredPadding != 4) {
3875 StringRef S = CDAInit->getAsString();
3876
3878 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3879 while (RequiredPadding--)
3880 V.push_back(0);
3882 }
3883
3884 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3885 SDValue CPAddr = DAG.getTargetConstantPool(CPVal, PtrVT, Align(4));
3886 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3889 PaddedSize - 4);
3890 }
3891 ++NumConstpoolPromoted;
3892 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3893}
3894
3896 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3897 if (!(GV = GA->getAliaseeObject()))
3898 return false;
3899 if (const auto *V = dyn_cast<GlobalVariable>(GV))
3900 return V->isConstant();
3901 return isa<Function>(GV);
3902}
3903
3904SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3905 SelectionDAG &DAG) const {
3906 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3907 default: llvm_unreachable("unknown object format");
3908 case Triple::COFF:
3909 return LowerGlobalAddressWindows(Op, DAG);
3910 case Triple::ELF:
3911 return LowerGlobalAddressELF(Op, DAG);
3912 case Triple::MachO:
3913 return LowerGlobalAddressDarwin(Op, DAG);
3914 }
3915}
3916
3917SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3918 SelectionDAG &DAG) const {
3919 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3920 SDLoc dl(Op);
3921 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3922 bool IsRO = isReadOnly(GV);
3923
3924 // promoteToConstantPool only if not generating XO text section
3925 if (GV->isDSOLocal() && !Subtarget->genExecuteOnly())
3926 if (SDValue V = promoteToConstantPool(this, GV, DAG, PtrVT, dl))
3927 return V;
3928
3929 if (isPositionIndependent()) {
3931 GV, dl, PtrVT, 0, GV->isDSOLocal() ? 0 : ARMII::MO_GOT);
3932 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3933 if (!GV->isDSOLocal())
3934 Result =
3935 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3937 return Result;
3938 } else if (Subtarget->isROPI() && IsRO) {
3939 // PC-relative.
3940 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3941 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3942 return Result;
3943 } else if (Subtarget->isRWPI() && !IsRO) {
3944 // SB-relative.
3945 SDValue RelAddr;
3946 if (Subtarget->useMovt()) {
3947 ++NumMovwMovt;
3948 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3949 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3950 } else { // use literal pool for address constant
3953 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
3954 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3955 RelAddr = DAG.getLoad(
3956 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3958 }
3959 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3960 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3961 return Result;
3962 }
3963
3964 // If we have T2 ops, we can materialize the address directly via movt/movw
3965 // pair. This is always cheaper. If need to generate Execute Only code, and we
3966 // only have Thumb1 available, we can't use a constant pool and are forced to
3967 // use immediate relocations.
3968 if (Subtarget->useMovt() || Subtarget->genExecuteOnly()) {
3969 if (Subtarget->useMovt())
3970 ++NumMovwMovt;
3971 // FIXME: Once remat is capable of dealing with instructions with register
3972 // operands, expand this into two nodes.
3973 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3974 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3975 } else {
3976 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, Align(4));
3977 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3978 return DAG.getLoad(
3979 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3981 }
3982}
3983
3984SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3985 SelectionDAG &DAG) const {
3986 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
3987 "ROPI/RWPI not currently supported for Darwin");
3988 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3989 SDLoc dl(Op);
3990 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3991
3992 if (Subtarget->useMovt())
3993 ++NumMovwMovt;
3994
3995 // FIXME: Once remat is capable of dealing with instructions with register
3996 // operands, expand this into multiple nodes
3997 unsigned Wrapper =
3999
4000 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
4001 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
4002
4003 if (Subtarget->isGVIndirectSymbol(GV))
4004 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
4006 return Result;
4007}
4008
4009SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
4010 SelectionDAG &DAG) const {
4011 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
4012 assert(Subtarget->useMovt() &&
4013 "Windows on ARM expects to use movw/movt");
4014 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
4015 "ROPI/RWPI not currently supported for Windows");
4016
4018 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4019 ARMII::TOF TargetFlags = ARMII::MO_NO_FLAG;
4020 if (GV->hasDLLImportStorageClass())
4021 TargetFlags = ARMII::MO_DLLIMPORT;
4022 else if (!TM.shouldAssumeDSOLocal(GV))
4023 TargetFlags = ARMII::MO_COFFSTUB;
4024 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4026 SDLoc DL(Op);
4027
4028 ++NumMovwMovt;
4029
4030 // FIXME: Once remat is capable of dealing with instructions with register
4031 // operands, expand this into two nodes.
4032 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
4033 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*offset=*/0,
4034 TargetFlags));
4035 if (TargetFlags & (ARMII::MO_DLLIMPORT | ARMII::MO_COFFSTUB))
4036 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4038 return Result;
4039}
4040
4041SDValue
4042ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
4043 SDLoc dl(Op);
4044 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
4045 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
4046 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
4047 Op.getOperand(1), Val);
4048}
4049
4050SDValue
4051ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
4052 SDLoc dl(Op);
4053 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
4054 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
4055}
4056
4057SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
4058 SelectionDAG &DAG) const {
4059 SDLoc dl(Op);
4060 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
4061 Op.getOperand(0));
4062}
4063
4064SDValue ARMTargetLowering::LowerINTRINSIC_VOID(
4065 SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const {
4066 unsigned IntNo =
4067 Op.getConstantOperandVal(Op.getOperand(0).getValueType() == MVT::Other);
4068 switch (IntNo) {
4069 default:
4070 return SDValue(); // Don't custom lower most intrinsics.
4071 case Intrinsic::arm_gnu_eabi_mcount: {
4073 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4074 SDLoc dl(Op);
4075 SDValue Chain = Op.getOperand(0);
4076 // call "\01__gnu_mcount_nc"
4077 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
4078 const uint32_t *Mask =
4080 assert(Mask && "Missing call preserved mask for calling convention");
4081 // Mark LR an implicit live-in.
4082 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4083 SDValue ReturnAddress =
4084 DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, PtrVT);
4085 constexpr EVT ResultTys[] = {MVT::Other, MVT::Glue};
4086 SDValue Callee =
4087 DAG.getTargetExternalSymbol("\01__gnu_mcount_nc", PtrVT, 0);
4089 if (Subtarget->isThumb())
4090 return SDValue(
4091 DAG.getMachineNode(
4092 ARM::tBL_PUSHLR, dl, ResultTys,
4093 {ReturnAddress, DAG.getTargetConstant(ARMCC::AL, dl, PtrVT),
4094 DAG.getRegister(0, PtrVT), Callee, RegisterMask, Chain}),
4095 0);
4096 return SDValue(
4097 DAG.getMachineNode(ARM::BL_PUSHLR, dl, ResultTys,
4098 {ReturnAddress, Callee, RegisterMask, Chain}),
4099 0);
4100 }
4101 }
4102}
4103
4104SDValue
4105ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
4106 const ARMSubtarget *Subtarget) const {
4107 unsigned IntNo = Op.getConstantOperandVal(0);
4108 SDLoc dl(Op);
4109 switch (IntNo) {
4110 default: return SDValue(); // Don't custom lower most intrinsics.
4111 case Intrinsic::thread_pointer: {
4112 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4113 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
4114 }
4115 case Intrinsic::arm_cls: {
4116 const SDValue &Operand = Op.getOperand(1);
4117 const EVT VTy = Op.getValueType();
4118 SDValue SRA =
4119 DAG.getNode(ISD::SRA, dl, VTy, Operand, DAG.getConstant(31, dl, VTy));
4120 SDValue XOR = DAG.getNode(ISD::XOR, dl, VTy, SRA, Operand);
4121 SDValue SHL =
4122 DAG.getNode(ISD::SHL, dl, VTy, XOR, DAG.getConstant(1, dl, VTy));
4123 SDValue OR =
4124 DAG.getNode(ISD::OR, dl, VTy, SHL, DAG.getConstant(1, dl, VTy));
4125 SDValue Result = DAG.getNode(ISD::CTLZ, dl, VTy, OR);
4126 return Result;
4127 }
4128 case Intrinsic::arm_cls64: {
4129 // cls(x) = if cls(hi(x)) != 31 then cls(hi(x))
4130 // else 31 + clz(if hi(x) == 0 then lo(x) else not(lo(x)))
4131 const SDValue &Operand = Op.getOperand(1);
4132 const EVT VTy = Op.getValueType();
4133 SDValue Lo, Hi;
4134 std::tie(Lo, Hi) = DAG.SplitScalar(Operand, dl, VTy, VTy);
4135 SDValue Constant0 = DAG.getConstant(0, dl, VTy);
4136 SDValue Constant1 = DAG.getConstant(1, dl, VTy);
4137 SDValue Constant31 = DAG.getConstant(31, dl, VTy);
4138 SDValue SRAHi = DAG.getNode(ISD::SRA, dl, VTy, Hi, Constant31);
4139 SDValue XORHi = DAG.getNode(ISD::XOR, dl, VTy, SRAHi, Hi);
4140 SDValue SHLHi = DAG.getNode(ISD::SHL, dl, VTy, XORHi, Constant1);
4141 SDValue ORHi = DAG.getNode(ISD::OR, dl, VTy, SHLHi, Constant1);
4142 SDValue CLSHi = DAG.getNode(ISD::CTLZ, dl, VTy, ORHi);
4143 SDValue CheckLo =
4144 DAG.getSetCC(dl, MVT::i1, CLSHi, Constant31, ISD::CondCode::SETEQ);
4145 SDValue HiIsZero =
4146 DAG.getSetCC(dl, MVT::i1, Hi, Constant0, ISD::CondCode::SETEQ);
4147 SDValue AdjustedLo =
4148 DAG.getSelect(dl, VTy, HiIsZero, Lo, DAG.getNOT(dl, Lo, VTy));
4149 SDValue CLZAdjustedLo = DAG.getNode(ISD::CTLZ, dl, VTy, AdjustedLo);
4150 SDValue Result =
4151 DAG.getSelect(dl, VTy, CheckLo,
4152 DAG.getNode(ISD::ADD, dl, VTy, CLZAdjustedLo, Constant31), CLSHi);
4153 return Result;
4154 }
4155 case Intrinsic::eh_sjlj_lsda: {
4158 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
4159 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4160 SDValue CPAddr;
4161 bool IsPositionIndependent = isPositionIndependent();
4162 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
4164 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
4165 ARMCP::CPLSDA, PCAdj);
4166 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, Align(4));
4167 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
4168 SDValue Result = DAG.getLoad(
4169 PtrVT, dl, DAG.getEntryNode(), CPAddr,
4171
4172 if (IsPositionIndependent) {
4173 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
4174 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
4175 }
4176 return Result;
4177 }
4178 case Intrinsic::arm_neon_vabs:
4179 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
4180 Op.getOperand(1));
4181 case Intrinsic::arm_neon_vmulls:
4182 case Intrinsic::arm_neon_vmullu: {
4183 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
4185 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4186 Op.getOperand(1), Op.getOperand(2));
4187 }
4188 case Intrinsic::arm_neon_vminnm:
4189 case Intrinsic::arm_neon_vmaxnm: {
4190 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
4192 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4193 Op.getOperand(1), Op.getOperand(2));
4194 }
4195 case Intrinsic::arm_neon_vminu:
4196 case Intrinsic::arm_neon_vmaxu: {
4197 if (Op.getValueType().isFloatingPoint())
4198 return SDValue();
4199 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
4200 ? ISD::UMIN : ISD::UMAX;
4201 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4202 Op.getOperand(1), Op.getOperand(2));
4203 }
4204 case Intrinsic::arm_neon_vmins:
4205 case Intrinsic::arm_neon_vmaxs: {
4206 // v{min,max}s is overloaded between signed integers and floats.
4207 if (!Op.getValueType().isFloatingPoint()) {
4208 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4209 ? ISD::SMIN : ISD::SMAX;
4210 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4211 Op.getOperand(1), Op.getOperand(2));
4212 }
4213 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
4215 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
4216 Op.getOperand(1), Op.getOperand(2));
4217 }
4218 case Intrinsic::arm_neon_vtbl1:
4219 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
4220 Op.getOperand(1), Op.getOperand(2));
4221 case Intrinsic::arm_neon_vtbl2:
4222 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
4223 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4224 case Intrinsic::arm_mve_pred_i2v:
4225 case Intrinsic::arm_mve_pred_v2i:
4226 return DAG.getNode(ARMISD::PREDICATE_CAST, SDLoc(Op), Op.getValueType(),
4227 Op.getOperand(1));
4228 case Intrinsic::arm_mve_vreinterpretq:
4229 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(Op), Op.getValueType(),
4230 Op.getOperand(1));
4231 case Intrinsic::arm_mve_lsll:
4232 return DAG.getNode(ARMISD::LSLL, SDLoc(Op), Op->getVTList(),
4233 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4234 case Intrinsic::arm_mve_asrl:
4235 return DAG.getNode(ARMISD::ASRL, SDLoc(Op), Op->getVTList(),
4236 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4237 }
4238}
4239
4241 const ARMSubtarget *Subtarget) {
4242 SDLoc dl(Op);
4243 auto SSID = static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4244 if (SSID == SyncScope::SingleThread)
4245 return Op;
4246
4247 if (!Subtarget->hasDataBarrier()) {
4248 // Some ARMv6 cpus can support data barriers with an mcr instruction.
4249 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
4250 // here.
4251 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
4252 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!");
4253 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
4254 DAG.getConstant(0, dl, MVT::i32));
4255 }
4256
4257 AtomicOrdering Ord =
4258 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4260 if (Subtarget->isMClass()) {
4261 // Only a full system barrier exists in the M-class architectures.
4263 } else if (Subtarget->preferISHSTBarriers() &&
4264 Ord == AtomicOrdering::Release) {
4265 // Swift happens to implement ISHST barriers in a way that's compatible with
4266 // Release semantics but weaker than ISH so we'd be fools not to use
4267 // it. Beware: other processors probably don't!
4269 }
4270
4271 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
4272 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
4273 DAG.getConstant(Domain, dl, MVT::i32));
4274}
4275
4277 const ARMSubtarget *Subtarget) {
4278 // ARM pre v5TE and Thumb1 does not have preload instructions.
4279 if (!(Subtarget->isThumb2() ||
4280 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
4281 // Just preserve the chain.
4282 return Op.getOperand(0);
4283
4284 SDLoc dl(Op);
4285 unsigned isRead = ~Op.getConstantOperandVal(2) & 1;
4286 if (!isRead &&
4287 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
4288 // ARMv7 with MP extension has PLDW.
4289 return Op.getOperand(0);
4290
4291 unsigned isData = Op.getConstantOperandVal(4);
4292 if (Subtarget->isThumb()) {
4293 // Invert the bits.
4294 isRead = ~isRead & 1;
4295 isData = ~isData & 1;
4296 }
4297
4298 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
4299 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
4300 DAG.getConstant(isData, dl, MVT::i32));
4301}
4302
4305 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
4306
4307 // vastart just stores the address of the VarArgsFrameIndex slot into the
4308 // memory location argument.
4309 SDLoc dl(Op);
4311 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4312 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4313 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4314 MachinePointerInfo(SV));
4315}
4316
4317SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
4318 CCValAssign &NextVA,
4319 SDValue &Root,
4320 SelectionDAG &DAG,
4321 const SDLoc &dl) const {
4324
4325 const TargetRegisterClass *RC;
4326 if (AFI->isThumb1OnlyFunction())
4327 RC = &ARM::tGPRRegClass;
4328 else
4329 RC = &ARM::GPRRegClass;
4330
4331 // Transform the arguments stored in physical registers into virtual ones.
4332 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4333 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4334
4335 SDValue ArgValue2;
4336 if (NextVA.isMemLoc()) {
4337 MachineFrameInfo &MFI = MF.getFrameInfo();
4338 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
4339
4340 // Create load node to retrieve arguments from the stack.
4341 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4342 ArgValue2 = DAG.getLoad(
4343 MVT::i32, dl, Root, FIN,
4345 } else {
4346 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
4347 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
4348 }
4349 if (!Subtarget->isLittle())
4350 std::swap (ArgValue, ArgValue2);
4351 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
4352}
4353
4354// The remaining GPRs hold either the beginning of variable-argument
4355// data, or the beginning of an aggregate passed by value (usually
4356// byval). Either way, we allocate stack slots adjacent to the data
4357// provided by our caller, and store the unallocated registers there.
4358// If this is a variadic function, the va_list pointer will begin with
4359// these values; otherwise, this reassembles a (byval) structure that
4360// was split between registers and memory.
4361// Return: The frame index registers were stored into.
4362int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
4363 const SDLoc &dl, SDValue &Chain,
4364 const Value *OrigArg,
4365 unsigned InRegsParamRecordIdx,
4366 int ArgOffset, unsigned ArgSize) const {
4367 // Currently, two use-cases possible:
4368 // Case #1. Non-var-args function, and we meet first byval parameter.
4369 // Setup first unallocated register as first byval register;
4370 // eat all remained registers
4371 // (these two actions are performed by HandleByVal method).
4372 // Then, here, we initialize stack frame with
4373 // "store-reg" instructions.
4374 // Case #2. Var-args function, that doesn't contain byval parameters.
4375 // The same: eat all remained unallocated registers,
4376 // initialize stack frame.
4377
4379 MachineFrameInfo &MFI = MF.getFrameInfo();
4381 unsigned RBegin, REnd;
4382 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
4383 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
4384 } else {
4385 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4386 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
4387 REnd = ARM::R4;
4388 }
4389
4390 if (REnd != RBegin)
4391 ArgOffset = -4 * (ARM::R4 - RBegin);
4392
4393 auto PtrVT = getPointerTy(DAG.getDataLayout());
4394 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
4395 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
4396
4398 const TargetRegisterClass *RC =
4399 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
4400
4401 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
4402 Register VReg = MF.addLiveIn(Reg, RC);
4403 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
4404 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
4405 MachinePointerInfo(OrigArg, 4 * i));
4406 MemOps.push_back(Store);
4407 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
4408 }
4409
4410 if (!MemOps.empty())
4411 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4412 return FrameIndex;
4413}
4414
4415// Setup stack frame, the va_list pointer will start from.
4416void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
4417 const SDLoc &dl, SDValue &Chain,
4418 unsigned ArgOffset,
4419 unsigned TotalArgRegsSaveSize,
4420 bool ForceMutable) const {
4423
4424 // Try to store any remaining integer argument regs
4425 // to their spots on the stack so that they may be loaded by dereferencing
4426 // the result of va_next.
4427 // If there is no regs to be stored, just point address after last
4428 // argument passed via stack.
4429 int FrameIndex = StoreByValRegs(
4430 CCInfo, DAG, dl, Chain, nullptr, CCInfo.getInRegsParamsCount(),
4431 CCInfo.getStackSize(), std::max(4U, TotalArgRegsSaveSize));
4432 AFI->setVarArgsFrameIndex(FrameIndex);
4433}
4434
4435bool ARMTargetLowering::splitValueIntoRegisterParts(
4436 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
4437 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
4438 EVT ValueVT = Val.getValueType();
4439 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4440 unsigned ValueBits = ValueVT.getSizeInBits();
4441 unsigned PartBits = PartVT.getSizeInBits();
4442 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(ValueBits), Val);
4443 Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::getIntegerVT(PartBits), Val);
4444 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
4445 Parts[0] = Val;
4446 return true;
4447 }
4448 return false;
4449}
4450
4451SDValue ARMTargetLowering::joinRegisterPartsIntoValue(
4452 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
4453 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
4454 if ((ValueVT == MVT::f16 || ValueVT == MVT::bf16) && PartVT == MVT::f32) {
4455 unsigned ValueBits = ValueVT.getSizeInBits();
4456 unsigned PartBits = PartVT.getSizeInBits();
4457 SDValue Val = Parts[0];
4458
4459 Val = DAG.getNode(ISD::BITCAST, DL, MVT::getIntegerVT(PartBits), Val);
4460 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::getIntegerVT(ValueBits), Val);
4461 Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
4462 return Val;
4463 }
4464 return SDValue();
4465}
4466
4467SDValue ARMTargetLowering::LowerFormalArguments(
4468 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4469 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4470 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4472 MachineFrameInfo &MFI = MF.getFrameInfo();
4473
4475
4476 // Assign locations to all of the incoming arguments.
4478 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4479 *DAG.getContext());
4480 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
4481
4482 SmallVector<SDValue, 16> ArgValues;
4483 SDValue ArgValue;
4485 unsigned CurArgIdx = 0;
4486
4487 // Initially ArgRegsSaveSize is zero.
4488 // Then we increase this value each time we meet byval parameter.
4489 // We also increase this value in case of varargs function.
4490 AFI->setArgRegsSaveSize(0);
4491
4492 // Calculate the amount of stack space that we need to allocate to store
4493 // byval and variadic arguments that are passed in registers.
4494 // We need to know this before we allocate the first byval or variadic
4495 // argument, as they will be allocated a stack slot below the CFA (Canonical
4496 // Frame Address, the stack pointer at entry to the function).
4497 unsigned ArgRegBegin = ARM::R4;
4498 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4499 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
4500 break;
4501
4502 CCValAssign &VA = ArgLocs[i];
4503 unsigned Index = VA.getValNo();
4504 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
4505 if (!Flags.isByVal())
4506 continue;
4507
4508 assert(VA.isMemLoc() && "unexpected byval pointer in reg");
4509 unsigned RBegin, REnd;
4510 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
4511 ArgRegBegin = std::min(ArgRegBegin, RBegin);
4512
4513 CCInfo.nextInRegsParam();
4514 }
4515 CCInfo.rewindByValRegsInfo();
4516
4517 int lastInsIndex = -1;
4518 if (isVarArg && MFI.hasVAStart()) {
4519 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
4520 if (RegIdx != std::size(GPRArgRegs))
4521 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
4522 }
4523
4524 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
4525 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
4526 auto PtrVT = getPointerTy(DAG.getDataLayout());
4527
4528 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4529 CCValAssign &VA = ArgLocs[i];
4530 if (Ins[VA.getValNo()].isOrigArg()) {
4531 std::advance(CurOrigArg,
4532 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
4533 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
4534 }
4535 // Arguments stored in registers.
4536 if (VA.isRegLoc()) {
4537 EVT RegVT = VA.getLocVT();
4538
4539 if (VA.needsCustom() && VA.getLocVT() == MVT::v2f64) {
4540 // f64 and vector types are split up into multiple registers or
4541 // combinations of registers and stack slots.
4542 SDValue ArgValue1 =
4543 GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4544 VA = ArgLocs[++i]; // skip ahead to next loc
4545 SDValue ArgValue2;
4546 if (VA.isMemLoc()) {
4547 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
4548 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4549 ArgValue2 = DAG.getLoad(
4550 MVT::f64, dl, Chain, FIN,
4552 } else {
4553 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4554 }
4555 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
4556 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4557 ArgValue1, DAG.getIntPtrConstant(0, dl));
4558 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, ArgValue,
4559 ArgValue2, DAG.getIntPtrConstant(1, dl));
4560 } else if (VA.needsCustom() && VA.getLocVT() == MVT::f64) {
4561 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
4562 } else {
4563 const TargetRegisterClass *RC;
4564
4565 if (RegVT == MVT::f16 || RegVT == MVT::bf16)
4566 RC = &ARM::HPRRegClass;
4567 else if (RegVT == MVT::f32)
4568 RC = &ARM::SPRRegClass;
4569 else if (RegVT == MVT::f64 || RegVT == MVT::v4f16 ||
4570 RegVT == MVT::v4bf16)
4571 RC = &ARM::DPRRegClass;
4572 else if (RegVT == MVT::v2f64 || RegVT == MVT::v8f16 ||
4573 RegVT == MVT::v8bf16)
4574 RC = &ARM::QPRRegClass;
4575 else if (RegVT == MVT::i32)
4576 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
4577 : &ARM::GPRRegClass;
4578 else
4579 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
4580
4581 // Transform the arguments in physical registers into virtual ones.
4582 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4583 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
4584
4585 // If this value is passed in r0 and has the returned attribute (e.g.
4586 // C++ 'structors), record this fact for later use.
4587 if (VA.getLocReg() == ARM::R0 && Ins[VA.getValNo()].Flags.isReturned()) {
4588 AFI->setPreservesR0();
4589 }
4590 }
4591
4592 // If this is an 8 or 16-bit value, it is really passed promoted
4593 // to 32 bits. Insert an assert[sz]ext to capture this, then
4594 // truncate to the right size.
4595 switch (VA.getLocInfo()) {
4596 default: llvm_unreachable("Unknown loc info!");
4597 case CCValAssign::Full: break;
4598 case CCValAssign::BCvt:
4599 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
4600 break;
4601 case CCValAssign::SExt:
4602 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
4603 DAG.getValueType(VA.getValVT()));
4604 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4605 break;
4606 case CCValAssign::ZExt:
4607 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
4608 DAG.getValueType(VA.getValVT()));
4609 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
4610 break;
4611 }
4612
4613 // f16 arguments have their size extended to 4 bytes and passed as if they
4614 // had been copied to the LSBs of a 32-bit register.
4615 // For that, it's passed extended to i32 (soft ABI) or to f32 (hard ABI)
4616 if (VA.needsCustom() &&
4617 (VA.getValVT() == MVT::f16 || VA.getValVT() == MVT::bf16))
4618 ArgValue = MoveToHPR(dl, DAG, VA.getLocVT(), VA.getValVT(), ArgValue);
4619
4620 InVals.push_back(ArgValue);
4621 } else { // VA.isRegLoc()
4622 // Only arguments passed on the stack should make it here.
4623 assert(VA.isMemLoc());
4624 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
4625
4626 int index = VA.getValNo();
4627
4628 // Some Ins[] entries become multiple ArgLoc[] entries.
4629 // Process them only once.
4630 if (index != lastInsIndex)
4631 {
4632 ISD::ArgFlagsTy Flags = Ins[index].Flags;
4633 // FIXME: For now, all byval parameter objects are marked mutable.
4634 // This can be changed with more analysis.
4635 // In case of tail call optimization mark all arguments mutable.
4636 // Since they could be overwritten by lowering of arguments in case of
4637 // a tail call.
4638 if (Flags.isByVal()) {
4639 assert(Ins[index].isOrigArg() &&
4640 "Byval arguments cannot be implicit");
4641 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
4642
4643 int FrameIndex = StoreByValRegs(
4644 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
4645 VA.getLocMemOffset(), Flags.getByValSize());
4646 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
4647 CCInfo.nextInRegsParam();
4648 } else {
4649 unsigned FIOffset = VA.getLocMemOffset();
4650 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
4651 FIOffset, true);
4652
4653 // Create load nodes to retrieve arguments from the stack.
4654 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4655 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
4657 DAG.getMachineFunction(), FI)));
4658 }
4659 lastInsIndex = index;
4660 }
4661 }
4662 }
4663
4664 // varargs
4665 if (isVarArg && MFI.hasVAStart()) {
4666 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getStackSize(),
4667 TotalArgRegsSaveSize);
4668 if (AFI->isCmseNSEntryFunction()) {
4671 "secure entry function must not be variadic", dl.getDebugLoc());
4672 DAG.getContext()->diagnose(Diag);
4673 }
4674 }
4675
4676 unsigned StackArgSize = CCInfo.getStackSize();
4677 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4678 if (canGuaranteeTCO(CallConv, TailCallOpt)) {
4679 // The only way to guarantee a tail call is if the callee restores its
4680 // argument area, but it must also keep the stack aligned when doing so.
4681 const DataLayout &DL = DAG.getDataLayout();
4682 StackArgSize = alignTo(StackArgSize, DL.getStackAlignment());
4683
4684 AFI->setArgumentStackToRestore(StackArgSize);
4685 }
4686 AFI->setArgumentStackSize(StackArgSize);
4687
4688 if (CCInfo.getStackSize() > 0 && AFI->isCmseNSEntryFunction()) {
4691 "secure entry function requires arguments on stack", dl.getDebugLoc());
4692 DAG.getContext()->diagnose(Diag);
4693 }
4694
4695 return Chain;
4696}
4697
4698/// isFloatingPointZero - Return true if this is +0.0.
4700 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
4701 return CFP->getValueAPF().isPosZero();
4702 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
4703 // Maybe this has already been legalized into the constant pool?
4704 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
4705 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
4706 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
4707 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
4708 return CFP->getValueAPF().isPosZero();
4709 }
4710 } else if (Op->getOpcode() == ISD::BITCAST &&
4711 Op->getValueType(0) == MVT::f64) {
4712 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
4713 // created by LowerConstantFP().
4714 SDValue BitcastOp = Op->getOperand(0);
4715 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
4716 isNullConstant(BitcastOp->getOperand(0)))
4717 return true;
4718 }
4719 return false;
4720}
4721
4722/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
4723/// the given operands.
4724SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
4725 SDValue &ARMcc, SelectionDAG &DAG,
4726 const SDLoc &dl) const {
4727 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
4728 unsigned C = RHSC->getZExtValue();
4729 if (!isLegalICmpImmediate((int32_t)C)) {
4730 // Constant does not fit, try adjusting it by one.
4731 switch (CC) {
4732 default: break;
4733 case ISD::SETLT:
4734 case ISD::SETGE:
4735 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
4737 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4738 }
4739 break;
4740 case ISD::SETULT:
4741 case ISD::SETUGE:
4742 if (C != 0 && isLegalICmpImmediate(C-1)) {
4744 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
4745 }
4746 break;
4747 case ISD::SETLE:
4748 case ISD::SETGT:
4749 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
4751 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4752 }
4753 break;
4754 case ISD::SETULE:
4755 case ISD::SETUGT:
4756 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
4758 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
4759 }
4760 break;
4761 }
4762 }
4763 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
4765 // In ARM and Thumb-2, the compare instructions can shift their second
4766 // operand.
4768 std::swap(LHS, RHS);
4769 }
4770
4771 // Thumb1 has very limited immediate modes, so turning an "and" into a
4772 // shift can save multiple instructions.
4773 //
4774 // If we have (x & C1), and C1 is an appropriate mask, we can transform it
4775 // into "((x << n) >> n)". But that isn't necessarily profitable on its
4776 // own. If it's the operand to an unsigned comparison with an immediate,
4777 // we can eliminate one of the shifts: we transform
4778 // "((x << n) >> n) == C2" to "(x << n) == (C2 << n)".
4779 //
4780 // We avoid transforming cases which aren't profitable due to encoding
4781 // details:
4782 //
4783 // 1. C2 fits into the immediate field of a cmp, and the transformed version
4784 // would not; in that case, we're essentially trading one immediate load for
4785 // another.
4786 // 2. C1 is 255 or 65535, so we can use uxtb or uxth.
4787 // 3. C2 is zero; we have other code for this special case.
4788 //
4789 // FIXME: Figure out profitability for Thumb2; we usually can't save an
4790 // instruction, since the AND is always one instruction anyway, but we could
4791 // use narrow instructions in some cases.
4792 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::AND &&
4793 LHS->hasOneUse() && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4794 LHS.getValueType() == MVT::i32 && isa<ConstantSDNode>(RHS) &&
4795 !isSignedIntSetCC(CC)) {
4796 unsigned Mask = LHS.getConstantOperandVal(1);
4797 auto *RHSC = cast<ConstantSDNode>(RHS.getNode());
4798 uint64_t RHSV = RHSC->getZExtValue();
4799 if (isMask_32(Mask) && (RHSV & ~Mask) == 0 && Mask != 255 && Mask != 65535) {
4800 unsigned ShiftBits = llvm::countl_zero(Mask);
4801 if (RHSV && (RHSV > 255 || (RHSV << ShiftBits) <= 255)) {
4802 SDValue ShiftAmt = DAG.getConstant(ShiftBits, dl, MVT::i32);
4803 LHS = DAG.getNode(ISD::SHL, dl, MVT::i32, LHS.getOperand(0), ShiftAmt);
4804 RHS = DAG.getConstant(RHSV << ShiftBits, dl, MVT::i32);
4805 }
4806 }
4807 }
4808
4809 // The specific comparison "(x<<c) > 0x80000000U" can be optimized to a
4810 // single "lsls x, c+1". The shift sets the "C" and "Z" flags the same
4811 // way a cmp would.
4812 // FIXME: Add support for ARM/Thumb2; this would need isel patterns, and
4813 // some tweaks to the heuristics for the previous and->shift transform.
4814 // FIXME: Optimize cases where the LHS isn't a shift.
4815 if (Subtarget->isThumb1Only() && LHS->getOpcode() == ISD::SHL &&
4816 isa<ConstantSDNode>(RHS) && RHS->getAsZExtVal() == 0x80000000U &&
4817 CC == ISD::SETUGT && isa<ConstantSDNode>(LHS.getOperand(1)) &&
4818 LHS.getConstantOperandVal(1) < 31) {
4819 unsigned ShiftAmt = LHS.getConstantOperandVal(1) + 1;
4820 SDValue Shift = DAG.getNode(ARMISD::LSLS, dl,
4821 DAG.getVTList(MVT::i32, MVT::i32),
4822 LHS.getOperand(0),
4823 DAG.getConstant(ShiftAmt, dl, MVT::i32));
4824 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
4825 Shift.getValue(1), SDValue());
4826 ARMcc = DAG.getConstant(ARMCC::HI, dl, MVT::i32);
4827 return Chain.getValue(1);
4828 }
4829
4831
4832 // If the RHS is a constant zero then the V (overflow) flag will never be
4833 // set. This can allow us to simplify GE to PL or LT to MI, which can be
4834 // simpler for other passes (like the peephole optimiser) to deal with.
4835 if (isNullConstant(RHS)) {
4836 switch (CondCode) {
4837 default: break;
4838 case ARMCC::GE:
4840 break;
4841 case ARMCC::LT:
4843 break;
4844 }
4845 }
4846
4847 ARMISD::NodeType CompareType;
4848 switch (CondCode) {
4849 default:
4850 CompareType = ARMISD::CMP;
4851 break;
4852 case ARMCC::EQ:
4853 case ARMCC::NE:
4854 // Uses only Z Flag
4855 CompareType = ARMISD::CMPZ;
4856 break;
4857 }
4858 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4859 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
4860}
4861
4862/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
4863SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
4864 SelectionDAG &DAG, const SDLoc &dl,
4865 bool Signaling) const {
4866 assert(Subtarget->hasFP64() || RHS.getValueType() != MVT::f64);
4867 SDValue Cmp;
4868 if (!isFloatingPointZero(RHS))
4869 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPE : ARMISD::CMPFP,
4870 dl, MVT::Glue, LHS, RHS);
4871 else
4872 Cmp = DAG.getNode(Signaling ? ARMISD::CMPFPEw0 : ARMISD::CMPFPw0,
4873 dl, MVT::Glue, LHS);
4874 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
4875}
4876
4877/// duplicateCmp - Glue values can have only one use, so this function
4878/// duplicates a comparison node.
4879SDValue
4880ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
4881 unsigned Opc = Cmp.getOpcode();
4882 SDLoc DL(Cmp);
4883 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
4884 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4885
4886 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
4887 Cmp = Cmp.getOperand(0);
4888 Opc = Cmp.getOpcode();
4889 if (Opc == ARMISD::CMPFP)
4890 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
4891 else {
4892 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
4893 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
4894 }
4895 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
4896}
4897
4898// This function returns three things: the arithmetic computation itself
4899// (Value), a comparison (OverflowCmp), and a condition code (ARMcc). The
4900// comparison and the condition code define the case in which the arithmetic
4901// computation *does not* overflow.
4902std::pair<SDValue, SDValue>
4903ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
4904 SDValue &ARMcc) const {
4905 assert(Op.getValueType() == MVT::i32 && "Unsupported value type");
4906
4907 SDValue Value, OverflowCmp;
4908 SDValue LHS = Op.getOperand(0);
4909 SDValue RHS = Op.getOperand(1);
4910 SDLoc dl(Op);
4911
4912 // FIXME: We are currently always generating CMPs because we don't support
4913 // generating CMN through the backend. This is not as good as the natural
4914 // CMP case because it causes a register dependency and cannot be folded
4915 // later.
4916
4917 switch (Op.getOpcode()) {
4918 default:
4919 llvm_unreachable("Unknown overflow instruction!");
4920 case ISD::SADDO:
4921 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4922 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
4923 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4924 break;
4925 case ISD::UADDO:
4926 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4927 // We use ADDC here to correspond to its use in LowerUnsignedALUO.
4928 // We do not use it in the USUBO case as Value may not be used.
4929 Value = DAG.getNode(ARMISD::ADDC, dl,
4930 DAG.getVTList(Op.getValueType(), MVT::i32), LHS, RHS)
4931 .getValue(0);
4932 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
4933 break;
4934 case ISD::SSUBO:
4935 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
4936 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4937 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4938 break;
4939 case ISD::USUBO:
4940 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
4941 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
4942 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
4943 break;
4944 case ISD::UMULO:
4945 // We generate a UMUL_LOHI and then check if the high word is 0.
4946 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4947 Value = DAG.getNode(ISD::UMUL_LOHI, dl,
4948 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4949 LHS, RHS);
4950 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4951 DAG.getConstant(0, dl, MVT::i32));
4952 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4953 break;
4954 case ISD::SMULO:
4955 // We generate a SMUL_LOHI and then check if all the bits of the high word
4956 // are the same as the sign bit of the low word.
4957 ARMcc = DAG.getConstant(ARMCC::EQ, dl, MVT::i32);
4958 Value = DAG.getNode(ISD::SMUL_LOHI, dl,
4959 DAG.getVTList(Op.getValueType(), Op.getValueType()),
4960 LHS, RHS);
4961 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value.getValue(1),
4962 DAG.getNode(ISD::SRA, dl, Op.getValueType(),
4963 Value.getValue(0),
4964 DAG.getConstant(31, dl, MVT::i32)));
4965 Value = Value.getValue(0); // We only want the low 32 bits for the result.
4966 break;
4967 } // switch (...)
4968
4969 return std::make_pair(Value, OverflowCmp);
4970}
4971
4972SDValue
4973ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
4974 // Let legalize expand this if it isn't a legal type yet.
4975 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
4976 return SDValue();
4977
4978 SDValue Value, OverflowCmp;
4979 SDValue ARMcc;
4980 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
4981 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4982 SDLoc dl(Op);
4983 // We use 0 and 1 as false and true values.
4984 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
4985 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
4986 EVT VT = Op.getValueType();
4987
4988 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
4989 ARMcc, CCR, OverflowCmp);
4990
4991 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
4992 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4993}
4994
4996 SelectionDAG &DAG) {
4997 SDLoc DL(BoolCarry);
4998 EVT CarryVT = BoolCarry.getValueType();
4999
5000 // This converts the boolean value carry into the carry flag by doing
5001 // ARMISD::SUBC Carry, 1
5002 SDValue Carry = DAG.getNode(ARMISD::SUBC, DL,
5003 DAG.getVTList(CarryVT, MVT::i32),
5004 BoolCarry, DAG.getConstant(1, DL, CarryVT));
5005 return Carry.getValue(1);
5006}
5007
5009 SelectionDAG &DAG) {
5010 SDLoc DL(Flags);
5011
5012 // Now convert the carry flag into a boolean carry. We do this
5013 // using ARMISD:ADDE 0, 0, Carry
5014 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
5015 DAG.getConstant(0, DL, MVT::i32),
5016 DAG.getConstant(0, DL, MVT::i32), Flags);
5017}
5018
5019SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
5020 SelectionDAG &DAG) const {
5021 // Let legalize expand this if it isn't a legal type yet.
5022 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
5023 return SDValue();
5024
5025 SDValue LHS = Op.getOperand(0);
5026 SDValue RHS = Op.getOperand(1);
5027 SDLoc dl(Op);
5028
5029 EVT VT = Op.getValueType();
5030 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5031 SDValue Value;
5032 SDValue Overflow;
5033 switch (Op.getOpcode()) {
5034 default:
5035 llvm_unreachable("Unknown overflow instruction!");
5036 case ISD::UADDO:
5037 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
5038 // Convert the carry flag into a boolean value.
5039 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5040 break;
5041 case ISD::USUBO: {
5042 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
5043 // Convert the carry flag into a boolean value.
5044 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
5045 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
5046 // value. So compute 1 - C.
5047 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
5048 DAG.getConstant(1, dl, MVT::i32), Overflow);
5049 break;
5050 }
5051 }
5052
5053 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
5054}
5055
5057 const ARMSubtarget *Subtarget) {
5058 EVT VT = Op.getValueType();
5059 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() || Subtarget->isThumb1Only())
5060 return SDValue();
5061 if (!VT.isSimple())
5062 return SDValue();
5063
5064 unsigned NewOpcode;
5065 switch (VT.getSimpleVT().SimpleTy) {
5066 default:
5067 return SDValue();
5068 case MVT::i8:
5069 switch (Op->getOpcode()) {
5070 case ISD::UADDSAT:
5071 NewOpcode = ARMISD::UQADD8b;
5072 break;
5073 case ISD::SADDSAT:
5074 NewOpcode = ARMISD::QADD8b;
5075 break;
5076 case ISD::USUBSAT:
5077 NewOpcode = ARMISD::UQSUB8b;
5078 break;
5079 case ISD::SSUBSAT:
5080 NewOpcode = ARMISD::QSUB8b;
5081 break;
5082 }
5083 break;
5084 case MVT::i16:
5085 switch (Op->getOpcode()) {
5086 case ISD::UADDSAT:
5087 NewOpcode = ARMISD::UQADD16b;
5088 break;
5089 case ISD::SADDSAT:
5090 NewOpcode = ARMISD::QADD16b;
5091 break;
5092 case ISD::USUBSAT:
5093 NewOpcode = ARMISD::UQSUB16b;
5094 break;
5095 case ISD::SSUBSAT:
5096 NewOpcode = ARMISD::QSUB16b;
5097 break;
5098 }
5099 break;
5100 }
5101
5102 SDLoc dl(Op);
5103 SDValue Add =
5104 DAG.getNode(NewOpcode, dl, MVT::i32,
5105 DAG.getSExtOrTrunc(Op->getOperand(0), dl, MVT::i32),
5106 DAG.getSExtOrTrunc(Op->getOperand(1), dl, MVT::i32));
5107 return DAG.getNode(ISD::TRUNCATE, dl, VT, Add);
5108}
5109
5110SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
5111 SDValue Cond = Op.getOperand(0);
5112 SDValue SelectTrue = Op.getOperand(1);
5113 SDValue SelectFalse = Op.getOperand(2);
5114 SDLoc dl(Op);
5115 unsigned Opc = Cond.getOpcode();
5116
5117 if (Cond.getResNo() == 1 &&
5118 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5119 Opc == ISD::USUBO)) {
5120 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5121 return SDValue();
5122
5123 SDValue Value, OverflowCmp;
5124 SDValue ARMcc;
5125 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5126 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5127 EVT VT = Op.getValueType();
5128
5129 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
5130 OverflowCmp, DAG);
5131 }
5132
5133 // Convert:
5134 //
5135 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
5136 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
5137 //
5138 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
5139 const ConstantSDNode *CMOVTrue =
5140 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
5141 const ConstantSDNode *CMOVFalse =
5142 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
5143
5144 if (CMOVTrue && CMOVFalse) {
5145 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
5146 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
5147
5148 SDValue True;
5149 SDValue False;
5150 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
5151 True = SelectTrue;
5152 False = SelectFalse;
5153 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
5154 True = SelectFalse;
5155 False = SelectTrue;
5156 }
5157
5158 if (True.getNode() && False.getNode()) {
5159 EVT VT = Op.getValueType();
5160 SDValue ARMcc = Cond.getOperand(2);
5161 SDValue CCR = Cond.getOperand(3);
5162 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
5163 assert(True.getValueType() == VT);
5164 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
5165 }
5166 }
5167 }
5168
5169 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
5170 // undefined bits before doing a full-word comparison with zero.
5171 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
5172 DAG.getConstant(1, dl, Cond.getValueType()));
5173
5174 return DAG.getSelectCC(dl, Cond,
5175 DAG.getConstant(0, dl, Cond.getValueType()),
5176 SelectTrue, SelectFalse, ISD::SETNE);
5177}
5178
5180 bool &swpCmpOps, bool &swpVselOps) {
5181 // Start by selecting the GE condition code for opcodes that return true for
5182 // 'equality'
5183 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
5184 CC == ISD::SETULE || CC == ISD::SETGE || CC == ISD::SETLE)
5185 CondCode = ARMCC::GE;
5186
5187 // and GT for opcodes that return false for 'equality'.
5188 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
5189 CC == ISD::SETULT || CC == ISD::SETGT || CC == ISD::SETLT)
5190 CondCode = ARMCC::GT;
5191
5192 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
5193 // to swap the compare operands.
5194 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
5195 CC == ISD::SETULT || CC == ISD::SETLE || CC == ISD::SETLT)
5196 swpCmpOps = true;
5197
5198 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
5199 // If we have an unordered opcode, we need to swap the operands to the VSEL
5200 // instruction (effectively negating the condition).
5201 //
5202 // This also has the effect of swapping which one of 'less' or 'greater'
5203 // returns true, so we also swap the compare operands. It also switches
5204 // whether we return true for 'equality', so we compensate by picking the
5205 // opposite condition code to our original choice.
5206 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
5207 CC == ISD::SETUGT) {
5208 swpCmpOps = !swpCmpOps;
5209 swpVselOps = !swpVselOps;
5210 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
5211 }
5212
5213 // 'ordered' is 'anything but unordered', so use the VS condition code and
5214 // swap the VSEL operands.
5215 if (CC == ISD::SETO) {
5216 CondCode = ARMCC::VS;
5217 swpVselOps = true;
5218 }
5219
5220 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
5221 // code and swap the VSEL operands. Also do this if we don't care about the
5222 // unordered case.
5223 if (CC == ISD::SETUNE || CC == ISD::SETNE) {
5224 CondCode = ARMCC::EQ;
5225 swpVselOps = true;
5226 }
5227}
5228
5229SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
5230 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
5231 SDValue Cmp, SelectionDAG &DAG) const {
5232 if (!Subtarget->hasFP64() && VT == MVT::f64) {
5234 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
5236 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
5237
5238 SDValue TrueLow = TrueVal.getValue(0);
5239 SDValue TrueHigh = TrueVal.getValue(1);
5240 SDValue FalseLow = FalseVal.getValue(0);
5241 SDValue FalseHigh = FalseVal.getValue(1);
5242
5243 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
5244 ARMcc, CCR, Cmp);
5245 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
5246 ARMcc, CCR, duplicateCmp(Cmp, DAG));
5247
5248 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
5249 } else {
5250 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
5251 Cmp);
5252 }
5253}
5254
5256 return CC == ISD::SETGT || CC == ISD::SETGE;
5257}
5258
5260 return CC == ISD::SETLT || CC == ISD::SETLE;
5261}
5262
5263// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
5264// All of these conditions (and their <= and >= counterparts) will do:
5265// x < k ? k : x
5266// x > k ? x : k
5267// k < x ? x : k
5268// k > x ? k : x
5269static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
5270 const SDValue TrueVal, const SDValue FalseVal,
5271 const ISD::CondCode CC, const SDValue K) {
5272 return (isGTorGE(CC) &&
5273 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
5274 (isLTorLE(CC) &&
5275 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
5276}
5277
5278// Check if two chained conditionals could be converted into SSAT or USAT.
5279//
5280// SSAT can replace a set of two conditional selectors that bound a number to an
5281// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
5282//
5283// x < -k ? -k : (x > k ? k : x)
5284// x < -k ? -k : (x < k ? x : k)
5285// x > -k ? (x > k ? k : x) : -k
5286// x < k ? (x < -k ? -k : x) : k
5287// etc.
5288//
5289// LLVM canonicalizes these to either a min(max()) or a max(min())
5290// pattern. This function tries to match one of these and will return a SSAT
5291// node if successful.
5292//
5293// USAT works similarily to SSAT but bounds on the interval [0, k] where k + 1
5294// is a power of 2.
5296 EVT VT = Op.getValueType();
5297 SDValue V1 = Op.getOperand(0);
5298 SDValue K1 = Op.getOperand(1);
5299 SDValue TrueVal1 = Op.getOperand(2);
5300 SDValue FalseVal1 = Op.getOperand(3);
5301 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5302
5303 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
5304 if (Op2.getOpcode() != ISD::SELECT_CC)
5305 return SDValue();
5306
5307 SDValue V2 = Op2.getOperand(0);
5308 SDValue K2 = Op2.getOperand(1);
5309 SDValue TrueVal2 = Op2.getOperand(2);
5310 SDValue FalseVal2 = Op2.getOperand(3);
5311 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
5312
5313 SDValue V1Tmp = V1;
5314 SDValue V2Tmp = V2;
5315
5316 // Check that the registers and the constants match a max(min()) or min(max())
5317 // pattern
5318 if (V1Tmp != TrueVal1 || V2Tmp != TrueVal2 || K1 != FalseVal1 ||
5319 K2 != FalseVal2 ||
5320 !((isGTorGE(CC1) && isLTorLE(CC2)) || (isLTorLE(CC1) && isGTorGE(CC2))))
5321 return SDValue();
5322
5323 // Check that the constant in the lower-bound check is
5324 // the opposite of the constant in the upper-bound check
5325 // in 1's complement.
5326 if (!isa<ConstantSDNode>(K1) || !isa<ConstantSDNode>(K2))
5327 return SDValue();
5328
5329 int64_t Val1 = cast<ConstantSDNode>(K1)->getSExtValue();
5330 int64_t Val2 = cast<ConstantSDNode>(K2)->getSExtValue();
5331 int64_t PosVal = std::max(Val1, Val2);
5332 int64_t NegVal = std::min(Val1, Val2);
5333
5334 if (!((Val1 > Val2 && isLTorLE(CC1)) || (Val1 < Val2 && isLTorLE(CC2))) ||
5335 !isPowerOf2_64(PosVal + 1))
5336 return SDValue();
5337
5338 // Handle the difference between USAT (unsigned) and SSAT (signed)
5339 // saturation
5340 // At this point, PosVal is guaranteed to be positive
5341 uint64_t K = PosVal;
5342 SDLoc dl(Op);
5343 if (Val1 == ~Val2)
5344 return DAG.getNode(ARMISD::SSAT, dl, VT, V2Tmp,
5345 DAG.getConstant(llvm::countr_one(K), dl, VT));
5346 if (NegVal == 0)
5347 return DAG.getNode(ARMISD::USAT, dl, VT, V2Tmp,
5348 DAG.getConstant(llvm::countr_one(K), dl, VT));
5349
5350 return SDValue();
5351}
5352
5353// Check if a condition of the type x < k ? k : x can be converted into a
5354// bit operation instead of conditional moves.
5355// Currently this is allowed given:
5356// - The conditions and values match up
5357// - k is 0 or -1 (all ones)
5358// This function will not check the last condition, thats up to the caller
5359// It returns true if the transformation can be made, and in such case
5360// returns x in V, and k in SatK.
5362 SDValue &SatK)
5363{
5364 SDValue LHS = Op.getOperand(0);
5365 SDValue RHS = Op.getOperand(1);
5366 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5367 SDValue TrueVal = Op.getOperand(2);
5368 SDValue FalseVal = Op.getOperand(3);
5369
5370 SDValue *K = isa<ConstantSDNode>(LHS) ? &LHS : isa<ConstantSDNode>(RHS)
5371 ? &RHS
5372 : nullptr;
5373
5374 // No constant operation in comparison, early out
5375 if (!K)
5376 return false;
5377
5378 SDValue KTmp = isa<ConstantSDNode>(TrueVal) ? TrueVal : FalseVal;
5379 V = (KTmp == TrueVal) ? FalseVal : TrueVal;
5380 SDValue VTmp = (K && *K == LHS) ? RHS : LHS;
5381
5382 // If the constant on left and right side, or variable on left and right,
5383 // does not match, early out
5384 if (*K != KTmp || V != VTmp)
5385 return false;
5386
5387 if (isLowerSaturate(LHS, RHS, TrueVal, FalseVal, CC, *K)) {
5388 SatK = *K;
5389 return true;
5390 }
5391
5392 return false;
5393}
5394
5395bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const {
5396 if (VT == MVT::f32)
5397 return !Subtarget->hasVFP2Base();
5398 if (VT == MVT::f64)
5399 return !Subtarget->hasFP64();
5400 if (VT == MVT::f16)
5401 return !Subtarget->hasFullFP16();
5402 return false;
5403}
5404
5405SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
5406 EVT VT = Op.getValueType();
5407 SDLoc dl(Op);
5408
5409 // Try to convert two saturating conditional selects into a single SSAT
5410 if ((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2())
5411 if (SDValue SatValue = LowerSaturatingConditional(Op, DAG))
5412 return SatValue;
5413
5414 // Try to convert expressions of the form x < k ? k : x (and similar forms)
5415 // into more efficient bit operations, which is possible when k is 0 or -1
5416 // On ARM and Thumb-2 which have flexible operand 2 this will result in
5417 // single instructions. On Thumb the shift and the bit operation will be two
5418 // instructions.
5419 // Only allow this transformation on full-width (32-bit) operations
5420 SDValue LowerSatConstant;
5421 SDValue SatValue;
5422 if (VT == MVT::i32 &&
5423 isLowerSaturatingConditional(Op, SatValue, LowerSatConstant)) {
5424 SDValue ShiftV = DAG.getNode(ISD::SRA, dl, VT, SatValue,
5425 DAG.getConstant(31, dl, VT));
5426 if (isNullConstant(LowerSatConstant)) {
5427 SDValue NotShiftV = DAG.getNode(ISD::XOR, dl, VT, ShiftV,
5428 DAG.getAllOnesConstant(dl, VT));
5429 return DAG.getNode(ISD::AND, dl, VT, SatValue, NotShiftV);
5430 } else if (isAllOnesConstant(LowerSatConstant))
5431 return DAG.getNode(ISD::OR, dl, VT, SatValue, ShiftV);
5432 }
5433
5434 SDValue LHS = Op.getOperand(0);
5435 SDValue RHS = Op.getOperand(1);
5436 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5437 SDValue TrueVal = Op.getOperand(2);
5438 SDValue FalseVal = Op.getOperand(3);
5439 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FalseVal);
5440 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TrueVal);
5441
5442 if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
5443 LHS.getValueType() == MVT::i32 && RHS.getValueType() == MVT::i32) {
5444 unsigned TVal = CTVal->getZExtValue();
5445 unsigned FVal = CFVal->getZExtValue();
5446 unsigned Opcode = 0;
5447
5448 if (TVal == ~FVal) {
5449 Opcode = ARMISD::CSINV;
5450 } else if (TVal == ~FVal + 1) {
5451 Opcode = ARMISD::CSNEG;
5452 } else if (TVal + 1 == FVal) {
5453 Opcode = ARMISD::CSINC;
5454 } else if (TVal == FVal + 1) {
5455 Opcode = ARMISD::CSINC;
5456 std::swap(TrueVal, FalseVal);
5457 std::swap(TVal, FVal);
5458 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5459 }
5460
5461 if (Opcode) {
5462 // If one of the constants is cheaper than another, materialise the
5463 // cheaper one and let the csel generate the other.
5464 if (Opcode != ARMISD::CSINC &&
5465 HasLowerConstantMaterializationCost(FVal, TVal, Subtarget)) {
5466 std::swap(TrueVal, FalseVal);
5467 std::swap(TVal, FVal);
5468 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5469 }
5470
5471 // Attempt to use ZR checking TVal is 0, possibly inverting the condition
5472 // to get there. CSINC not is invertable like the other two (~(~a) == a,
5473 // -(-a) == a, but (a+1)+1 != a).
5474 if (FVal == 0 && Opcode != ARMISD::CSINC) {
5475 std::swap(TrueVal, FalseVal);
5476 std::swap(TVal, FVal);
5477 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5478 }
5479
5480 // Drops F's value because we can get it by inverting/negating TVal.
5481 FalseVal = TrueVal;
5482
5483 SDValue ARMcc;
5484 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5485 EVT VT = TrueVal.getValueType();
5486 return DAG.getNode(Opcode, dl, VT, TrueVal, FalseVal, ARMcc, Cmp);
5487 }
5488 }
5489
5490 if (isUnsupportedFloatingType(LHS.getValueType())) {
5492 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5493
5494 // If softenSetCCOperands only returned one value, we should compare it to
5495 // zero.
5496 if (!RHS.getNode()) {
5497 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5498 CC = ISD::SETNE;
5499 }
5500 }
5501
5502 if (LHS.getValueType() == MVT::i32) {
5503 // Try to generate VSEL on ARMv8.
5504 // The VSEL instruction can't use all the usual ARM condition
5505 // codes: it only has two bits to select the condition code, so it's
5506 // constrained to use only GE, GT, VS and EQ.
5507 //
5508 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
5509 // swap the operands of the previous compare instruction (effectively
5510 // inverting the compare condition, swapping 'less' and 'greater') and
5511 // sometimes need to swap the operands to the VSEL (which inverts the
5512 // condition in the sense of firing whenever the previous condition didn't)
5513 if (Subtarget->hasFPARMv8Base() && (TrueVal.getValueType() == MVT::f16 ||
5514 TrueVal.getValueType() == MVT::f32 ||
5515 TrueVal.getValueType() == MVT::f64)) {
5517 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
5518 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
5519 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5520 std::swap(TrueVal, FalseVal);
5521 }
5522 }
5523
5524 SDValue ARMcc;
5525 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5526 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5527 // Choose GE over PL, which vsel does now support
5528 if (ARMcc->getAsZExtVal() == ARMCC::PL)
5529 ARMcc = DAG.getConstant(ARMCC::GE, dl, MVT::i32);
5530 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5531 }
5532
5533 ARMCC::CondCodes CondCode, CondCode2;
5534 FPCCToARMCC(CC, CondCode, CondCode2);
5535
5536 // Normalize the fp compare. If RHS is zero we prefer to keep it there so we
5537 // match CMPFPw0 instead of CMPFP, though we don't do this for f16 because we
5538 // must use VSEL (limited condition codes), due to not having conditional f16
5539 // moves.
5540 if (Subtarget->hasFPARMv8Base() &&
5541 !(isFloatingPointZero(RHS) && TrueVal.getValueType() != MVT::f16) &&
5542 (TrueVal.getValueType() == MVT::f16 ||
5543 TrueVal.getValueType() == MVT::f32 ||
5544 TrueVal.getValueType() == MVT::f64)) {
5545 bool swpCmpOps = false;
5546 bool swpVselOps = false;
5547 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
5548
5549 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
5550 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
5551 if (swpCmpOps)
5552 std::swap(LHS, RHS);
5553 if (swpVselOps)
5554 std::swap(TrueVal, FalseVal);
5555 }
5556 }
5557
5558 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5559 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5560 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5561 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
5562 if (CondCode2 != ARMCC::AL) {
5563 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
5564 // FIXME: Needs another CMP because flag can have but one use.
5565 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
5566 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
5567 }
5568 return Result;
5569}
5570
5571/// canChangeToInt - Given the fp compare operand, return true if it is suitable
5572/// to morph to an integer compare sequence.
5573static bool canChangeToInt(SDValue Op, bool &SeenZero,
5574 const ARMSubtarget *Subtarget) {
5575 SDNode *N = Op.getNode();
5576 if (!N->hasOneUse())
5577 // Otherwise it requires moving the value from fp to integer registers.
5578 return false;
5579 if (!N->getNumValues())
5580 return false;
5581 EVT VT = Op.getValueType();
5582 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
5583 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
5584 // vmrs are very slow, e.g. cortex-a8.
5585 return false;
5586
5587 if (isFloatingPointZero(Op)) {
5588 SeenZero = true;
5589 return true;
5590 }
5591 return ISD::isNormalLoad(N);
5592}
5593
5596 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
5597
5598 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
5599 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
5600 Ld->getPointerInfo(), Ld->getAlign(),
5601 Ld->getMemOperand()->getFlags());
5602
5603 llvm_unreachable("Unknown VFP cmp argument!");
5604}
5605
5607 SDValue &RetVal1, SDValue &RetVal2) {
5608 SDLoc dl(Op);
5609
5610 if (isFloatingPointZero(Op)) {
5611 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
5612 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
5613 return;
5614 }
5615
5616 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
5617 SDValue Ptr = Ld->getBasePtr();
5618 RetVal1 =
5619 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
5620 Ld->getAlign(), Ld->getMemOperand()->getFlags());
5621
5622 EVT PtrType = Ptr.getValueType();
5623 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
5624 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
5625 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
5626 Ld->getPointerInfo().getWithOffset(4),
5627 commonAlignment(Ld->getAlign(), 4),
5628 Ld->getMemOperand()->getFlags());
5629 return;
5630 }
5631
5632 llvm_unreachable("Unknown VFP cmp argument!");
5633}
5634
5635/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
5636/// f32 and even f64 comparisons to integer ones.
5637SDValue
5638ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
5639 SDValue Chain = Op.getOperand(0);
5640 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5641 SDValue LHS = Op.getOperand(2);
5642 SDValue RHS = Op.getOperand(3);
5643 SDValue Dest = Op.getOperand(4);
5644 SDLoc dl(Op);
5645
5646 bool LHSSeenZero = false;
5647 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
5648 bool RHSSeenZero = false;
5649 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
5650 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
5651 // If unsafe fp math optimization is enabled and there are no other uses of
5652 // the CMP operands, and the condition code is EQ or NE, we can optimize it
5653 // to an integer comparison.
5654 if (CC == ISD::SETOEQ)
5655 CC = ISD::SETEQ;
5656 else if (CC == ISD::SETUNE)
5657 CC = ISD::SETNE;
5658
5659 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
5660 SDValue ARMcc;
5661 if (LHS.getValueType() == MVT::f32) {
5662 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5663 bitcastf32Toi32(LHS, DAG), Mask);
5664 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
5665 bitcastf32Toi32(RHS, DAG), Mask);
5666 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5667 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5668 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5669 Chain, Dest, ARMcc, CCR, Cmp);
5670 }
5671
5672 SDValue LHS1, LHS2;
5673 SDValue RHS1, RHS2;
5674 expandf64Toi32(LHS, DAG, LHS1, LHS2);
5675 expandf64Toi32(RHS, DAG, RHS1, RHS2);
5676 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
5677 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
5679 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5680 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5681 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
5682 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
5683 }
5684
5685 return SDValue();
5686}
5687
5688SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
5689 SDValue Chain = Op.getOperand(0);
5690 SDValue Cond = Op.getOperand(1);
5691 SDValue Dest = Op.getOperand(2);
5692 SDLoc dl(Op);
5693
5694 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5695 // instruction.
5696 unsigned Opc = Cond.getOpcode();
5697 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5698 !Subtarget->isThumb1Only();
5699 if (Cond.getResNo() == 1 &&
5700 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5701 Opc == ISD::USUBO || OptimizeMul)) {
5702 // Only lower legal XALUO ops.
5703 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
5704 return SDValue();
5705
5706 // The actual operation with overflow check.
5707 SDValue Value, OverflowCmp;
5708 SDValue ARMcc;
5709 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
5710
5711 // Reverse the condition code.
5713 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5715 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5716 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5717
5718 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5719 OverflowCmp);
5720 }
5721
5722 return SDValue();
5723}
5724
5725SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5726 SDValue Chain = Op.getOperand(0);
5727 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5728 SDValue LHS = Op.getOperand(2);
5729 SDValue RHS = Op.getOperand(3);
5730 SDValue Dest = Op.getOperand(4);
5731 SDLoc dl(Op);
5732
5733 if (isUnsupportedFloatingType(LHS.getValueType())) {
5735 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS);
5736
5737 // If softenSetCCOperands only returned one value, we should compare it to
5738 // zero.
5739 if (!RHS.getNode()) {
5740 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5741 CC = ISD::SETNE;
5742 }
5743 }
5744
5745 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5746 // instruction.
5747 unsigned Opc = LHS.getOpcode();
5748 bool OptimizeMul = (Opc == ISD::SMULO || Opc == ISD::UMULO) &&
5749 !Subtarget->isThumb1Only();
5750 if (LHS.getResNo() == 1 && (isOneConstant(RHS) || isNullConstant(RHS)) &&
5751 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
5752 Opc == ISD::USUBO || OptimizeMul) &&
5753 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5754 // Only lower legal XALUO ops.
5755 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5756 return SDValue();
5757
5758 // The actual operation with overflow check.
5759 SDValue Value, OverflowCmp;
5760 SDValue ARMcc;
5761 std::tie(Value, OverflowCmp) = getARMXALUOOp(LHS.getValue(0), DAG, ARMcc);
5762
5763 if ((CC == ISD::SETNE) != isOneConstant(RHS)) {
5764 // Reverse the condition code.
5766 (ARMCC::CondCodes)cast<const ConstantSDNode>(ARMcc)->getZExtValue();
5768 ARMcc = DAG.getConstant(CondCode, SDLoc(ARMcc), MVT::i32);
5769 }
5770 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5771
5772 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, Chain, Dest, ARMcc, CCR,
5773 OverflowCmp);
5774 }
5775
5776 if (LHS.getValueType() == MVT::i32) {
5777 SDValue ARMcc;
5778 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
5779 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5780 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
5781 Chain, Dest, ARMcc, CCR, Cmp);
5782 }
5783
5784 if (getTargetMachine().Options.UnsafeFPMath &&
5785 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
5786 CC == ISD::SETNE || CC == ISD::SETUNE)) {
5787 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
5788 return Result;
5789 }
5790
5791 ARMCC::CondCodes CondCode, CondCode2;
5792 FPCCToARMCC(CC, CondCode, CondCode2);
5793
5794 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
5795 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
5796 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5797 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
5798 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
5799 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5800 if (CondCode2 != ARMCC::AL) {
5801 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
5802 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
5803 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
5804 }
5805 return Res;
5806}
5807
5808SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
5809 SDValue Chain = Op.getOperand(0);
5810 SDValue Table = Op.getOperand(1);
5811 SDValue Index = Op.getOperand(2);
5812 SDLoc dl(Op);
5813
5814 EVT PTy = getPointerTy(DAG.getDataLayout());
5815 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
5816 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
5817 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
5818 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
5819 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
5820 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
5821 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
5822 // which does another jump to the destination. This also makes it easier
5823 // to translate it to TBB / TBH later (Thumb2 only).
5824 // FIXME: This might not work if the function is extremely large.
5825 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
5826 Addr, Op.getOperand(2), JTI);
5827 }
5828 if (isPositionIndependent() || Subtarget->isROPI()) {
5829 Addr =
5830 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
5832 Chain = Addr.getValue(1);
5833 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
5834 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5835 } else {
5836 Addr =
5837 DAG.getLoad(PTy, dl, Chain, Addr,
5839 Chain = Addr.getValue(1);
5840 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
5841 }
5842}
5843
5845 EVT VT = Op.getValueType();
5846 SDLoc dl(Op);
5847
5848 if (Op.getValueType().getVectorElementType() == MVT::i32) {
5849 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
5850 return Op;
5851 return DAG.UnrollVectorOp(Op.getNode());
5852 }
5853
5854 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5855
5856 EVT NewTy;
5857 const EVT OpTy = Op.getOperand(0).getValueType();
5858 if (OpTy == MVT::v4f32)
5859 NewTy = MVT::v4i32;
5860 else if (OpTy == MVT::v4f16 && HasFullFP16)
5861 NewTy = MVT::v4i16;
5862 else if (OpTy == MVT::v8f16 && HasFullFP16)
5863 NewTy = MVT::v8i16;
5864 else
5865 llvm_unreachable("Invalid type for custom lowering!");
5866
5867 if (VT != MVT::v4i16 && VT != MVT::v8i16)
5868 return DAG.UnrollVectorOp(Op.getNode());
5869
5870 Op = DAG.getNode(Op.getOpcode(), dl, NewTy, Op.getOperand(0));
5871 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
5872}
5873
5874SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
5875 EVT VT = Op.getValueType();
5876 if (VT.isVector())
5877 return LowerVectorFP_TO_INT(Op, DAG);
5878
5879 bool IsStrict = Op->isStrictFPOpcode();
5880 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
5881
5882 if (isUnsupportedFloatingType(SrcVal.getValueType())) {
5883 RTLIB::Libcall LC;
5884 if (Op.getOpcode() == ISD::FP_TO_SINT ||
5885 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
5886 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(),
5887 Op.getValueType());
5888 else
5889 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(),
5890 Op.getValueType());
5891 SDLoc Loc(Op);
5892 MakeLibCallOptions CallOptions;
5893 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
5895 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
5896 CallOptions, Loc, Chain);
5897 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
5898 }
5899
5900 // FIXME: Remove this when we have strict fp instruction selection patterns
5901 if (IsStrict) {
5902 SDLoc Loc(Op);
5903 SDValue Result =
5906 Loc, Op.getValueType(), SrcVal);
5907 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
5908 }
5909
5910 return Op;
5911}
5912
5914 const ARMSubtarget *Subtarget) {
5915 EVT VT = Op.getValueType();
5916 EVT ToVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
5917 EVT FromVT = Op.getOperand(0).getValueType();
5918
5919 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f32)
5920 return Op;
5921 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f64 &&
5922 Subtarget->hasFP64())
5923 return Op;
5924 if (VT == MVT::i32 && ToVT == MVT::i32 && FromVT == MVT::f16 &&
5925 Subtarget->hasFullFP16())
5926 return Op;
5927 if (VT == MVT::v4i32 && ToVT == MVT::i32 && FromVT == MVT::v4f32 &&
5928 Subtarget->hasMVEFloatOps())
5929 return Op;
5930 if (VT == MVT::v8i16 && ToVT == MVT::i16 && FromVT == MVT::v8f16 &&
5931 Subtarget->hasMVEFloatOps())
5932 return Op;
5933
5934 if (FromVT != MVT::v4f32 && FromVT != MVT::v8f16)
5935 return SDValue();
5936
5937 SDLoc DL(Op);
5938 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT_SAT;
5939 unsigned BW = ToVT.getScalarSizeInBits() - IsSigned;
5940 SDValue CVT = DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
5941 DAG.getValueType(VT.getScalarType()));
5942 SDValue Max = DAG.getNode(IsSigned ? ISD::SMIN : ISD::UMIN, DL, VT, CVT,
5943 DAG.getConstant((1 << BW) - 1, DL, VT));
5944 if (IsSigned)
5945 Max = DAG.getNode(ISD::SMAX, DL, VT, Max,
5946 DAG.getConstant(-(1 << BW), DL, VT));
5947 return Max;
5948}
5949
5951 EVT VT = Op.getValueType();
5952 SDLoc dl(Op);
5953
5954 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
5955 if (VT.getVectorElementType() == MVT::f32)
5956 return Op;
5957 return DAG.UnrollVectorOp(Op.getNode());
5958 }
5959
5960 assert((Op.getOperand(0).getValueType() == MVT::v4i16 ||
5961 Op.getOperand(0).getValueType() == MVT::v8i16) &&
5962 "Invalid type for custom lowering!");
5963
5964 const bool HasFullFP16 = DAG.getSubtarget<ARMSubtarget>().hasFullFP16();
5965
5966 EVT DestVecType;
5967 if (VT == MVT::v4f32)
5968 DestVecType = MVT::v4i32;
5969 else if (VT == MVT::v4f16 && HasFullFP16)
5970 DestVecType = MVT::v4i16;
5971 else if (VT == MVT::v8f16 && HasFullFP16)
5972 DestVecType = MVT::v8i16;
5973 else
5974 return DAG.UnrollVectorOp(Op.getNode());
5975
5976 unsigned CastOpc;
5977 unsigned Opc;
5978 switch (Op.getOpcode()) {
5979 default: llvm_unreachable("Invalid opcode!");
5980 case ISD::SINT_TO_FP:
5981 CastOpc = ISD::SIGN_EXTEND;
5982 Opc = ISD::SINT_TO_FP;
5983 break;
5984 case ISD::UINT_TO_FP:
5985 CastOpc = ISD::ZERO_EXTEND;
5986 Opc = ISD::UINT_TO_FP;
5987 break;
5988 }
5989
5990 Op = DAG.getNode(CastOpc, dl, DestVecType, Op.getOperand(0));
5991 return DAG.getNode(Opc, dl, VT, Op);
5992}
5993
5994SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
5995 EVT VT = Op.getValueType();
5996 if (VT.isVector())
5997 return LowerVectorINT_TO_FP(Op, DAG);
5998 if (isUnsupportedFloatingType(VT)) {
5999 RTLIB::Libcall LC;
6000 if (Op.getOpcode() == ISD::SINT_TO_FP)
6001 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
6002 Op.getValueType());
6003 else
6004 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
6005 Op.getValueType());
6006 MakeLibCallOptions CallOptions;
6007 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
6008 CallOptions, SDLoc(Op)).first;
6009 }
6010
6011 return Op;
6012}
6013
6014SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
6015 // Implement fcopysign with a fabs and a conditional fneg.
6016 SDValue Tmp0 = Op.getOperand(0);
6017 SDValue Tmp1 = Op.getOperand(1);
6018 SDLoc dl(Op);
6019 EVT VT = Op.getValueType();
6020 EVT SrcVT = Tmp1.getValueType();
6021 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
6022 Tmp0.getOpcode() == ARMISD::VMOVDRR;
6023 bool UseNEON = !InGPR && Subtarget->hasNEON();
6024
6025 if (UseNEON) {
6026 // Use VBSL to copy the sign bit.
6027 unsigned EncodedVal = ARM_AM::createVMOVModImm(0x6, 0x80);
6028 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
6029 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
6030 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
6031 if (VT == MVT::f64)
6032 Mask = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6033 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
6034 DAG.getConstant(32, dl, MVT::i32));
6035 else /*if (VT == MVT::f32)*/
6036 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
6037 if (SrcVT == MVT::f32) {
6038 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
6039 if (VT == MVT::f64)
6040 Tmp1 = DAG.getNode(ARMISD::VSHLIMM, dl, OpVT,
6041 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
6042 DAG.getConstant(32, dl, MVT::i32));
6043 } else if (VT == MVT::f32)
6044 Tmp1 = DAG.getNode(ARMISD::VSHRuIMM, dl, MVT::v1i64,
6045 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
6046 DAG.getConstant(32, dl, MVT::i32));
6047 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
6048 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
6049
6051 dl, MVT::i32);
6052 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
6053 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
6054 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
6055
6056 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
6057 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
6058 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
6059 if (VT == MVT::f32) {
6060 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
6061 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
6062 DAG.getConstant(0, dl, MVT::i32));
6063 } else {
6064 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
6065 }
6066
6067 return Res;
6068 }
6069
6070 // Bitcast operand 1 to i32.
6071 if (SrcVT == MVT::f64)
6072 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6073 Tmp1).getValue(1);
6074 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
6075
6076 // Or in the signbit with integer operations.
6077 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
6078 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
6079 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
6080 if (VT == MVT::f32) {
6081 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
6082 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
6083 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
6084 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
6085 }
6086
6087 // f64: Or the high part with signbit and then combine two parts.
6088 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
6089 Tmp0);
6090 SDValue Lo = Tmp0.getValue(0);
6091 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
6092 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
6093 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
6094}
6095
6096SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
6098 MachineFrameInfo &MFI = MF.getFrameInfo();
6099 MFI.setReturnAddressIsTaken(true);
6100
6102 return SDValue();
6103
6104 EVT VT = Op.getValueType();
6105 SDLoc dl(Op);
6106 unsigned Depth = Op.getConstantOperandVal(0);
6107 if (Depth) {
6108 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
6109 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
6110 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
6111 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
6113 }
6114
6115 // Return LR, which contains the return address. Mark it an implicit live-in.
6116 Register Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
6117 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
6118}
6119
6120SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
6121 const ARMBaseRegisterInfo &ARI =
6122 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
6124 MachineFrameInfo &MFI = MF.getFrameInfo();
6125 MFI.setFrameAddressIsTaken(true);
6126
6127 EVT VT = Op.getValueType();
6128 SDLoc dl(Op); // FIXME probably not meaningful
6129 unsigned Depth = Op.getConstantOperandVal(0);
6130 Register FrameReg = ARI.getFrameRegister(MF);
6131 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
6132 while (Depth--)
6133 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
6135 return FrameAddr;
6136}
6137
6138// FIXME? Maybe this could be a TableGen attribute on some registers and
6139// this table could be generated automatically from RegInfo.
6140Register ARMTargetLowering::getRegisterByName(const char* RegName, LLT VT,
6141 const MachineFunction &MF) const {
6143 .Case("sp", ARM::SP)
6144 .Default(0);
6145 if (Reg)
6146 return Reg;
6147 report_fatal_error(Twine("Invalid register name \""
6148 + StringRef(RegName) + "\"."));
6149}
6150
6151// Result is 64 bit value so split into two 32 bit values and return as a
6152// pair of values.
6154 SelectionDAG &DAG) {
6155 SDLoc DL(N);
6156
6157 // This function is only supposed to be called for i64 type destination.
6158 assert(N->getValueType(0) == MVT::i64
6159 && "ExpandREAD_REGISTER called for non-i64 type result.");
6160
6162 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
6163 N->getOperand(0),
6164 N->getOperand(1));
6165
6166 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
6167 Read.getValue(1)));
6168 Results.push_back(Read.getOperand(0));
6169}
6170
6171/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
6172/// When \p DstVT, the destination type of \p BC, is on the vector
6173/// register bank and the source of bitcast, \p Op, operates on the same bank,
6174/// it might be possible to combine them, such that everything stays on the
6175/// vector register bank.
6176/// \p return The node that would replace \p BT, if the combine
6177/// is possible.
6179 SelectionDAG &DAG) {
6180 SDValue Op = BC->getOperand(0);
6181 EVT DstVT = BC->getValueType(0);
6182
6183 // The only vector instruction that can produce a scalar (remember,
6184 // since the bitcast was about to be turned into VMOVDRR, the source
6185 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
6186 // Moreover, we can do this combine only if there is one use.
6187 // Finally, if the destination type is not a vector, there is not
6188 // much point on forcing everything on the vector bank.
6189 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6190 !Op.hasOneUse())
6191 return SDValue();
6192
6193 // If the index is not constant, we will introduce an additional
6194 // multiply that will stick.
6195 // Give up in that case.
6196 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6197 if (!Index)
6198 return SDValue();
6199 unsigned DstNumElt = DstVT.getVectorNumElements();
6200
6201 // Compute the new index.
6202 const APInt &APIntIndex = Index->getAPIntValue();
6203 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
6204 NewIndex *= APIntIndex;
6205 // Check if the new constant index fits into i32.
6206 if (NewIndex.getBitWidth() > 32)
6207 return SDValue();
6208
6209 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
6210 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
6211 SDLoc dl(Op);
6212 SDValue ExtractSrc = Op.getOperand(0);
6213 EVT VecVT = EVT::getVectorVT(
6214 *DAG.getContext(), DstVT.getScalarType(),
6215 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
6216 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
6217 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
6218 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
6219}
6220
6221/// ExpandBITCAST - If the target supports VFP, this function is called to
6222/// expand a bit convert where either the source or destination type is i64 to
6223/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
6224/// operand type is illegal (e.g., v2f32 for a target that doesn't support
6225/// vectors), since the legalizer won't know what to do with that.
6226SDValue ARMTargetLowering::ExpandBITCAST(SDNode *N, SelectionDAG &DAG,
6227 const ARMSubtarget *Subtarget) const {
6228 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6229 SDLoc dl(N);
6230 SDValue Op = N->getOperand(0);
6231
6232 // This function is only supposed to be called for i16 and i64 types, either
6233 // as the source or destination of the bit convert.
6234 EVT SrcVT = Op.getValueType();
6235 EVT DstVT = N->getValueType(0);
6236
6237 if ((SrcVT == MVT::i16 || SrcVT == MVT::i32) &&
6238 (DstVT == MVT::f16 || DstVT == MVT::bf16))
6239 return MoveToHPR(SDLoc(N), DAG, MVT::i32, DstVT.getSimpleVT(),
6240 DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), MVT::i32, Op));
6241
6242 if ((DstVT == MVT::i16 || DstVT == MVT::i32) &&
6243 (SrcVT == MVT::f16 || SrcVT == MVT::bf16))
6244 return DAG.getNode(
6245 ISD::TRUNCATE, SDLoc(N), DstVT,
6246 MoveFromHPR(SDLoc(N), DAG, MVT::i32, SrcVT.getSimpleVT(), Op));
6247
6248 if (!(SrcVT == MVT::i64 || DstVT == MVT::i64))
6249 return SDValue();
6250
6251 // Turn i64->f64 into VMOVDRR.
6252 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
6253 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
6254 // if we can combine the bitcast with its source.
6256 return Val;
6257 SDValue Lo, Hi;
6258 std::tie(Lo, Hi) = DAG.SplitScalar(Op, dl, MVT::i32, MVT::i32);
6259 return DAG.getNode(ISD::BITCAST, dl, DstVT,
6260 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
6261 }
6262
6263 // Turn f64->i64 into VMOVRRD.
6264 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
6265 SDValue Cvt;
6266 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
6267 SrcVT.getVectorNumElements() > 1)
6268 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6269 DAG.getVTList(MVT::i32, MVT::i32),
6270 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
6271 else
6272 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
6273 DAG.getVTList(MVT::i32, MVT::i32), Op);
6274 // Merge the pieces into a single i64 value.
6275 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
6276 }
6277
6278 return SDValue();
6279}
6280
6281/// getZeroVector - Returns a vector of specified type with all zero elements.
6282/// Zero vectors are used to represent vector negation and in those cases
6283/// will be implemented with the NEON VNEG instruction. However, VNEG does
6284/// not support i64 elements, so sometimes the zero vectors will need to be
6285/// explicitly constructed. Regardless, use a canonical VMOV to create the
6286/// zero vector.
6287static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
6288 assert(VT.isVector() && "Expected a vector type");
6289 // The canonical modified immediate encoding of a zero vector is....0!
6290 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
6291 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
6292 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
6293 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6294}
6295
6296/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
6297/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6298SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
6299 SelectionDAG &DAG) const {
6300 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6301 EVT VT = Op.getValueType();
6302 unsigned VTBits = VT.getSizeInBits();
6303 SDLoc dl(Op);
6304 SDValue ShOpLo = Op.getOperand(0);
6305 SDValue ShOpHi = Op.getOperand(1);
6306 SDValue ShAmt = Op.getOperand(2);
6307 SDValue ARMcc;
6308 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6309 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
6310
6311 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
6312
6313 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6314 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6315 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
6316 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6317 DAG.getConstant(VTBits, dl, MVT::i32));
6318 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
6319 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6320 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
6321 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6322 ISD::SETGE, ARMcc, DAG, dl);
6323 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
6324 ARMcc, CCR, CmpLo);
6325
6326 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
6327 SDValue HiBigShift = Opc == ISD::SRA
6328 ? DAG.getNode(Opc, dl, VT, ShOpHi,
6329 DAG.getConstant(VTBits - 1, dl, VT))
6330 : DAG.getConstant(0, dl, VT);
6331 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6332 ISD::SETGE, ARMcc, DAG, dl);
6333 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6334 ARMcc, CCR, CmpHi);
6335
6336 SDValue Ops[2] = { Lo, Hi };
6337 return DAG.getMergeValues(Ops, dl);
6338}
6339
6340/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
6341/// i32 values and take a 2 x i32 value to shift plus a shift amount.
6342SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
6343 SelectionDAG &DAG) const {
6344 assert(Op.getNumOperands() == 3 && "Not a double-shift!");
6345 EVT VT = Op.getValueType();
6346 unsigned VTBits = VT.getSizeInBits();
6347 SDLoc dl(Op);
6348 SDValue ShOpLo = Op.getOperand(0);
6349 SDValue ShOpHi = Op.getOperand(1);
6350 SDValue ShAmt = Op.getOperand(2);
6351 SDValue ARMcc;
6352 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6353
6354 assert(Op.getOpcode() == ISD::SHL_PARTS);
6355 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6356 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
6357 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
6358 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
6359 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
6360
6361 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
6362 DAG.getConstant(VTBits, dl, MVT::i32));
6363 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
6364 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6365 ISD::SETGE, ARMcc, DAG, dl);
6366 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
6367 ARMcc, CCR, CmpHi);
6368
6369 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
6370 ISD::SETGE, ARMcc, DAG, dl);
6371 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
6372 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
6373 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
6374
6375 SDValue Ops[2] = { Lo, Hi };
6376 return DAG.getMergeValues(Ops, dl);
6377}
6378
6379SDValue ARMTargetLowering::LowerGET_ROUNDING(SDValue Op,
6380 SelectionDAG &DAG) const {
6381 // The rounding mode is in bits 23:22 of the FPSCR.
6382 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
6383 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
6384 // so that the shift + and get folded into a bitfield extract.
6385 SDLoc dl(Op);
6386 SDValue Chain = Op.getOperand(0);
6387 SDValue Ops[] = {Chain,
6388 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32)};
6389
6390 SDValue FPSCR =
6391 DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, {MVT::i32, MVT::Other}, Ops);
6392 Chain = FPSCR.getValue(1);
6393 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
6394 DAG.getConstant(1U << 22, dl, MVT::i32));
6395 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
6396 DAG.getConstant(22, dl, MVT::i32));
6397 SDValue And = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
6398 DAG.getConstant(3, dl, MVT::i32));
6399 return DAG.getMergeValues({And, Chain}, dl);
6400}
6401
6402SDValue ARMTargetLowering::LowerSET_ROUNDING(SDValue Op,
6403 SelectionDAG &DAG) const {
6404 SDLoc DL(Op);
6405 SDValue Chain = Op->getOperand(0);
6406 SDValue RMValue = Op->getOperand(1);
6407
6408 // The rounding mode is in bits 23:22 of the FPSCR.
6409 // The llvm.set.rounding argument value to ARM rounding mode value mapping
6410 // is 0->3, 1->0, 2->1, 3->2. The formula we use to implement this is
6411 // ((arg - 1) & 3) << 22).
6412 //
6413 // It is expected that the argument of llvm.set.rounding is within the
6414 // segment [0, 3], so NearestTiesToAway (4) is not handled here. It is
6415 // responsibility of the code generated llvm.set.rounding to ensure this
6416 // condition.
6417
6418 // Calculate new value of FPSCR[23:22].
6419 RMValue = DAG.getNode(ISD::SUB, DL, MVT::i32, RMValue,
6420 DAG.getConstant(1, DL, MVT::i32));
6421 RMValue = DAG.getNode(ISD::AND, DL, MVT::i32, RMValue,
6422 DAG.getConstant(0x3, DL, MVT::i32));
6423 RMValue = DAG.getNode(ISD::SHL, DL, MVT::i32, RMValue,
6424 DAG.getConstant(ARM::RoundingBitsPos, DL, MVT::i32));
6425
6426 // Get current value of FPSCR.
6427 SDValue Ops[] = {Chain,
6428 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6429 SDValue FPSCR =
6430 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6431 Chain = FPSCR.getValue(1);
6432 FPSCR = FPSCR.getValue(0);
6433
6434 // Put new rounding mode into FPSCR[23:22].
6435 const unsigned RMMask = ~(ARM::Rounding::rmMask << ARM::RoundingBitsPos);
6436 FPSCR = DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6437 DAG.getConstant(RMMask, DL, MVT::i32));
6438 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCR, RMValue);
6439 SDValue Ops2[] = {
6440 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6441 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6442}
6443
6444SDValue ARMTargetLowering::LowerSET_FPMODE(SDValue Op,
6445 SelectionDAG &DAG) const {
6446 SDLoc DL(Op);
6447 SDValue Chain = Op->getOperand(0);
6448 SDValue Mode = Op->getOperand(1);
6449
6450 // Generate nodes to build:
6451 // FPSCR = (FPSCR & FPStatusBits) | (Mode & ~FPStatusBits)
6452 SDValue Ops[] = {Chain,
6453 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6454 SDValue FPSCR =
6455 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6456 Chain = FPSCR.getValue(1);
6457 FPSCR = FPSCR.getValue(0);
6458
6459 SDValue FPSCRMasked =
6460 DAG.getNode(ISD::AND, DL, MVT::i32, FPSCR,
6461 DAG.getConstant(ARM::FPStatusBits, DL, MVT::i32));
6462 SDValue InputMasked =
6463 DAG.getNode(ISD::AND, DL, MVT::i32, Mode,
6464 DAG.getConstant(~ARM::FPStatusBits, DL, MVT::i32));
6465 FPSCR = DAG.getNode(ISD::OR, DL, MVT::i32, FPSCRMasked, InputMasked);
6466
6467 SDValue Ops2[] = {
6468 Chain, DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32), FPSCR};
6469 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6470}
6471
6472SDValue ARMTargetLowering::LowerRESET_FPMODE(SDValue Op,
6473 SelectionDAG &DAG) const {
6474 SDLoc DL(Op);
6475 SDValue Chain = Op->getOperand(0);
6476
6477 // To get the default FP mode all control bits are cleared:
6478 // FPSCR = FPSCR & (FPStatusBits | FPReservedBits)
6479 SDValue Ops[] = {Chain,
6480 DAG.getConstant(Intrinsic::arm_get_fpscr, DL, MVT::i32)};
6481 SDValue FPSCR =
6482 DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, {MVT::i32, MVT::Other}, Ops);
6483 Chain = FPSCR.getValue(1);
6484 FPSCR = FPSCR.getValue(0);
6485
6486 SDValue FPSCRMasked = DAG.getNode(
6487 ISD::AND, DL, MVT::i32, FPSCR,
6489 SDValue Ops2[] = {Chain,
6490 DAG.getConstant(Intrinsic::arm_set_fpscr, DL, MVT::i32),
6491 FPSCRMasked};
6492 return DAG.getNode(ISD::INTRINSIC_VOID, DL, MVT::Other, Ops2);
6493}
6494
6496 const ARMSubtarget *ST) {
6497 SDLoc dl(N);
6498 EVT VT = N->getValueType(0);
6499 if (VT.isVector() && ST->hasNEON()) {
6500
6501 // Compute the least significant set bit: LSB = X & -X
6502 SDValue X = N->getOperand(0);
6503 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
6504 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
6505
6506 EVT ElemTy = VT.getVectorElementType();
6507
6508 if (ElemTy == MVT::i8) {
6509 // Compute with: cttz(x) = ctpop(lsb - 1)
6510 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6511 DAG.getTargetConstant(1, dl, ElemTy));
6512 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6513 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6514 }
6515
6516 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
6517 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
6518 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
6519 unsigned NumBits = ElemTy.getSizeInBits();
6520 SDValue WidthMinus1 =
6521 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6522 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
6523 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
6524 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
6525 }
6526
6527 // Compute with: cttz(x) = ctpop(lsb - 1)
6528
6529 // Compute LSB - 1.
6530 SDValue Bits;
6531 if (ElemTy == MVT::i64) {
6532 // Load constant 0xffff'ffff'ffff'ffff to register.
6533 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6534 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
6535 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
6536 } else {
6537 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
6538 DAG.getTargetConstant(1, dl, ElemTy));
6539 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
6540 }
6541 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
6542 }
6543
6544 if (!ST->hasV6T2Ops())
6545 return SDValue();
6546
6547 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
6548 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
6549}
6550
6552 const ARMSubtarget *ST) {
6553 EVT VT = N->getValueType(0);
6554 SDLoc DL(N);
6555
6556 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
6557 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
6558 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
6559 "Unexpected type for custom ctpop lowering");
6560
6561 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6562 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
6563 SDValue Res = DAG.getBitcast(VT8Bit, N->getOperand(0));
6564 Res = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Res);
6565
6566 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
6567 unsigned EltSize = 8;
6568 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
6569 while (EltSize != VT.getScalarSizeInBits()) {
6571 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddlu, DL,
6572 TLI.getPointerTy(DAG.getDataLayout())));
6573 Ops.push_back(Res);
6574
6575 EltSize *= 2;
6576 NumElts /= 2;
6577 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
6578 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, WidenVT, Ops);
6579 }
6580
6581 return Res;
6582}
6583
6584/// Getvshiftimm - Check if this is a valid build_vector for the immediate
6585/// operand of a vector shift operation, where all the elements of the
6586/// build_vector must have the same constant integer value.
6587static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
6588 // Ignore bit_converts.
6589 while (Op.getOpcode() == ISD::BITCAST)
6590 Op = Op.getOperand(0);
6591 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
6592 APInt SplatBits, SplatUndef;
6593 unsigned SplatBitSize;
6594 bool HasAnyUndefs;
6595 if (!BVN ||
6596 !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6597 ElementBits) ||
6598 SplatBitSize > ElementBits)
6599 return false;
6600 Cnt = SplatBits.getSExtValue();
6601 return true;
6602}
6603
6604/// isVShiftLImm - Check if this is a valid build_vector for the immediate
6605/// operand of a vector shift left operation. That value must be in the range:
6606/// 0 <= Value < ElementBits for a left shift; or
6607/// 0 <= Value <= ElementBits for a long left shift.
6608static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
6609 assert(VT.isVector() && "vector shift count is not a vector type");
6610 int64_t ElementBits = VT.getScalarSizeInBits();
6611 if (!getVShiftImm(Op, ElementBits, Cnt))
6612 return false;
6613 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
6614}
6615
6616/// isVShiftRImm - Check if this is a valid build_vector for the immediate
6617/// operand of a vector shift right operation. For a shift opcode, the value
6618/// is positive, but for an intrinsic the value count must be negative. The
6619/// absolute value must be in the range:
6620/// 1 <= |Value| <= ElementBits for a right shift; or
6621/// 1 <= |Value| <= ElementBits/2 for a narrow right shift.
6622static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
6623 int64_t &Cnt) {
6624 assert(VT.isVector() && "vector shift count is not a vector type");
6625 int64_t ElementBits = VT.getScalarSizeInBits();
6626 if (!getVShiftImm(Op, ElementBits, Cnt))
6627 return false;
6628 if (!isIntrinsic)
6629 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
6630 if (Cnt >= -(isNarrow ? ElementBits / 2 : ElementBits) && Cnt <= -1) {
6631 Cnt = -Cnt;
6632 return true;
6633 }
6634 return false;
6635}
6636
6638 const ARMSubtarget *ST) {
6639 EVT VT = N->getValueType(0);
6640 SDLoc dl(N);
6641 int64_t Cnt;
6642
6643 if (!VT.isVector())
6644 return SDValue();
6645
6646 // We essentially have two forms here. Shift by an immediate and shift by a
6647 // vector register (there are also shift by a gpr, but that is just handled
6648 // with a tablegen pattern). We cannot easily match shift by an immediate in
6649 // tablegen so we do that here and generate a VSHLIMM/VSHRsIMM/VSHRuIMM.
6650 // For shifting by a vector, we don't have VSHR, only VSHL (which can be
6651 // signed or unsigned, and a negative shift indicates a shift right).
6652 if (N->getOpcode() == ISD::SHL) {
6653 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
6654 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
6655 DAG.getConstant(Cnt, dl, MVT::i32));
6656 return DAG.getNode(ARMISD::VSHLu, dl, VT, N->getOperand(0),
6657 N->getOperand(1));
6658 }
6659
6660 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
6661 "unexpected vector shift opcode");
6662
6663 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
6664 unsigned VShiftOpc =
6665 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
6666 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
6667 DAG.getConstant(Cnt, dl, MVT::i32));
6668 }
6669
6670 // Other right shifts we don't have operations for (we use a shift left by a
6671 // negative number).
6672 EVT ShiftVT = N->getOperand(1).getValueType();
6673 SDValue NegatedCount = DAG.getNode(
6674 ISD::SUB, dl, ShiftVT, getZeroVector(ShiftVT, DAG, dl), N->getOperand(1));
6675 unsigned VShiftOpc =
6676 (N->getOpcode() == ISD::SRA ? ARMISD::VSHLs : ARMISD::VSHLu);
6677 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0), NegatedCount);
6678}
6679
6681 const ARMSubtarget *ST) {
6682 EVT VT = N->getValueType(0);
6683 SDLoc dl(N);
6684
6685 // We can get here for a node like i32 = ISD::SHL i32, i64
6686 if (VT != MVT::i64)
6687 return SDValue();
6688
6689 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA ||
6690 N->getOpcode() == ISD::SHL) &&
6691 "Unknown shift to lower!");
6692
6693 unsigned ShOpc = N->getOpcode();
6694 if (ST->hasMVEIntegerOps()) {
6695 SDValue ShAmt = N->getOperand(1);
6696 unsigned ShPartsOpc = ARMISD::LSLL;
6697 ConstantSDNode *Con = dyn_cast<ConstantSDNode>(ShAmt);
6698
6699 // If the shift amount is greater than 32 or has a greater bitwidth than 64
6700 // then do the default optimisation
6701 if ((!Con && ShAmt->getValueType(0).getSizeInBits() > 64) ||
6702 (Con && (Con->getAPIntValue() == 0 || Con->getAPIntValue().uge(32))))
6703 return SDValue();
6704
6705 // Extract the lower 32 bits of the shift amount if it's not an i32
6706 if (ShAmt->getValueType(0) != MVT::i32)
6707 ShAmt = DAG.getZExtOrTrunc(ShAmt, dl, MVT::i32);
6708
6709 if (ShOpc == ISD::SRL) {
6710 if (!Con)
6711 // There is no t2LSRLr instruction so negate and perform an lsll if the
6712 // shift amount is in a register, emulating a right shift.
6713 ShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
6714 DAG.getConstant(0, dl, MVT::i32), ShAmt);
6715 else
6716 // Else generate an lsrl on the immediate shift amount
6717 ShPartsOpc = ARMISD::LSRL;
6718 } else if (ShOpc == ISD::SRA)
6719 ShPartsOpc = ARMISD::ASRL;
6720
6721 // Split Lower/Upper 32 bits of the destination/source
6722 SDValue Lo, Hi;
6723 std::tie(Lo, Hi) =
6724 DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6725 // Generate the shift operation as computed above
6726 Lo = DAG.getNode(ShPartsOpc, dl, DAG.getVTList(MVT::i32, MVT::i32), Lo, Hi,
6727 ShAmt);
6728 // The upper 32 bits come from the second return value of lsll
6729 Hi = SDValue(Lo.getNode(), 1);
6730 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6731 }
6732
6733 // We only lower SRA, SRL of 1 here, all others use generic lowering.
6734 if (!isOneConstant(N->getOperand(1)) || N->getOpcode() == ISD::SHL)
6735 return SDValue();
6736
6737 // If we are in thumb mode, we don't have RRX.
6738 if (ST->isThumb1Only())
6739 return SDValue();
6740
6741 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
6742 SDValue Lo, Hi;
6743 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(0), dl, MVT::i32, MVT::i32);
6744
6745 // First, build a SRA_GLUE/SRL_GLUE op, which shifts the top part by one and
6746 // captures the result into a carry flag.
6747 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_GLUE:ARMISD::SRA_GLUE;
6748 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
6749
6750 // The low part is an ARMISD::RRX operand, which shifts the carry in.
6751 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
6752
6753 // Merge the pieces into a single i64 value.
6754 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6755}
6756
6758 const ARMSubtarget *ST) {
6759 bool Invert = false;
6760 bool Swap = false;
6761 unsigned Opc = ARMCC::AL;
6762
6763 SDValue Op0 = Op.getOperand(0);
6764 SDValue Op1 = Op.getOperand(1);
6765 SDValue CC = Op.getOperand(2);
6766 EVT VT = Op.getValueType();
6767 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
6768 SDLoc dl(Op);
6769
6770 EVT CmpVT;
6771 if (ST->hasNEON())
6773 else {
6774 assert(ST->hasMVEIntegerOps() &&
6775 "No hardware support for integer vector comparison!");
6776
6777 if (Op.getValueType().getVectorElementType() != MVT::i1)
6778 return SDValue();
6779
6780 // Make sure we expand floating point setcc to scalar if we do not have
6781 // mve.fp, so that we can handle them from there.
6782 if (Op0.getValueType().isFloatingPoint() && !ST->hasMVEFloatOps())
6783 return SDValue();
6784
6785 CmpVT = VT;
6786 }
6787
6788 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
6789 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
6790 // Special-case integer 64-bit equality comparisons. They aren't legal,
6791 // but they can be lowered with a few vector instructions.
6792 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
6793 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
6794 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
6795 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
6796 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
6797 DAG.getCondCode(ISD::SETEQ));
6798 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
6799 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
6800 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
6801 if (SetCCOpcode == ISD::SETNE)
6802 Merged = DAG.getNOT(dl, Merged, CmpVT);
6803 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
6804 return Merged;
6805 }
6806
6807 if (CmpVT.getVectorElementType() == MVT::i64)
6808 // 64-bit comparisons are not legal in general.
6809 return SDValue();
6810
6811 if (Op1.getValueType().isFloatingPoint()) {
6812 switch (SetCCOpcode) {
6813 default: llvm_unreachable("Illegal FP comparison");
6814 case ISD::SETUNE:
6815 case ISD::SETNE:
6816 if (ST->hasMVEFloatOps()) {
6817 Opc = ARMCC::NE; break;
6818 } else {
6819 Invert = true; [[fallthrough]];
6820 }
6821 case ISD::SETOEQ:
6822 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6823 case ISD::SETOLT:
6824 case ISD::SETLT: Swap = true; [[fallthrough]];
6825 case ISD::SETOGT:
6826 case ISD::SETGT: Opc = ARMCC::GT; break;
6827 case ISD::SETOLE:
6828 case ISD::SETLE: Swap = true; [[fallthrough]];
6829 case ISD::SETOGE:
6830 case ISD::SETGE: Opc = ARMCC::GE; break;
6831 case ISD::SETUGE: Swap = true; [[fallthrough]];
6832 case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break;
6833 case ISD::SETUGT: Swap = true; [[fallthrough]];
6834 case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break;
6835 case ISD::SETUEQ: Invert = true; [[fallthrough]];
6836 case ISD::SETONE: {
6837 // Expand this to (OLT | OGT).
6838 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6839 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6840 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6841 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6842 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6843 if (Invert)
6844 Result = DAG.getNOT(dl, Result, VT);
6845 return Result;
6846 }
6847 case ISD::SETUO: Invert = true; [[fallthrough]];
6848 case ISD::SETO: {
6849 // Expand this to (OLT | OGE).
6850 SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0,
6851 DAG.getConstant(ARMCC::GT, dl, MVT::i32));
6852 SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6853 DAG.getConstant(ARMCC::GE, dl, MVT::i32));
6854 SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1);
6855 if (Invert)
6856 Result = DAG.getNOT(dl, Result, VT);
6857 return Result;
6858 }
6859 }
6860 } else {
6861 // Integer comparisons.
6862 switch (SetCCOpcode) {
6863 default: llvm_unreachable("Illegal integer comparison");
6864 case ISD::SETNE:
6865 if (ST->hasMVEIntegerOps()) {
6866 Opc = ARMCC::NE; break;
6867 } else {
6868 Invert = true; [[fallthrough]];
6869 }
6870 case ISD::SETEQ: Opc = ARMCC::EQ; break;
6871 case ISD::SETLT: Swap = true; [[fallthrough]];
6872 case ISD::SETGT: Opc = ARMCC::GT; break;
6873 case ISD::SETLE: Swap = true; [[fallthrough]];
6874 case ISD::SETGE: Opc = ARMCC::GE; break;
6875 case ISD::SETULT: Swap = true; [[fallthrough]];
6876 case ISD::SETUGT: Opc = ARMCC::HI; break;
6877 case ISD::SETULE: Swap = true; [[fallthrough]];
6878 case ISD::SETUGE: Opc = ARMCC::HS; break;
6879 }
6880
6881 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
6882 if (ST->hasNEON() && Opc == ARMCC::EQ) {
6883 SDValue AndOp;
6885 AndOp = Op0;
6886 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
6887 AndOp = Op1;
6888
6889 // Ignore bitconvert.
6890 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
6891 AndOp = AndOp.getOperand(0);
6892
6893 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
6894 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
6895 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
6896 SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1);
6897 if (!Invert)
6898 Result = DAG.getNOT(dl, Result, VT);
6899 return Result;
6900 }
6901 }
6902 }
6903
6904 if (Swap)
6905 std::swap(Op0, Op1);
6906
6907 // If one of the operands is a constant vector zero, attempt to fold the
6908 // comparison to a specialized compare-against-zero form.
6910 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::EQ ||
6911 Opc == ARMCC::NE)) {
6912 if (Opc == ARMCC::GE)
6913 Opc = ARMCC::LE;
6914 else if (Opc == ARMCC::GT)
6915 Opc = ARMCC::LT;
6916 std::swap(Op0, Op1);
6917 }
6918
6919 SDValue Result;
6921 (Opc == ARMCC::GE || Opc == ARMCC::GT || Opc == ARMCC::LE ||
6922 Opc == ARMCC::LT || Opc == ARMCC::NE || Opc == ARMCC::EQ))
6923 Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, Op0,
6924 DAG.getConstant(Opc, dl, MVT::i32));
6925 else
6926 Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1,
6927 DAG.getConstant(Opc, dl, MVT::i32));
6928
6929 Result = DAG.getSExtOrTrunc(Result, dl, VT);
6930
6931 if (Invert)
6932 Result = DAG.getNOT(dl, Result, VT);
6933
6934 return Result;
6935}
6936
6938 SDValue LHS = Op.getOperand(0);
6939 SDValue RHS = Op.getOperand(1);
6940 SDValue Carry = Op.getOperand(2);
6941 SDValue Cond = Op.getOperand(3);
6942 SDLoc DL(Op);
6943
6944 assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only.");
6945
6946 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
6947 // have to invert the carry first.
6948 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
6949 DAG.getConstant(1, DL, MVT::i32), Carry);
6950 // This converts the boolean value carry into the carry flag.
6951 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
6952
6953 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
6954 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
6955
6956 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
6957 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
6958 SDValue ARMcc = DAG.getConstant(
6959 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
6960 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
6961 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
6962 Cmp.getValue(1), SDValue());
6963 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
6964 CCR, Chain.getValue(1));
6965}
6966
6967/// isVMOVModifiedImm - Check if the specified splat value corresponds to a
6968/// valid vector constant for a NEON or MVE instruction with a "modified
6969/// immediate" operand (e.g., VMOV). If so, return the encoded value.
6970static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
6971 unsigned SplatBitSize, SelectionDAG &DAG,
6972 const SDLoc &dl, EVT &VT, EVT VectorVT,
6973 VMOVModImmType type) {
6974 unsigned OpCmode, Imm;
6975 bool is128Bits = VectorVT.is128BitVector();
6976
6977 // SplatBitSize is set to the smallest size that splats the vector, so a
6978 // zero vector will always have SplatBitSize == 8. However, NEON modified
6979 // immediate instructions others than VMOV do not support the 8-bit encoding
6980 // of a zero vector, and the default encoding of zero is supposed to be the
6981 // 32-bit version.
6982 if (SplatBits == 0)
6983 SplatBitSize = 32;
6984
6985 switch (SplatBitSize) {
6986 case 8:
6987 if (type != VMOVModImm)
6988 return SDValue();
6989 // Any 1-byte value is OK. Op=0, Cmode=1110.
6990 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
6991 OpCmode = 0xe;
6992 Imm = SplatBits;
6993 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
6994 break;
6995
6996 case 16:
6997 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
6998 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
6999 if ((SplatBits & ~0xff) == 0) {
7000 // Value = 0x00nn: Op=x, Cmode=100x.
7001 OpCmode = 0x8;
7002 Imm = SplatBits;
7003 break;
7004 }
7005 if ((SplatBits & ~0xff00) == 0) {
7006 // Value = 0xnn00: Op=x, Cmode=101x.
7007 OpCmode = 0xa;
7008 Imm = SplatBits >> 8;
7009 break;
7010 }
7011 return SDValue();
7012
7013 case 32:
7014 // NEON's 32-bit VMOV supports splat values where:
7015 // * only one byte is nonzero, or
7016 // * the least significant byte is 0xff and the second byte is nonzero, or
7017 // * the least significant 2 bytes are 0xff and the third is nonzero.
7018 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
7019 if ((SplatBits & ~0xff) == 0) {
7020 // Value = 0x000000nn: Op=x, Cmode=000x.
7021 OpCmode = 0;
7022 Imm = SplatBits;
7023 break;
7024 }
7025 if ((SplatBits & ~0xff00) == 0) {
7026 // Value = 0x0000nn00: Op=x, Cmode=001x.
7027 OpCmode = 0x2;
7028 Imm = SplatBits >> 8;
7029 break;
7030 }
7031 if ((SplatBits & ~0xff0000) == 0) {
7032 // Value = 0x00nn0000: Op=x, Cmode=010x.
7033 OpCmode = 0x4;
7034 Imm = SplatBits >> 16;
7035 break;
7036 }
7037 if ((SplatBits & ~0xff000000) == 0) {
7038 // Value = 0xnn000000: Op=x, Cmode=011x.
7039 OpCmode = 0x6;
7040 Imm = SplatBits >> 24;
7041 break;
7042 }
7043
7044 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
7045 if (type == OtherModImm) return SDValue();
7046
7047 if ((SplatBits & ~0xffff) == 0 &&
7048 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
7049 // Value = 0x0000nnff: Op=x, Cmode=1100.
7050 OpCmode = 0xc;
7051 Imm = SplatBits >> 8;
7052 break;
7053 }
7054
7055 // cmode == 0b1101 is not supported for MVE VMVN
7056 if (type == MVEVMVNModImm)
7057 return SDValue();
7058
7059 if ((SplatBits & ~0xffffff) == 0 &&
7060 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
7061 // Value = 0x00nnffff: Op=x, Cmode=1101.
7062 OpCmode = 0xd;
7063 Imm = SplatBits >> 16;
7064 break;
7065 }
7066
7067 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
7068 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
7069 // VMOV.I32. A (very) minor optimization would be to replicate the value
7070 // and fall through here to test for a valid 64-bit splat. But, then the
7071 // caller would also need to check and handle the change in size.
7072 return SDValue();
7073
7074 case 64: {
7075 if (type != VMOVModImm)
7076 return SDValue();
7077 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
7078 uint64_t BitMask = 0xff;
7079 unsigned ImmMask = 1;
7080 Imm = 0;
7081 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
7082 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
7083 Imm |= ImmMask;
7084 } else if ((SplatBits & BitMask) != 0) {
7085 return SDValue();
7086 }
7087 BitMask <<= 8;
7088 ImmMask <<= 1;
7089 }
7090
7091 if (DAG.getDataLayout().isBigEndian()) {
7092 // Reverse the order of elements within the vector.
7093 unsigned BytesPerElem = VectorVT.getScalarSizeInBits() / 8;
7094 unsigned Mask = (1 << BytesPerElem) - 1;
7095 unsigned NumElems = 8 / BytesPerElem;
7096 unsigned NewImm = 0;
7097 for (unsigned ElemNum = 0; ElemNum < NumElems; ++ElemNum) {
7098 unsigned Elem = ((Imm >> ElemNum * BytesPerElem) & Mask);
7099 NewImm |= Elem << (NumElems - ElemNum - 1) * BytesPerElem;
7100 }
7101 Imm = NewImm;
7102 }
7103
7104 // Op=1, Cmode=1110.
7105 OpCmode = 0x1e;
7106 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
7107 break;
7108 }
7109
7110 default:
7111 llvm_unreachable("unexpected size for isVMOVModifiedImm");
7112 }
7113
7114 unsigned EncodedVal = ARM_AM::createVMOVModImm(OpCmode, Imm);
7115 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
7116}
7117
7118SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
7119 const ARMSubtarget *ST) const {
7120 EVT VT = Op.getValueType();
7121 bool IsDouble = (VT == MVT::f64);
7122 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
7123 const APFloat &FPVal = CFP->getValueAPF();
7124
7125 // Prevent floating-point constants from using literal loads
7126 // when execute-only is enabled.
7127 if (ST->genExecuteOnly()) {
7128 // We shouldn't trigger this for v6m execute-only
7129 assert((!ST->isThumb1Only() || ST->hasV8MBaselineOps()) &&
7130 "Unexpected architecture");
7131
7132 // If we can represent the constant as an immediate, don't lower it
7133 if (isFPImmLegal(FPVal, VT))
7134 return Op;
7135 // Otherwise, construct as integer, and move to float register
7136 APInt INTVal = FPVal.bitcastToAPInt();
7137 SDLoc DL(CFP);
7138 switch (VT.getSimpleVT().SimpleTy) {
7139 default:
7140 llvm_unreachable("Unknown floating point type!");
7141 break;
7142 case MVT::f64: {
7143 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
7144 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
7145 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
7146 }
7147 case MVT::f32:
7148 return DAG.getNode(ARMISD::VMOVSR, DL, VT,
7149 DAG.getConstant(INTVal, DL, MVT::i32));
7150 }
7151 }
7152
7153 if (!ST->hasVFP3Base())
7154 return SDValue();
7155
7156 // Use the default (constant pool) lowering for double constants when we have
7157 // an SP-only FPU
7158 if (IsDouble && !Subtarget->hasFP64())
7159 return SDValue();
7160
7161 // Try splatting with a VMOV.f32...
7162 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
7163
7164 if (ImmVal != -1) {
7165 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
7166 // We have code in place to select a valid ConstantFP already, no need to
7167 // do any mangling.
7168 return Op;
7169 }
7170
7171 // It's a float and we are trying to use NEON operations where
7172 // possible. Lower it to a splat followed by an extract.
7173 SDLoc DL(Op);
7174 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
7175 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
7176 NewVal);
7177 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
7178 DAG.getConstant(0, DL, MVT::i32));
7179 }
7180
7181 // The rest of our options are NEON only, make sure that's allowed before
7182 // proceeding..
7183 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
7184 return SDValue();
7185
7186 EVT VMovVT;
7187 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
7188
7189 // It wouldn't really be worth bothering for doubles except for one very
7190 // important value, which does happen to match: 0.0. So make sure we don't do
7191 // anything stupid.
7192 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
7193 return SDValue();
7194
7195 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
7196 SDValue NewVal = isVMOVModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
7197 VMovVT, VT, VMOVModImm);
7198 if (NewVal != SDValue()) {
7199 SDLoc DL(Op);
7200 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
7201 NewVal);
7202 if (IsDouble)
7203 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7204
7205 // It's a float: cast and extract a vector element.
7206 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7207 VecConstant);
7208 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7209 DAG.getConstant(0, DL, MVT::i32));
7210 }
7211
7212 // Finally, try a VMVN.i32
7213 NewVal = isVMOVModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
7214 VT, VMVNModImm);
7215 if (NewVal != SDValue()) {
7216 SDLoc DL(Op);
7217 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
7218
7219 if (IsDouble)
7220 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
7221
7222 // It's a float: cast and extract a vector element.
7223 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
7224 VecConstant);
7225 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
7226 DAG.getConstant(0, DL, MVT::i32));
7227 }
7228
7229 return SDValue();
7230}
7231
7232// check if an VEXT instruction can handle the shuffle mask when the
7233// vector sources of the shuffle are the same.
7234static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
7235 unsigned NumElts = VT.getVectorNumElements();
7236
7237 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7238 if (M[0] < 0)
7239 return false;
7240
7241 Imm = M[0];
7242
7243 // If this is a VEXT shuffle, the immediate value is the index of the first
7244 // element. The other shuffle indices must be the successive elements after
7245 // the first one.
7246 unsigned ExpectedElt = Imm;
7247 for (unsigned i = 1; i < NumElts; ++i) {
7248 // Increment the expected index. If it wraps around, just follow it
7249 // back to index zero and keep going.
7250 ++ExpectedElt;
7251 if (ExpectedElt == NumElts)
7252 ExpectedElt = 0;
7253
7254 if (M[i] < 0) continue; // ignore UNDEF indices
7255 if (ExpectedElt != static_cast<unsigned>(M[i]))
7256 return false;
7257 }
7258
7259 return true;
7260}
7261
7262static bool isVEXTMask(ArrayRef<int> M, EVT VT,
7263 bool &ReverseVEXT, unsigned &Imm) {
7264 unsigned NumElts = VT.getVectorNumElements();
7265 ReverseVEXT = false;
7266
7267 // Assume that the first shuffle index is not UNDEF. Fail if it is.
7268 if (M[0] < 0)
7269 return false;
7270
7271 Imm = M[0];
7272
7273 // If this is a VEXT shuffle, the immediate value is the index of the first
7274 // element. The other shuffle indices must be the successive elements after
7275 // the first one.
7276 unsigned ExpectedElt = Imm;
7277 for (unsigned i = 1; i < NumElts; ++i) {
7278 // Increment the expected index. If it wraps around, it may still be
7279 // a VEXT but the source vectors must be swapped.
7280 ExpectedElt += 1;
7281 if (ExpectedElt == NumElts * 2) {
7282 ExpectedElt = 0;
7283 ReverseVEXT = true;
7284 }
7285
7286 if (M[i] < 0) continue; // ignore UNDEF indices
7287 if (ExpectedElt != static_cast<unsigned>(M[i]))
7288 return false;
7289 }
7290
7291 // Adjust the index value if the source operands will be swapped.
7292 if (ReverseVEXT)
7293 Imm -= NumElts;
7294
7295 return true;
7296}
7297
7298static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
7299 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
7300 // range, then 0 is placed into the resulting vector. So pretty much any mask
7301 // of 8 elements can work here.
7302 return VT == MVT::v8i8 && M.size() == 8;
7303}
7304
7305static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
7306 unsigned Index) {
7307 if (Mask.size() == Elements * 2)
7308 return Index / Elements;
7309 return Mask[Index] == 0 ? 0 : 1;
7310}
7311
7312// Checks whether the shuffle mask represents a vector transpose (VTRN) by
7313// checking that pairs of elements in the shuffle mask represent the same index
7314// in each vector, incrementing the expected index by 2 at each step.
7315// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
7316// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
7317// v2={e,f,g,h}
7318// WhichResult gives the offset for each element in the mask based on which
7319// of the two results it belongs to.
7320//
7321// The transpose can be represented either as:
7322// result1 = shufflevector v1, v2, result1_shuffle_mask
7323// result2 = shufflevector v1, v2, result2_shuffle_mask
7324// where v1/v2 and the shuffle masks have the same number of elements
7325// (here WhichResult (see below) indicates which result is being checked)
7326//
7327// or as:
7328// results = shufflevector v1, v2, shuffle_mask
7329// where both results are returned in one vector and the shuffle mask has twice
7330// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
7331// want to check the low half and high half of the shuffle mask as if it were
7332// the other case
7333static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7334 unsigned EltSz = VT.getScalarSizeInBits();
7335 if (EltSz == 64)
7336 return false;
7337
7338 unsigned NumElts = VT.getVectorNumElements();
7339 if (M.size() != NumElts && M.size() != NumElts*2)
7340 return false;
7341
7342 // If the mask is twice as long as the input vector then we need to check the
7343 // upper and lower parts of the mask with a matching value for WhichResult
7344 // FIXME: A mask with only even values will be rejected in case the first
7345 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
7346 // M[0] is used to determine WhichResult
7347 for (unsigned i = 0; i < M.size(); i += NumElts) {
7348 WhichResult = SelectPairHalf(NumElts, M, i);
7349 for (unsigned j = 0; j < NumElts; j += 2) {
7350 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7351 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
7352 return false;
7353 }
7354 }
7355
7356 if (M.size() == NumElts*2)
7357 WhichResult = 0;
7358
7359 return true;
7360}
7361
7362/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
7363/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7364/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
7365static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7366 unsigned EltSz = VT.getScalarSizeInBits();
7367 if (EltSz == 64)
7368 return false;
7369
7370 unsigned NumElts = VT.getVectorNumElements();
7371 if (M.size() != NumElts && M.size() != NumElts*2)
7372 return false;
7373
7374 for (unsigned i = 0; i < M.size(); i += NumElts) {
7375 WhichResult = SelectPairHalf(NumElts, M, i);
7376 for (unsigned j = 0; j < NumElts; j += 2) {
7377 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
7378 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
7379 return false;
7380 }
7381 }
7382
7383 if (M.size() == NumElts*2)
7384 WhichResult = 0;
7385
7386 return true;
7387}
7388
7389// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
7390// that the mask elements are either all even and in steps of size 2 or all odd
7391// and in steps of size 2.
7392// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
7393// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
7394// v2={e,f,g,h}
7395// Requires similar checks to that of isVTRNMask with
7396// respect the how results are returned.
7397static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7398 unsigned EltSz = VT.getScalarSizeInBits();
7399 if (EltSz == 64)
7400 return false;
7401
7402 unsigned NumElts = VT.getVectorNumElements();
7403 if (M.size() != NumElts && M.size() != NumElts*2)
7404 return false;
7405
7406 for (unsigned i = 0; i < M.size(); i += NumElts) {
7407 WhichResult = SelectPairHalf(NumElts, M, i);
7408 for (unsigned j = 0; j < NumElts; ++j) {
7409 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
7410 return false;
7411 }
7412 }
7413
7414 if (M.size() == NumElts*2)
7415 WhichResult = 0;
7416
7417 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7418 if (VT.is64BitVector() && EltSz == 32)
7419 return false;
7420
7421 return true;
7422}
7423
7424/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
7425/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7426/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
7427static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7428 unsigned EltSz = VT.getScalarSizeInBits();
7429 if (EltSz == 64)
7430 return false;
7431
7432 unsigned NumElts = VT.getVectorNumElements();
7433 if (M.size() != NumElts && M.size() != NumElts*2)
7434 return false;
7435
7436 unsigned Half = NumElts / 2;
7437 for (unsigned i = 0; i < M.size(); i += NumElts) {
7438 WhichResult = SelectPairHalf(NumElts, M, i);
7439 for (unsigned j = 0; j < NumElts; j += Half) {
7440 unsigned Idx = WhichResult;
7441 for (unsigned k = 0; k < Half; ++k) {
7442 int MIdx = M[i + j + k];
7443 if (MIdx >= 0 && (unsigned) MIdx != Idx)
7444 return false;
7445 Idx += 2;
7446 }
7447 }
7448 }
7449
7450 if (M.size() == NumElts*2)
7451 WhichResult = 0;
7452
7453 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7454 if (VT.is64BitVector() && EltSz == 32)
7455 return false;
7456
7457 return true;
7458}
7459
7460// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
7461// that pairs of elements of the shufflemask represent the same index in each
7462// vector incrementing sequentially through the vectors.
7463// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
7464// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
7465// v2={e,f,g,h}
7466// Requires similar checks to that of isVTRNMask with respect the how results
7467// are returned.
7468static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
7469 unsigned EltSz = VT.getScalarSizeInBits();
7470 if (EltSz == 64)
7471 return false;
7472
7473 unsigned NumElts = VT.getVectorNumElements();
7474 if (M.size() != NumElts && M.size() != NumElts*2)
7475 return false;
7476
7477 for (unsigned i = 0; i < M.size(); i += NumElts) {
7478 WhichResult = SelectPairHalf(NumElts, M, i);
7479 unsigned Idx = WhichResult * NumElts / 2;
7480 for (unsigned j = 0; j < NumElts; j += 2) {
7481 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7482 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
7483 return false;
7484 Idx += 1;
7485 }
7486 }
7487
7488 if (M.size() == NumElts*2)
7489 WhichResult = 0;
7490
7491 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7492 if (VT.is64BitVector() && EltSz == 32)
7493 return false;
7494
7495 return true;
7496}
7497
7498/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
7499/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
7500/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
7501static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
7502 unsigned EltSz = VT.getScalarSizeInBits();
7503 if (EltSz == 64)
7504 return false;
7505
7506 unsigned NumElts = VT.getVectorNumElements();
7507 if (M.size() != NumElts && M.size() != NumElts*2)
7508 return false;
7509
7510 for (unsigned i = 0; i < M.size(); i += NumElts) {
7511 WhichResult = SelectPairHalf(NumElts, M, i);
7512 unsigned Idx = WhichResult * NumElts / 2;
7513 for (unsigned j = 0; j < NumElts; j += 2) {
7514 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
7515 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
7516 return false;
7517 Idx += 1;
7518 }
7519 }
7520
7521 if (M.size() == NumElts*2)
7522 WhichResult = 0;
7523
7524 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
7525 if (VT.is64BitVector() && EltSz == 32)
7526 return false;
7527
7528 return true;
7529}
7530
7531/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
7532/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
7533static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
7534 unsigned &WhichResult,
7535 bool &isV_UNDEF) {
7536 isV_UNDEF = false;
7537 if (isVTRNMask(ShuffleMask, VT, WhichResult))
7538 return ARMISD::VTRN;
7539 if (isVUZPMask(ShuffleMask, VT, WhichResult))
7540 return ARMISD::VUZP;
7541 if (isVZIPMask(ShuffleMask, VT, WhichResult))
7542 return ARMISD::VZIP;
7543
7544 isV_UNDEF = true;
7545 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
7546 return ARMISD::VTRN;
7547 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7548 return ARMISD::VUZP;
7549 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
7550 return ARMISD::VZIP;
7551
7552 return 0;
7553}
7554
7555/// \return true if this is a reverse operation on an vector.
7556static bool isReverseMask(ArrayRef<int> M, EVT VT) {
7557 unsigned NumElts = VT.getVectorNumElements();
7558 // Make sure the mask has the right size.
7559 if (NumElts != M.size())
7560 return false;
7561
7562 // Look for <15, ..., 3, -1, 1, 0>.
7563 for (unsigned i = 0; i != NumElts; ++i)
7564 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
7565 return false;
7566
7567 return true;
7568}
7569
7570static bool isTruncMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7571 unsigned NumElts = VT.getVectorNumElements();
7572 // Make sure the mask has the right size.
7573 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7574 return false;
7575
7576 // Half-width truncation patterns (e.g. v4i32 -> v8i16):
7577 // !Top && SingleSource: <0, 2, 4, 6, 0, 2, 4, 6>
7578 // !Top && !SingleSource: <0, 2, 4, 6, 8, 10, 12, 14>
7579 // Top && SingleSource: <1, 3, 5, 7, 1, 3, 5, 7>
7580 // Top && !SingleSource: <1, 3, 5, 7, 9, 11, 13, 15>
7581 int Ofs = Top ? 1 : 0;
7582 int Upper = SingleSource ? 0 : NumElts;
7583 for (int i = 0, e = NumElts / 2; i != e; ++i) {
7584 if (M[i] >= 0 && M[i] != (i * 2) + Ofs)
7585 return false;
7586 if (M[i + e] >= 0 && M[i + e] != (i * 2) + Ofs + Upper)
7587 return false;
7588 }
7589 return true;
7590}
7591
7592static bool isVMOVNMask(ArrayRef<int> M, EVT VT, bool Top, bool SingleSource) {
7593 unsigned NumElts = VT.getVectorNumElements();
7594 // Make sure the mask has the right size.
7595 if (NumElts != M.size() || (VT != MVT::v8i16 && VT != MVT::v16i8))
7596 return false;
7597
7598 // If Top
7599 // Look for <0, N, 2, N+2, 4, N+4, ..>.
7600 // This inserts Input2 into Input1
7601 // else if not Top
7602 // Look for <0, N+1, 2, N+3, 4, N+5, ..>
7603 // This inserts Input1 into Input2
7604 unsigned Offset = Top ? 0 : 1;
7605 unsigned N = SingleSource ? 0 : NumElts;
7606 for (unsigned i = 0; i < NumElts; i += 2) {
7607 if (M[i] >= 0 && M[i] != (int)i)
7608 return false;
7609 if (M[i + 1] >= 0 && M[i + 1] != (int)(N + i + Offset))
7610 return false;
7611 }
7612
7613 return true;
7614}
7615
7616static bool isVMOVNTruncMask(ArrayRef<int> M, EVT ToVT, bool rev) {
7617 unsigned NumElts = ToVT.getVectorNumElements();
7618 if (NumElts != M.size())
7619 return false;
7620
7621 // Test if the Trunc can be convertable to a VMOVN with this shuffle. We are
7622 // looking for patterns of:
7623 // !rev: 0 N/2 1 N/2+1 2 N/2+2 ...
7624 // rev: N/2 0 N/2+1 1 N/2+2 2 ...
7625
7626 unsigned Off0 = rev ? NumElts / 2 : 0;
7627 unsigned Off1 = rev ? 0 : NumElts / 2;
7628 for (unsigned i = 0; i < NumElts; i += 2) {
7629 if (M[i] >= 0 && M[i] != (int)(Off0 + i / 2))
7630 return false;
7631 if (M[i + 1] >= 0 && M[i + 1] != (int)(Off1 + i / 2))
7632 return false;
7633 }
7634
7635 return true;
7636}
7637
7638// Reconstruct an MVE VCVT from a BuildVector of scalar fptrunc, all extracted
7639// from a pair of inputs. For example:
7640// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7641// FP_ROUND(EXTRACT_ELT(Y, 0),
7642// FP_ROUND(EXTRACT_ELT(X, 1),
7643// FP_ROUND(EXTRACT_ELT(Y, 1), ...)
7645 const ARMSubtarget *ST) {
7646 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7647 if (!ST->hasMVEFloatOps())
7648 return SDValue();
7649
7650 SDLoc dl(BV);
7651 EVT VT = BV.getValueType();
7652 if (VT != MVT::v8f16)
7653 return SDValue();
7654
7655 // We are looking for a buildvector of fptrunc elements, where all the
7656 // elements are interleavingly extracted from two sources. Check the first two
7657 // items are valid enough and extract some info from them (they are checked
7658 // properly in the loop below).
7659 if (BV.getOperand(0).getOpcode() != ISD::FP_ROUND ||
7662 return SDValue();
7663 if (BV.getOperand(1).getOpcode() != ISD::FP_ROUND ||
7666 return SDValue();
7667 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7668 SDValue Op1 = BV.getOperand(1).getOperand(0).getOperand(0);
7669 if (Op0.getValueType() != MVT::v4f32 || Op1.getValueType() != MVT::v4f32)
7670 return SDValue();
7671
7672 // Check all the values in the BuildVector line up with our expectations.
7673 for (unsigned i = 1; i < 4; i++) {
7674 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7675 return Trunc.getOpcode() == ISD::FP_ROUND &&
7677 Trunc.getOperand(0).getOperand(0) == Op &&
7678 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7679 };
7680 if (!Check(BV.getOperand(i * 2 + 0), Op0, i))
7681 return SDValue();
7682 if (!Check(BV.getOperand(i * 2 + 1), Op1, i))
7683 return SDValue();
7684 }
7685
7686 SDValue N1 = DAG.getNode(ARMISD::VCVTN, dl, VT, DAG.getUNDEF(VT), Op0,
7687 DAG.getConstant(0, dl, MVT::i32));
7688 return DAG.getNode(ARMISD::VCVTN, dl, VT, N1, Op1,
7689 DAG.getConstant(1, dl, MVT::i32));
7690}
7691
7692// Reconstruct an MVE VCVT from a BuildVector of scalar fpext, all extracted
7693// from a single input on alternating lanes. For example:
7694// BUILDVECTOR(FP_ROUND(EXTRACT_ELT(X, 0),
7695// FP_ROUND(EXTRACT_ELT(X, 2),
7696// FP_ROUND(EXTRACT_ELT(X, 4), ...)
7698 const ARMSubtarget *ST) {
7699 assert(BV.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
7700 if (!ST->hasMVEFloatOps())
7701 return SDValue();
7702
7703 SDLoc dl(BV);
7704 EVT VT = BV.getValueType();
7705 if (VT != MVT::v4f32)
7706 return SDValue();
7707
7708 // We are looking for a buildvector of fptext elements, where all the
7709 // elements are alternating lanes from a single source. For example <0,2,4,6>
7710 // or <1,3,5,7>. Check the first two items are valid enough and extract some
7711 // info from them (they are checked properly in the loop below).
7712 if (BV.getOperand(0).getOpcode() != ISD::FP_EXTEND ||
7714 return SDValue();
7715 SDValue Op0 = BV.getOperand(0).getOperand(0).getOperand(0);
7717 if (Op0.getValueType() != MVT::v8f16 || (Offset != 0 && Offset != 1))
7718 return SDValue();
7719
7720 // Check all the values in the BuildVector line up with our expectations.
7721 for (unsigned i = 1; i < 4; i++) {
7722 auto Check = [](SDValue Trunc, SDValue Op, unsigned Idx) {
7723 return Trunc.getOpcode() == ISD::FP_EXTEND &&
7725 Trunc.getOperand(0).getOperand(0) == Op &&
7726 Trunc.getOperand(0).getConstantOperandVal(1) == Idx;
7727 };
7728 if (!Check(BV.getOperand(i), Op0, 2 * i + Offset))
7729 return SDValue();
7730 }
7731
7732 return DAG.getNode(ARMISD::VCVTL, dl, VT, Op0,
7733 DAG.getConstant(Offset, dl, MVT::i32));
7734}
7735
7736// If N is an integer constant that can be moved into a register in one
7737// instruction, return an SDValue of such a constant (will become a MOV
7738// instruction). Otherwise return null.
7740 const ARMSubtarget *ST, const SDLoc &dl) {
7741 uint64_t Val;
7742 if (!isa<ConstantSDNode>(N))
7743 return SDValue();
7744 Val = N->getAsZExtVal();
7745
7746 if (ST->isThumb1Only()) {
7747 if (Val <= 255 || ~Val <= 255)
7748 return DAG.getConstant(Val, dl, MVT::i32);
7749 } else {
7750 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
7751 return DAG.getConstant(Val, dl, MVT::i32);
7752 }
7753 return SDValue();
7754}
7755
7757 const ARMSubtarget *ST) {
7758 SDLoc dl(Op);
7759 EVT VT = Op.getValueType();
7760
7761 assert(ST->hasMVEIntegerOps() && "LowerBUILD_VECTOR_i1 called without MVE!");
7762
7763 unsigned NumElts = VT.getVectorNumElements();
7764 unsigned BoolMask;
7765 unsigned BitsPerBool;
7766 if (NumElts == 2) {
7767 BitsPerBool = 8;
7768 BoolMask = 0xff;
7769 } else if (NumElts == 4) {
7770 BitsPerBool = 4;
7771 BoolMask = 0xf;
7772 } else if (NumElts == 8) {
7773 BitsPerBool = 2;
7774 BoolMask = 0x3;
7775 } else if (NumElts == 16) {
7776 BitsPerBool = 1;
7777 BoolMask = 0x1;
7778 } else
7779 return SDValue();
7780
7781 // If this is a single value copied into all lanes (a splat), we can just sign
7782 // extend that single value
7783 SDValue FirstOp = Op.getOperand(0);
7784 if (!isa<ConstantSDNode>(FirstOp) &&
7785 llvm::all_of(llvm::drop_begin(Op->ops()), [&FirstOp](const SDUse &U) {
7786 return U.get().isUndef() || U.get() == FirstOp;
7787 })) {
7788 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32, FirstOp,
7789 DAG.getValueType(MVT::i1));
7790 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), Ext);
7791 }
7792
7793 // First create base with bits set where known
7794 unsigned Bits32 = 0;
7795 for (unsigned i = 0; i < NumElts; ++i) {
7796 SDValue V = Op.getOperand(i);
7797 if (!isa<ConstantSDNode>(V) && !V.isUndef())
7798 continue;
7799 bool BitSet = V.isUndef() ? false : V->getAsZExtVal();
7800 if (BitSet)
7801 Bits32 |= BoolMask << (i * BitsPerBool);
7802 }
7803
7804 // Add in unknown nodes
7806 DAG.getConstant(Bits32, dl, MVT::i32));
7807 for (unsigned i = 0; i < NumElts; ++i) {
7808 SDValue V = Op.getOperand(i);
7809 if (isa<ConstantSDNode>(V) || V.isUndef())
7810 continue;
7811 Base = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Base, V,
7812 DAG.getConstant(i, dl, MVT::i32));
7813 }
7814
7815 return Base;
7816}
7817
7819 const ARMSubtarget *ST) {
7820 if (!ST->hasMVEIntegerOps())
7821 return SDValue();
7822
7823 // We are looking for a buildvector where each element is Op[0] + i*N
7824 EVT VT = Op.getValueType();
7825 SDValue Op0 = Op.getOperand(0);
7826 unsigned NumElts = VT.getVectorNumElements();
7827
7828 // Get the increment value from operand 1
7829 SDValue Op1 = Op.getOperand(1);
7830 if (Op1.getOpcode() != ISD::ADD || Op1.getOperand(0) != Op0 ||
7831 !isa<ConstantSDNode>(Op1.getOperand(1)))
7832 return SDValue();
7833 unsigned N = Op1.getConstantOperandVal(1);
7834 if (N != 1 && N != 2 && N != 4 && N != 8)
7835 return SDValue();
7836
7837 // Check that each other operand matches
7838 for (unsigned I = 2; I < NumElts; I++) {
7839 SDValue OpI = Op.getOperand(I);
7840 if (OpI.getOpcode() != ISD::ADD || OpI.getOperand(0) != Op0 ||
7841 !isa<ConstantSDNode>(OpI.getOperand(1)) ||
7842 OpI.getConstantOperandVal(1) != I * N)
7843 return SDValue();
7844 }
7845
7846 SDLoc DL(Op);
7847 return DAG.getNode(ARMISD::VIDUP, DL, DAG.getVTList(VT, MVT::i32), Op0,
7848 DAG.getConstant(N, DL, MVT::i32));
7849}
7850
7851// Returns true if the operation N can be treated as qr instruction variant at
7852// operand Op.
7853static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op) {
7854 switch (N->getOpcode()) {
7855 case ISD::ADD:
7856 case ISD::MUL:
7857 case ISD::SADDSAT:
7858 case ISD::UADDSAT:
7859 return true;
7860 case ISD::SUB:
7861 case ISD::SSUBSAT:
7862 case ISD::USUBSAT:
7863 return N->getOperand(1).getNode() == Op;
7865 switch (N->getConstantOperandVal(0)) {
7866 case Intrinsic::arm_mve_add_predicated:
7867 case Intrinsic::arm_mve_mul_predicated:
7868 case Intrinsic::arm_mve_qadd_predicated:
7869 case Intrinsic::arm_mve_vhadd:
7870 case Intrinsic::arm_mve_hadd_predicated:
7871 case Intrinsic::arm_mve_vqdmulh:
7872 case Intrinsic::arm_mve_qdmulh_predicated:
7873 case Intrinsic::arm_mve_vqrdmulh:
7874 case Intrinsic::arm_mve_qrdmulh_predicated:
7875 case Intrinsic::arm_mve_vqdmull:
7876 case Intrinsic::arm_mve_vqdmull_predicated:
7877 return true;
7878 case Intrinsic::arm_mve_sub_predicated:
7879 case Intrinsic::arm_mve_qsub_predicated:
7880 case Intrinsic::arm_mve_vhsub:
7881 case Intrinsic::arm_mve_hsub_predicated:
7882 return N->getOperand(2).getNode() == Op;
7883 default:
7884 return false;
7885 }
7886 default:
7887 return false;
7888 }
7889}
7890
7891// If this is a case we can't handle, return null and let the default
7892// expansion code take care of it.
7893SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
7894 const ARMSubtarget *ST) const {
7895 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
7896 SDLoc dl(Op);
7897 EVT VT = Op.getValueType();
7898
7899 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
7900 return LowerBUILD_VECTOR_i1(Op, DAG, ST);
7901
7902 if (SDValue R = LowerBUILD_VECTORToVIDUP(Op, DAG, ST))
7903 return R;
7904
7905 APInt SplatBits, SplatUndef;
7906 unsigned SplatBitSize;
7907 bool HasAnyUndefs;
7908 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
7909 if (SplatUndef.isAllOnes())
7910 return DAG.getUNDEF(VT);
7911
7912 // If all the users of this constant splat are qr instruction variants,
7913 // generate a vdup of the constant.
7914 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == SplatBitSize &&
7915 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32) &&
7916 all_of(BVN->uses(),
7917 [BVN](const SDNode *U) { return IsQRMVEInstruction(U, BVN); })) {
7918 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7919 : SplatBitSize == 16 ? MVT::v8i16
7920 : MVT::v16i8;
7921 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7922 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7923 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7924 }
7925
7926 if ((ST->hasNEON() && SplatBitSize <= 64) ||
7927 (ST->hasMVEIntegerOps() && SplatBitSize <= 64)) {
7928 // Check if an immediate VMOV works.
7929 EVT VmovVT;
7930 SDValue Val =
7931 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
7932 SplatBitSize, DAG, dl, VmovVT, VT, VMOVModImm);
7933
7934 if (Val.getNode()) {
7935 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
7936 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7937 }
7938
7939 // Try an immediate VMVN.
7940 uint64_t NegatedImm = (~SplatBits).getZExtValue();
7941 Val = isVMOVModifiedImm(
7942 NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, DAG, dl, VmovVT,
7943 VT, ST->hasMVEIntegerOps() ? MVEVMVNModImm : VMVNModImm);
7944 if (Val.getNode()) {
7945 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
7946 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
7947 }
7948
7949 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
7950 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
7951 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
7952 if (ImmVal != -1) {
7953 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
7954 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
7955 }
7956 }
7957
7958 // If we are under MVE, generate a VDUP(constant), bitcast to the original
7959 // type.
7960 if (ST->hasMVEIntegerOps() &&
7961 (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32)) {
7962 EVT DupVT = SplatBitSize == 32 ? MVT::v4i32
7963 : SplatBitSize == 16 ? MVT::v8i16
7964 : MVT::v16i8;
7965 SDValue Const = DAG.getConstant(SplatBits.getZExtValue(), dl, MVT::i32);
7966 SDValue VDup = DAG.getNode(ARMISD::VDUP, dl, DupVT, Const);
7967 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, VDup);
7968 }
7969 }
7970 }
7971
7972 // Scan through the operands to see if only one value is used.
7973 //
7974 // As an optimisation, even if more than one value is used it may be more
7975 // profitable to splat with one value then change some lanes.
7976 //
7977 // Heuristically we decide to do this if the vector has a "dominant" value,
7978 // defined as splatted to more than half of the lanes.
7979 unsigned NumElts = VT.getVectorNumElements();
7980 bool isOnlyLowElement = true;
7981 bool usesOnlyOneValue = true;
7982 bool hasDominantValue = false;
7983 bool isConstant = true;
7984
7985 // Map of the number of times a particular SDValue appears in the
7986 // element list.
7987 DenseMap<SDValue, unsigned> ValueCounts;
7988 SDValue Value;
7989 for (unsigned i = 0; i < NumElts; ++i) {
7990 SDValue V = Op.getOperand(i);
7991 if (V.isUndef())
7992 continue;
7993 if (i > 0)
7994 isOnlyLowElement = false;
7995 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
7996 isConstant = false;
7997
7998 ValueCounts.insert(std::make_pair(V, 0));
7999 unsigned &Count = ValueCounts[V];
8000
8001 // Is this value dominant? (takes up more than half of the lanes)
8002 if (++Count > (NumElts / 2)) {
8003 hasDominantValue = true;
8004 Value = V;
8005 }
8006 }
8007 if (ValueCounts.size() != 1)
8008 usesOnlyOneValue = false;
8009 if (!Value.getNode() && !ValueCounts.empty())
8010 Value = ValueCounts.begin()->first;
8011
8012 if (ValueCounts.empty())
8013 return DAG.getUNDEF(VT);
8014
8015 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
8016 // Keep going if we are hitting this case.
8017 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
8018 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
8019
8020 unsigned EltSize = VT.getScalarSizeInBits();
8021
8022 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
8023 // i32 and try again.
8024 if (hasDominantValue && EltSize <= 32) {
8025 if (!isConstant) {
8026 SDValue N;
8027
8028 // If we are VDUPing a value that comes directly from a vector, that will
8029 // cause an unnecessary move to and from a GPR, where instead we could
8030 // just use VDUPLANE. We can only do this if the lane being extracted
8031 // is at a constant index, as the VDUP from lane instructions only have
8032 // constant-index forms.
8033 ConstantSDNode *constIndex;
8034 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8035 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
8036 // We need to create a new undef vector to use for the VDUPLANE if the
8037 // size of the vector from which we get the value is different than the
8038 // size of the vector that we need to create. We will insert the element
8039 // such that the register coalescer will remove unnecessary copies.
8040 if (VT != Value->getOperand(0).getValueType()) {
8041 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
8043 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8044 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
8045 Value, DAG.getConstant(index, dl, MVT::i32)),
8046 DAG.getConstant(index, dl, MVT::i32));
8047 } else
8048 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8049 Value->getOperand(0), Value->getOperand(1));
8050 } else
8051 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
8052
8053 if (!usesOnlyOneValue) {
8054 // The dominant value was splatted as 'N', but we now have to insert
8055 // all differing elements.
8056 for (unsigned I = 0; I < NumElts; ++I) {
8057 if (Op.getOperand(I) == Value)
8058 continue;
8060 Ops.push_back(N);
8061 Ops.push_back(Op.getOperand(I));
8062 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
8063 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
8064 }
8065 }
8066 return N;
8067 }
8071 assert(FVT == MVT::f32 || FVT == MVT::f16);
8072 MVT IVT = (FVT == MVT::f32) ? MVT::i32 : MVT::i16;
8073 for (unsigned i = 0; i < NumElts; ++i)
8074 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, IVT,
8075 Op.getOperand(i)));
8076 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), IVT, NumElts);
8077 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
8078 Val = LowerBUILD_VECTOR(Val, DAG, ST);
8079 if (Val.getNode())
8080 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8081 }
8082 if (usesOnlyOneValue) {
8083 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
8084 if (isConstant && Val.getNode())
8085 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
8086 }
8087 }
8088
8089 // If all elements are constants and the case above didn't get hit, fall back
8090 // to the default expansion, which will generate a load from the constant
8091 // pool.
8092 if (isConstant)
8093 return SDValue();
8094
8095 // Reconstruct the BUILDVECTOR to one of the legal shuffles (such as vext and
8096 // vmovn). Empirical tests suggest this is rarely worth it for vectors of
8097 // length <= 2.
8098 if (NumElts >= 4)
8099 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
8100 return shuffle;
8101
8102 // Attempt to turn a buildvector of scalar fptrunc's or fpext's back into
8103 // VCVT's
8104 if (SDValue VCVT = LowerBuildVectorOfFPTrunc(Op, DAG, Subtarget))
8105 return VCVT;
8106 if (SDValue VCVT = LowerBuildVectorOfFPExt(Op, DAG, Subtarget))
8107 return VCVT;
8108
8109 if (ST->hasNEON() && VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
8110 // If we haven't found an efficient lowering, try splitting a 128-bit vector
8111 // into two 64-bit vectors; we might discover a better way to lower it.
8112 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
8113 EVT ExtVT = VT.getVectorElementType();
8114 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
8115 SDValue Lower = DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[0], NumElts / 2));
8116 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
8117 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
8118 SDValue Upper =
8119 DAG.getBuildVector(HVT, dl, ArrayRef(&Ops[NumElts / 2], NumElts / 2));
8120 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
8121 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
8122 if (Lower && Upper)
8123 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
8124 }
8125
8126 // Vectors with 32- or 64-bit elements can be built by directly assigning
8127 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
8128 // will be legalized.
8129 if (EltSize >= 32) {
8130 // Do the expansion with floating-point types, since that is what the VFP
8131 // registers are defined to use, and since i64 is not legal.
8132 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8133 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8135 for (unsigned i = 0; i < NumElts; ++i)
8136 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
8137 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
8138 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
8139 }
8140
8141 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
8142 // know the default expansion would otherwise fall back on something even
8143 // worse. For a vector with one or two non-undef values, that's
8144 // scalar_to_vector for the elements followed by a shuffle (provided the
8145 // shuffle is valid for the target) and materialization element by element
8146 // on the stack followed by a load for everything else.
8147 if (!isConstant && !usesOnlyOneValue) {
8148 SDValue Vec = DAG.getUNDEF(VT);
8149 for (unsigned i = 0 ; i < NumElts; ++i) {
8150 SDValue V = Op.getOperand(i);
8151 if (V.isUndef())
8152 continue;
8153 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
8154 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
8155 }
8156 return Vec;
8157 }
8158
8159 return SDValue();
8160}
8161
8162// Gather data to see if the operation can be modelled as a
8163// shuffle in combination with VEXTs.
8164SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
8165 SelectionDAG &DAG) const {
8166 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
8167 SDLoc dl(Op);
8168 EVT VT = Op.getValueType();
8169 unsigned NumElts = VT.getVectorNumElements();
8170
8171 struct ShuffleSourceInfo {
8172 SDValue Vec;
8173 unsigned MinElt = std::numeric_limits<unsigned>::max();
8174 unsigned MaxElt = 0;
8175
8176 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
8177 // be compatible with the shuffle we intend to construct. As a result
8178 // ShuffleVec will be some sliding window into the original Vec.
8179 SDValue ShuffleVec;
8180
8181 // Code should guarantee that element i in Vec starts at element "WindowBase
8182 // + i * WindowScale in ShuffleVec".
8183 int WindowBase = 0;
8184 int WindowScale = 1;
8185
8186 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
8187
8188 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
8189 };
8190
8191 // First gather all vectors used as an immediate source for this BUILD_VECTOR
8192 // node.
8194 for (unsigned i = 0; i < NumElts; ++i) {
8195 SDValue V = Op.getOperand(i);
8196 if (V.isUndef())
8197 continue;
8198 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
8199 // A shuffle can only come from building a vector from various
8200 // elements of other vectors.
8201 return SDValue();
8202 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
8203 // Furthermore, shuffles require a constant mask, whereas extractelts
8204 // accept variable indices.
8205 return SDValue();
8206 }
8207
8208 // Add this element source to the list if it's not already there.
8209 SDValue SourceVec = V.getOperand(0);
8210 auto Source = llvm::find(Sources, SourceVec);
8211 if (Source == Sources.end())
8212 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
8213
8214 // Update the minimum and maximum lane number seen.
8215 unsigned EltNo = V.getConstantOperandVal(1);
8216 Source->MinElt = std::min(Source->MinElt, EltNo);
8217 Source->MaxElt = std::max(Source->MaxElt, EltNo);
8218 }
8219
8220 // Currently only do something sane when at most two source vectors
8221 // are involved.
8222 if (Sources.size() > 2)
8223 return SDValue();
8224
8225 // Find out the smallest element size among result and two sources, and use
8226 // it as element size to build the shuffle_vector.
8227 EVT SmallestEltTy = VT.getVectorElementType();
8228 for (auto &Source : Sources) {
8229 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
8230 if (SrcEltTy.bitsLT(SmallestEltTy))
8231 SmallestEltTy = SrcEltTy;
8232 }
8233 unsigned ResMultiplier =
8234 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
8235 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
8236 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
8237
8238 // If the source vector is too wide or too narrow, we may nevertheless be able
8239 // to construct a compatible shuffle either by concatenating it with UNDEF or
8240 // extracting a suitable range of elements.
8241 for (auto &Src : Sources) {
8242 EVT SrcVT = Src.ShuffleVec.getValueType();
8243
8244 uint64_t SrcVTSize = SrcVT.getFixedSizeInBits();
8245 uint64_t VTSize = VT.getFixedSizeInBits();
8246 if (SrcVTSize == VTSize)
8247 continue;
8248
8249 // This stage of the search produces a source with the same element type as
8250 // the original, but with a total width matching the BUILD_VECTOR output.
8251 EVT EltVT = SrcVT.getVectorElementType();
8252 unsigned NumSrcElts = VTSize / EltVT.getFixedSizeInBits();
8253 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
8254
8255 if (SrcVTSize < VTSize) {
8256 if (2 * SrcVTSize != VTSize)
8257 return SDValue();
8258 // We can pad out the smaller vector for free, so if it's part of a
8259 // shuffle...
8260 Src.ShuffleVec =
8261 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
8262 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
8263 continue;
8264 }
8265
8266 if (SrcVTSize != 2 * VTSize)
8267 return SDValue();
8268
8269 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
8270 // Span too large for a VEXT to cope
8271 return SDValue();
8272 }
8273
8274 if (Src.MinElt >= NumSrcElts) {
8275 // The extraction can just take the second half
8276 Src.ShuffleVec =
8277 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8278 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8279 Src.WindowBase = -NumSrcElts;
8280 } else if (Src.MaxElt < NumSrcElts) {
8281 // The extraction can just take the first half
8282 Src.ShuffleVec =
8283 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8284 DAG.getConstant(0, dl, MVT::i32));
8285 } else {
8286 // An actual VEXT is needed
8287 SDValue VEXTSrc1 =
8288 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8289 DAG.getConstant(0, dl, MVT::i32));
8290 SDValue VEXTSrc2 =
8291 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
8292 DAG.getConstant(NumSrcElts, dl, MVT::i32));
8293
8294 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
8295 VEXTSrc2,
8296 DAG.getConstant(Src.MinElt, dl, MVT::i32));
8297 Src.WindowBase = -Src.MinElt;
8298 }
8299 }
8300
8301 // Another possible incompatibility occurs from the vector element types. We
8302 // can fix this by bitcasting the source vectors to the same type we intend
8303 // for the shuffle.
8304 for (auto &Src : Sources) {
8305 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
8306 if (SrcEltTy == SmallestEltTy)
8307 continue;
8308 assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
8309 Src.ShuffleVec = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, ShuffleVT, Src.ShuffleVec);
8310 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
8311 Src.WindowBase *= Src.WindowScale;
8312 }
8313
8314 // Final check before we try to actually produce a shuffle.
8315 LLVM_DEBUG(for (auto Src
8316 : Sources)
8317 assert(Src.ShuffleVec.getValueType() == ShuffleVT););
8318
8319 // The stars all align, our next step is to produce the mask for the shuffle.
8321 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
8322 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
8323 SDValue Entry = Op.getOperand(i);
8324 if (Entry.isUndef())
8325 continue;
8326
8327 auto Src = llvm::find(Sources, Entry.getOperand(0));
8328 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
8329
8330 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
8331 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
8332 // segment.
8333 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
8334 int BitsDefined = std::min(OrigEltTy.getScalarSizeInBits(),
8335 VT.getScalarSizeInBits());
8336 int LanesDefined = BitsDefined / BitsPerShuffleLane;
8337
8338 // This source is expected to fill ResMultiplier lanes of the final shuffle,
8339 // starting at the appropriate offset.
8340 int *LaneMask = &Mask[i * ResMultiplier];
8341
8342 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
8343 ExtractBase += NumElts * (Src - Sources.begin());
8344 for (int j = 0; j < LanesDefined; ++j)
8345 LaneMask[j] = ExtractBase + j;
8346 }
8347
8348
8349 // We can't handle more than two sources. This should have already
8350 // been checked before this point.
8351 assert(Sources.size() <= 2 && "Too many sources!");
8352
8353 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
8354 for (unsigned i = 0; i < Sources.size(); ++i)
8355 ShuffleOps[i] = Sources[i].ShuffleVec;
8356
8357 SDValue Shuffle = buildLegalVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
8358 ShuffleOps[1], Mask, DAG);
8359 if (!Shuffle)
8360 return SDValue();
8361 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Shuffle);
8362}
8363
8365 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8374 OP_VUZPL, // VUZP, left result
8375 OP_VUZPR, // VUZP, right result
8376 OP_VZIPL, // VZIP, left result
8377 OP_VZIPR, // VZIP, right result
8378 OP_VTRNL, // VTRN, left result
8379 OP_VTRNR // VTRN, right result
8381
8382static bool isLegalMVEShuffleOp(unsigned PFEntry) {
8383 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8384 switch (OpNum) {
8385 case OP_COPY:
8386 case OP_VREV:
8387 case OP_VDUP0:
8388 case OP_VDUP1:
8389 case OP_VDUP2:
8390 case OP_VDUP3:
8391 return true;
8392 }
8393 return false;
8394}
8395
8396/// isShuffleMaskLegal - Targets can use this to indicate that they only
8397/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
8398/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
8399/// are assumed to be legal.
8401 if (VT.getVectorNumElements() == 4 &&
8402 (VT.is128BitVector() || VT.is64BitVector())) {
8403 unsigned PFIndexes[4];
8404 for (unsigned i = 0; i != 4; ++i) {
8405 if (M[i] < 0)
8406 PFIndexes[i] = 8;
8407 else
8408 PFIndexes[i] = M[i];
8409 }
8410
8411 // Compute the index in the perfect shuffle table.
8412 unsigned PFTableIndex =
8413 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8414 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8415 unsigned Cost = (PFEntry >> 30);
8416
8417 if (Cost <= 4 && (Subtarget->hasNEON() || isLegalMVEShuffleOp(PFEntry)))
8418 return true;
8419 }
8420
8421 bool ReverseVEXT, isV_UNDEF;
8422 unsigned Imm, WhichResult;
8423
8424 unsigned EltSize = VT.getScalarSizeInBits();
8425 if (EltSize >= 32 ||
8427 ShuffleVectorInst::isIdentityMask(M, M.size()) ||
8428 isVREVMask(M, VT, 64) ||
8429 isVREVMask(M, VT, 32) ||
8430 isVREVMask(M, VT, 16))
8431 return true;
8432 else if (Subtarget->hasNEON() &&
8433 (isVEXTMask(M, VT, ReverseVEXT, Imm) ||
8434 isVTBLMask(M, VT) ||
8435 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF)))
8436 return true;
8437 else if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8438 isReverseMask(M, VT))
8439 return true;
8440 else if (Subtarget->hasMVEIntegerOps() &&
8441 (isVMOVNMask(M, VT, true, false) ||
8442 isVMOVNMask(M, VT, false, false) || isVMOVNMask(M, VT, true, true)))
8443 return true;
8444 else if (Subtarget->hasMVEIntegerOps() &&
8445 (isTruncMask(M, VT, false, false) ||
8446 isTruncMask(M, VT, false, true) ||
8447 isTruncMask(M, VT, true, false) || isTruncMask(M, VT, true, true)))
8448 return true;
8449 else
8450 return false;
8451}
8452
8453/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8454/// the specified operations to build the shuffle.
8455static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
8456 SDValue RHS, SelectionDAG &DAG,
8457 const SDLoc &dl) {
8458 unsigned OpNum = (PFEntry >> 26) & 0x0F;
8459 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8460 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8461
8462 if (OpNum == OP_COPY) {
8463 if (LHSID == (1*9+2)*9+3) return LHS;
8464 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8465 return RHS;
8466 }
8467
8468 SDValue OpLHS, OpRHS;
8469 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
8470 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
8471 EVT VT = OpLHS.getValueType();
8472
8473 switch (OpNum) {
8474 default: llvm_unreachable("Unknown shuffle opcode!");
8475 case OP_VREV:
8476 // VREV divides the vector in half and swaps within the half.
8477 if (VT.getScalarSizeInBits() == 32)
8478 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
8479 // vrev <4 x i16> -> VREV32
8480 if (VT.getScalarSizeInBits() == 16)
8481 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
8482 // vrev <4 x i8> -> VREV16
8483 assert(VT.getScalarSizeInBits() == 8);
8484 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
8485 case OP_VDUP0:
8486 case OP_VDUP1:
8487 case OP_VDUP2:
8488 case OP_VDUP3:
8489 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
8490 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
8491 case OP_VEXT1:
8492 case OP_VEXT2:
8493 case OP_VEXT3:
8494 return DAG.getNode(ARMISD::VEXT, dl, VT,
8495 OpLHS, OpRHS,
8496 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
8497 case OP_VUZPL:
8498 case OP_VUZPR:
8499 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
8500 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
8501 case OP_VZIPL:
8502 case OP_VZIPR:
8503 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
8504 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
8505 case OP_VTRNL:
8506 case OP_VTRNR:
8507 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
8508 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
8509 }
8510}
8511
8513 ArrayRef<int> ShuffleMask,
8514 SelectionDAG &DAG) {
8515 // Check to see if we can use the VTBL instruction.
8516 SDValue V1 = Op.getOperand(0);
8517 SDValue V2 = Op.getOperand(1);
8518 SDLoc DL(Op);
8519
8520 SmallVector<SDValue, 8> VTBLMask;
8521 for (int I : ShuffleMask)
8522 VTBLMask.push_back(DAG.getConstant(I, DL, MVT::i32));
8523
8524 if (V2.getNode()->isUndef())
8525 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
8526 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8527
8528 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
8529 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
8530}
8531
8533 SDLoc DL(Op);
8534 EVT VT = Op.getValueType();
8535
8536 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8537 "Expect an v8i16/v16i8 type");
8538 SDValue OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, Op.getOperand(0));
8539 // For a v16i8 type: After the VREV, we have got <7, ..., 0, 15, ..., 8>. Now,
8540 // extract the first 8 bytes into the top double word and the last 8 bytes
8541 // into the bottom double word, through a new vector shuffle that will be
8542 // turned into a VEXT on Neon, or a couple of VMOVDs on MVE.
8543 std::vector<int> NewMask;
8544 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8545 NewMask.push_back(VT.getVectorNumElements() / 2 + i);
8546 for (unsigned i = 0; i < VT.getVectorNumElements() / 2; i++)
8547 NewMask.push_back(i);
8548 return DAG.getVectorShuffle(VT, DL, OpLHS, OpLHS, NewMask);
8549}
8550
8552 switch (VT.getSimpleVT().SimpleTy) {
8553 case MVT::v2i1:
8554 return MVT::v2f64;
8555 case MVT::v4i1:
8556 return MVT::v4i32;
8557 case MVT::v8i1:
8558 return MVT::v8i16;
8559 case MVT::v16i1:
8560 return MVT::v16i8;
8561 default:
8562 llvm_unreachable("Unexpected vector predicate type");
8563 }
8564}
8565
8567 SelectionDAG &DAG) {
8568 // Converting from boolean predicates to integers involves creating a vector
8569 // of all ones or all zeroes and selecting the lanes based upon the real
8570 // predicate.
8572 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0xff), dl, MVT::i32);
8573 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllOnes);
8574
8575 SDValue AllZeroes =
8576 DAG.getTargetConstant(ARM_AM::createVMOVModImm(0xe, 0x0), dl, MVT::i32);
8577 AllZeroes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v16i8, AllZeroes);
8578
8579 // Get full vector type from predicate type
8581
8582 SDValue RecastV1;
8583 // If the real predicate is an v8i1 or v4i1 (not v16i1) then we need to recast
8584 // this to a v16i1. This cannot be done with an ordinary bitcast because the
8585 // sizes are not the same. We have to use a MVE specific PREDICATE_CAST node,
8586 // since we know in hardware the sizes are really the same.
8587 if (VT != MVT::v16i1)
8588 RecastV1 = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Pred);
8589 else
8590 RecastV1 = Pred;
8591
8592 // Select either all ones or zeroes depending upon the real predicate bits.
8593 SDValue PredAsVector =
8594 DAG.getNode(ISD::VSELECT, dl, MVT::v16i8, RecastV1, AllOnes, AllZeroes);
8595
8596 // Recast our new predicate-as-integer v16i8 vector into something
8597 // appropriate for the shuffle, i.e. v4i32 for a real v4i1 predicate.
8598 return DAG.getNode(ISD::BITCAST, dl, NewVT, PredAsVector);
8599}
8600
8602 const ARMSubtarget *ST) {
8603 EVT VT = Op.getValueType();
8604 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8605 ArrayRef<int> ShuffleMask = SVN->getMask();
8606
8607 assert(ST->hasMVEIntegerOps() &&
8608 "No support for vector shuffle of boolean predicates");
8609
8610 SDValue V1 = Op.getOperand(0);
8611 SDValue V2 = Op.getOperand(1);
8612 SDLoc dl(Op);
8613 if (isReverseMask(ShuffleMask, VT)) {
8614 SDValue cast = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, V1);
8615 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, cast);
8616 SDValue srl = DAG.getNode(ISD::SRL, dl, MVT::i32, rbit,
8617 DAG.getConstant(16, dl, MVT::i32));
8618 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, srl);
8619 }
8620
8621 // Until we can come up with optimised cases for every single vector
8622 // shuffle in existence we have chosen the least painful strategy. This is
8623 // to essentially promote the boolean predicate to a 8-bit integer, where
8624 // each predicate represents a byte. Then we fall back on a normal integer
8625 // vector shuffle and convert the result back into a predicate vector. In
8626 // many cases the generated code might be even better than scalar code
8627 // operating on bits. Just imagine trying to shuffle 8 arbitrary 2-bit
8628 // fields in a register into 8 other arbitrary 2-bit fields!
8629 SDValue PredAsVector1 = PromoteMVEPredVector(dl, V1, VT, DAG);
8630 EVT NewVT = PredAsVector1.getValueType();
8631 SDValue PredAsVector2 = V2.isUndef() ? DAG.getUNDEF(NewVT)
8632 : PromoteMVEPredVector(dl, V2, VT, DAG);
8633 assert(PredAsVector2.getValueType() == NewVT &&
8634 "Expected identical vector type in expanded i1 shuffle!");
8635
8636 // Do the shuffle!
8637 SDValue Shuffled = DAG.getVectorShuffle(NewVT, dl, PredAsVector1,
8638 PredAsVector2, ShuffleMask);
8639
8640 // Now return the result of comparing the shuffled vector with zero,
8641 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. For a v2i1
8642 // we convert to a v4i1 compare to fill in the two halves of the i64 as i32s.
8643 if (VT == MVT::v2i1) {
8644 SDValue BC = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Shuffled);
8645 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, BC,
8646 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8647 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
8648 }
8649 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled,
8650 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
8651}
8652
8654 ArrayRef<int> ShuffleMask,
8655 SelectionDAG &DAG) {
8656 // Attempt to lower the vector shuffle using as many whole register movs as
8657 // possible. This is useful for types smaller than 32bits, which would
8658 // often otherwise become a series for grp movs.
8659 SDLoc dl(Op);
8660 EVT VT = Op.getValueType();
8661 if (VT.getScalarSizeInBits() >= 32)
8662 return SDValue();
8663
8664 assert((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
8665 "Unexpected vector type");
8666 int NumElts = VT.getVectorNumElements();
8667 int QuarterSize = NumElts / 4;
8668 // The four final parts of the vector, as i32's
8669 SDValue Parts[4];
8670
8671 // Look for full lane vmovs like <0,1,2,3> or <u,5,6,7> etc, (but not
8672 // <u,u,u,u>), returning the vmov lane index
8673 auto getMovIdx = [](ArrayRef<int> ShuffleMask, int Start, int Length) {
8674 // Detect which mov lane this would be from the first non-undef element.
8675 int MovIdx = -1;
8676 for (int i = 0; i < Length; i++) {
8677 if (ShuffleMask[Start + i] >= 0) {
8678 if (ShuffleMask[Start + i] % Length != i)
8679 return -1;
8680 MovIdx = ShuffleMask[Start + i] / Length;
8681 break;
8682 }
8683 }
8684 // If all items are undef, leave this for other combines
8685 if (MovIdx == -1)
8686 return -1;
8687 // Check the remaining values are the correct part of the same mov
8688 for (int i = 1; i < Length; i++) {
8689 if (ShuffleMask[Start + i] >= 0 &&
8690 (ShuffleMask[Start + i] / Length != MovIdx ||
8691 ShuffleMask[Start + i] % Length != i))
8692 return -1;
8693 }
8694 return MovIdx;
8695 };
8696
8697 for (int Part = 0; Part < 4; ++Part) {
8698 // Does this part look like a mov
8699 int Elt = getMovIdx(ShuffleMask, Part * QuarterSize, QuarterSize);
8700 if (Elt != -1) {
8701 SDValue Input = Op->getOperand(0);
8702 if (Elt >= 4) {
8703 Input = Op->getOperand(1);
8704 Elt -= 4;
8705 }
8706 SDValue BitCast = DAG.getBitcast(MVT::v4f32, Input);
8707 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, BitCast,
8708 DAG.getConstant(Elt, dl, MVT::i32));
8709 }
8710 }
8711
8712 // Nothing interesting found, just return
8713 if (!Parts[0] && !Parts[1] && !Parts[2] && !Parts[3])
8714 return SDValue();
8715
8716 // The other parts need to be built with the old shuffle vector, cast to a
8717 // v4i32 and extract_vector_elts
8718 if (!Parts[0] || !Parts[1] || !Parts[2] || !Parts[3]) {
8719 SmallVector<int, 16> NewShuffleMask;
8720 for (int Part = 0; Part < 4; ++Part)
8721 for (int i = 0; i < QuarterSize; i++)
8722 NewShuffleMask.push_back(
8723 Parts[Part] ? -1 : ShuffleMask[Part * QuarterSize + i]);
8724 SDValue NewShuffle = DAG.getVectorShuffle(
8725 VT, dl, Op->getOperand(0), Op->getOperand(1), NewShuffleMask);
8726 SDValue BitCast = DAG.getBitcast(MVT::v4f32, NewShuffle);
8727
8728 for (int Part = 0; Part < 4; ++Part)
8729 if (!Parts[Part])
8730 Parts[Part] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32,
8731 BitCast, DAG.getConstant(Part, dl, MVT::i32));
8732 }
8733 // Build a vector out of the various parts and bitcast it back to the original
8734 // type.
8735 SDValue NewVec = DAG.getNode(ARMISD::BUILD_VECTOR, dl, MVT::v4f32, Parts);
8736 return DAG.getBitcast(VT, NewVec);
8737}
8738
8740 ArrayRef<int> ShuffleMask,
8741 SelectionDAG &DAG) {
8742 SDValue V1 = Op.getOperand(0);
8743 SDValue V2 = Op.getOperand(1);
8744 EVT VT = Op.getValueType();
8745 unsigned NumElts = VT.getVectorNumElements();
8746
8747 // An One-Off Identity mask is one that is mostly an identity mask from as
8748 // single source but contains a single element out-of-place, either from a
8749 // different vector or from another position in the same vector. As opposed to
8750 // lowering this via a ARMISD::BUILD_VECTOR we can generate an extract/insert
8751 // pair directly.
8752 auto isOneOffIdentityMask = [](ArrayRef<int> Mask, EVT VT, int BaseOffset,
8753 int &OffElement) {
8754 OffElement = -1;
8755 int NonUndef = 0;
8756 for (int i = 0, NumMaskElts = Mask.size(); i < NumMaskElts; ++i) {
8757 if (Mask[i] == -1)
8758 continue;
8759 NonUndef++;
8760 if (Mask[i] != i + BaseOffset) {
8761 if (OffElement == -1)
8762 OffElement = i;
8763 else
8764 return false;
8765 }
8766 }
8767 return NonUndef > 2 && OffElement != -1;
8768 };
8769 int OffElement;
8770 SDValue VInput;
8771 if (isOneOffIdentityMask(ShuffleMask, VT, 0, OffElement))
8772 VInput = V1;
8773 else if (isOneOffIdentityMask(ShuffleMask, VT, NumElts, OffElement))
8774 VInput = V2;
8775 else
8776 return SDValue();
8777
8778 SDLoc dl(Op);
8779 EVT SVT = VT.getScalarType() == MVT::i8 || VT.getScalarType() == MVT::i16
8780 ? MVT::i32
8781 : VT.getScalarType();
8782 SDValue Elt = DAG.getNode(
8783 ISD::EXTRACT_VECTOR_ELT, dl, SVT,
8784 ShuffleMask[OffElement] < (int)NumElts ? V1 : V2,
8785 DAG.getVectorIdxConstant(ShuffleMask[OffElement] % NumElts, dl));
8786 return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, VInput, Elt,
8787 DAG.getVectorIdxConstant(OffElement % NumElts, dl));
8788}
8789
8791 const ARMSubtarget *ST) {
8792 SDValue V1 = Op.getOperand(0);
8793 SDValue V2 = Op.getOperand(1);
8794 SDLoc dl(Op);
8795 EVT VT = Op.getValueType();
8796 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
8797 unsigned EltSize = VT.getScalarSizeInBits();
8798
8799 if (ST->hasMVEIntegerOps() && EltSize == 1)
8800 return LowerVECTOR_SHUFFLE_i1(Op, DAG, ST);
8801
8802 // Convert shuffles that are directly supported on NEON to target-specific
8803 // DAG nodes, instead of keeping them as shuffles and matching them again
8804 // during code selection. This is more efficient and avoids the possibility
8805 // of inconsistencies between legalization and selection.
8806 // FIXME: floating-point vectors should be canonicalized to integer vectors
8807 // of the same time so that they get CSEd properly.
8808 ArrayRef<int> ShuffleMask = SVN->getMask();
8809
8810 if (EltSize <= 32) {
8811 if (SVN->isSplat()) {
8812 int Lane = SVN->getSplatIndex();
8813 // If this is undef splat, generate it via "just" vdup, if possible.
8814 if (Lane == -1) Lane = 0;
8815
8816 // Test if V1 is a SCALAR_TO_VECTOR.
8817 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
8818 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8819 }
8820 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
8821 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
8822 // reaches it).
8823 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
8824 !isa<ConstantSDNode>(V1.getOperand(0))) {
8825 bool IsScalarToVector = true;
8826 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
8827 if (!V1.getOperand(i).isUndef()) {
8828 IsScalarToVector = false;
8829 break;
8830 }
8831 if (IsScalarToVector)
8832 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
8833 }
8834 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
8835 DAG.getConstant(Lane, dl, MVT::i32));
8836 }
8837
8838 bool ReverseVEXT = false;
8839 unsigned Imm = 0;
8840 if (ST->hasNEON() && isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
8841 if (ReverseVEXT)
8842 std::swap(V1, V2);
8843 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
8844 DAG.getConstant(Imm, dl, MVT::i32));
8845 }
8846
8847 if (isVREVMask(ShuffleMask, VT, 64))
8848 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
8849 if (isVREVMask(ShuffleMask, VT, 32))
8850 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
8851 if (isVREVMask(ShuffleMask, VT, 16))
8852 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
8853
8854 if (ST->hasNEON() && V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
8855 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
8856 DAG.getConstant(Imm, dl, MVT::i32));
8857 }
8858
8859 // Check for Neon shuffles that modify both input vectors in place.
8860 // If both results are used, i.e., if there are two shuffles with the same
8861 // source operands and with masks corresponding to both results of one of
8862 // these operations, DAG memoization will ensure that a single node is
8863 // used for both shuffles.
8864 unsigned WhichResult = 0;
8865 bool isV_UNDEF = false;
8866 if (ST->hasNEON()) {
8867 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8868 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
8869 if (isV_UNDEF)
8870 V2 = V1;
8871 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
8872 .getValue(WhichResult);
8873 }
8874 }
8875 if (ST->hasMVEIntegerOps()) {
8876 if (isVMOVNMask(ShuffleMask, VT, false, false))
8877 return DAG.getNode(ARMISD::VMOVN, dl, VT, V2, V1,
8878 DAG.getConstant(0, dl, MVT::i32));
8879 if (isVMOVNMask(ShuffleMask, VT, true, false))
8880 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V2,
8881 DAG.getConstant(1, dl, MVT::i32));
8882 if (isVMOVNMask(ShuffleMask, VT, true, true))
8883 return DAG.getNode(ARMISD::VMOVN, dl, VT, V1, V1,
8884 DAG.getConstant(1, dl, MVT::i32));
8885 }
8886
8887 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
8888 // shuffles that produce a result larger than their operands with:
8889 // shuffle(concat(v1, undef), concat(v2, undef))
8890 // ->
8891 // shuffle(concat(v1, v2), undef)
8892 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
8893 //
8894 // This is useful in the general case, but there are special cases where
8895 // native shuffles produce larger results: the two-result ops.
8896 //
8897 // Look through the concat when lowering them:
8898 // shuffle(concat(v1, v2), undef)
8899 // ->
8900 // concat(VZIP(v1, v2):0, :1)
8901 //
8902 if (ST->hasNEON() && V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
8903 SDValue SubV1 = V1->getOperand(0);
8904 SDValue SubV2 = V1->getOperand(1);
8905 EVT SubVT = SubV1.getValueType();
8906
8907 // We expect these to have been canonicalized to -1.
8908 assert(llvm::all_of(ShuffleMask, [&](int i) {
8909 return i < (int)VT.getVectorNumElements();
8910 }) && "Unexpected shuffle index into UNDEF operand!");
8911
8912 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
8913 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
8914 if (isV_UNDEF)
8915 SubV2 = SubV1;
8916 assert((WhichResult == 0) &&
8917 "In-place shuffle of concat can only have one result!");
8918 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
8919 SubV1, SubV2);
8920 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
8921 Res.getValue(1));
8922 }
8923 }
8924 }
8925
8926 if (ST->hasMVEIntegerOps() && EltSize <= 32) {
8927 if (SDValue V = LowerVECTOR_SHUFFLEUsingOneOff(Op, ShuffleMask, DAG))
8928 return V;
8929
8930 for (bool Top : {false, true}) {
8931 for (bool SingleSource : {false, true}) {
8932 if (isTruncMask(ShuffleMask, VT, Top, SingleSource)) {
8933 MVT FromSVT = MVT::getIntegerVT(EltSize * 2);
8934 MVT FromVT = MVT::getVectorVT(FromSVT, ShuffleMask.size() / 2);
8935 SDValue Lo = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT, V1);
8936 SDValue Hi = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, FromVT,
8937 SingleSource ? V1 : V2);
8938 if (Top) {
8939 SDValue Amt = DAG.getConstant(EltSize, dl, FromVT);
8940 Lo = DAG.getNode(ISD::SRL, dl, FromVT, Lo, Amt);
8941 Hi = DAG.getNode(ISD::SRL, dl, FromVT, Hi, Amt);
8942 }
8943 return DAG.getNode(ARMISD::MVETRUNC, dl, VT, Lo, Hi);
8944 }
8945 }
8946 }
8947 }
8948
8949 // If the shuffle is not directly supported and it has 4 elements, use
8950 // the PerfectShuffle-generated table to synthesize it from other shuffles.
8951 unsigned NumElts = VT.getVectorNumElements();
8952 if (NumElts == 4) {
8953 unsigned PFIndexes[4];
8954 for (unsigned i = 0; i != 4; ++i) {
8955 if (ShuffleMask[i] < 0)
8956 PFIndexes[i] = 8;
8957 else
8958 PFIndexes[i] = ShuffleMask[i];
8959 }
8960
8961 // Compute the index in the perfect shuffle table.
8962 unsigned PFTableIndex =
8963 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
8964 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
8965 unsigned Cost = (PFEntry >> 30);
8966
8967 if (Cost <= 4) {
8968 if (ST->hasNEON())
8969 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8970 else if (isLegalMVEShuffleOp(PFEntry)) {
8971 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
8972 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
8973 unsigned PFEntryLHS = PerfectShuffleTable[LHSID];
8974 unsigned PFEntryRHS = PerfectShuffleTable[RHSID];
8975 if (isLegalMVEShuffleOp(PFEntryLHS) && isLegalMVEShuffleOp(PFEntryRHS))
8976 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
8977 }
8978 }
8979 }
8980
8981 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
8982 if (EltSize >= 32) {
8983 // Do the expansion with floating-point types, since that is what the VFP
8984 // registers are defined to use, and since i64 is not legal.
8985 EVT EltVT = EVT::getFloatingPointVT(EltSize);
8986 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
8987 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
8988 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
8990 for (unsigned i = 0; i < NumElts; ++i) {
8991 if (ShuffleMask[i] < 0)
8992 Ops.push_back(DAG.getUNDEF(EltVT));
8993 else
8994 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
8995 ShuffleMask[i] < (int)NumElts ? V1 : V2,
8996 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
8997 dl, MVT::i32)));
8998 }
8999 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
9000 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
9001 }
9002
9003 if ((VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v16i8) &&
9004 isReverseMask(ShuffleMask, VT))
9005 return LowerReverse_VECTOR_SHUFFLE(Op, DAG);
9006
9007 if (ST->hasNEON() && VT == MVT::v8i8)
9008 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
9009 return NewOp;
9010
9011 if (ST->hasMVEIntegerOps())
9012 if (SDValue NewOp = LowerVECTOR_SHUFFLEUsingMovs(Op, ShuffleMask, DAG))
9013 return NewOp;
9014
9015 return SDValue();
9016}
9017
9019 const ARMSubtarget *ST) {
9020 EVT VecVT = Op.getOperand(0).getValueType();
9021 SDLoc dl(Op);
9022
9023 assert(ST->hasMVEIntegerOps() &&
9024 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9025
9026 SDValue Conv =
9027 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9028 unsigned Lane = Op.getConstantOperandVal(2);
9029 unsigned LaneWidth =
9031 unsigned Mask = ((1 << LaneWidth) - 1) << Lane * LaneWidth;
9032 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::i32,
9033 Op.getOperand(1), DAG.getValueType(MVT::i1));
9034 SDValue BFI = DAG.getNode(ARMISD::BFI, dl, MVT::i32, Conv, Ext,
9035 DAG.getConstant(~Mask, dl, MVT::i32));
9036 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, Op.getValueType(), BFI);
9037}
9038
9039SDValue ARMTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
9040 SelectionDAG &DAG) const {
9041 // INSERT_VECTOR_ELT is legal only for immediate indexes.
9042 SDValue Lane = Op.getOperand(2);
9043 if (!isa<ConstantSDNode>(Lane))
9044 return SDValue();
9045
9046 SDValue Elt = Op.getOperand(1);
9047 EVT EltVT = Elt.getValueType();
9048
9049 if (Subtarget->hasMVEIntegerOps() &&
9050 Op.getValueType().getScalarSizeInBits() == 1)
9051 return LowerINSERT_VECTOR_ELT_i1(Op, DAG, Subtarget);
9052
9053 if (getTypeAction(*DAG.getContext(), EltVT) ==
9055 // INSERT_VECTOR_ELT doesn't want f16 operands promoting to f32,
9056 // but the type system will try to do that if we don't intervene.
9057 // Reinterpret any such vector-element insertion as one with the
9058 // corresponding integer types.
9059
9060 SDLoc dl(Op);
9061
9062 EVT IEltVT = MVT::getIntegerVT(EltVT.getScalarSizeInBits());
9063 assert(getTypeAction(*DAG.getContext(), IEltVT) !=
9065
9066 SDValue VecIn = Op.getOperand(0);
9067 EVT VecVT = VecIn.getValueType();
9068 EVT IVecVT = EVT::getVectorVT(*DAG.getContext(), IEltVT,
9069 VecVT.getVectorNumElements());
9070
9071 SDValue IElt = DAG.getNode(ISD::BITCAST, dl, IEltVT, Elt);
9072 SDValue IVecIn = DAG.getNode(ISD::BITCAST, dl, IVecVT, VecIn);
9073 SDValue IVecOut = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, IVecVT,
9074 IVecIn, IElt, Lane);
9075 return DAG.getNode(ISD::BITCAST, dl, VecVT, IVecOut);
9076 }
9077
9078 return Op;
9079}
9080
9082 const ARMSubtarget *ST) {
9083 EVT VecVT = Op.getOperand(0).getValueType();
9084 SDLoc dl(Op);
9085
9086 assert(ST->hasMVEIntegerOps() &&
9087 "LowerINSERT_VECTOR_ELT_i1 called without MVE!");
9088
9089 SDValue Conv =
9090 DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Op->getOperand(0));
9091 unsigned Lane = Op.getConstantOperandVal(1);
9092 unsigned LaneWidth =
9094 SDValue Shift = DAG.getNode(ISD::SRL, dl, MVT::i32, Conv,
9095 DAG.getConstant(Lane * LaneWidth, dl, MVT::i32));
9096 return Shift;
9097}
9098
9100 const ARMSubtarget *ST) {
9101 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
9102 SDValue Lane = Op.getOperand(1);
9103 if (!isa<ConstantSDNode>(Lane))
9104 return SDValue();
9105
9106 SDValue Vec = Op.getOperand(0);
9107 EVT VT = Vec.getValueType();
9108
9109 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9110 return LowerEXTRACT_VECTOR_ELT_i1(Op, DAG, ST);
9111
9112 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
9113 SDLoc dl(Op);
9114 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
9115 }
9116
9117 return Op;
9118}
9119
9121 const ARMSubtarget *ST) {
9122 SDLoc dl(Op);
9123 assert(Op.getValueType().getScalarSizeInBits() == 1 &&
9124 "Unexpected custom CONCAT_VECTORS lowering");
9126 "Unexpected custom CONCAT_VECTORS lowering");
9127 assert(ST->hasMVEIntegerOps() &&
9128 "CONCAT_VECTORS lowering only supported for MVE");
9129
9130 auto ConcatPair = [&](SDValue V1, SDValue V2) {
9131 EVT Op1VT = V1.getValueType();
9132 EVT Op2VT = V2.getValueType();
9133 assert(Op1VT == Op2VT && "Operand types don't match!");
9134 assert((Op1VT == MVT::v2i1 || Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) &&
9135 "Unexpected i1 concat operations!");
9136 EVT VT = Op1VT.getDoubleNumVectorElementsVT(*DAG.getContext());
9137
9138 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9139 SDValue NewV2 = PromoteMVEPredVector(dl, V2, Op2VT, DAG);
9140
9141 // We now have Op1 + Op2 promoted to vectors of integers, where v8i1 gets
9142 // promoted to v8i16, etc.
9143 MVT ElType =
9145 unsigned NumElts = 2 * Op1VT.getVectorNumElements();
9146
9147 EVT ConcatVT = MVT::getVectorVT(ElType, NumElts);
9148 if (Op1VT == MVT::v4i1 || Op1VT == MVT::v8i1) {
9149 // Use MVETRUNC to truncate the combined NewV1::NewV2 into the smaller
9150 // ConcatVT.
9151 SDValue ConVec =
9152 DAG.getNode(ARMISD::MVETRUNC, dl, ConcatVT, NewV1, NewV2);
9153 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9154 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9155 }
9156
9157 // Extract the vector elements from Op1 and Op2 one by one and truncate them
9158 // to be the right size for the destination. For example, if Op1 is v4i1
9159 // then the promoted vector is v4i32. The result of concatenation gives a
9160 // v8i1, which when promoted is v8i16. That means each i32 element from Op1
9161 // needs truncating to i16 and inserting in the result.
9162 auto ExtractInto = [&DAG, &dl](SDValue NewV, SDValue ConVec, unsigned &j) {
9163 EVT NewVT = NewV.getValueType();
9164 EVT ConcatVT = ConVec.getValueType();
9165 unsigned ExtScale = 1;
9166 if (NewVT == MVT::v2f64) {
9167 NewV = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, NewV);
9168 ExtScale = 2;
9169 }
9170 for (unsigned i = 0, e = NewVT.getVectorNumElements(); i < e; i++, j++) {
9171 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV,
9172 DAG.getIntPtrConstant(i * ExtScale, dl));
9173 ConVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ConcatVT, ConVec, Elt,
9174 DAG.getConstant(j, dl, MVT::i32));
9175 }
9176 return ConVec;
9177 };
9178 unsigned j = 0;
9179 SDValue ConVec = DAG.getNode(ISD::UNDEF, dl, ConcatVT);
9180 ConVec = ExtractInto(NewV1, ConVec, j);
9181 ConVec = ExtractInto(NewV2, ConVec, j);
9182
9183 // Now return the result of comparing the subvector with zero, which will
9184 // generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9185 return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec,
9186 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9187 };
9188
9189 // Concat each pair of subvectors and pack into the lower half of the array.
9190 SmallVector<SDValue> ConcatOps(Op->op_begin(), Op->op_end());
9191 while (ConcatOps.size() > 1) {
9192 for (unsigned I = 0, E = ConcatOps.size(); I != E; I += 2) {
9193 SDValue V1 = ConcatOps[I];
9194 SDValue V2 = ConcatOps[I + 1];
9195 ConcatOps[I / 2] = ConcatPair(V1, V2);
9196 }
9197 ConcatOps.resize(ConcatOps.size() / 2);
9198 }
9199 return ConcatOps[0];
9200}
9201
9203 const ARMSubtarget *ST) {
9204 EVT VT = Op->getValueType(0);
9205 if (ST->hasMVEIntegerOps() && VT.getScalarSizeInBits() == 1)
9206 return LowerCONCAT_VECTORS_i1(Op, DAG, ST);
9207
9208 // The only time a CONCAT_VECTORS operation can have legal types is when
9209 // two 64-bit vectors are concatenated to a 128-bit vector.
9210 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
9211 "unexpected CONCAT_VECTORS");
9212 SDLoc dl(Op);
9213 SDValue Val = DAG.getUNDEF(MVT::v2f64);
9214 SDValue Op0 = Op.getOperand(0);
9215 SDValue Op1 = Op.getOperand(1);
9216 if (!Op0.isUndef())
9217 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9218 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
9219 DAG.getIntPtrConstant(0, dl));
9220 if (!Op1.isUndef())
9221 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
9222 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
9223 DAG.getIntPtrConstant(1, dl));
9224 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
9225}
9226
9228 const ARMSubtarget *ST) {
9229 SDValue V1 = Op.getOperand(0);
9230 SDValue V2 = Op.getOperand(1);
9231 SDLoc dl(Op);
9232 EVT VT = Op.getValueType();
9233 EVT Op1VT = V1.getValueType();
9234 unsigned NumElts = VT.getVectorNumElements();
9235 unsigned Index = V2->getAsZExtVal();
9236
9237 assert(VT.getScalarSizeInBits() == 1 &&
9238 "Unexpected custom EXTRACT_SUBVECTOR lowering");
9239 assert(ST->hasMVEIntegerOps() &&
9240 "EXTRACT_SUBVECTOR lowering only supported for MVE");
9241
9242 SDValue NewV1 = PromoteMVEPredVector(dl, V1, Op1VT, DAG);
9243
9244 // We now have Op1 promoted to a vector of integers, where v8i1 gets
9245 // promoted to v8i16, etc.
9246
9248
9249 if (NumElts == 2) {
9250 EVT SubVT = MVT::v4i32;
9251 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9252 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j += 2) {
9253 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9254 DAG.getIntPtrConstant(i, dl));
9255 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9256 DAG.getConstant(j, dl, MVT::i32));
9257 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9258 DAG.getConstant(j + 1, dl, MVT::i32));
9259 }
9260 SDValue Cmp = DAG.getNode(ARMISD::VCMPZ, dl, MVT::v4i1, SubVec,
9261 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9262 return DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v2i1, Cmp);
9263 }
9264
9265 EVT SubVT = MVT::getVectorVT(ElType, NumElts);
9266 SDValue SubVec = DAG.getNode(ISD::UNDEF, dl, SubVT);
9267 for (unsigned i = Index, j = 0; i < (Index + NumElts); i++, j++) {
9268 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, NewV1,
9269 DAG.getIntPtrConstant(i, dl));
9270 SubVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, SubVT, SubVec, Elt,
9271 DAG.getConstant(j, dl, MVT::i32));
9272 }
9273
9274 // Now return the result of comparing the subvector with zero,
9275 // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1.
9276 return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec,
9277 DAG.getConstant(ARMCC::NE, dl, MVT::i32));
9278}
9279
9280// Turn a truncate into a predicate (an i1 vector) into icmp(and(x, 1), 0).
9282 const ARMSubtarget *ST) {
9283 assert(ST->hasMVEIntegerOps() && "Expected MVE!");
9284 EVT VT = N->getValueType(0);
9285 assert((VT == MVT::v16i1 || VT == MVT::v8i1 || VT == MVT::v4i1) &&
9286 "Expected a vector i1 type!");
9287 SDValue Op = N->getOperand(0);
9288 EVT FromVT = Op.getValueType();
9289 SDLoc DL(N);
9290
9291 SDValue And =
9292 DAG.getNode(ISD::AND, DL, FromVT, Op, DAG.getConstant(1, DL, FromVT));
9293 return DAG.getNode(ISD::SETCC, DL, VT, And, DAG.getConstant(0, DL, FromVT),
9294 DAG.getCondCode(ISD::SETNE));
9295}
9296
9298 const ARMSubtarget *Subtarget) {
9299 if (!Subtarget->hasMVEIntegerOps())
9300 return SDValue();
9301
9302 EVT ToVT = N->getValueType(0);
9303 if (ToVT.getScalarType() == MVT::i1)
9304 return LowerTruncatei1(N, DAG, Subtarget);
9305
9306 // MVE does not have a single instruction to perform the truncation of a v4i32
9307 // into the lower half of a v8i16, in the same way that a NEON vmovn would.
9308 // Most of the instructions in MVE follow the 'Beats' system, where moving
9309 // values from different lanes is usually something that the instructions
9310 // avoid.
9311 //
9312 // Instead it has top/bottom instructions such as VMOVLT/B and VMOVNT/B,
9313 // which take a the top/bottom half of a larger lane and extend it (or do the
9314 // opposite, truncating into the top/bottom lane from a larger lane). Note
9315 // that because of the way we widen lanes, a v4i16 is really a v4i32 using the
9316 // bottom 16bits from each vector lane. This works really well with T/B
9317 // instructions, but that doesn't extend to v8i32->v8i16 where the lanes need
9318 // to move order.
9319 //
9320 // But truncates and sext/zext are always going to be fairly common from llvm.
9321 // We have several options for how to deal with them:
9322 // - Wherever possible combine them into an instruction that makes them
9323 // "free". This includes loads/stores, which can perform the trunc as part
9324 // of the memory operation. Or certain shuffles that can be turned into
9325 // VMOVN/VMOVL.
9326 // - Lane Interleaving to transform blocks surrounded by ext/trunc. So
9327 // trunc(mul(sext(a), sext(b))) may become
9328 // VMOVNT(VMUL(VMOVLB(a), VMOVLB(b)), VMUL(VMOVLT(a), VMOVLT(b))). (Which in
9329 // this case can use VMULL). This is performed in the
9330 // MVELaneInterleavingPass.
9331 // - Otherwise we have an option. By default we would expand the
9332 // zext/sext/trunc into a series of lane extract/inserts going via GPR
9333 // registers. One for each vector lane in the vector. This can obviously be
9334 // very expensive.
9335 // - The other option is to use the fact that loads/store can extend/truncate
9336 // to turn a trunc into two truncating stack stores and a stack reload. This
9337 // becomes 3 back-to-back memory operations, but at least that is less than
9338 // all the insert/extracts.
9339 //
9340 // In order to do the last, we convert certain trunc's into MVETRUNC, which
9341 // are either optimized where they can be, or eventually lowered into stack
9342 // stores/loads. This prevents us from splitting a v8i16 trunc into two stores
9343 // two early, where other instructions would be better, and stops us from
9344 // having to reconstruct multiple buildvector shuffles into loads/stores.
9345 if (ToVT != MVT::v8i16 && ToVT != MVT::v16i8)
9346 return SDValue();
9347 EVT FromVT = N->getOperand(0).getValueType();
9348 if (FromVT != MVT::v8i32 && FromVT != MVT::v16i16)
9349 return SDValue();
9350
9351 SDValue Lo, Hi;
9352 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
9353 SDLoc DL(N);
9354 return DAG.getNode(ARMISD::MVETRUNC, DL, ToVT, Lo, Hi);
9355}
9356
9358 const ARMSubtarget *Subtarget) {
9359 if (!Subtarget->hasMVEIntegerOps())
9360 return SDValue();
9361
9362 // See LowerTruncate above for an explanation of MVEEXT/MVETRUNC.
9363
9364 EVT ToVT = N->getValueType(0);
9365 if (ToVT != MVT::v16i32 && ToVT != MVT::v8i32 && ToVT != MVT::v16i16)
9366 return SDValue();
9367 SDValue Op = N->getOperand(0);
9368 EVT FromVT = Op.getValueType();
9369 if (FromVT != MVT::v8i16 && FromVT != MVT::v16i8)
9370 return SDValue();
9371
9372 SDLoc DL(N);
9373 EVT ExtVT = ToVT.getHalfNumVectorElementsVT(*DAG.getContext());
9374 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8)
9375 ExtVT = MVT::v8i16;
9376
9377 unsigned Opcode =
9379 SDValue Ext = DAG.getNode(Opcode, DL, DAG.getVTList(ExtVT, ExtVT), Op);
9380 SDValue Ext1 = Ext.getValue(1);
9381
9382 if (ToVT.getScalarType() == MVT::i32 && FromVT.getScalarType() == MVT::i8) {
9383 Ext = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext);
9384 Ext1 = DAG.getNode(N->getOpcode(), DL, MVT::v8i32, Ext1);
9385 }
9386
9387 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Ext, Ext1);
9388}
9389
9390/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
9391/// element has been zero/sign-extended, depending on the isSigned parameter,
9392/// from an integer type half its size.
9394 bool isSigned) {
9395 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
9396 EVT VT = N->getValueType(0);
9397 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
9398 SDNode *BVN = N->getOperand(0).getNode();
9399 if (BVN->getValueType(0) != MVT::v4i32 ||
9400 BVN->getOpcode() != ISD::BUILD_VECTOR)
9401 return false;
9402 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9403 unsigned HiElt = 1 - LoElt;
9404 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
9405 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
9406 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
9407 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
9408 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
9409 return false;
9410 if (isSigned) {
9411 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
9412 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
9413 return true;
9414 } else {
9415 if (Hi0->isZero() && Hi1->isZero())
9416 return true;
9417 }
9418 return false;
9419 }
9420
9421 if (N->getOpcode() != ISD::BUILD_VECTOR)
9422 return false;
9423
9424 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
9425 SDNode *Elt = N->getOperand(i).getNode();
9426 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
9427 unsigned EltSize = VT.getScalarSizeInBits();
9428 unsigned HalfSize = EltSize / 2;
9429 if (isSigned) {
9430 if (!isIntN(HalfSize, C->getSExtValue()))
9431 return false;
9432 } else {
9433 if (!isUIntN(HalfSize, C->getZExtValue()))
9434 return false;
9435 }
9436 continue;
9437 }
9438 return false;
9439 }
9440
9441 return true;
9442}
9443
9444/// isSignExtended - Check if a node is a vector value that is sign-extended
9445/// or a constant BUILD_VECTOR with sign-extended elements.
9447 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
9448 return true;
9449 if (isExtendedBUILD_VECTOR(N, DAG, true))
9450 return true;
9451 return false;
9452}
9453
9454/// isZeroExtended - Check if a node is a vector value that is zero-extended (or
9455/// any-extended) or a constant BUILD_VECTOR with zero-extended elements.
9457 if (N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND ||
9459 return true;
9460 if (isExtendedBUILD_VECTOR(N, DAG, false))
9461 return true;
9462 return false;
9463}
9464
9465static EVT getExtensionTo64Bits(const EVT &OrigVT) {
9466 if (OrigVT.getSizeInBits() >= 64)
9467 return OrigVT;
9468
9469 assert(OrigVT.isSimple() && "Expecting a simple value type");
9470
9471 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
9472 switch (OrigSimpleTy) {
9473 default: llvm_unreachable("Unexpected Vector Type");
9474 case MVT::v2i8:
9475 case MVT::v2i16:
9476 return MVT::v2i32;
9477 case MVT::v4i8:
9478 return MVT::v4i16;
9479 }
9480}
9481
9482/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
9483/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
9484/// We insert the required extension here to get the vector to fill a D register.
9486 const EVT &OrigTy,
9487 const EVT &ExtTy,
9488 unsigned ExtOpcode) {
9489 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
9490 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
9491 // 64-bits we need to insert a new extension so that it will be 64-bits.
9492 assert(ExtTy.is128BitVector() && "Unexpected extension size");
9493 if (OrigTy.getSizeInBits() >= 64)
9494 return N;
9495
9496 // Must extend size to at least 64 bits to be used as an operand for VMULL.
9497 EVT NewVT = getExtensionTo64Bits(OrigTy);
9498
9499 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
9500}
9501
9502/// SkipLoadExtensionForVMULL - return a load of the original vector size that
9503/// does not do any sign/zero extension. If the original vector is less
9504/// than 64 bits, an appropriate extension will be added after the load to
9505/// reach a total size of 64 bits. We have to add the extension separately
9506/// because ARM does not have a sign/zero extending load for vectors.
9508 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
9509
9510 // The load already has the right type.
9511 if (ExtendedTy == LD->getMemoryVT())
9512 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
9513 LD->getBasePtr(), LD->getPointerInfo(), LD->getAlign(),
9514 LD->getMemOperand()->getFlags());
9515
9516 // We need to create a zextload/sextload. We cannot just create a load
9517 // followed by a zext/zext node because LowerMUL is also run during normal
9518 // operation legalization where we can't create illegal types.
9519 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
9520 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
9521 LD->getMemoryVT(), LD->getAlign(),
9522 LD->getMemOperand()->getFlags());
9523}
9524
9525/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
9526/// ANY_EXTEND, extending load, or BUILD_VECTOR with extended elements, return
9527/// the unextended value. The unextended vector should be 64 bits so that it can
9528/// be used as an operand to a VMULL instruction. If the original vector size
9529/// before extension is less than 64 bits we add a an extension to resize
9530/// the vector to 64 bits.
9532 if (N->getOpcode() == ISD::SIGN_EXTEND ||
9533 N->getOpcode() == ISD::ZERO_EXTEND || N->getOpcode() == ISD::ANY_EXTEND)
9534 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
9535 N->getOperand(0)->getValueType(0),
9536 N->getValueType(0),
9537 N->getOpcode());
9538
9539 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
9540 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&
9541 "Expected extending load");
9542
9543 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
9544 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
9545 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
9546 SDValue extLoad =
9547 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
9548 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
9549
9550 return newLoad;
9551 }
9552
9553 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
9554 // have been legalized as a BITCAST from v4i32.
9555 if (N->getOpcode() == ISD::BITCAST) {
9556 SDNode *BVN = N->getOperand(0).getNode();
9558 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
9559 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
9560 return DAG.getBuildVector(
9561 MVT::v2i32, SDLoc(N),
9562 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
9563 }
9564 // Construct a new BUILD_VECTOR with elements truncated to half the size.
9565 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
9566 EVT VT = N->getValueType(0);
9567 unsigned EltSize = VT.getScalarSizeInBits() / 2;
9568 unsigned NumElts = VT.getVectorNumElements();
9569 MVT TruncVT = MVT::getIntegerVT(EltSize);
9571 SDLoc dl(N);
9572 for (unsigned i = 0; i != NumElts; ++i) {
9573 const APInt &CInt = N->getConstantOperandAPInt(i);
9574 // Element types smaller than 32 bits are not legal, so use i32 elements.
9575 // The values are implicitly truncated so sext vs. zext doesn't matter.
9576 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
9577 }
9578 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
9579}
9580
9581static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
9582 unsigned Opcode = N->getOpcode();
9583 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9584 SDNode *N0 = N->getOperand(0).getNode();
9585 SDNode *N1 = N->getOperand(1).getNode();
9586 return N0->hasOneUse() && N1->hasOneUse() &&
9587 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
9588 }
9589 return false;
9590}
9591
9592static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
9593 unsigned Opcode = N->getOpcode();
9594 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
9595 SDNode *N0 = N->getOperand(0).getNode();
9596 SDNode *N1 = N->getOperand(1).getNode();
9597 return N0->hasOneUse() && N1->hasOneUse() &&
9598 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
9599 }
9600 return false;
9601}
9602
9604 // Multiplications are only custom-lowered for 128-bit vectors so that
9605 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
9606 EVT VT = Op.getValueType();
9607 assert(VT.is128BitVector() && VT.isInteger() &&
9608 "unexpected type for custom-lowering ISD::MUL");
9609 SDNode *N0 = Op.getOperand(0).getNode();
9610 SDNode *N1 = Op.getOperand(1).getNode();
9611 unsigned NewOpc = 0;
9612 bool isMLA = false;
9613 bool isN0SExt = isSignExtended(N0, DAG);
9614 bool isN1SExt = isSignExtended(N1, DAG);
9615 if (isN0SExt && isN1SExt)
9616 NewOpc = ARMISD::VMULLs;
9617 else {
9618 bool isN0ZExt = isZeroExtended(N0, DAG);
9619 bool isN1ZExt = isZeroExtended(N1, DAG);
9620 if (isN0ZExt && isN1ZExt)
9621 NewOpc = ARMISD::VMULLu;
9622 else if (isN1SExt || isN1ZExt) {
9623 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
9624 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
9625 if (isN1SExt && isAddSubSExt(N0, DAG)) {
9626 NewOpc = ARMISD::VMULLs;
9627 isMLA = true;
9628 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
9629 NewOpc = ARMISD::VMULLu;
9630 isMLA = true;
9631 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
9632 std::swap(N0, N1);
9633 NewOpc = ARMISD::VMULLu;
9634 isMLA = true;
9635 }
9636 }
9637
9638 if (!NewOpc) {
9639 if (VT == MVT::v2i64)
9640 // Fall through to expand this. It is not legal.
9641 return SDValue();
9642 else
9643 // Other vector multiplications are legal.
9644 return Op;
9645 }
9646 }
9647
9648 // Legalize to a VMULL instruction.
9649 SDLoc DL(Op);
9650 SDValue Op0;
9651 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
9652 if (!isMLA) {
9653 Op0 = SkipExtensionForVMULL(N0, DAG);
9655 Op1.getValueType().is64BitVector() &&
9656 "unexpected types for extended operands to VMULL");
9657 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
9658 }
9659
9660 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
9661 // isel lowering to take advantage of no-stall back to back vmul + vmla.
9662 // vmull q0, d4, d6
9663 // vmlal q0, d5, d6
9664 // is faster than
9665 // vaddl q0, d4, d5
9666 // vmovl q1, d6
9667 // vmul q0, q0, q1
9668 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
9669 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
9670 EVT Op1VT = Op1.getValueType();
9671 return DAG.getNode(N0->getOpcode(), DL, VT,
9672 DAG.getNode(NewOpc, DL, VT,
9673 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
9674 DAG.getNode(NewOpc, DL, VT,
9675 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
9676}
9677
9679 SelectionDAG &DAG) {
9680 // TODO: Should this propagate fast-math-flags?
9681
9682 // Convert to float
9683 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
9684 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
9685 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
9686 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
9687 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
9688 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
9689 // Get reciprocal estimate.
9690 // float4 recip = vrecpeq_f32(yf);
9691 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9692 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9693 Y);
9694 // Because char has a smaller range than uchar, we can actually get away
9695 // without any newton steps. This requires that we use a weird bias
9696 // of 0xb000, however (again, this has been exhaustively tested).
9697 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
9698 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
9699 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
9700 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
9701 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
9702 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
9703 // Convert back to short.
9704 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
9705 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
9706 return X;
9707}
9708
9710 SelectionDAG &DAG) {
9711 // TODO: Should this propagate fast-math-flags?
9712
9713 SDValue N2;
9714 // Convert to float.
9715 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
9716 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
9717 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
9718 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
9719 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9720 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9721
9722 // Use reciprocal estimate and one refinement step.
9723 // float4 recip = vrecpeq_f32(yf);
9724 // recip *= vrecpsq_f32(yf, recip);
9725 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9726 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9727 N1);
9728 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9729 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9730 N1, N2);
9731 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9732 // Because short has a smaller range than ushort, we can actually get away
9733 // with only a single newton step. This requires that we use a weird bias
9734 // of 89, however (again, this has been exhaustively tested).
9735 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
9736 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9737 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9738 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
9739 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9740 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9741 // Convert back to integer and return.
9742 // return vmovn_s32(vcvt_s32_f32(result));
9743 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9744 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9745 return N0;
9746}
9747
9749 const ARMSubtarget *ST) {
9750 EVT VT = Op.getValueType();
9751 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9752 "unexpected type for custom-lowering ISD::SDIV");
9753
9754 SDLoc dl(Op);
9755 SDValue N0 = Op.getOperand(0);
9756 SDValue N1 = Op.getOperand(1);
9757 SDValue N2, N3;
9758
9759 if (VT == MVT::v8i8) {
9760 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
9761 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
9762
9763 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9764 DAG.getIntPtrConstant(4, dl));
9765 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9766 DAG.getIntPtrConstant(4, dl));
9767 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9768 DAG.getIntPtrConstant(0, dl));
9769 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9770 DAG.getIntPtrConstant(0, dl));
9771
9772 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
9773 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
9774
9775 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9776 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9777
9778 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
9779 return N0;
9780 }
9781 return LowerSDIV_v4i16(N0, N1, dl, DAG);
9782}
9783
9785 const ARMSubtarget *ST) {
9786 // TODO: Should this propagate fast-math-flags?
9787 EVT VT = Op.getValueType();
9788 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
9789 "unexpected type for custom-lowering ISD::UDIV");
9790
9791 SDLoc dl(Op);
9792 SDValue N0 = Op.getOperand(0);
9793 SDValue N1 = Op.getOperand(1);
9794 SDValue N2, N3;
9795
9796 if (VT == MVT::v8i8) {
9797 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
9798 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
9799
9800 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9801 DAG.getIntPtrConstant(4, dl));
9802 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9803 DAG.getIntPtrConstant(4, dl));
9804 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
9805 DAG.getIntPtrConstant(0, dl));
9806 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
9807 DAG.getIntPtrConstant(0, dl));
9808
9809 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
9810 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
9811
9812 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
9813 N0 = LowerCONCAT_VECTORS(N0, DAG, ST);
9814
9815 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
9816 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
9817 MVT::i32),
9818 N0);
9819 return N0;
9820 }
9821
9822 // v4i16 sdiv ... Convert to float.
9823 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
9824 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
9825 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
9826 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
9827 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
9828 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
9829
9830 // Use reciprocal estimate and two refinement steps.
9831 // float4 recip = vrecpeq_f32(yf);
9832 // recip *= vrecpsq_f32(yf, recip);
9833 // recip *= vrecpsq_f32(yf, recip);
9834 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9835 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
9836 BN1);
9837 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9838 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9839 BN1, N2);
9840 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9841 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
9842 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
9843 BN1, N2);
9844 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
9845 // Simply multiplying by the reciprocal estimate can leave us a few ulps
9846 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
9847 // and that it will never cause us to return an answer too large).
9848 // float4 result = as_float4(as_int4(xf*recip) + 2);
9849 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
9850 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
9851 N1 = DAG.getConstant(2, dl, MVT::v4i32);
9852 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
9853 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
9854 // Convert back to integer and return.
9855 // return vmovn_u32(vcvt_s32_f32(result));
9856 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
9857 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
9858 return N0;
9859}
9860
9862 SDNode *N = Op.getNode();
9863 EVT VT = N->getValueType(0);
9864 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
9865
9866 SDValue Carry = Op.getOperand(2);
9867
9868 SDLoc DL(Op);
9869
9870 SDValue Result;
9871 if (Op.getOpcode() == ISD::UADDO_CARRY) {
9872 // This converts the boolean value carry into the carry flag.
9873 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9874
9875 // Do the addition proper using the carry flag we wanted.
9876 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
9877 Op.getOperand(1), Carry);
9878
9879 // Now convert the carry flag into a boolean value.
9880 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9881 } else {
9882 // ARMISD::SUBE expects a carry not a borrow like ISD::USUBO_CARRY so we
9883 // have to invert the carry first.
9884 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9885 DAG.getConstant(1, DL, MVT::i32), Carry);
9886 // This converts the boolean value carry into the carry flag.
9887 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
9888
9889 // Do the subtraction proper using the carry flag we wanted.
9890 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
9891 Op.getOperand(1), Carry);
9892
9893 // Now convert the carry flag into a boolean value.
9894 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
9895 // But the carry returned by ARMISD::SUBE is not a borrow as expected
9896 // by ISD::USUBO_CARRY, so compute 1 - C.
9897 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
9898 DAG.getConstant(1, DL, MVT::i32), Carry);
9899 }
9900
9901 // Return both values.
9902 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
9903}
9904
9905SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
9906 assert(Subtarget->isTargetDarwin());
9907
9908 // For iOS, we want to call an alternative entry point: __sincos_stret,
9909 // return values are passed via sret.
9910 SDLoc dl(Op);
9911 SDValue Arg = Op.getOperand(0);
9912 EVT ArgVT = Arg.getValueType();
9913 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
9914 auto PtrVT = getPointerTy(DAG.getDataLayout());
9915
9917 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
9918
9919 // Pair of floats / doubles used to pass the result.
9920 Type *RetTy = StructType::get(ArgTy, ArgTy);
9921 auto &DL = DAG.getDataLayout();
9922
9924 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
9925 SDValue SRet;
9926 if (ShouldUseSRet) {
9927 // Create stack object for sret.
9928 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
9929 const Align StackAlign = DL.getPrefTypeAlign(RetTy);
9930 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
9931 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
9932
9933 ArgListEntry Entry;
9934 Entry.Node = SRet;
9935 Entry.Ty = PointerType::getUnqual(RetTy->getContext());
9936 Entry.IsSExt = false;
9937 Entry.IsZExt = false;
9938 Entry.IsSRet = true;
9939 Args.push_back(Entry);
9941 }
9942
9943 ArgListEntry Entry;
9944 Entry.Node = Arg;
9945 Entry.Ty = ArgTy;
9946 Entry.IsSExt = false;
9947 Entry.IsZExt = false;
9948 Args.push_back(Entry);
9949
9950 RTLIB::Libcall LC =
9951 (ArgVT == MVT::f64) ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32;
9952 const char *LibcallName = getLibcallName(LC);
9954 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
9955
9957 CLI.setDebugLoc(dl)
9958 .setChain(DAG.getEntryNode())
9959 .setCallee(CC, RetTy, Callee, std::move(Args))
9960 .setDiscardResult(ShouldUseSRet);
9961 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
9962
9963 if (!ShouldUseSRet)
9964 return CallResult.first;
9965
9966 SDValue LoadSin =
9967 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
9968
9969 // Address of cos field.
9970 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
9971 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
9972 SDValue LoadCos =
9973 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
9974
9975 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
9976 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
9977 LoadSin.getValue(0), LoadCos.getValue(0));
9978}
9979
9980SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
9981 bool Signed,
9982 SDValue &Chain) const {
9983 EVT VT = Op.getValueType();
9984 assert((VT == MVT::i32 || VT == MVT::i64) &&
9985 "unexpected type for custom lowering DIV");
9986 SDLoc dl(Op);
9987
9988 const auto &DL = DAG.getDataLayout();
9989 const auto &TLI = DAG.getTargetLoweringInfo();
9990
9991 const char *Name = nullptr;
9992 if (Signed)
9993 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
9994 else
9995 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
9996
9998
10000
10001 for (auto AI : {1, 0}) {
10002 ArgListEntry Arg;
10003 Arg.Node = Op.getOperand(AI);
10004 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
10005 Args.push_back(Arg);
10006 }
10007
10008 CallLoweringInfo CLI(DAG);
10009 CLI.setDebugLoc(dl)
10010 .setChain(Chain)
10012 ES, std::move(Args));
10013
10014 return LowerCallTo(CLI).first;
10015}
10016
10017// This is a code size optimisation: return the original SDIV node to
10018// DAGCombiner when we don't want to expand SDIV into a sequence of
10019// instructions, and an empty node otherwise which will cause the
10020// SDIV to be expanded in DAGCombine.
10021SDValue
10022ARMTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
10023 SelectionDAG &DAG,
10024 SmallVectorImpl<SDNode *> &Created) const {
10025 // TODO: Support SREM
10026 if (N->getOpcode() != ISD::SDIV)
10027 return SDValue();
10028
10029 const auto &ST = DAG.getSubtarget<ARMSubtarget>();
10030 const bool MinSize = ST.hasMinSize();
10031 const bool HasDivide = ST.isThumb() ? ST.hasDivideInThumbMode()
10032 : ST.hasDivideInARMMode();
10033
10034 // Don't touch vector types; rewriting this may lead to scalarizing
10035 // the int divs.
10036 if (N->getOperand(0).getValueType().isVector())
10037 return SDValue();
10038
10039 // Bail if MinSize is not set, and also for both ARM and Thumb mode we need
10040 // hwdiv support for this to be really profitable.
10041 if (!(MinSize && HasDivide))
10042 return SDValue();
10043
10044 // ARM mode is a bit simpler than Thumb: we can handle large power
10045 // of 2 immediates with 1 mov instruction; no further checks required,
10046 // just return the sdiv node.
10047 if (!ST.isThumb())
10048 return SDValue(N, 0);
10049
10050 // In Thumb mode, immediates larger than 128 need a wide 4-byte MOV,
10051 // and thus lose the code size benefits of a MOVS that requires only 2.
10052 // TargetTransformInfo and 'getIntImmCodeSizeCost' could be helpful here,
10053 // but as it's doing exactly this, it's not worth the trouble to get TTI.
10054 if (Divisor.sgt(128))
10055 return SDValue();
10056
10057 return SDValue(N, 0);
10058}
10059
10060SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
10061 bool Signed) const {
10062 assert(Op.getValueType() == MVT::i32 &&
10063 "unexpected type for custom lowering DIV");
10064 SDLoc dl(Op);
10065
10066 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
10067 DAG.getEntryNode(), Op.getOperand(1));
10068
10069 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10070}
10071
10073 SDLoc DL(N);
10074 SDValue Op = N->getOperand(1);
10075 if (N->getValueType(0) == MVT::i32)
10076 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
10077 SDValue Lo, Hi;
10078 std::tie(Lo, Hi) = DAG.SplitScalar(Op, DL, MVT::i32, MVT::i32);
10079 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
10080 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
10081}
10082
10083void ARMTargetLowering::ExpandDIV_Windows(
10084 SDValue Op, SelectionDAG &DAG, bool Signed,
10086 const auto &DL = DAG.getDataLayout();
10087 const auto &TLI = DAG.getTargetLoweringInfo();
10088
10089 assert(Op.getValueType() == MVT::i64 &&
10090 "unexpected type for custom lowering DIV");
10091 SDLoc dl(Op);
10092
10093 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
10094
10095 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
10096
10097 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
10098 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
10099 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
10100 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
10101
10102 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lower, Upper));
10103}
10104
10106 LoadSDNode *LD = cast<LoadSDNode>(Op.getNode());
10107 EVT MemVT = LD->getMemoryVT();
10108 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10109 MemVT == MVT::v16i1) &&
10110 "Expected a predicate type!");
10111 assert(MemVT == Op.getValueType());
10112 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
10113 "Expected a non-extending load");
10114 assert(LD->isUnindexed() && "Expected a unindexed load");
10115
10116 // The basic MVE VLDR on a v2i1/v4i1/v8i1 actually loads the entire 16bit
10117 // predicate, with the "v4i1" bits spread out over the 16 bits loaded. We
10118 // need to make sure that 8/4/2 bits are actually loaded into the correct
10119 // place, which means loading the value and then shuffling the values into
10120 // the bottom bits of the predicate.
10121 // Equally, VLDR for an v16i1 will actually load 32bits (so will be incorrect
10122 // for BE).
10123 // Speaking of BE, apparently the rest of llvm will assume a reverse order to
10124 // a natural VMSR(load), so needs to be reversed.
10125
10126 SDLoc dl(Op);
10127 SDValue Load = DAG.getExtLoad(
10128 ISD::EXTLOAD, dl, MVT::i32, LD->getChain(), LD->getBasePtr(),
10130 LD->getMemOperand());
10131 SDValue Val = Load;
10132 if (DAG.getDataLayout().isBigEndian())
10133 Val = DAG.getNode(ISD::SRL, dl, MVT::i32,
10134 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, Load),
10135 DAG.getConstant(32 - MemVT.getSizeInBits(), dl, MVT::i32));
10136 SDValue Pred = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::v16i1, Val);
10137 if (MemVT != MVT::v16i1)
10138 Pred = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MemVT, Pred,
10139 DAG.getConstant(0, dl, MVT::i32));
10140 return DAG.getMergeValues({Pred, Load.getValue(1)}, dl);
10141}
10142
10143void ARMTargetLowering::LowerLOAD(SDNode *N, SmallVectorImpl<SDValue> &Results,
10144 SelectionDAG &DAG) const {
10145 LoadSDNode *LD = cast<LoadSDNode>(N);
10146 EVT MemVT = LD->getMemoryVT();
10147 assert(LD->isUnindexed() && "Loads should be unindexed at this point.");
10148
10149 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10150 !Subtarget->isThumb1Only() && LD->isVolatile() &&
10151 LD->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10152 SDLoc dl(N);
10154 ARMISD::LDRD, dl, DAG.getVTList({MVT::i32, MVT::i32, MVT::Other}),
10155 {LD->getChain(), LD->getBasePtr()}, MemVT, LD->getMemOperand());
10156 SDValue Lo = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 0 : 1);
10157 SDValue Hi = Result.getValue(DAG.getDataLayout().isLittleEndian() ? 1 : 0);
10158 SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
10159 Results.append({Pair, Result.getValue(2)});
10160 }
10161}
10162
10164 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10165 EVT MemVT = ST->getMemoryVT();
10166 assert((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10167 MemVT == MVT::v16i1) &&
10168 "Expected a predicate type!");
10169 assert(MemVT == ST->getValue().getValueType());
10170 assert(!ST->isTruncatingStore() && "Expected a non-extending store");
10171 assert(ST->isUnindexed() && "Expected a unindexed store");
10172
10173 // Only store the v2i1 or v4i1 or v8i1 worth of bits, via a buildvector with
10174 // top bits unset and a scalar store.
10175 SDLoc dl(Op);
10176 SDValue Build = ST->getValue();
10177 if (MemVT != MVT::v16i1) {
10179 for (unsigned I = 0; I < MemVT.getVectorNumElements(); I++) {
10180 unsigned Elt = DAG.getDataLayout().isBigEndian()
10181 ? MemVT.getVectorNumElements() - I - 1
10182 : I;
10183 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Build,
10184 DAG.getConstant(Elt, dl, MVT::i32)));
10185 }
10186 for (unsigned I = MemVT.getVectorNumElements(); I < 16; I++)
10187 Ops.push_back(DAG.getUNDEF(MVT::i32));
10188 Build = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i1, Ops);
10189 }
10190 SDValue GRP = DAG.getNode(ARMISD::PREDICATE_CAST, dl, MVT::i32, Build);
10191 if (MemVT == MVT::v16i1 && DAG.getDataLayout().isBigEndian())
10192 GRP = DAG.getNode(ISD::SRL, dl, MVT::i32,
10193 DAG.getNode(ISD::BITREVERSE, dl, MVT::i32, GRP),
10194 DAG.getConstant(16, dl, MVT::i32));
10195 return DAG.getTruncStore(
10196 ST->getChain(), dl, GRP, ST->getBasePtr(),
10198 ST->getMemOperand());
10199}
10200
10202 const ARMSubtarget *Subtarget) {
10203 StoreSDNode *ST = cast<StoreSDNode>(Op.getNode());
10204 EVT MemVT = ST->getMemoryVT();
10205 assert(ST->isUnindexed() && "Stores should be unindexed at this point.");
10206
10207 if (MemVT == MVT::i64 && Subtarget->hasV5TEOps() &&
10208 !Subtarget->isThumb1Only() && ST->isVolatile() &&
10209 ST->getAlign() >= Subtarget->getDualLoadStoreAlignment()) {
10210 SDNode *N = Op.getNode();
10211 SDLoc dl(N);
10212
10213 SDValue Lo = DAG.getNode(
10214 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10215 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 0 : 1, dl,
10216 MVT::i32));
10217 SDValue Hi = DAG.getNode(
10218 ISD::EXTRACT_ELEMENT, dl, MVT::i32, ST->getValue(),
10219 DAG.getTargetConstant(DAG.getDataLayout().isLittleEndian() ? 1 : 0, dl,
10220 MVT::i32));
10221
10222 return DAG.getMemIntrinsicNode(ARMISD::STRD, dl, DAG.getVTList(MVT::Other),
10223 {ST->getChain(), Lo, Hi, ST->getBasePtr()},
10224 MemVT, ST->getMemOperand());
10225 } else if (Subtarget->hasMVEIntegerOps() &&
10226 ((MemVT == MVT::v2i1 || MemVT == MVT::v4i1 || MemVT == MVT::v8i1 ||
10227 MemVT == MVT::v16i1))) {
10228 return LowerPredicateStore(Op, DAG);
10229 }
10230
10231 return SDValue();
10232}
10233
10234static bool isZeroVector(SDValue N) {
10235 return (ISD::isBuildVectorAllZeros(N.getNode()) ||
10236 (N->getOpcode() == ARMISD::VMOVIMM &&
10237 isNullConstant(N->getOperand(0))));
10238}
10239
10241 MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode());
10242 MVT VT = Op.getSimpleValueType();
10243 SDValue Mask = N->getMask();
10244 SDValue PassThru = N->getPassThru();
10245 SDLoc dl(Op);
10246
10247 if (isZeroVector(PassThru))
10248 return Op;
10249
10250 // MVE Masked loads use zero as the passthru value. Here we convert undef to
10251 // zero too, and other values are lowered to a select.
10252 SDValue ZeroVec = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
10253 DAG.getTargetConstant(0, dl, MVT::i32));
10254 SDValue NewLoad = DAG.getMaskedLoad(
10255 VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, ZeroVec,
10256 N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
10257 N->getExtensionType(), N->isExpandingLoad());
10258 SDValue Combo = NewLoad;
10259 bool PassThruIsCastZero = (PassThru.getOpcode() == ISD::BITCAST ||
10260 PassThru.getOpcode() == ARMISD::VECTOR_REG_CAST) &&
10261 isZeroVector(PassThru->getOperand(0));
10262 if (!PassThru.isUndef() && !PassThruIsCastZero)
10263 Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru);
10264 return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl);
10265}
10266
10268 const ARMSubtarget *ST) {
10269 if (!ST->hasMVEIntegerOps())
10270 return SDValue();
10271
10272 SDLoc dl(Op);
10273 unsigned BaseOpcode = 0;
10274 switch (Op->getOpcode()) {
10275 default: llvm_unreachable("Expected VECREDUCE opcode");
10276 case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
10277 case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
10278 case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
10279 case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
10280 case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
10281 case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
10282 case ISD::VECREDUCE_FMAX: BaseOpcode = ISD::FMAXNUM; break;
10283 case ISD::VECREDUCE_FMIN: BaseOpcode = ISD::FMINNUM; break;
10284 }
10285
10286 SDValue Op0 = Op->getOperand(0);
10287 EVT VT = Op0.getValueType();
10288 EVT EltVT = VT.getVectorElementType();
10289 unsigned NumElts = VT.getVectorNumElements();
10290 unsigned NumActiveLanes = NumElts;
10291
10292 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10293 NumActiveLanes == 2) &&
10294 "Only expected a power 2 vector size");
10295
10296 // Use Mul(X, Rev(X)) until 4 items remain. Going down to 4 vector elements
10297 // allows us to easily extract vector elements from the lanes.
10298 while (NumActiveLanes > 4) {
10299 unsigned RevOpcode = NumActiveLanes == 16 ? ARMISD::VREV16 : ARMISD::VREV32;
10300 SDValue Rev = DAG.getNode(RevOpcode, dl, VT, Op0);
10301 Op0 = DAG.getNode(BaseOpcode, dl, VT, Op0, Rev);
10302 NumActiveLanes /= 2;
10303 }
10304
10305 SDValue Res;
10306 if (NumActiveLanes == 4) {
10307 // The remaining 4 elements are summed sequentially
10308 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10309 DAG.getConstant(0 * NumElts / 4, dl, MVT::i32));
10310 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10311 DAG.getConstant(1 * NumElts / 4, dl, MVT::i32));
10312 SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10313 DAG.getConstant(2 * NumElts / 4, dl, MVT::i32));
10314 SDValue Ext3 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10315 DAG.getConstant(3 * NumElts / 4, dl, MVT::i32));
10316 SDValue Res0 = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10317 SDValue Res1 = DAG.getNode(BaseOpcode, dl, EltVT, Ext2, Ext3, Op->getFlags());
10318 Res = DAG.getNode(BaseOpcode, dl, EltVT, Res0, Res1, Op->getFlags());
10319 } else {
10320 SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10321 DAG.getConstant(0, dl, MVT::i32));
10322 SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10323 DAG.getConstant(1, dl, MVT::i32));
10324 Res = DAG.getNode(BaseOpcode, dl, EltVT, Ext0, Ext1, Op->getFlags());
10325 }
10326
10327 // Result type may be wider than element type.
10328 if (EltVT != Op->getValueType(0))
10329 Res = DAG.getNode(ISD::ANY_EXTEND, dl, Op->getValueType(0), Res);
10330 return Res;
10331}
10332
10334 const ARMSubtarget *ST) {
10335 if (!ST->hasMVEFloatOps())
10336 return SDValue();
10337 return LowerVecReduce(Op, DAG, ST);
10338}
10339
10341 const ARMSubtarget *ST) {
10342 if (!ST->hasNEON())
10343 return SDValue();
10344
10345 SDLoc dl(Op);
10346 SDValue Op0 = Op->getOperand(0);
10347 EVT VT = Op0.getValueType();
10348 EVT EltVT = VT.getVectorElementType();
10349
10350 unsigned PairwiseIntrinsic = 0;
10351 switch (Op->getOpcode()) {
10352 default:
10353 llvm_unreachable("Expected VECREDUCE opcode");
10355 PairwiseIntrinsic = Intrinsic::arm_neon_vpminu;
10356 break;
10358 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxu;
10359 break;
10361 PairwiseIntrinsic = Intrinsic::arm_neon_vpmins;
10362 break;
10364 PairwiseIntrinsic = Intrinsic::arm_neon_vpmaxs;
10365 break;
10366 }
10367 SDValue PairwiseOp = DAG.getConstant(PairwiseIntrinsic, dl, MVT::i32);
10368
10369 unsigned NumElts = VT.getVectorNumElements();
10370 unsigned NumActiveLanes = NumElts;
10371
10372 assert((NumActiveLanes == 16 || NumActiveLanes == 8 || NumActiveLanes == 4 ||
10373 NumActiveLanes == 2) &&
10374 "Only expected a power 2 vector size");
10375
10376 // Split 128-bit vectors, since vpmin/max takes 2 64-bit vectors.
10377 if (VT.is128BitVector()) {
10378 SDValue Lo, Hi;
10379 std::tie(Lo, Hi) = DAG.SplitVector(Op0, dl);
10380 VT = Lo.getValueType();
10381 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Lo, Hi});
10382 NumActiveLanes /= 2;
10383 }
10384
10385 // Use pairwise reductions until one lane remains
10386 while (NumActiveLanes > 1) {
10387 Op0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, {PairwiseOp, Op0, Op0});
10388 NumActiveLanes /= 2;
10389 }
10390
10391 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0,
10392 DAG.getConstant(0, dl, MVT::i32));
10393
10394 // Result type may be wider than element type.
10395 if (EltVT != Op.getValueType()) {
10396 unsigned Extend = 0;
10397 switch (Op->getOpcode()) {
10398 default:
10399 llvm_unreachable("Expected VECREDUCE opcode");
10402 Extend = ISD::ZERO_EXTEND;
10403 break;
10406 Extend = ISD::SIGN_EXTEND;
10407 break;
10408 }
10409 Res = DAG.getNode(Extend, dl, Op.getValueType(), Res);
10410 }
10411 return Res;
10412}
10413
10415 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getSuccessOrdering()))
10416 // Acquire/Release load/store is not legal for targets without a dmb or
10417 // equivalent available.
10418 return SDValue();
10419
10420 // Monotonic load/store is legal for all targets.
10421 return Op;
10422}
10423
10426 SelectionDAG &DAG,
10427 const ARMSubtarget *Subtarget) {
10428 SDLoc DL(N);
10429 // Under Power Management extensions, the cycle-count is:
10430 // mrc p15, #0, <Rt>, c9, c13, #0
10431 SDValue Ops[] = { N->getOperand(0), // Chain
10432 DAG.getTargetConstant(Intrinsic::arm_mrc, DL, MVT::i32),
10433 DAG.getTargetConstant(15, DL, MVT::i32),
10434 DAG.getTargetConstant(0, DL, MVT::i32),
10435 DAG.getTargetConstant(9, DL, MVT::i32),
10436 DAG.getTargetConstant(13, DL, MVT::i32),
10437 DAG.getTargetConstant(0, DL, MVT::i32)
10438 };
10439
10440 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
10441 DAG.getVTList(MVT::i32, MVT::Other), Ops);
10442 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
10443 DAG.getConstant(0, DL, MVT::i32)));
10444 Results.push_back(Cycles32.getValue(1));
10445}
10446
10448 SDLoc dl(V.getNode());
10449 auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i32, MVT::i32);
10450 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10451 if (isBigEndian)
10452 std::swap (VLo, VHi);
10453 SDValue RegClass =
10454 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
10455 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
10456 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
10457 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
10458 return SDValue(
10459 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
10460}
10461
10464 SelectionDAG &DAG) {
10465 assert(N->getValueType(0) == MVT::i64 &&
10466 "AtomicCmpSwap on types less than 64 should be legal");
10467 SDValue Ops[] = {N->getOperand(1),
10468 createGPRPairNode(DAG, N->getOperand(2)),
10469 createGPRPairNode(DAG, N->getOperand(3)),
10470 N->getOperand(0)};
10471 SDNode *CmpSwap = DAG.getMachineNode(
10472 ARM::CMP_SWAP_64, SDLoc(N),
10473 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
10474
10475 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
10476 DAG.setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
10477
10478 bool isBigEndian = DAG.getDataLayout().isBigEndian();
10479
10480 SDValue Lo =
10481 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
10482 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10483 SDValue Hi =
10484 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
10485 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0));
10486 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), MVT::i64, Lo, Hi));
10487 Results.push_back(SDValue(CmpSwap, 2));
10488}
10489
10490SDValue ARMTargetLowering::LowerFSETCC(SDValue Op, SelectionDAG &DAG) const {
10491 SDLoc dl(Op);
10492 EVT VT = Op.getValueType();
10493 SDValue Chain = Op.getOperand(0);
10494 SDValue LHS = Op.getOperand(1);
10495 SDValue RHS = Op.getOperand(2);
10496 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
10497 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
10498
10499 // If we don't have instructions of this float type then soften to a libcall
10500 // and use SETCC instead.
10501 if (isUnsupportedFloatingType(LHS.getValueType())) {
10503 DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS, Chain, IsSignaling);
10504 if (!RHS.getNode()) {
10505 RHS = DAG.getConstant(0, dl, LHS.getValueType());
10506 CC = ISD::SETNE;
10507 }
10508 SDValue Result = DAG.getNode(ISD::SETCC, dl, VT, LHS, RHS,
10509 DAG.getCondCode(CC));
10510 return DAG.getMergeValues({Result, Chain}, dl);
10511 }
10512
10513 ARMCC::CondCodes CondCode, CondCode2;
10514 FPCCToARMCC(CC, CondCode, CondCode2);
10515
10516 // FIXME: Chain is not handled correctly here. Currently the FPSCR is implicit
10517 // in CMPFP and CMPFPE, but instead it should be made explicit by these
10518 // instructions using a chain instead of glue. This would also fix the problem
10519 // here (and also in LowerSELECT_CC) where we generate two comparisons when
10520 // CondCode2 != AL.
10521 SDValue True = DAG.getConstant(1, dl, VT);
10522 SDValue False = DAG.getConstant(0, dl, VT);
10523 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
10524 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
10525 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10526 SDValue Result = getCMOV(dl, VT, False, True, ARMcc, CCR, Cmp, DAG);
10527 if (CondCode2 != ARMCC::AL) {
10528 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
10529 Cmp = getVFPCmp(LHS, RHS, DAG, dl, IsSignaling);
10530 Result = getCMOV(dl, VT, Result, True, ARMcc, CCR, Cmp, DAG);
10531 }
10532 return DAG.getMergeValues({Result, Chain}, dl);
10533}
10534
10535SDValue ARMTargetLowering::LowerSPONENTRY(SDValue Op, SelectionDAG &DAG) const {
10537
10538 EVT VT = getPointerTy(DAG.getDataLayout());
10539 SDLoc DL(Op);
10540 int FI = MFI.CreateFixedObject(4, 0, false);
10541 return DAG.getFrameIndex(FI, VT);
10542}
10543
10545 LLVM_DEBUG(dbgs() << "Lowering node: "; Op.dump());
10546 switch (Op.getOpcode()) {
10547 default: llvm_unreachable("Don't know how to custom lower this!");
10548 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
10549 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
10550 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
10551 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
10552 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
10553 case ISD::SELECT: return LowerSELECT(Op, DAG);
10554 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
10555 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
10556 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
10557 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
10558 case ISD::VASTART: return LowerVASTART(Op, DAG);
10559 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
10560 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
10561 case ISD::SINT_TO_FP:
10562 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
10565 case ISD::FP_TO_SINT:
10566 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
10568 case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG, Subtarget);
10569 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
10570 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
10571 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
10572 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
10573 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
10574 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
10575 case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG, Subtarget);
10576 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
10577 Subtarget);
10578 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG, Subtarget);
10579 case ISD::SHL:
10580 case ISD::SRL:
10581 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
10582 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
10583 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
10584 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
10585 case ISD::SRL_PARTS:
10586 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
10587 case ISD::CTTZ:
10588 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
10589 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
10590 case ISD::SETCC: return LowerVSETCC(Op, DAG, Subtarget);
10591 case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG);
10592 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
10593 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
10594 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
10595 case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG, Subtarget);
10596 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
10597 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG, Subtarget);
10598 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG, Subtarget);
10599 case ISD::TRUNCATE: return LowerTruncate(Op.getNode(), DAG, Subtarget);
10600 case ISD::SIGN_EXTEND:
10601 case ISD::ZERO_EXTEND: return LowerVectorExtend(Op.getNode(), DAG, Subtarget);
10602 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
10603 case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG);
10604 case ISD::SET_FPMODE:
10605 return LowerSET_FPMODE(Op, DAG);
10606 case ISD::RESET_FPMODE:
10607 return LowerRESET_FPMODE(Op, DAG);
10608 case ISD::MUL: return LowerMUL(Op, DAG);
10609 case ISD::SDIV:
10610 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10611 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
10612 return LowerSDIV(Op, DAG, Subtarget);
10613 case ISD::UDIV:
10614 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
10615 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
10616 return LowerUDIV(Op, DAG, Subtarget);
10617 case ISD::UADDO_CARRY:
10618 case ISD::USUBO_CARRY:
10619 return LowerUADDSUBO_CARRY(Op, DAG);
10620 case ISD::SADDO:
10621 case ISD::SSUBO:
10622 return LowerSignedALUO(Op, DAG);
10623 case ISD::UADDO:
10624 case ISD::USUBO:
10625 return LowerUnsignedALUO(Op, DAG);
10626 case ISD::SADDSAT:
10627 case ISD::SSUBSAT:
10628 case ISD::UADDSAT:
10629 case ISD::USUBSAT:
10630 return LowerADDSUBSAT(Op, DAG, Subtarget);
10631 case ISD::LOAD:
10632 return LowerPredicateLoad(Op, DAG);
10633 case ISD::STORE:
10634 return LowerSTORE(Op, DAG, Subtarget);
10635 case ISD::MLOAD:
10636 return LowerMLOAD(Op, DAG);
10637 case ISD::VECREDUCE_MUL:
10638 case ISD::VECREDUCE_AND:
10639 case ISD::VECREDUCE_OR:
10640 case ISD::VECREDUCE_XOR:
10641 return LowerVecReduce(Op, DAG, Subtarget);
10646 return LowerVecReduceF(Op, DAG, Subtarget);
10651 return LowerVecReduceMinMax(Op, DAG, Subtarget);
10652 case ISD::ATOMIC_LOAD:
10653 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
10654 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
10655 case ISD::SDIVREM:
10656 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
10658 if (Subtarget->isTargetWindows())
10659 return LowerDYNAMIC_STACKALLOC(Op, DAG);
10660 llvm_unreachable("Don't know how to custom lower this!");
10662 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
10664 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
10665 case ISD::STRICT_FSETCC:
10666 case ISD::STRICT_FSETCCS: return LowerFSETCC(Op, DAG);
10667 case ISD::SPONENTRY:
10668 return LowerSPONENTRY(Op, DAG);
10669 case ARMISD::WIN__DBZCHK: return SDValue();
10670 }
10671}
10672
10674 SelectionDAG &DAG) {
10675 unsigned IntNo = N->getConstantOperandVal(0);
10676 unsigned Opc = 0;
10677 if (IntNo == Intrinsic::arm_smlald)
10678 Opc = ARMISD::SMLALD;
10679 else if (IntNo == Intrinsic::arm_smlaldx)
10680 Opc = ARMISD::SMLALDX;
10681 else if (IntNo == Intrinsic::arm_smlsld)
10682 Opc = ARMISD::SMLSLD;
10683 else if (IntNo == Intrinsic::arm_smlsldx)
10684 Opc = ARMISD::SMLSLDX;
10685 else
10686 return;
10687
10688 SDLoc dl(N);
10689 SDValue Lo, Hi;
10690 std::tie(Lo, Hi) = DAG.SplitScalar(N->getOperand(3), dl, MVT::i32, MVT::i32);
10691
10692 SDValue LongMul = DAG.getNode(Opc, dl,
10693 DAG.getVTList(MVT::i32, MVT::i32),
10694 N->getOperand(1), N->getOperand(2),
10695 Lo, Hi);
10696 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
10697 LongMul.getValue(0), LongMul.getValue(1)));
10698}
10699
10700/// ReplaceNodeResults - Replace the results of node with an illegal result
10701/// type with new values built out of custom code.
10704 SelectionDAG &DAG) const {
10705 SDValue Res;
10706 switch (N->getOpcode()) {
10707 default:
10708 llvm_unreachable("Don't know how to custom expand this!");
10709 case ISD::READ_REGISTER:
10711 break;
10712 case ISD::BITCAST:
10713 Res = ExpandBITCAST(N, DAG, Subtarget);
10714 break;
10715 case ISD::SRL:
10716 case ISD::SRA:
10717 case ISD::SHL:
10718 Res = Expand64BitShift(N, DAG, Subtarget);
10719 break;
10720 case ISD::SREM:
10721 case ISD::UREM:
10722 Res = LowerREM(N, DAG);
10723 break;
10724 case ISD::SDIVREM:
10725 case ISD::UDIVREM:
10726 Res = LowerDivRem(SDValue(N, 0), DAG);
10727 assert(Res.getNumOperands() == 2 && "DivRem needs two values");
10728 Results.push_back(Res.getValue(0));
10729 Results.push_back(Res.getValue(1));
10730 return;
10731 case ISD::SADDSAT:
10732 case ISD::SSUBSAT:
10733 case ISD::UADDSAT:
10734 case ISD::USUBSAT:
10735 Res = LowerADDSUBSAT(SDValue(N, 0), DAG, Subtarget);
10736 break;
10738 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
10739 return;
10740 case ISD::UDIV:
10741 case ISD::SDIV:
10742 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
10743 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
10744 Results);
10747 return;
10749 return ReplaceLongIntrinsic(N, Results, DAG);
10750 case ISD::LOAD:
10751 LowerLOAD(N, Results, DAG);
10752 break;
10753 case ISD::TRUNCATE:
10754 Res = LowerTruncate(N, DAG, Subtarget);
10755 break;
10756 case ISD::SIGN_EXTEND:
10757 case ISD::ZERO_EXTEND:
10758 Res = LowerVectorExtend(N, DAG, Subtarget);
10759 break;
10762 Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
10763 break;
10764 }
10765 if (Res.getNode())
10766 Results.push_back(Res);
10767}
10768
10769//===----------------------------------------------------------------------===//
10770// ARM Scheduler Hooks
10771//===----------------------------------------------------------------------===//
10772
10773/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
10774/// registers the function context.
10775void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
10777 MachineBasicBlock *DispatchBB,
10778 int FI) const {
10779 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
10780 "ROPI/RWPI not currently supported with SjLj");
10781 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10782 DebugLoc dl = MI.getDebugLoc();
10783 MachineFunction *MF = MBB->getParent();
10787 const Function &F = MF->getFunction();
10788
10789 bool isThumb = Subtarget->isThumb();
10790 bool isThumb2 = Subtarget->isThumb2();
10791
10792 unsigned PCLabelId = AFI->createPICLabelUId();
10793 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
10795 ARMConstantPoolMBB::Create(F.getContext(), DispatchBB, PCLabelId, PCAdj);
10796 unsigned CPI = MCP->getConstantPoolIndex(CPV, Align(4));
10797
10798 const TargetRegisterClass *TRC = isThumb ? &ARM::tGPRRegClass
10799 : &ARM::GPRRegClass;
10800
10801 // Grab constant pool and fixed stack memory operands.
10802 MachineMemOperand *CPMMO =
10805
10806 MachineMemOperand *FIMMOSt =
10809
10810 // Load the address of the dispatch MBB into the jump buffer.
10811 if (isThumb2) {
10812 // Incoming value: jbuf
10813 // ldr.n r5, LCPI1_1
10814 // orr r5, r5, #1
10815 // add r5, pc
10816 // str r5, [$jbuf, #+4] ; &jbuf[1]
10817 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10818 BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
10820 .addMemOperand(CPMMO)
10822 // Set the low bit because of thumb mode.
10823 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10824 BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
10825 .addReg(NewVReg1, RegState::Kill)
10826 .addImm(0x01)
10828 .add(condCodeOp());
10829 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10830 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
10831 .addReg(NewVReg2, RegState::Kill)
10832 .addImm(PCLabelId);
10833 BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
10834 .addReg(NewVReg3, RegState::Kill)
10835 .addFrameIndex(FI)
10836 .addImm(36) // &jbuf[1] :: pc
10837 .addMemOperand(FIMMOSt)
10839 } else if (isThumb) {
10840 // Incoming value: jbuf
10841 // ldr.n r1, LCPI1_4
10842 // add r1, pc
10843 // mov r2, #1
10844 // orrs r1, r2
10845 // add r2, $jbuf, #+4 ; &jbuf[1]
10846 // str r1, [r2]
10847 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10848 BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
10850 .addMemOperand(CPMMO)
10852 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10853 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
10854 .addReg(NewVReg1, RegState::Kill)
10855 .addImm(PCLabelId);
10856 // Set the low bit because of thumb mode.
10857 Register NewVReg3 = MRI->createVirtualRegister(TRC);
10858 BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
10859 .addReg(ARM::CPSR, RegState::Define)
10860 .addImm(1)
10862 Register NewVReg4 = MRI->createVirtualRegister(TRC);
10863 BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
10864 .addReg(ARM::CPSR, RegState::Define)
10865 .addReg(NewVReg2, RegState::Kill)
10866 .addReg(NewVReg3, RegState::Kill)
10868 Register NewVReg5 = MRI->createVirtualRegister(TRC);
10869 BuildMI(*MBB, MI, dl, TII->get(ARM::tADDframe), NewVReg5)
10870 .addFrameIndex(FI)
10871 .addImm(36); // &jbuf[1] :: pc
10872 BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
10873 .addReg(NewVReg4, RegState::Kill)
10874 .addReg(NewVReg5, RegState::Kill)
10875 .addImm(0)
10876 .addMemOperand(FIMMOSt)
10878 } else {
10879 // Incoming value: jbuf
10880 // ldr r1, LCPI1_1
10881 // add r1, pc, r1
10882 // str r1, [$jbuf, #+4] ; &jbuf[1]
10883 Register NewVReg1 = MRI->createVirtualRegister(TRC);
10884 BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1)
10886 .addImm(0)
10887 .addMemOperand(CPMMO)
10889 Register NewVReg2 = MRI->createVirtualRegister(TRC);
10890 BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
10891 .addReg(NewVReg1, RegState::Kill)
10892 .addImm(PCLabelId)
10894 BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
10895 .addReg(NewVReg2, RegState::Kill)
10896 .addFrameIndex(FI)
10897 .addImm(36) // &jbuf[1] :: pc
10898 .addMemOperand(FIMMOSt)
10900 }
10901}
10902
10903void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
10904 MachineBasicBlock *MBB) const {
10905 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
10906 DebugLoc dl = MI.getDebugLoc();
10907 MachineFunction *MF = MBB->getParent();
10909 MachineFrameInfo &MFI = MF->getFrameInfo();
10910 int FI = MFI.getFunctionContextIndex();
10911
10912 const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
10913 : &ARM::GPRnopcRegClass;
10914
10915 // Get a mapping of the call site numbers to all of the landing pads they're
10916 // associated with.
10918 unsigned MaxCSNum = 0;
10919 for (MachineBasicBlock &BB : *MF) {
10920 if (!BB.isEHPad())
10921 continue;
10922
10923 // FIXME: We should assert that the EH_LABEL is the first MI in the landing
10924 // pad.
10925 for (MachineInstr &II : BB) {
10926 if (!II.isEHLabel())
10927 continue;
10928
10929 MCSymbol *Sym = II.getOperand(0).getMCSymbol();
10930 if (!MF->hasCallSiteLandingPad(Sym)) continue;
10931
10932 SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
10933 for (unsigned Idx : CallSiteIdxs) {
10934 CallSiteNumToLPad[Idx].push_back(&BB);
10935 MaxCSNum = std::max(MaxCSNum, Idx);
10936 }
10937 break;
10938 }
10939 }
10940
10941 // Get an ordered list of the machine basic blocks for the jump table.
10942 std::vector<MachineBasicBlock*> LPadList;
10944 LPadList.reserve(CallSiteNumToLPad.size());
10945 for (unsigned I = 1; I <= MaxCSNum; ++I) {
10946 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
10947 for (MachineBasicBlock *MBB : MBBList) {
10948 LPadList.push_back(MBB);
10949 InvokeBBs.insert(MBB->pred_begin(), MBB->pred_end());
10950 }
10951 }
10952
10953 assert(!LPadList.empty() &&
10954 "No landing pad destinations for the dispatch jump table!");
10955
10956 // Create the jump table and associated information.
10958 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
10959 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
10960
10961 // Create the MBBs for the dispatch code.
10962
10963 // Shove the dispatch's address into the return slot in the function context.
10964 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
10965 DispatchBB->setIsEHPad();
10966
10967 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
10968 unsigned trap_opcode;
10969 if (Subtarget->isThumb())
10970 trap_opcode = ARM::tTRAP;
10971 else
10972 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
10973
10974 BuildMI(TrapBB, dl, TII->get(trap_opcode));
10975 DispatchBB->addSuccessor(TrapBB);
10976
10977 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
10978 DispatchBB->addSuccessor(DispContBB);
10979
10980 // Insert and MBBs.
10981 MF->insert(MF->end(), DispatchBB);
10982 MF->insert(MF->end(), DispContBB);
10983 MF->insert(MF->end(), TrapBB);
10984
10985 // Insert code into the entry block that creates and registers the function
10986 // context.
10987 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
10988
10989 MachineMemOperand *FIMMOLd = MF->getMachineMemOperand(
10992
10994 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
10995
10996 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
10997 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
10998
10999 // Add a register mask with no preserved registers. This results in all
11000 // registers being marked as clobbered. This can't work if the dispatch block
11001 // is in a Thumb1 function and is linked with ARM code which uses the FP
11002 // registers, as there is no way to preserve the FP registers in Thumb1 mode.
11004
11005 bool IsPositionIndependent = isPositionIndependent();
11006 unsigned NumLPads = LPadList.size();
11007 if (Subtarget->isThumb2()) {
11008 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11009 BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
11010 .addFrameIndex(FI)
11011 .addImm(4)
11012 .addMemOperand(FIMMOLd)
11014
11015 if (NumLPads < 256) {
11016 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
11017 .addReg(NewVReg1)
11018 .addImm(LPadList.size())
11020 } else {
11021 Register VReg1 = MRI->createVirtualRegister(TRC);
11022 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
11023 .addImm(NumLPads & 0xFFFF)
11025
11026 unsigned VReg2 = VReg1;
11027 if ((NumLPads & 0xFFFF0000) != 0) {
11028 VReg2 = MRI->createVirtualRegister(TRC);
11029 BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
11030 .addReg(VReg1)
11031 .addImm(NumLPads >> 16)
11033 }
11034
11035 BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
11036 .addReg(NewVReg1)
11037 .addReg(VReg2)
11039 }
11040
11041 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
11042 .addMBB(TrapBB)
11044 .addReg(ARM::CPSR);
11045
11046 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11047 BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT), NewVReg3)
11048 .addJumpTableIndex(MJTI)
11050
11051 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11052 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
11053 .addReg(NewVReg3, RegState::Kill)
11054 .addReg(NewVReg1)
11057 .add(condCodeOp());
11058
11059 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
11060 .addReg(NewVReg4, RegState::Kill)
11061 .addReg(NewVReg1)
11062 .addJumpTableIndex(MJTI);
11063 } else if (Subtarget->isThumb()) {
11064 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11065 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
11066 .addFrameIndex(FI)
11067 .addImm(1)
11068 .addMemOperand(FIMMOLd)
11070
11071 if (NumLPads < 256) {
11072 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
11073 .addReg(NewVReg1)
11074 .addImm(NumLPads)
11076 } else {
11077 MachineConstantPool *ConstantPool = MF->getConstantPool();
11078 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11079 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11080
11081 // MachineConstantPool wants an explicit alignment.
11082 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11083 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11084
11085 Register VReg1 = MRI->createVirtualRegister(TRC);
11086 BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
11087 .addReg(VReg1, RegState::Define)
11090 BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
11091 .addReg(NewVReg1)
11092 .addReg(VReg1)
11094 }
11095
11096 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
11097 .addMBB(TrapBB)
11099 .addReg(ARM::CPSR);
11100
11101 Register NewVReg2 = MRI->createVirtualRegister(TRC);
11102 BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
11103 .addReg(ARM::CPSR, RegState::Define)
11104 .addReg(NewVReg1)
11105 .addImm(2)
11107
11108 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11109 BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
11110 .addJumpTableIndex(MJTI)
11112
11113 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11114 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
11115 .addReg(ARM::CPSR, RegState::Define)
11116 .addReg(NewVReg2, RegState::Kill)
11117 .addReg(NewVReg3)
11119
11120 MachineMemOperand *JTMMOLd =
11121 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11123
11124 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11125 BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
11126 .addReg(NewVReg4, RegState::Kill)
11127 .addImm(0)
11128 .addMemOperand(JTMMOLd)
11130
11131 unsigned NewVReg6 = NewVReg5;
11132 if (IsPositionIndependent) {
11133 NewVReg6 = MRI->createVirtualRegister(TRC);
11134 BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
11135 .addReg(ARM::CPSR, RegState::Define)
11136 .addReg(NewVReg5, RegState::Kill)
11137 .addReg(NewVReg3)
11139 }
11140
11141 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
11142 .addReg(NewVReg6, RegState::Kill)
11143 .addJumpTableIndex(MJTI);
11144 } else {
11145 Register NewVReg1 = MRI->createVirtualRegister(TRC);
11146 BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
11147 .addFrameIndex(FI)
11148 .addImm(4)
11149 .addMemOperand(FIMMOLd)
11151
11152 if (NumLPads < 256) {
11153 BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
11154 .addReg(NewVReg1)
11155 .addImm(NumLPads)
11157 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
11158 Register VReg1 = MRI->createVirtualRegister(TRC);
11159 BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
11160 .addImm(NumLPads & 0xFFFF)
11162
11163 unsigned VReg2 = VReg1;
11164 if ((NumLPads & 0xFFFF0000) != 0) {
11165 VReg2 = MRI->createVirtualRegister(TRC);
11166 BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
11167 .addReg(VReg1)
11168 .addImm(NumLPads >> 16)
11170 }
11171
11172 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11173 .addReg(NewVReg1)
11174 .addReg(VReg2)
11176 } else {
11177 MachineConstantPool *ConstantPool = MF->getConstantPool();
11178 Type *Int32Ty = Type::getInt32Ty(MF->getFunction().getContext());
11179 const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
11180
11181 // MachineConstantPool wants an explicit alignment.
11182 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11183 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11184
11185 Register VReg1 = MRI->createVirtualRegister(TRC);
11186 BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
11187 .addReg(VReg1, RegState::Define)
11189 .addImm(0)
11191 BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
11192 .addReg(NewVReg1)
11193 .addReg(VReg1, RegState::Kill)
11195 }
11196
11197 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
11198 .addMBB(TrapBB)
11200 .addReg(ARM::CPSR);
11201
11202 Register NewVReg3 = MRI->createVirtualRegister(TRC);
11203 BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
11204 .addReg(NewVReg1)
11207 .add(condCodeOp());
11208 Register NewVReg4 = MRI->createVirtualRegister(TRC);
11209 BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
11210 .addJumpTableIndex(MJTI)
11212
11213 MachineMemOperand *JTMMOLd =
11214 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(*MF),
11216 Register NewVReg5 = MRI->createVirtualRegister(TRC);
11217 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
11218 .addReg(NewVReg3, RegState::Kill)
11219 .addReg(NewVReg4)
11220 .addImm(0)
11221 .addMemOperand(JTMMOLd)
11223
11224 if (IsPositionIndependent) {
11225 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
11226 .addReg(NewVReg5, RegState::Kill)
11227 .addReg(NewVReg4)
11228 .addJumpTableIndex(MJTI);
11229 } else {
11230 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
11231 .addReg(NewVReg5, RegState::Kill)
11232 .addJumpTableIndex(MJTI);
11233 }
11234 }
11235
11236 // Add the jump table entries as successors to the MBB.
11238 for (MachineBasicBlock *CurMBB : LPadList) {
11239 if (SeenMBBs.insert(CurMBB).second)
11240 DispContBB->addSuccessor(CurMBB);
11241 }
11242
11243 // N.B. the order the invoke BBs are processed in doesn't matter here.
11244 const MCPhysReg *SavedRegs = RI.getCalleeSavedRegs(MF);
11246 for (MachineBasicBlock *BB : InvokeBBs) {
11247
11248 // Remove the landing pad successor from the invoke block and replace it
11249 // with the new dispatch block.
11250 SmallVector<MachineBasicBlock*, 4> Successors(BB->successors());
11251 while (!Successors.empty()) {
11252 MachineBasicBlock *SMBB = Successors.pop_back_val();
11253 if (SMBB->isEHPad()) {
11254 BB->removeSuccessor(SMBB);
11255 MBBLPads.push_back(SMBB);
11256 }
11257 }
11258
11259 BB->addSuccessor(DispatchBB, BranchProbability::getZero());
11260 BB->normalizeSuccProbs();
11261
11262 // Find the invoke call and mark all of the callee-saved registers as
11263 // 'implicit defined' so that they're spilled. This prevents code from
11264 // moving instructions to before the EH block, where they will never be
11265 // executed.
11267 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
11268 if (!II->isCall()) continue;
11269
11272 OI = II->operands_begin(), OE = II->operands_end();
11273 OI != OE; ++OI) {
11274 if (!OI->isReg()) continue;
11275 DefRegs[OI->getReg()] = true;
11276 }
11277
11278 MachineInstrBuilder MIB(*MF, &*II);
11279
11280 for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
11281 unsigned Reg = SavedRegs[i];
11282 if (Subtarget->isThumb2() &&
11283 !ARM::tGPRRegClass.contains(Reg) &&
11284 !ARM::hGPRRegClass.contains(Reg))
11285 continue;
11286 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
11287 continue;
11288 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
11289 continue;
11290 if (!DefRegs[Reg])
11292 }
11293
11294 break;
11295 }
11296 }
11297
11298 // Mark all former landing pads as non-landing pads. The dispatch is the only
11299 // landing pad now.
11300 for (MachineBasicBlock *MBBLPad : MBBLPads)
11301 MBBLPad->setIsEHPad(false);
11302
11303 // The instruction is gone now.
11304 MI.eraseFromParent();
11305}
11306
11307static
11309 for (MachineBasicBlock *S : MBB->successors())
11310 if (S != Succ)
11311 return S;
11312 llvm_unreachable("Expecting a BB with two successors!");
11313}
11314
11315/// Return the load opcode for a given load size. If load size >= 8,
11316/// neon opcode will be returned.
11317static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) {
11318 if (LdSize >= 8)
11319 return LdSize == 16 ? ARM::VLD1q32wb_fixed
11320 : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0;
11321 if (IsThumb1)
11322 return LdSize == 4 ? ARM::tLDRi
11323 : LdSize == 2 ? ARM::tLDRHi
11324 : LdSize == 1 ? ARM::tLDRBi : 0;
11325 if (IsThumb2)
11326 return LdSize == 4 ? ARM::t2LDR_POST
11327 : LdSize == 2 ? ARM::t2LDRH_POST
11328 : LdSize == 1 ? ARM::t2LDRB_POST : 0;
11329 return LdSize == 4 ? ARM::LDR_POST_IMM
11330 : LdSize == 2 ? ARM::LDRH_POST
11331 : LdSize == 1 ? ARM::LDRB_POST_IMM : 0;
11332}
11333
11334/// Return the store opcode for a given store size. If store size >= 8,
11335/// neon opcode will be returned.
11336static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) {
11337 if (StSize >= 8)
11338 return StSize == 16 ? ARM::VST1q32wb_fixed
11339 : StSize == 8 ? ARM::VST1d32wb_fixed : 0;
11340 if (IsThumb1)
11341 return StSize == 4 ? ARM::tSTRi
11342 : StSize == 2 ? ARM::tSTRHi
11343 : StSize == 1 ? ARM::tSTRBi : 0;
11344 if (IsThumb2)
11345 return StSize == 4 ? ARM::t2STR_POST
11346 : StSize == 2 ? ARM::t2STRH_POST
11347 : StSize == 1 ? ARM::t2STRB_POST : 0;
11348 return StSize == 4 ? ARM::STR_POST_IMM
11349 : StSize == 2 ? ARM::STRH_POST
11350 : StSize == 1 ? ARM::STRB_POST_IMM : 0;
11351}
11352
11353/// Emit a post-increment load operation with given size. The instructions
11354/// will be added to BB at Pos.
11356 const TargetInstrInfo *TII, const DebugLoc &dl,
11357 unsigned LdSize, unsigned Data, unsigned AddrIn,
11358 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11359 unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2);
11360 assert(LdOpc != 0 && "Should have a load opcode");
11361 if (LdSize >= 8) {
11362 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11363 .addReg(AddrOut, RegState::Define)
11364 .addReg(AddrIn)
11365 .addImm(0)
11367 } else if (IsThumb1) {
11368 // load + update AddrIn
11369 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11370 .addReg(AddrIn)
11371 .addImm(0)
11373 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11374 .add(t1CondCodeOp())
11375 .addReg(AddrIn)
11376 .addImm(LdSize)
11378 } else if (IsThumb2) {
11379 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11380 .addReg(AddrOut, RegState::Define)
11381 .addReg(AddrIn)
11382 .addImm(LdSize)
11384 } else { // arm
11385 BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data)
11386 .addReg(AddrOut, RegState::Define)
11387 .addReg(AddrIn)
11388 .addReg(0)
11389 .addImm(LdSize)
11391 }
11392}
11393
11394/// Emit a post-increment store operation with given size. The instructions
11395/// will be added to BB at Pos.
11397 const TargetInstrInfo *TII, const DebugLoc &dl,
11398 unsigned StSize, unsigned Data, unsigned AddrIn,
11399 unsigned AddrOut, bool IsThumb1, bool IsThumb2) {
11400 unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2);
11401 assert(StOpc != 0 && "Should have a store opcode");
11402 if (StSize >= 8) {
11403 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11404 .addReg(AddrIn)
11405 .addImm(0)
11406 .addReg(Data)
11408 } else if (IsThumb1) {
11409 // store + update AddrIn
11410 BuildMI(*BB, Pos, dl, TII->get(StOpc))
11411 .addReg(Data)
11412 .addReg(AddrIn)
11413 .addImm(0)
11415 BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut)
11416 .add(t1CondCodeOp())
11417 .addReg(AddrIn)
11418 .addImm(StSize)
11420 } else if (IsThumb2) {
11421 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11422 .addReg(Data)
11423 .addReg(AddrIn)
11424 .addImm(StSize)
11426 } else { // arm
11427 BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut)
11428 .addReg(Data)
11429 .addReg(AddrIn)
11430 .addReg(0)
11431 .addImm(StSize)
11433 }
11434}
11435
11437ARMTargetLowering::EmitStructByval(MachineInstr &MI,
11438 MachineBasicBlock *BB) const {
11439 // This pseudo instruction has 3 operands: dst, src, size
11440 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
11441 // Otherwise, we will generate unrolled scalar copies.
11442 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11443 const BasicBlock *LLVM_BB = BB->getBasicBlock();
11445
11446 Register dest = MI.getOperand(0).getReg();
11447 Register src = MI.getOperand(1).getReg();
11448 unsigned SizeVal = MI.getOperand(2).getImm();
11449 unsigned Alignment = MI.getOperand(3).getImm();
11450 DebugLoc dl = MI.getDebugLoc();
11451
11452 MachineFunction *MF = BB->getParent();
11454 unsigned UnitSize = 0;
11455 const TargetRegisterClass *TRC = nullptr;
11456 const TargetRegisterClass *VecTRC = nullptr;
11457
11458 bool IsThumb1 = Subtarget->isThumb1Only();
11459 bool IsThumb2 = Subtarget->isThumb2();
11460 bool IsThumb = Subtarget->isThumb();
11461
11462 if (Alignment & 1) {
11463 UnitSize = 1;
11464 } else if (Alignment & 2) {
11465 UnitSize = 2;
11466 } else {
11467 // Check whether we can use NEON instructions.
11468 if (!MF->getFunction().hasFnAttribute(Attribute::NoImplicitFloat) &&
11469 Subtarget->hasNEON()) {
11470 if ((Alignment % 16 == 0) && SizeVal >= 16)
11471 UnitSize = 16;
11472 else if ((Alignment % 8 == 0) && SizeVal >= 8)
11473 UnitSize = 8;
11474 }
11475 // Can't use NEON instructions.
11476 if (UnitSize == 0)
11477 UnitSize = 4;
11478 }
11479
11480 // Select the correct opcode and register class for unit size load/store
11481 bool IsNeon = UnitSize >= 8;
11482 TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
11483 if (IsNeon)
11484 VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
11485 : UnitSize == 8 ? &ARM::DPRRegClass
11486 : nullptr;
11487
11488 unsigned BytesLeft = SizeVal % UnitSize;
11489 unsigned LoopSize = SizeVal - BytesLeft;
11490
11491 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
11492 // Use LDR and STR to copy.
11493 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
11494 // [destOut] = STR_POST(scratch, destIn, UnitSize)
11495 unsigned srcIn = src;
11496 unsigned destIn = dest;
11497 for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
11498 Register srcOut = MRI.createVirtualRegister(TRC);
11499 Register destOut = MRI.createVirtualRegister(TRC);
11500 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11501 emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut,
11502 IsThumb1, IsThumb2);
11503 emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut,
11504 IsThumb1, IsThumb2);
11505 srcIn = srcOut;
11506 destIn = destOut;
11507 }
11508
11509 // Handle the leftover bytes with LDRB and STRB.
11510 // [scratch, srcOut] = LDRB_POST(srcIn, 1)
11511 // [destOut] = STRB_POST(scratch, destIn, 1)
11512 for (unsigned i = 0; i < BytesLeft; i++) {
11513 Register srcOut = MRI.createVirtualRegister(TRC);
11514 Register destOut = MRI.createVirtualRegister(TRC);
11515 Register scratch = MRI.createVirtualRegister(TRC);
11516 emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut,
11517 IsThumb1, IsThumb2);
11518 emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut,
11519 IsThumb1, IsThumb2);
11520 srcIn = srcOut;
11521 destIn = destOut;
11522 }
11523 MI.eraseFromParent(); // The instruction is gone now.
11524 return BB;
11525 }
11526
11527 // Expand the pseudo op to a loop.
11528 // thisMBB:
11529 // ...
11530 // movw varEnd, # --> with thumb2
11531 // movt varEnd, #
11532 // ldrcp varEnd, idx --> without thumb2
11533 // fallthrough --> loopMBB
11534 // loopMBB:
11535 // PHI varPhi, varEnd, varLoop
11536 // PHI srcPhi, src, srcLoop
11537 // PHI destPhi, dst, destLoop
11538 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11539 // [destLoop] = STR_POST(scratch, destPhi, UnitSize)
11540 // subs varLoop, varPhi, #UnitSize
11541 // bne loopMBB
11542 // fallthrough --> exitMBB
11543 // exitMBB:
11544 // epilogue to handle left-over bytes
11545 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11546 // [destOut] = STRB_POST(scratch, destLoop, 1)
11547 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11548 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
11549 MF->insert(It, loopMBB);
11550 MF->insert(It, exitMBB);
11551
11552 // Set the call frame size on entry to the new basic blocks.
11553 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
11554 loopMBB->setCallFrameSize(CallFrameSize);
11555 exitMBB->setCallFrameSize(CallFrameSize);
11556
11557 // Transfer the remainder of BB and its successor edges to exitMBB.
11558 exitMBB->splice(exitMBB->begin(), BB,
11559 std::next(MachineBasicBlock::iterator(MI)), BB->end());
11561
11562 // Load an immediate to varEnd.
11563 Register varEnd = MRI.createVirtualRegister(TRC);
11564 if (Subtarget->useMovt()) {
11565 BuildMI(BB, dl, TII->get(IsThumb ? ARM::t2MOVi32imm : ARM::MOVi32imm),
11566 varEnd)
11567 .addImm(LoopSize);
11568 } else if (Subtarget->genExecuteOnly()) {
11569 assert(IsThumb && "Non-thumb expected to have used movt");
11570 BuildMI(BB, dl, TII->get(ARM::tMOVi32imm), varEnd).addImm(LoopSize);
11571 } else {
11574 const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
11575
11576 // MachineConstantPool wants an explicit alignment.
11577 Align Alignment = MF->getDataLayout().getPrefTypeAlign(Int32Ty);
11578 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Alignment);
11579 MachineMemOperand *CPMMO =
11582
11583 if (IsThumb)
11584 BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci))
11585 .addReg(varEnd, RegState::Define)
11588 .addMemOperand(CPMMO);
11589 else
11590 BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp))
11591 .addReg(varEnd, RegState::Define)
11593 .addImm(0)
11595 .addMemOperand(CPMMO);
11596 }
11597 BB->addSuccessor(loopMBB);
11598
11599 // Generate the loop body:
11600 // varPhi = PHI(varLoop, varEnd)
11601 // srcPhi = PHI(srcLoop, src)
11602 // destPhi = PHI(destLoop, dst)
11603 MachineBasicBlock *entryBB = BB;
11604 BB = loopMBB;
11605 Register varLoop = MRI.createVirtualRegister(TRC);
11606 Register varPhi = MRI.createVirtualRegister(TRC);
11607 Register srcLoop = MRI.createVirtualRegister(TRC);
11608 Register srcPhi = MRI.createVirtualRegister(TRC);
11609 Register destLoop = MRI.createVirtualRegister(TRC);
11610 Register destPhi = MRI.createVirtualRegister(TRC);
11611
11612 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
11613 .addReg(varLoop).addMBB(loopMBB)
11614 .addReg(varEnd).addMBB(entryBB);
11615 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
11616 .addReg(srcLoop).addMBB(loopMBB)
11617 .addReg(src).addMBB(entryBB);
11618 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
11619 .addReg(destLoop).addMBB(loopMBB)
11620 .addReg(dest).addMBB(entryBB);
11621
11622 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
11623 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
11624 Register scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC);
11625 emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop,
11626 IsThumb1, IsThumb2);
11627 emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop,
11628 IsThumb1, IsThumb2);
11629
11630 // Decrement loop variable by UnitSize.
11631 if (IsThumb1) {
11632 BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop)
11633 .add(t1CondCodeOp())
11634 .addReg(varPhi)
11635 .addImm(UnitSize)
11637 } else {
11639 BuildMI(*BB, BB->end(), dl,
11640 TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
11641 MIB.addReg(varPhi)
11642 .addImm(UnitSize)
11644 .add(condCodeOp());
11645 MIB->getOperand(5).setReg(ARM::CPSR);
11646 MIB->getOperand(5).setIsDef(true);
11647 }
11648 BuildMI(*BB, BB->end(), dl,
11649 TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc))
11650 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
11651
11652 // loopMBB can loop back to loopMBB or fall through to exitMBB.
11653 BB->addSuccessor(loopMBB);
11654 BB->addSuccessor(exitMBB);
11655
11656 // Add epilogue to handle BytesLeft.
11657 BB = exitMBB;
11658 auto StartOfExit = exitMBB->begin();
11659
11660 // [scratch, srcOut] = LDRB_POST(srcLoop, 1)
11661 // [destOut] = STRB_POST(scratch, destLoop, 1)
11662 unsigned srcIn = srcLoop;
11663 unsigned destIn = destLoop;
11664 for (unsigned i = 0; i < BytesLeft; i++) {
11665 Register srcOut = MRI.createVirtualRegister(TRC);
11666 Register destOut = MRI.createVirtualRegister(TRC);
11667 Register scratch = MRI.createVirtualRegister(TRC);
11668 emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut,
11669 IsThumb1, IsThumb2);
11670 emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut,
11671 IsThumb1, IsThumb2);
11672 srcIn = srcOut;
11673 destIn = destOut;
11674 }
11675
11676 MI.eraseFromParent(); // The instruction is gone now.
11677 return BB;
11678}
11679
11681ARMTargetLowering::EmitLowered__chkstk(MachineInstr &MI,
11682 MachineBasicBlock *MBB) const {
11684 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
11685 DebugLoc DL = MI.getDebugLoc();
11686
11687 assert(Subtarget->isTargetWindows() &&
11688 "__chkstk is only supported on Windows");
11689 assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode");
11690
11691 // __chkstk takes the number of words to allocate on the stack in R4, and
11692 // returns the stack adjustment in number of bytes in R4. This will not
11693 // clober any other registers (other than the obvious lr).
11694 //
11695 // Although, technically, IP should be considered a register which may be
11696 // clobbered, the call itself will not touch it. Windows on ARM is a pure
11697 // thumb-2 environment, so there is no interworking required. As a result, we
11698 // do not expect a veneer to be emitted by the linker, clobbering IP.
11699 //
11700 // Each module receives its own copy of __chkstk, so no import thunk is
11701 // required, again, ensuring that IP is not clobbered.
11702 //
11703 // Finally, although some linkers may theoretically provide a trampoline for
11704 // out of range calls (which is quite common due to a 32M range limitation of
11705 // branches for Thumb), we can generate the long-call version via
11706 // -mcmodel=large, alleviating the need for the trampoline which may clobber
11707 // IP.
11708
11709 switch (TM.getCodeModel()) {
11710 case CodeModel::Tiny:
11711 llvm_unreachable("Tiny code model not available on ARM.");
11712 case CodeModel::Small:
11713 case CodeModel::Medium:
11714 case CodeModel::Kernel:
11715 BuildMI(*MBB, MI, DL, TII.get(ARM::tBL))
11717 .addExternalSymbol("__chkstk")
11720 .addReg(ARM::R12,
11722 .addReg(ARM::CPSR,
11724 break;
11725 case CodeModel::Large: {
11727 Register Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11728
11729 BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg)
11730 .addExternalSymbol("__chkstk");
11733 .addReg(Reg, RegState::Kill)
11736 .addReg(ARM::R12,
11738 .addReg(ARM::CPSR,
11740 break;
11741 }
11742 }
11743
11744 BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), ARM::SP)
11745 .addReg(ARM::SP, RegState::Kill)
11746 .addReg(ARM::R4, RegState::Kill)
11749 .add(condCodeOp());
11750
11751 MI.eraseFromParent();
11752 return MBB;
11753}
11754
11756ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
11757 MachineBasicBlock *MBB) const {
11758 DebugLoc DL = MI.getDebugLoc();
11759 MachineFunction *MF = MBB->getParent();
11760 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11761
11763 MF->insert(++MBB->getIterator(), ContBB);
11764 ContBB->splice(ContBB->begin(), MBB,
11765 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11767 MBB->addSuccessor(ContBB);
11768
11770 BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
11771 MF->push_back(TrapBB);
11772 MBB->addSuccessor(TrapBB);
11773
11774 BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
11775 .addReg(MI.getOperand(0).getReg())
11776 .addImm(0)
11778 BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
11779 .addMBB(TrapBB)
11781 .addReg(ARM::CPSR);
11782
11783 MI.eraseFromParent();
11784 return ContBB;
11785}
11786
11787// The CPSR operand of SelectItr might be missing a kill marker
11788// because there were multiple uses of CPSR, and ISel didn't know
11789// which to mark. Figure out whether SelectItr should have had a
11790// kill marker, and set it if it should. Returns the correct kill
11791// marker value.
11794 const TargetRegisterInfo* TRI) {
11795 // Scan forward through BB for a use/def of CPSR.
11796 MachineBasicBlock::iterator miI(std::next(SelectItr));
11797 for (MachineBasicBlock::iterator miE = BB->end(); miI != miE; ++miI) {
11798 const MachineInstr& mi = *miI;
11799 if (mi.readsRegister(ARM::CPSR))
11800 return false;
11801 if (mi.definesRegister(ARM::CPSR))
11802 break; // Should have kill-flag - update below.
11803 }
11804
11805 // If we hit the end of the block, check whether CPSR is live into a
11806 // successor.
11807 if (miI == BB->end()) {
11808 for (MachineBasicBlock *Succ : BB->successors())
11809 if (Succ->isLiveIn(ARM::CPSR))
11810 return false;
11811 }
11812
11813 // We found a def, or hit the end of the basic block and CPSR wasn't live
11814 // out. SelectMI should have a kill flag on CPSR.
11815 SelectItr->addRegisterKilled(ARM::CPSR, TRI);
11816 return true;
11817}
11818
11819/// Adds logic in loop entry MBB to calculate loop iteration count and adds
11820/// t2WhileLoopSetup and t2WhileLoopStart to generate WLS loop
11822 MachineBasicBlock *TpLoopBody,
11823 MachineBasicBlock *TpExit, Register OpSizeReg,
11824 const TargetInstrInfo *TII, DebugLoc Dl,
11826 // Calculates loop iteration count = ceil(n/16) = (n + 15) >> 4.
11827 Register AddDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11828 BuildMI(TpEntry, Dl, TII->get(ARM::t2ADDri), AddDestReg)
11829 .addUse(OpSizeReg)
11830 .addImm(15)
11832 .addReg(0);
11833
11834 Register LsrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11835 BuildMI(TpEntry, Dl, TII->get(ARM::t2LSRri), LsrDestReg)
11836 .addUse(AddDestReg, RegState::Kill)
11837 .addImm(4)
11839 .addReg(0);
11840
11841 Register TotalIterationsReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11842 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopSetup), TotalIterationsReg)
11843 .addUse(LsrDestReg, RegState::Kill);
11844
11845 BuildMI(TpEntry, Dl, TII->get(ARM::t2WhileLoopStart))
11846 .addUse(TotalIterationsReg)
11847 .addMBB(TpExit);
11848
11849 BuildMI(TpEntry, Dl, TII->get(ARM::t2B))
11850 .addMBB(TpLoopBody)
11852
11853 return TotalIterationsReg;
11854}
11855
11856/// Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and
11857/// t2DoLoopEnd. These are used by later passes to generate tail predicated
11858/// loops.
11859static void genTPLoopBody(MachineBasicBlock *TpLoopBody,
11860 MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit,
11861 const TargetInstrInfo *TII, DebugLoc Dl,
11862 MachineRegisterInfo &MRI, Register OpSrcReg,
11863 Register OpDestReg, Register ElementCountReg,
11864 Register TotalIterationsReg, bool IsMemcpy) {
11865 // First insert 4 PHI nodes for: Current pointer to Src (if memcpy), Dest
11866 // array, loop iteration counter, predication counter.
11867
11868 Register SrcPhiReg, CurrSrcReg;
11869 if (IsMemcpy) {
11870 // Current position in the src array
11871 SrcPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11872 CurrSrcReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11873 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), SrcPhiReg)
11874 .addUse(OpSrcReg)
11875 .addMBB(TpEntry)
11876 .addUse(CurrSrcReg)
11877 .addMBB(TpLoopBody);
11878 }
11879
11880 // Current position in the dest array
11881 Register DestPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11882 Register CurrDestReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11883 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), DestPhiReg)
11884 .addUse(OpDestReg)
11885 .addMBB(TpEntry)
11886 .addUse(CurrDestReg)
11887 .addMBB(TpLoopBody);
11888
11889 // Current loop counter
11890 Register LoopCounterPhiReg = MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11891 Register RemainingLoopIterationsReg =
11892 MRI.createVirtualRegister(&ARM::GPRlrRegClass);
11893 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), LoopCounterPhiReg)
11894 .addUse(TotalIterationsReg)
11895 .addMBB(TpEntry)
11896 .addUse(RemainingLoopIterationsReg)
11897 .addMBB(TpLoopBody);
11898
11899 // Predication counter
11900 Register PredCounterPhiReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11901 Register RemainingElementsReg = MRI.createVirtualRegister(&ARM::rGPRRegClass);
11902 BuildMI(TpLoopBody, Dl, TII->get(ARM::PHI), PredCounterPhiReg)
11903 .addUse(ElementCountReg)
11904 .addMBB(TpEntry)
11905 .addUse(RemainingElementsReg)
11906 .addMBB(TpLoopBody);
11907
11908 // Pass predication counter to VCTP
11909 Register VccrReg = MRI.createVirtualRegister(&ARM::VCCRRegClass);
11910 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VCTP8), VccrReg)
11911 .addUse(PredCounterPhiReg)
11913 .addReg(0)
11914 .addReg(0);
11915
11916 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2SUBri), RemainingElementsReg)
11917 .addUse(PredCounterPhiReg)
11918 .addImm(16)
11920 .addReg(0);
11921
11922 // VLDRB (only if memcpy) and VSTRB instructions, predicated using VPR
11923 Register SrcValueReg;
11924 if (IsMemcpy) {
11925 SrcValueReg = MRI.createVirtualRegister(&ARM::MQPRRegClass);
11926 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VLDRBU8_post))
11927 .addDef(CurrSrcReg)
11928 .addDef(SrcValueReg)
11929 .addReg(SrcPhiReg)
11930 .addImm(16)
11932 .addUse(VccrReg)
11933 .addReg(0);
11934 } else
11935 SrcValueReg = OpSrcReg;
11936
11937 BuildMI(TpLoopBody, Dl, TII->get(ARM::MVE_VSTRBU8_post))
11938 .addDef(CurrDestReg)
11939 .addUse(SrcValueReg)
11940 .addReg(DestPhiReg)
11941 .addImm(16)
11943 .addUse(VccrReg)
11944 .addReg(0);
11945
11946 // Add the pseudoInstrs for decrementing the loop counter and marking the
11947 // end:t2DoLoopDec and t2DoLoopEnd
11948 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopDec), RemainingLoopIterationsReg)
11949 .addUse(LoopCounterPhiReg)
11950 .addImm(1);
11951
11952 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2LoopEnd))
11953 .addUse(RemainingLoopIterationsReg)
11954 .addMBB(TpLoopBody);
11955
11956 BuildMI(TpLoopBody, Dl, TII->get(ARM::t2B))
11957 .addMBB(TpExit)
11959}
11960
11963 MachineBasicBlock *BB) const {
11964 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
11965 DebugLoc dl = MI.getDebugLoc();
11966 bool isThumb2 = Subtarget->isThumb2();
11967 switch (MI.getOpcode()) {
11968 default: {
11969 MI.print(errs());
11970 llvm_unreachable("Unexpected instr type to insert");
11971 }
11972
11973 // Thumb1 post-indexed loads are really just single-register LDMs.
11974 case ARM::tLDR_postidx: {
11975 MachineOperand Def(MI.getOperand(1));
11976 BuildMI(*BB, MI, dl, TII->get(ARM::tLDMIA_UPD))
11977 .add(Def) // Rn_wb
11978 .add(MI.getOperand(2)) // Rn
11979 .add(MI.getOperand(3)) // PredImm
11980 .add(MI.getOperand(4)) // PredReg
11981 .add(MI.getOperand(0)) // Rt
11982 .cloneMemRefs(MI);
11983 MI.eraseFromParent();
11984 return BB;
11985 }
11986
11987 case ARM::MVE_MEMCPYLOOPINST:
11988 case ARM::MVE_MEMSETLOOPINST: {
11989
11990 // Transformation below expands MVE_MEMCPYLOOPINST/MVE_MEMSETLOOPINST Pseudo
11991 // into a Tail Predicated (TP) Loop. It adds the instructions to calculate
11992 // the iteration count =ceil(size_in_bytes/16)) in the TP entry block and
11993 // adds the relevant instructions in the TP loop Body for generation of a
11994 // WLSTP loop.
11995
11996 // Below is relevant portion of the CFG after the transformation.
11997 // The Machine Basic Blocks are shown along with branch conditions (in
11998 // brackets). Note that TP entry/exit MBBs depict the entry/exit of this
11999 // portion of the CFG and may not necessarily be the entry/exit of the
12000 // function.
12001
12002 // (Relevant) CFG after transformation:
12003 // TP entry MBB
12004 // |
12005 // |-----------------|
12006 // (n <= 0) (n > 0)
12007 // | |
12008 // | TP loop Body MBB<--|
12009 // | | |
12010 // \ |___________|
12011 // \ /
12012 // TP exit MBB
12013
12014 MachineFunction *MF = BB->getParent();
12015 MachineFunctionProperties &Properties = MF->getProperties();
12017
12018 Register OpDestReg = MI.getOperand(0).getReg();
12019 Register OpSrcReg = MI.getOperand(1).getReg();
12020 Register OpSizeReg = MI.getOperand(2).getReg();
12021
12022 // Allocate the required MBBs and add to parent function.
12023 MachineBasicBlock *TpEntry = BB;
12024 MachineBasicBlock *TpLoopBody = MF->CreateMachineBasicBlock();
12025 MachineBasicBlock *TpExit;
12026
12027 MF->push_back(TpLoopBody);
12028
12029 // If any instructions are present in the current block after
12030 // MVE_MEMCPYLOOPINST or MVE_MEMSETLOOPINST, split the current block and
12031 // move the instructions into the newly created exit block. If there are no
12032 // instructions add an explicit branch to the FallThrough block and then
12033 // split.
12034 //
12035 // The split is required for two reasons:
12036 // 1) A terminator(t2WhileLoopStart) will be placed at that site.
12037 // 2) Since a TPLoopBody will be added later, any phis in successive blocks
12038 // need to be updated. splitAt() already handles this.
12039 TpExit = BB->splitAt(MI, false);
12040 if (TpExit == BB) {
12041 assert(BB->canFallThrough() && "Exit Block must be Fallthrough of the "
12042 "block containing memcpy/memset Pseudo");
12043 TpExit = BB->getFallThrough();
12044 BuildMI(BB, dl, TII->get(ARM::t2B))
12045 .addMBB(TpExit)
12047 TpExit = BB->splitAt(MI, false);
12048 }
12049
12050 // Add logic for iteration count
12051 Register TotalIterationsReg =
12052 genTPEntry(TpEntry, TpLoopBody, TpExit, OpSizeReg, TII, dl, MRI);
12053
12054 // Add the vectorized (and predicated) loads/store instructions
12055 bool IsMemcpy = MI.getOpcode() == ARM::MVE_MEMCPYLOOPINST;
12056 genTPLoopBody(TpLoopBody, TpEntry, TpExit, TII, dl, MRI, OpSrcReg,
12057 OpDestReg, OpSizeReg, TotalIterationsReg, IsMemcpy);
12058
12059 // Required to avoid conflict with the MachineVerifier during testing.
12061
12062 // Connect the blocks
12063 TpEntry->addSuccessor(TpLoopBody);
12064 TpLoopBody->addSuccessor(TpLoopBody);
12065 TpLoopBody->addSuccessor(TpExit);
12066
12067 // Reorder for a more natural layout
12068 TpLoopBody->moveAfter(TpEntry);
12069 TpExit->moveAfter(TpLoopBody);
12070
12071 // Finally, remove the memcpy Pseudo Instruction
12072 MI.eraseFromParent();
12073
12074 // Return the exit block as it may contain other instructions requiring a
12075 // custom inserter
12076 return TpExit;
12077 }
12078
12079 // The Thumb2 pre-indexed stores have the same MI operands, they just
12080 // define them differently in the .td files from the isel patterns, so
12081 // they need pseudos.
12082 case ARM::t2STR_preidx:
12083 MI.setDesc(TII->get(ARM::t2STR_PRE));
12084 return BB;
12085 case ARM::t2STRB_preidx:
12086 MI.setDesc(TII->get(ARM::t2STRB_PRE));
12087 return BB;
12088 case ARM::t2STRH_preidx:
12089 MI.setDesc(TII->get(ARM::t2STRH_PRE));
12090 return BB;
12091
12092 case ARM::STRi_preidx:
12093 case ARM::STRBi_preidx: {
12094 unsigned NewOpc = MI.getOpcode() == ARM::STRi_preidx ? ARM::STR_PRE_IMM
12095 : ARM::STRB_PRE_IMM;
12096 // Decode the offset.
12097 unsigned Offset = MI.getOperand(4).getImm();
12098 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
12100 if (isSub)
12101 Offset = -Offset;
12102
12103 MachineMemOperand *MMO = *MI.memoperands_begin();
12104 BuildMI(*BB, MI, dl, TII->get(NewOpc))
12105 .add(MI.getOperand(0)) // Rn_wb
12106 .add(MI.getOperand(1)) // Rt
12107 .add(MI.getOperand(2)) // Rn
12108 .addImm(Offset) // offset (skip GPR==zero_reg)
12109 .add(MI.getOperand(5)) // pred
12110 .add(MI.getOperand(6))
12111 .addMemOperand(MMO);
12112 MI.eraseFromParent();
12113 return BB;
12114 }
12115 case ARM::STRr_preidx:
12116 case ARM::STRBr_preidx:
12117 case ARM::STRH_preidx: {
12118 unsigned NewOpc;
12119 switch (MI.getOpcode()) {
12120 default: llvm_unreachable("unexpected opcode!");
12121 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
12122 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
12123 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
12124 }
12125 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
12126 for (const MachineOperand &MO : MI.operands())
12127 MIB.add(MO);
12128 MI.eraseFromParent();
12129 return BB;
12130 }
12131
12132 case ARM::tMOVCCr_pseudo: {
12133 // To "insert" a SELECT_CC instruction, we actually have to insert the
12134 // diamond control-flow pattern. The incoming instruction knows the
12135 // destination vreg to set, the condition code register to branch on, the
12136 // true/false values to select between, and a branch opcode to use.
12137 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12139
12140 // thisMBB:
12141 // ...
12142 // TrueVal = ...
12143 // cmpTY ccX, r1, r2
12144 // bCC copy1MBB
12145 // fallthrough --> copy0MBB
12146 MachineBasicBlock *thisMBB = BB;
12147 MachineFunction *F = BB->getParent();
12148 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12149 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12150 F->insert(It, copy0MBB);
12151 F->insert(It, sinkMBB);
12152
12153 // Set the call frame size on entry to the new basic blocks.
12154 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12155 copy0MBB->setCallFrameSize(CallFrameSize);
12156 sinkMBB->setCallFrameSize(CallFrameSize);
12157
12158 // Check whether CPSR is live past the tMOVCCr_pseudo.
12159 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
12160 if (!MI.killsRegister(ARM::CPSR) &&
12161 !checkAndUpdateCPSRKill(MI, thisMBB, TRI)) {
12162 copy0MBB->addLiveIn(ARM::CPSR);
12163 sinkMBB->addLiveIn(ARM::CPSR);
12164 }
12165
12166 // Transfer the remainder of BB and its successor edges to sinkMBB.
12167 sinkMBB->splice(sinkMBB->begin(), BB,
12168 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12170
12171 BB->addSuccessor(copy0MBB);
12172 BB->addSuccessor(sinkMBB);
12173
12174 BuildMI(BB, dl, TII->get(ARM::tBcc))
12175 .addMBB(sinkMBB)
12176 .addImm(MI.getOperand(3).getImm())
12177 .addReg(MI.getOperand(4).getReg());
12178
12179 // copy0MBB:
12180 // %FalseValue = ...
12181 // # fallthrough to sinkMBB
12182 BB = copy0MBB;
12183
12184 // Update machine-CFG edges
12185 BB->addSuccessor(sinkMBB);
12186
12187 // sinkMBB:
12188 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12189 // ...
12190 BB = sinkMBB;
12191 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), MI.getOperand(0).getReg())
12192 .addReg(MI.getOperand(1).getReg())
12193 .addMBB(copy0MBB)
12194 .addReg(MI.getOperand(2).getReg())
12195 .addMBB(thisMBB);
12196
12197 MI.eraseFromParent(); // The pseudo instruction is gone now.
12198 return BB;
12199 }
12200
12201 case ARM::BCCi64:
12202 case ARM::BCCZi64: {
12203 // If there is an unconditional branch to the other successor, remove it.
12204 BB->erase(std::next(MachineBasicBlock::iterator(MI)), BB->end());
12205
12206 // Compare both parts that make up the double comparison separately for
12207 // equality.
12208 bool RHSisZero = MI.getOpcode() == ARM::BCCZi64;
12209
12210 Register LHS1 = MI.getOperand(1).getReg();
12211 Register LHS2 = MI.getOperand(2).getReg();
12212 if (RHSisZero) {
12213 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12214 .addReg(LHS1)
12215 .addImm(0)
12217 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12218 .addReg(LHS2).addImm(0)
12219 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12220 } else {
12221 Register RHS1 = MI.getOperand(3).getReg();
12222 Register RHS2 = MI.getOperand(4).getReg();
12223 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12224 .addReg(LHS1)
12225 .addReg(RHS1)
12227 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
12228 .addReg(LHS2).addReg(RHS2)
12229 .addImm(ARMCC::EQ).addReg(ARM::CPSR);
12230 }
12231
12232 MachineBasicBlock *destMBB = MI.getOperand(RHSisZero ? 3 : 5).getMBB();
12233 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
12234 if (MI.getOperand(0).getImm() == ARMCC::NE)
12235 std::swap(destMBB, exitMBB);
12236
12237 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
12238 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
12239 if (isThumb2)
12240 BuildMI(BB, dl, TII->get(ARM::t2B))
12241 .addMBB(exitMBB)
12243 else
12244 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
12245
12246 MI.eraseFromParent(); // The pseudo instruction is gone now.
12247 return BB;
12248 }
12249
12250 case ARM::Int_eh_sjlj_setjmp:
12251 case ARM::Int_eh_sjlj_setjmp_nofp:
12252 case ARM::tInt_eh_sjlj_setjmp:
12253 case ARM::t2Int_eh_sjlj_setjmp:
12254 case ARM::t2Int_eh_sjlj_setjmp_nofp:
12255 return BB;
12256
12257 case ARM::Int_eh_sjlj_setup_dispatch:
12258 EmitSjLjDispatchBlock(MI, BB);
12259 return BB;
12260
12261 case ARM::ABS:
12262 case ARM::t2ABS: {
12263 // To insert an ABS instruction, we have to insert the
12264 // diamond control-flow pattern. The incoming instruction knows the
12265 // source vreg to test against 0, the destination vreg to set,
12266 // the condition code register to branch on, the
12267 // true/false values to select between, and a branch opcode to use.
12268 // It transforms
12269 // V1 = ABS V0
12270 // into
12271 // V2 = MOVS V0
12272 // BCC (branch to SinkBB if V0 >= 0)
12273 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
12274 // SinkBB: V1 = PHI(V2, V3)
12275 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12277 MachineFunction *Fn = BB->getParent();
12278 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12279 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
12280 Fn->insert(BBI, RSBBB);
12281 Fn->insert(BBI, SinkBB);
12282
12283 Register ABSSrcReg = MI.getOperand(1).getReg();
12284 Register ABSDstReg = MI.getOperand(0).getReg();
12285 bool ABSSrcKIll = MI.getOperand(1).isKill();
12286 bool isThumb2 = Subtarget->isThumb2();
12288 // In Thumb mode S must not be specified if source register is the SP or
12289 // PC and if destination register is the SP, so restrict register class
12290 Register NewRsbDstReg = MRI.createVirtualRegister(
12291 isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
12292
12293 // Transfer the remainder of BB and its successor edges to sinkMBB.
12294 SinkBB->splice(SinkBB->begin(), BB,
12295 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12297
12298 BB->addSuccessor(RSBBB);
12299 BB->addSuccessor(SinkBB);
12300
12301 // fall through to SinkMBB
12302 RSBBB->addSuccessor(SinkBB);
12303
12304 // insert a cmp at the end of BB
12305 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
12306 .addReg(ABSSrcReg)
12307 .addImm(0)
12309
12310 // insert a bcc with opposite CC to ARMCC::MI at the end of BB
12311 BuildMI(BB, dl,
12312 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
12314
12315 // insert rsbri in RSBBB
12316 // Note: BCC and rsbri will be converted into predicated rsbmi
12317 // by if-conversion pass
12318 BuildMI(*RSBBB, RSBBB->begin(), dl,
12319 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
12320 .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
12321 .addImm(0)
12323 .add(condCodeOp());
12324
12325 // insert PHI in SinkBB,
12326 // reuse ABSDstReg to not change uses of ABS instruction
12327 BuildMI(*SinkBB, SinkBB->begin(), dl,
12328 TII->get(ARM::PHI), ABSDstReg)
12329 .addReg(NewRsbDstReg).addMBB(RSBBB)
12330 .addReg(ABSSrcReg).addMBB(BB);
12331
12332 // remove ABS instruction
12333 MI.eraseFromParent();
12334
12335 // return last added BB
12336 return SinkBB;
12337 }
12338 case ARM::COPY_STRUCT_BYVAL_I32:
12339 ++NumLoopByVals;
12340 return EmitStructByval(MI, BB);
12341 case ARM::WIN__CHKSTK:
12342 return EmitLowered__chkstk(MI, BB);
12343 case ARM::WIN__DBZCHK:
12344 return EmitLowered__dbzchk(MI, BB);
12345 }
12346}
12347
12348/// Attaches vregs to MEMCPY that it will use as scratch registers
12349/// when it is expanded into LDM/STM. This is done as a post-isel lowering
12350/// instead of as a custom inserter because we need the use list from the SDNode.
12351static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget,
12352 MachineInstr &MI, const SDNode *Node) {
12353 bool isThumb1 = Subtarget->isThumb1Only();
12354
12355 DebugLoc DL = MI.getDebugLoc();
12356 MachineFunction *MF = MI.getParent()->getParent();
12358 MachineInstrBuilder MIB(*MF, MI);
12359
12360 // If the new dst/src is unused mark it as dead.
12361 if (!Node->hasAnyUseOfValue(0)) {
12362 MI.getOperand(0).setIsDead(true);
12363 }
12364 if (!Node->hasAnyUseOfValue(1)) {
12365 MI.getOperand(1).setIsDead(true);
12366 }
12367
12368 // The MEMCPY both defines and kills the scratch registers.
12369 for (unsigned I = 0; I != MI.getOperand(4).getImm(); ++I) {
12370 Register TmpReg = MRI.createVirtualRegister(isThumb1 ? &ARM::tGPRRegClass
12371 : &ARM::GPRRegClass);
12373 }
12374}
12375
12377 SDNode *Node) const {
12378 if (MI.getOpcode() == ARM::MEMCPY) {
12379 attachMEMCPYScratchRegs(Subtarget, MI, Node);
12380 return;
12381 }
12382
12383 const MCInstrDesc *MCID = &MI.getDesc();
12384 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
12385 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
12386 // operand is still set to noreg. If needed, set the optional operand's
12387 // register to CPSR, and remove the redundant implicit def.
12388 //
12389 // e.g. ADCS (..., implicit-def CPSR) -> ADC (... opt:def CPSR).
12390
12391 // Rename pseudo opcodes.
12392 unsigned NewOpc = convertAddSubFlagsOpcode(MI.getOpcode());
12393 unsigned ccOutIdx;
12394 if (NewOpc) {
12395 const ARMBaseInstrInfo *TII = Subtarget->getInstrInfo();
12396 MCID = &TII->get(NewOpc);
12397
12398 assert(MCID->getNumOperands() ==
12399 MI.getDesc().getNumOperands() + 5 - MI.getDesc().getSize()
12400 && "converted opcode should be the same except for cc_out"
12401 " (and, on Thumb1, pred)");
12402
12403 MI.setDesc(*MCID);
12404
12405 // Add the optional cc_out operand
12406 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
12407
12408 // On Thumb1, move all input operands to the end, then add the predicate
12409 if (Subtarget->isThumb1Only()) {
12410 for (unsigned c = MCID->getNumOperands() - 4; c--;) {
12411 MI.addOperand(MI.getOperand(1));
12412 MI.removeOperand(1);
12413 }
12414
12415 // Restore the ties
12416 for (unsigned i = MI.getNumOperands(); i--;) {
12417 const MachineOperand& op = MI.getOperand(i);
12418 if (op.isReg() && op.isUse()) {
12419 int DefIdx = MCID->getOperandConstraint(i, MCOI::TIED_TO);
12420 if (DefIdx != -1)
12421 MI.tieOperands(DefIdx, i);
12422 }
12423 }
12424
12426 MI.addOperand(MachineOperand::CreateReg(0, /*isDef=*/false));
12427 ccOutIdx = 1;
12428 } else
12429 ccOutIdx = MCID->getNumOperands() - 1;
12430 } else
12431 ccOutIdx = MCID->getNumOperands() - 1;
12432
12433 // Any ARM instruction that sets the 's' bit should specify an optional
12434 // "cc_out" operand in the last operand position.
12435 if (!MI.hasOptionalDef() || !MCID->operands()[ccOutIdx].isOptionalDef()) {
12436 assert(!NewOpc && "Optional cc_out operand required");
12437 return;
12438 }
12439 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
12440 // since we already have an optional CPSR def.
12441 bool definesCPSR = false;
12442 bool deadCPSR = false;
12443 for (unsigned i = MCID->getNumOperands(), e = MI.getNumOperands(); i != e;
12444 ++i) {
12445 const MachineOperand &MO = MI.getOperand(i);
12446 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
12447 definesCPSR = true;
12448 if (MO.isDead())
12449 deadCPSR = true;
12450 MI.removeOperand(i);
12451 break;
12452 }
12453 }
12454 if (!definesCPSR) {
12455 assert(!NewOpc && "Optional cc_out operand required");
12456 return;
12457 }
12458 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
12459 if (deadCPSR) {
12460 assert(!MI.getOperand(ccOutIdx).getReg() &&
12461 "expect uninitialized optional cc_out operand");
12462 // Thumb1 instructions must have the S bit even if the CPSR is dead.
12463 if (!Subtarget->isThumb1Only())
12464 return;
12465 }
12466
12467 // If this instruction was defined with an optional CPSR def and its dag node
12468 // had a live implicit CPSR def, then activate the optional CPSR def.
12469 MachineOperand &MO = MI.getOperand(ccOutIdx);
12470 MO.setReg(ARM::CPSR);
12471 MO.setIsDef(true);
12472}
12473
12474//===----------------------------------------------------------------------===//
12475// ARM Optimization Hooks
12476//===----------------------------------------------------------------------===//
12477
12478// Helper function that checks if N is a null or all ones constant.
12479static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
12481}
12482
12483// Return true if N is conditionally 0 or all ones.
12484// Detects these expressions where cc is an i1 value:
12485//
12486// (select cc 0, y) [AllOnes=0]
12487// (select cc y, 0) [AllOnes=0]
12488// (zext cc) [AllOnes=0]
12489// (sext cc) [AllOnes=0/1]
12490// (select cc -1, y) [AllOnes=1]
12491// (select cc y, -1) [AllOnes=1]
12492//
12493// Invert is set when N is the null/all ones constant when CC is false.
12494// OtherOp is set to the alternative value of N.
12496 SDValue &CC, bool &Invert,
12497 SDValue &OtherOp,
12498 SelectionDAG &DAG) {
12499 switch (N->getOpcode()) {
12500 default: return false;
12501 case ISD::SELECT: {
12502 CC = N->getOperand(0);
12503 SDValue N1 = N->getOperand(1);
12504 SDValue N2 = N->getOperand(2);
12505 if (isZeroOrAllOnes(N1, AllOnes)) {
12506 Invert = false;
12507 OtherOp = N2;
12508 return true;
12509 }
12510 if (isZeroOrAllOnes(N2, AllOnes)) {
12511 Invert = true;
12512 OtherOp = N1;
12513 return true;
12514 }
12515 return false;
12516 }
12517 case ISD::ZERO_EXTEND:
12518 // (zext cc) can never be the all ones value.
12519 if (AllOnes)
12520 return false;
12521 [[fallthrough]];
12522 case ISD::SIGN_EXTEND: {
12523 SDLoc dl(N);
12524 EVT VT = N->getValueType(0);
12525 CC = N->getOperand(0);
12526 if (CC.getValueType() != MVT::i1 || CC.getOpcode() != ISD::SETCC)
12527 return false;
12528 Invert = !AllOnes;
12529 if (AllOnes)
12530 // When looking for an AllOnes constant, N is an sext, and the 'other'
12531 // value is 0.
12532 OtherOp = DAG.getConstant(0, dl, VT);
12533 else if (N->getOpcode() == ISD::ZERO_EXTEND)
12534 // When looking for a 0 constant, N can be zext or sext.
12535 OtherOp = DAG.getConstant(1, dl, VT);
12536 else
12537 OtherOp = DAG.getAllOnesConstant(dl, VT);
12538 return true;
12539 }
12540 }
12541}
12542
12543// Combine a constant select operand into its use:
12544//
12545// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
12546// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
12547// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1]
12548// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
12549// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
12550//
12551// The transform is rejected if the select doesn't have a constant operand that
12552// is null, or all ones when AllOnes is set.
12553//
12554// Also recognize sext/zext from i1:
12555//
12556// (add (zext cc), x) -> (select cc (add x, 1), x)
12557// (add (sext cc), x) -> (select cc (add x, -1), x)
12558//
12559// These transformations eventually create predicated instructions.
12560//
12561// @param N The node to transform.
12562// @param Slct The N operand that is a select.
12563// @param OtherOp The other N operand (x above).
12564// @param DCI Context.
12565// @param AllOnes Require the select constant to be all ones instead of null.
12566// @returns The new node, or SDValue() on failure.
12567static
12570 bool AllOnes = false) {
12571 SelectionDAG &DAG = DCI.DAG;
12572 EVT VT = N->getValueType(0);
12573 SDValue NonConstantVal;
12574 SDValue CCOp;
12575 bool SwapSelectOps;
12576 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
12577 NonConstantVal, DAG))
12578 return SDValue();
12579
12580 // Slct is now know to be the desired identity constant when CC is true.
12581 SDValue TrueVal = OtherOp;
12582 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
12583 OtherOp, NonConstantVal);
12584 // Unless SwapSelectOps says CC should be false.
12585 if (SwapSelectOps)
12586 std::swap(TrueVal, FalseVal);
12587
12588 return DAG.getNode(ISD::SELECT, SDLoc(N), VT,
12589 CCOp, TrueVal, FalseVal);
12590}
12591
12592// Attempt combineSelectAndUse on each operand of a commutative operator N.
12593static
12596 SDValue N0 = N->getOperand(0);
12597 SDValue N1 = N->getOperand(1);
12598 if (N0.getNode()->hasOneUse())
12599 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
12600 return Result;
12601 if (N1.getNode()->hasOneUse())
12602 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
12603 return Result;
12604 return SDValue();
12605}
12606
12608 // VUZP shuffle node.
12609 if (N->getOpcode() == ARMISD::VUZP)
12610 return true;
12611
12612 // "VUZP" on i32 is an alias for VTRN.
12613 if (N->getOpcode() == ARMISD::VTRN && N->getValueType(0) == MVT::v2i32)
12614 return true;
12615
12616 return false;
12617}
12618
12621 const ARMSubtarget *Subtarget) {
12622 // Look for ADD(VUZP.0, VUZP.1).
12623 if (!IsVUZPShuffleNode(N0.getNode()) || N0.getNode() != N1.getNode() ||
12624 N0 == N1)
12625 return SDValue();
12626
12627 // Make sure the ADD is a 64-bit add; there is no 128-bit VPADD.
12628 if (!N->getValueType(0).is64BitVector())
12629 return SDValue();
12630
12631 // Generate vpadd.
12632 SelectionDAG &DAG = DCI.DAG;
12633 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12634 SDLoc dl(N);
12635 SDNode *Unzip = N0.getNode();
12636 EVT VT = N->getValueType(0);
12637
12639 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpadd, dl,
12640 TLI.getPointerTy(DAG.getDataLayout())));
12641 Ops.push_back(Unzip->getOperand(0));
12642 Ops.push_back(Unzip->getOperand(1));
12643
12644 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12645}
12646
12649 const ARMSubtarget *Subtarget) {
12650 // Check for two extended operands.
12651 if (!(N0.getOpcode() == ISD::SIGN_EXTEND &&
12652 N1.getOpcode() == ISD::SIGN_EXTEND) &&
12653 !(N0.getOpcode() == ISD::ZERO_EXTEND &&
12654 N1.getOpcode() == ISD::ZERO_EXTEND))
12655 return SDValue();
12656
12657 SDValue N00 = N0.getOperand(0);
12658 SDValue N10 = N1.getOperand(0);
12659
12660 // Look for ADD(SEXT(VUZP.0), SEXT(VUZP.1))
12661 if (!IsVUZPShuffleNode(N00.getNode()) || N00.getNode() != N10.getNode() ||
12662 N00 == N10)
12663 return SDValue();
12664
12665 // We only recognize Q register paddl here; this can't be reached until
12666 // after type legalization.
12667 if (!N00.getValueType().is64BitVector() ||
12669 return SDValue();
12670
12671 // Generate vpaddl.
12672 SelectionDAG &DAG = DCI.DAG;
12673 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12674 SDLoc dl(N);
12675 EVT VT = N->getValueType(0);
12676
12678 // Form vpaddl.sN or vpaddl.uN depending on the kind of extension.
12679 unsigned Opcode;
12680 if (N0.getOpcode() == ISD::SIGN_EXTEND)
12681 Opcode = Intrinsic::arm_neon_vpaddls;
12682 else
12683 Opcode = Intrinsic::arm_neon_vpaddlu;
12684 Ops.push_back(DAG.getConstant(Opcode, dl,
12685 TLI.getPointerTy(DAG.getDataLayout())));
12686 EVT ElemTy = N00.getValueType().getVectorElementType();
12687 unsigned NumElts = VT.getVectorNumElements();
12688 EVT ConcatVT = EVT::getVectorVT(*DAG.getContext(), ElemTy, NumElts * 2);
12689 SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), ConcatVT,
12690 N00.getOperand(0), N00.getOperand(1));
12691 Ops.push_back(Concat);
12692
12693 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, Ops);
12694}
12695
12696// FIXME: This function shouldn't be necessary; if we lower BUILD_VECTOR in
12697// an appropriate manner, we end up with ADD(VUZP(ZEXT(N))), which is
12698// much easier to match.
12699static SDValue
12702 const ARMSubtarget *Subtarget) {
12703 // Only perform optimization if after legalize, and if NEON is available. We
12704 // also expected both operands to be BUILD_VECTORs.
12705 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
12706 || N0.getOpcode() != ISD::BUILD_VECTOR
12707 || N1.getOpcode() != ISD::BUILD_VECTOR)
12708 return SDValue();
12709
12710 // Check output type since VPADDL operand elements can only be 8, 16, or 32.
12711 EVT VT = N->getValueType(0);
12712 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
12713 return SDValue();
12714
12715 // Check that the vector operands are of the right form.
12716 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
12717 // operands, where N is the size of the formed vector.
12718 // Each EXTRACT_VECTOR should have the same input vector and odd or even
12719 // index such that we have a pair wise add pattern.
12720
12721 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
12723 return SDValue();
12724 SDValue Vec = N0->getOperand(0)->getOperand(0);
12725 SDNode *V = Vec.getNode();
12726 unsigned nextIndex = 0;
12727
12728 // For each operands to the ADD which are BUILD_VECTORs,
12729 // check to see if each of their operands are an EXTRACT_VECTOR with
12730 // the same vector and appropriate index.
12731 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
12734
12735 SDValue ExtVec0 = N0->getOperand(i);
12736 SDValue ExtVec1 = N1->getOperand(i);
12737
12738 // First operand is the vector, verify its the same.
12739 if (V != ExtVec0->getOperand(0).getNode() ||
12740 V != ExtVec1->getOperand(0).getNode())
12741 return SDValue();
12742
12743 // Second is the constant, verify its correct.
12744 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
12745 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
12746
12747 // For the constant, we want to see all the even or all the odd.
12748 if (!C0 || !C1 || C0->getZExtValue() != nextIndex
12749 || C1->getZExtValue() != nextIndex+1)
12750 return SDValue();
12751
12752 // Increment index.
12753 nextIndex+=2;
12754 } else
12755 return SDValue();
12756 }
12757
12758 // Don't generate vpaddl+vmovn; we'll match it to vpadd later. Also make sure
12759 // we're using the entire input vector, otherwise there's a size/legality
12760 // mismatch somewhere.
12761 if (nextIndex != Vec.getValueType().getVectorNumElements() ||
12763 return SDValue();
12764
12765 // Create VPADDL node.
12766 SelectionDAG &DAG = DCI.DAG;
12767 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
12768
12769 SDLoc dl(N);
12770
12771 // Build operand list.
12773 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, dl,
12774 TLI.getPointerTy(DAG.getDataLayout())));
12775
12776 // Input is the vector.
12777 Ops.push_back(Vec);
12778
12779 // Get widened type and narrowed type.
12780 MVT widenType;
12781 unsigned numElem = VT.getVectorNumElements();
12782
12783 EVT inputLaneType = Vec.getValueType().getVectorElementType();
12784 switch (inputLaneType.getSimpleVT().SimpleTy) {
12785 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
12786 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
12787 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
12788 default:
12789 llvm_unreachable("Invalid vector element type for padd optimization.");
12790 }
12791
12792 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, widenType, Ops);
12793 unsigned ExtOp = VT.bitsGT(tmp.getValueType()) ? ISD::ANY_EXTEND : ISD::TRUNCATE;
12794 return DAG.getNode(ExtOp, dl, VT, tmp);
12795}
12796
12798 if (V->getOpcode() == ISD::UMUL_LOHI ||
12799 V->getOpcode() == ISD::SMUL_LOHI)
12800 return V;
12801 return SDValue();
12802}
12803
12804static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode,
12806 const ARMSubtarget *Subtarget) {
12807 if (!Subtarget->hasBaseDSP())
12808 return SDValue();
12809
12810 // SMLALBB, SMLALBT, SMLALTB, SMLALTT multiply two 16-bit values and
12811 // accumulates the product into a 64-bit value. The 16-bit values will
12812 // be sign extended somehow or SRA'd into 32-bit values
12813 // (addc (adde (mul 16bit, 16bit), lo), hi)
12814 SDValue Mul = AddcNode->getOperand(0);
12815 SDValue Lo = AddcNode->getOperand(1);
12816 if (Mul.getOpcode() != ISD::MUL) {
12817 Lo = AddcNode->getOperand(0);
12818 Mul = AddcNode->getOperand(1);
12819 if (Mul.getOpcode() != ISD::MUL)
12820 return SDValue();
12821 }
12822
12823 SDValue SRA = AddeNode->getOperand(0);
12824 SDValue Hi = AddeNode->getOperand(1);
12825 if (SRA.getOpcode() != ISD::SRA) {
12826 SRA = AddeNode->getOperand(1);
12827 Hi = AddeNode->getOperand(0);
12828 if (SRA.getOpcode() != ISD::SRA)
12829 return SDValue();
12830 }
12831 if (auto Const = dyn_cast<ConstantSDNode>(SRA.getOperand(1))) {
12832 if (Const->getZExtValue() != 31)
12833 return SDValue();
12834 } else
12835 return SDValue();
12836
12837 if (SRA.getOperand(0) != Mul)
12838 return SDValue();
12839
12840 SelectionDAG &DAG = DCI.DAG;
12841 SDLoc dl(AddcNode);
12842 unsigned Opcode = 0;
12843 SDValue Op0;
12844 SDValue Op1;
12845
12846 if (isS16(Mul.getOperand(0), DAG) && isS16(Mul.getOperand(1), DAG)) {
12847 Opcode = ARMISD::SMLALBB;
12848 Op0 = Mul.getOperand(0);
12849 Op1 = Mul.getOperand(1);
12850 } else if (isS16(Mul.getOperand(0), DAG) && isSRA16(Mul.getOperand(1))) {
12851 Opcode = ARMISD::SMLALBT;
12852 Op0 = Mul.getOperand(0);
12853 Op1 = Mul.getOperand(1).getOperand(0);
12854 } else if (isSRA16(Mul.getOperand(0)) && isS16(Mul.getOperand(1), DAG)) {
12855 Opcode = ARMISD::SMLALTB;
12856 Op0 = Mul.getOperand(0).getOperand(0);
12857 Op1 = Mul.getOperand(1);
12858 } else if (isSRA16(Mul.getOperand(0)) && isSRA16(Mul.getOperand(1))) {
12859 Opcode = ARMISD::SMLALTT;
12860 Op0 = Mul->getOperand(0).getOperand(0);
12861 Op1 = Mul->getOperand(1).getOperand(0);
12862 }
12863
12864 if (!Op0 || !Op1)
12865 return SDValue();
12866
12867 SDValue SMLAL = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
12868 Op0, Op1, Lo, Hi);
12869 // Replace the ADDs' nodes uses by the MLA node's values.
12870 SDValue HiMLALResult(SMLAL.getNode(), 1);
12871 SDValue LoMLALResult(SMLAL.getNode(), 0);
12872
12873 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
12874 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
12875
12876 // Return original node to notify the driver to stop replacing.
12877 SDValue resNode(AddcNode, 0);
12878 return resNode;
12879}
12880
12883 const ARMSubtarget *Subtarget) {
12884 // Look for multiply add opportunities.
12885 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
12886 // each add nodes consumes a value from ISD::UMUL_LOHI and there is
12887 // a glue link from the first add to the second add.
12888 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
12889 // a S/UMLAL instruction.
12890 // UMUL_LOHI
12891 // / :lo \ :hi
12892 // V \ [no multiline comment]
12893 // loAdd -> ADDC |
12894 // \ :carry /
12895 // V V
12896 // ADDE <- hiAdd
12897 //
12898 // In the special case where only the higher part of a signed result is used
12899 // and the add to the low part of the result of ISD::UMUL_LOHI adds or subtracts
12900 // a constant with the exact value of 0x80000000, we recognize we are dealing
12901 // with a "rounded multiply and add" (or subtract) and transform it into
12902 // either a ARMISD::SMMLAR or ARMISD::SMMLSR respectively.
12903
12904 assert((AddeSubeNode->getOpcode() == ARMISD::ADDE ||
12905 AddeSubeNode->getOpcode() == ARMISD::SUBE) &&
12906 "Expect an ADDE or SUBE");
12907
12908 assert(AddeSubeNode->getNumOperands() == 3 &&
12909 AddeSubeNode->getOperand(2).getValueType() == MVT::i32 &&
12910 "ADDE node has the wrong inputs");
12911
12912 // Check that we are chained to the right ADDC or SUBC node.
12913 SDNode *AddcSubcNode = AddeSubeNode->getOperand(2).getNode();
12914 if ((AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12915 AddcSubcNode->getOpcode() != ARMISD::ADDC) ||
12916 (AddeSubeNode->getOpcode() == ARMISD::SUBE &&
12917 AddcSubcNode->getOpcode() != ARMISD::SUBC))
12918 return SDValue();
12919
12920 SDValue AddcSubcOp0 = AddcSubcNode->getOperand(0);
12921 SDValue AddcSubcOp1 = AddcSubcNode->getOperand(1);
12922
12923 // Check if the two operands are from the same mul_lohi node.
12924 if (AddcSubcOp0.getNode() == AddcSubcOp1.getNode())
12925 return SDValue();
12926
12927 assert(AddcSubcNode->getNumValues() == 2 &&
12928 AddcSubcNode->getValueType(0) == MVT::i32 &&
12929 "Expect ADDC with two result values. First: i32");
12930
12931 // Check that the ADDC adds the low result of the S/UMUL_LOHI. If not, it
12932 // maybe a SMLAL which multiplies two 16-bit values.
12933 if (AddeSubeNode->getOpcode() == ARMISD::ADDE &&
12934 AddcSubcOp0->getOpcode() != ISD::UMUL_LOHI &&
12935 AddcSubcOp0->getOpcode() != ISD::SMUL_LOHI &&
12936 AddcSubcOp1->getOpcode() != ISD::UMUL_LOHI &&
12937 AddcSubcOp1->getOpcode() != ISD::SMUL_LOHI)
12938 return AddCombineTo64BitSMLAL16(AddcSubcNode, AddeSubeNode, DCI, Subtarget);
12939
12940 // Check for the triangle shape.
12941 SDValue AddeSubeOp0 = AddeSubeNode->getOperand(0);
12942 SDValue AddeSubeOp1 = AddeSubeNode->getOperand(1);
12943
12944 // Make sure that the ADDE/SUBE operands are not coming from the same node.
12945 if (AddeSubeOp0.getNode() == AddeSubeOp1.getNode())
12946 return SDValue();
12947
12948 // Find the MUL_LOHI node walking up ADDE/SUBE's operands.
12949 bool IsLeftOperandMUL = false;
12950 SDValue MULOp = findMUL_LOHI(AddeSubeOp0);
12951 if (MULOp == SDValue())
12952 MULOp = findMUL_LOHI(AddeSubeOp1);
12953 else
12954 IsLeftOperandMUL = true;
12955 if (MULOp == SDValue())
12956 return SDValue();
12957
12958 // Figure out the right opcode.
12959 unsigned Opc = MULOp->getOpcode();
12960 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
12961
12962 // Figure out the high and low input values to the MLAL node.
12963 SDValue *HiAddSub = nullptr;
12964 SDValue *LoMul = nullptr;
12965 SDValue *LowAddSub = nullptr;
12966
12967 // Ensure that ADDE/SUBE is from high result of ISD::xMUL_LOHI.
12968 if ((AddeSubeOp0 != MULOp.getValue(1)) && (AddeSubeOp1 != MULOp.getValue(1)))
12969 return SDValue();
12970
12971 if (IsLeftOperandMUL)
12972 HiAddSub = &AddeSubeOp1;
12973 else
12974 HiAddSub = &AddeSubeOp0;
12975
12976 // Ensure that LoMul and LowAddSub are taken from correct ISD::SMUL_LOHI node
12977 // whose low result is fed to the ADDC/SUBC we are checking.
12978
12979 if (AddcSubcOp0 == MULOp.getValue(0)) {
12980 LoMul = &AddcSubcOp0;
12981 LowAddSub = &AddcSubcOp1;
12982 }
12983 if (AddcSubcOp1 == MULOp.getValue(0)) {
12984 LoMul = &AddcSubcOp1;
12985 LowAddSub = &AddcSubcOp0;
12986 }
12987
12988 if (!LoMul)
12989 return SDValue();
12990
12991 // If HiAddSub is the same node as ADDC/SUBC or is a predecessor of ADDC/SUBC
12992 // the replacement below will create a cycle.
12993 if (AddcSubcNode == HiAddSub->getNode() ||
12994 AddcSubcNode->isPredecessorOf(HiAddSub->getNode()))
12995 return SDValue();
12996
12997 // Create the merged node.
12998 SelectionDAG &DAG = DCI.DAG;
12999
13000 // Start building operand list.
13002 Ops.push_back(LoMul->getOperand(0));
13003 Ops.push_back(LoMul->getOperand(1));
13004
13005 // Check whether we can use SMMLAR, SMMLSR or SMMULR instead. For this to be
13006 // the case, we must be doing signed multiplication and only use the higher
13007 // part of the result of the MLAL, furthermore the LowAddSub must be a constant
13008 // addition or subtraction with the value of 0x800000.
13009 if (Subtarget->hasV6Ops() && Subtarget->hasDSP() && Subtarget->useMulOps() &&
13010 FinalOpc == ARMISD::SMLAL && !AddeSubeNode->hasAnyUseOfValue(1) &&
13011 LowAddSub->getNode()->getOpcode() == ISD::Constant &&
13012 static_cast<ConstantSDNode *>(LowAddSub->getNode())->getZExtValue() ==
13013 0x80000000) {
13014 Ops.push_back(*HiAddSub);
13015 if (AddcSubcNode->getOpcode() == ARMISD::SUBC) {
13016 FinalOpc = ARMISD::SMMLSR;
13017 } else {
13018 FinalOpc = ARMISD::SMMLAR;
13019 }
13020 SDValue NewNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode), MVT::i32, Ops);
13021 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), NewNode);
13022
13023 return SDValue(AddeSubeNode, 0);
13024 } else if (AddcSubcNode->getOpcode() == ARMISD::SUBC)
13025 // SMMLS is generated during instruction selection and the rest of this
13026 // function can not handle the case where AddcSubcNode is a SUBC.
13027 return SDValue();
13028
13029 // Finish building the operand list for {U/S}MLAL
13030 Ops.push_back(*LowAddSub);
13031 Ops.push_back(*HiAddSub);
13032
13033 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcSubcNode),
13034 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13035
13036 // Replace the ADDs' nodes uses by the MLA node's values.
13037 SDValue HiMLALResult(MLALNode.getNode(), 1);
13038 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeSubeNode, 0), HiMLALResult);
13039
13040 SDValue LoMLALResult(MLALNode.getNode(), 0);
13041 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcSubcNode, 0), LoMLALResult);
13042
13043 // Return original node to notify the driver to stop replacing.
13044 return SDValue(AddeSubeNode, 0);
13045}
13046
13049 const ARMSubtarget *Subtarget) {
13050 // UMAAL is similar to UMLAL except that it adds two unsigned values.
13051 // While trying to combine for the other MLAL nodes, first search for the
13052 // chance to use UMAAL. Check if Addc uses a node which has already
13053 // been combined into a UMLAL. The other pattern is UMLAL using Addc/Adde
13054 // as the addend, and it's handled in PerformUMLALCombine.
13055
13056 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13057 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13058
13059 // Check that we have a glued ADDC node.
13060 SDNode* AddcNode = AddeNode->getOperand(2).getNode();
13061 if (AddcNode->getOpcode() != ARMISD::ADDC)
13062 return SDValue();
13063
13064 // Find the converted UMAAL or quit if it doesn't exist.
13065 SDNode *UmlalNode = nullptr;
13066 SDValue AddHi;
13067 if (AddcNode->getOperand(0).getOpcode() == ARMISD::UMLAL) {
13068 UmlalNode = AddcNode->getOperand(0).getNode();
13069 AddHi = AddcNode->getOperand(1);
13070 } else if (AddcNode->getOperand(1).getOpcode() == ARMISD::UMLAL) {
13071 UmlalNode = AddcNode->getOperand(1).getNode();
13072 AddHi = AddcNode->getOperand(0);
13073 } else {
13074 return AddCombineTo64bitMLAL(AddeNode, DCI, Subtarget);
13075 }
13076
13077 // The ADDC should be glued to an ADDE node, which uses the same UMLAL as
13078 // the ADDC as well as Zero.
13079 if (!isNullConstant(UmlalNode->getOperand(3)))
13080 return SDValue();
13081
13082 if ((isNullConstant(AddeNode->getOperand(0)) &&
13083 AddeNode->getOperand(1).getNode() == UmlalNode) ||
13084 (AddeNode->getOperand(0).getNode() == UmlalNode &&
13085 isNullConstant(AddeNode->getOperand(1)))) {
13086 SelectionDAG &DAG = DCI.DAG;
13087 SDValue Ops[] = { UmlalNode->getOperand(0), UmlalNode->getOperand(1),
13088 UmlalNode->getOperand(2), AddHi };
13089 SDValue UMAAL = DAG.getNode(ARMISD::UMAAL, SDLoc(AddcNode),
13090 DAG.getVTList(MVT::i32, MVT::i32), Ops);
13091
13092 // Replace the ADDs' nodes uses by the UMAAL node's values.
13093 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), SDValue(UMAAL.getNode(), 1));
13094 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), SDValue(UMAAL.getNode(), 0));
13095
13096 // Return original node to notify the driver to stop replacing.
13097 return SDValue(AddeNode, 0);
13098 }
13099 return SDValue();
13100}
13101
13103 const ARMSubtarget *Subtarget) {
13104 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
13105 return SDValue();
13106
13107 // Check that we have a pair of ADDC and ADDE as operands.
13108 // Both addends of the ADDE must be zero.
13109 SDNode* AddcNode = N->getOperand(2).getNode();
13110 SDNode* AddeNode = N->getOperand(3).getNode();
13111 if ((AddcNode->getOpcode() == ARMISD::ADDC) &&
13112 (AddeNode->getOpcode() == ARMISD::ADDE) &&
13113 isNullConstant(AddeNode->getOperand(0)) &&
13114 isNullConstant(AddeNode->getOperand(1)) &&
13115 (AddeNode->getOperand(2).getNode() == AddcNode))
13116 return DAG.getNode(ARMISD::UMAAL, SDLoc(N),
13117 DAG.getVTList(MVT::i32, MVT::i32),
13118 {N->getOperand(0), N->getOperand(1),
13119 AddcNode->getOperand(0), AddcNode->getOperand(1)});
13120 else
13121 return SDValue();
13122}
13123
13126 const ARMSubtarget *Subtarget) {
13127 SelectionDAG &DAG(DCI.DAG);
13128
13129 if (N->getOpcode() == ARMISD::SUBC && N->hasAnyUseOfValue(1)) {
13130 // (SUBC (ADDE 0, 0, C), 1) -> C
13131 SDValue LHS = N->getOperand(0);
13132 SDValue RHS = N->getOperand(1);
13133 if (LHS->getOpcode() == ARMISD::ADDE &&
13134 isNullConstant(LHS->getOperand(0)) &&
13135 isNullConstant(LHS->getOperand(1)) && isOneConstant(RHS)) {
13136 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
13137 }
13138 }
13139
13140 if (Subtarget->isThumb1Only()) {
13141 SDValue RHS = N->getOperand(1);
13142 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13143 int32_t imm = C->getSExtValue();
13144 if (imm < 0 && imm > std::numeric_limits<int>::min()) {
13145 SDLoc DL(N);
13146 RHS = DAG.getConstant(-imm, DL, MVT::i32);
13147 unsigned Opcode = (N->getOpcode() == ARMISD::ADDC) ? ARMISD::SUBC
13148 : ARMISD::ADDC;
13149 return DAG.getNode(Opcode, DL, N->getVTList(), N->getOperand(0), RHS);
13150 }
13151 }
13152 }
13153
13154 return SDValue();
13155}
13156
13159 const ARMSubtarget *Subtarget) {
13160 if (Subtarget->isThumb1Only()) {
13161 SelectionDAG &DAG = DCI.DAG;
13162 SDValue RHS = N->getOperand(1);
13163 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
13164 int64_t imm = C->getSExtValue();
13165 if (imm < 0) {
13166 SDLoc DL(N);
13167
13168 // The with-carry-in form matches bitwise not instead of the negation.
13169 // Effectively, the inverse interpretation of the carry flag already
13170 // accounts for part of the negation.
13171 RHS = DAG.getConstant(~imm, DL, MVT::i32);
13172
13173 unsigned Opcode = (N->getOpcode() == ARMISD::ADDE) ? ARMISD::SUBE
13174 : ARMISD::ADDE;
13175 return DAG.getNode(Opcode, DL, N->getVTList(),
13176 N->getOperand(0), RHS, N->getOperand(2));
13177 }
13178 }
13179 } else if (N->getOperand(1)->getOpcode() == ISD::SMUL_LOHI) {
13180 return AddCombineTo64bitMLAL(N, DCI, Subtarget);
13181 }
13182 return SDValue();
13183}
13184
13187 const ARMSubtarget *Subtarget) {
13188 if (!Subtarget->hasMVEIntegerOps())
13189 return SDValue();
13190
13191 SDLoc dl(N);
13192 SDValue SetCC;
13193 SDValue LHS;
13194 SDValue RHS;
13196 SDValue TrueVal;
13197 SDValue FalseVal;
13198
13199 if (N->getOpcode() == ISD::SELECT &&
13200 N->getOperand(0)->getOpcode() == ISD::SETCC) {
13201 SetCC = N->getOperand(0);
13202 LHS = SetCC->getOperand(0);
13203 RHS = SetCC->getOperand(1);
13204 CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
13205 TrueVal = N->getOperand(1);
13206 FalseVal = N->getOperand(2);
13207 } else if (N->getOpcode() == ISD::SELECT_CC) {
13208 LHS = N->getOperand(0);
13209 RHS = N->getOperand(1);
13210 CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
13211 TrueVal = N->getOperand(2);
13212 FalseVal = N->getOperand(3);
13213 } else {
13214 return SDValue();
13215 }
13216
13217 unsigned int Opcode = 0;
13218 if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMIN ||
13219 FalseVal->getOpcode() == ISD::VECREDUCE_UMIN) &&
13220 (CC == ISD::SETULT || CC == ISD::SETUGT)) {
13221 Opcode = ARMISD::VMINVu;
13222 if (CC == ISD::SETUGT)
13223 std::swap(TrueVal, FalseVal);
13224 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMIN ||
13225 FalseVal->getOpcode() == ISD::VECREDUCE_SMIN) &&
13226 (CC == ISD::SETLT || CC == ISD::SETGT)) {
13227 Opcode = ARMISD::VMINVs;
13228 if (CC == ISD::SETGT)
13229 std::swap(TrueVal, FalseVal);
13230 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_UMAX ||
13231 FalseVal->getOpcode() == ISD::VECREDUCE_UMAX) &&
13232 (CC == ISD::SETUGT || CC == ISD::SETULT)) {
13233 Opcode = ARMISD::VMAXVu;
13234 if (CC == ISD::SETULT)
13235 std::swap(TrueVal, FalseVal);
13236 } else if ((TrueVal->getOpcode() == ISD::VECREDUCE_SMAX ||
13237 FalseVal->getOpcode() == ISD::VECREDUCE_SMAX) &&
13238 (CC == ISD::SETGT || CC == ISD::SETLT)) {
13239 Opcode = ARMISD::VMAXVs;
13240 if (CC == ISD::SETLT)
13241 std::swap(TrueVal, FalseVal);
13242 } else
13243 return SDValue();
13244
13245 // Normalise to the right hand side being the vector reduction
13246 switch (TrueVal->getOpcode()) {
13251 std::swap(LHS, RHS);
13252 std::swap(TrueVal, FalseVal);
13253 break;
13254 }
13255
13256 EVT VectorType = FalseVal->getOperand(0).getValueType();
13257
13258 if (VectorType != MVT::v16i8 && VectorType != MVT::v8i16 &&
13259 VectorType != MVT::v4i32)
13260 return SDValue();
13261
13262 EVT VectorScalarType = VectorType.getVectorElementType();
13263
13264 // The values being selected must also be the ones being compared
13265 if (TrueVal != LHS || FalseVal != RHS)
13266 return SDValue();
13267
13268 EVT LeftType = LHS->getValueType(0);
13269 EVT RightType = RHS->getValueType(0);
13270
13271 // The types must match the reduced type too
13272 if (LeftType != VectorScalarType || RightType != VectorScalarType)
13273 return SDValue();
13274
13275 // Legalise the scalar to an i32
13276 if (VectorScalarType != MVT::i32)
13277 LHS = DCI.DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
13278
13279 // Generate the reduction as an i32 for legalisation purposes
13280 auto Reduction =
13281 DCI.DAG.getNode(Opcode, dl, MVT::i32, LHS, RHS->getOperand(0));
13282
13283 // The result isn't actually an i32 so truncate it back to its original type
13284 if (VectorScalarType != MVT::i32)
13285 Reduction = DCI.DAG.getNode(ISD::TRUNCATE, dl, VectorScalarType, Reduction);
13286
13287 return Reduction;
13288}
13289
13290// A special combine for the vqdmulh family of instructions. This is one of the
13291// potential set of patterns that could patch this instruction. The base pattern
13292// you would expect to be min(max(ashr(mul(mul(sext(x), 2), sext(y)), 16))).
13293// This matches the different min(max(ashr(mul(mul(sext(x), sext(y)), 2), 16))),
13294// which llvm will have optimized to min(ashr(mul(sext(x), sext(y)), 15))) as
13295// the max is unnecessary.
13297 EVT VT = N->getValueType(0);
13298 SDValue Shft;
13299 ConstantSDNode *Clamp;
13300
13301 if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
13302 return SDValue();
13303
13304 if (N->getOpcode() == ISD::SMIN) {
13305 Shft = N->getOperand(0);
13306 Clamp = isConstOrConstSplat(N->getOperand(1));
13307 } else if (N->getOpcode() == ISD::VSELECT) {
13308 // Detect a SMIN, which for an i64 node will be a vselect/setcc, not a smin.
13309 SDValue Cmp = N->getOperand(0);
13310 if (Cmp.getOpcode() != ISD::SETCC ||
13311 cast<CondCodeSDNode>(Cmp.getOperand(2))->get() != ISD::SETLT ||
13312 Cmp.getOperand(0) != N->getOperand(1) ||
13313 Cmp.getOperand(1) != N->getOperand(2))
13314 return SDValue();
13315 Shft = N->getOperand(1);
13316 Clamp = isConstOrConstSplat(N->getOperand(2));
13317 } else
13318 return SDValue();
13319
13320 if (!Clamp)
13321 return SDValue();
13322
13323 MVT ScalarType;
13324 int ShftAmt = 0;
13325 switch (Clamp->getSExtValue()) {
13326 case (1 << 7) - 1:
13327 ScalarType = MVT::i8;
13328 ShftAmt = 7;
13329 break;
13330 case (1 << 15) - 1:
13331 ScalarType = MVT::i16;
13332 ShftAmt = 15;
13333 break;
13334 case (1ULL << 31) - 1:
13335 ScalarType = MVT::i32;
13336 ShftAmt = 31;
13337 break;
13338 default:
13339 return SDValue();
13340 }
13341
13342 if (Shft.getOpcode() != ISD::SRA)
13343 return SDValue();
13345 if (!N1 || N1->getSExtValue() != ShftAmt)
13346 return SDValue();
13347
13348 SDValue Mul = Shft.getOperand(0);
13349 if (Mul.getOpcode() != ISD::MUL)
13350 return SDValue();
13351
13352 SDValue Ext0 = Mul.getOperand(0);
13353 SDValue Ext1 = Mul.getOperand(1);
13354 if (Ext0.getOpcode() != ISD::SIGN_EXTEND ||
13355 Ext1.getOpcode() != ISD::SIGN_EXTEND)
13356 return SDValue();
13357 EVT VecVT = Ext0.getOperand(0).getValueType();
13358 if (!VecVT.isPow2VectorType() || VecVT.getVectorNumElements() == 1)
13359 return SDValue();
13360 if (Ext1.getOperand(0).getValueType() != VecVT ||
13361 VecVT.getScalarType() != ScalarType ||
13362 VT.getScalarSizeInBits() < ScalarType.getScalarSizeInBits() * 2)
13363 return SDValue();
13364
13365 SDLoc DL(Mul);
13366 unsigned LegalLanes = 128 / (ShftAmt + 1);
13367 EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
13368 // For types smaller than legal vectors extend to be legal and only use needed
13369 // lanes.
13370 if (VecVT.getSizeInBits() < 128) {
13371 EVT ExtVecVT =
13373 VecVT.getVectorNumElements());
13374 SDValue Inp0 =
13375 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext0.getOperand(0));
13376 SDValue Inp1 =
13377 DAG.getNode(ISD::ANY_EXTEND, DL, ExtVecVT, Ext1.getOperand(0));
13378 Inp0 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp0);
13379 Inp1 = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, LegalVecVT, Inp1);
13380 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13381 SDValue Trunc = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, ExtVecVT, VQDMULH);
13382 Trunc = DAG.getNode(ISD::TRUNCATE, DL, VecVT, Trunc);
13383 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Trunc);
13384 }
13385
13386 // For larger types, split into legal sized chunks.
13387 assert(VecVT.getSizeInBits() % 128 == 0 && "Expected a power2 type");
13388 unsigned NumParts = VecVT.getSizeInBits() / 128;
13390 for (unsigned I = 0; I < NumParts; ++I) {
13391 SDValue Inp0 =
13392 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext0.getOperand(0),
13393 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13394 SDValue Inp1 =
13395 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LegalVecVT, Ext1.getOperand(0),
13396 DAG.getVectorIdxConstant(I * LegalLanes, DL));
13397 SDValue VQDMULH = DAG.getNode(ARMISD::VQDMULH, DL, LegalVecVT, Inp0, Inp1);
13398 Parts.push_back(VQDMULH);
13399 }
13400 return DAG.getNode(ISD::SIGN_EXTEND, DL, VT,
13401 DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
13402}
13403
13406 const ARMSubtarget *Subtarget) {
13407 if (!Subtarget->hasMVEIntegerOps())
13408 return SDValue();
13409
13410 if (SDValue V = PerformVQDMULHCombine(N, DCI.DAG))
13411 return V;
13412
13413 // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs).
13414 //
13415 // We need to re-implement this optimization here as the implementation in the
13416 // Target-Independent DAGCombiner does not handle the kind of constant we make
13417 // (it calls isConstOrConstSplat with AllowTruncation set to false - and for
13418 // good reason, allowing truncation there would break other targets).
13419 //
13420 // Currently, this is only done for MVE, as it's the only target that benefits
13421 // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL).
13422 if (N->getOperand(0).getOpcode() != ISD::XOR)
13423 return SDValue();
13424 SDValue XOR = N->getOperand(0);
13425
13426 // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s.
13427 // It is important to check with truncation allowed as the BUILD_VECTORs we
13428 // generate in those situations will truncate their operands.
13429 ConstantSDNode *Const =
13430 isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false,
13431 /*AllowTruncation*/ true);
13432 if (!Const || !Const->isOne())
13433 return SDValue();
13434
13435 // Rewrite into vselect(cond, rhs, lhs).
13436 SDValue Cond = XOR->getOperand(0);
13437 SDValue LHS = N->getOperand(1);
13438 SDValue RHS = N->getOperand(2);
13439 EVT Type = N->getValueType(0);
13440 return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS);
13441}
13442
13443// Convert vsetcc([0,1,2,..], splat(n), ult) -> vctp n
13446 const ARMSubtarget *Subtarget) {
13447 SDValue Op0 = N->getOperand(0);
13448 SDValue Op1 = N->getOperand(1);
13449 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13450 EVT VT = N->getValueType(0);
13451
13452 if (!Subtarget->hasMVEIntegerOps() ||
13454 return SDValue();
13455
13456 if (CC == ISD::SETUGE) {
13457 std::swap(Op0, Op1);
13458 CC = ISD::SETULT;
13459 }
13460
13461 if (CC != ISD::SETULT || VT.getScalarSizeInBits() != 1 ||
13463 return SDValue();
13464
13465 // Check first operand is BuildVector of 0,1,2,...
13466 for (unsigned I = 0; I < VT.getVectorNumElements(); I++) {
13467 if (!Op0.getOperand(I).isUndef() &&
13468 !(isa<ConstantSDNode>(Op0.getOperand(I)) &&
13469 Op0.getConstantOperandVal(I) == I))
13470 return SDValue();
13471 }
13472
13473 // The second is a Splat of Op1S
13474 SDValue Op1S = DCI.DAG.getSplatValue(Op1);
13475 if (!Op1S)
13476 return SDValue();
13477
13478 unsigned Opc;
13479 switch (VT.getVectorNumElements()) {
13480 case 2:
13481 Opc = Intrinsic::arm_mve_vctp64;
13482 break;
13483 case 4:
13484 Opc = Intrinsic::arm_mve_vctp32;
13485 break;
13486 case 8:
13487 Opc = Intrinsic::arm_mve_vctp16;
13488 break;
13489 case 16:
13490 Opc = Intrinsic::arm_mve_vctp8;
13491 break;
13492 default:
13493 return SDValue();
13494 }
13495
13496 SDLoc DL(N);
13497 return DCI.DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
13498 DCI.DAG.getConstant(Opc, DL, MVT::i32),
13499 DCI.DAG.getZExtOrTrunc(Op1S, DL, MVT::i32));
13500}
13501
13504 const ARMSubtarget *Subtarget) {
13505 SelectionDAG &DAG = DCI.DAG;
13506 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13507
13508 if (TLI.isOperationLegal(N->getOpcode(), N->getValueType(0)))
13509 return SDValue();
13510
13511 return TLI.expandABS(N, DAG);
13512}
13513
13514/// PerformADDECombine - Target-specific dag combine transform from
13515/// ARMISD::ADDC, ARMISD::ADDE, and ISD::MUL_LOHI to MLAL or
13516/// ARMISD::ADDC, ARMISD::ADDE and ARMISD::UMLAL to ARMISD::UMAAL
13519 const ARMSubtarget *Subtarget) {
13520 // Only ARM and Thumb2 support UMLAL/SMLAL.
13521 if (Subtarget->isThumb1Only())
13522 return PerformAddeSubeCombine(N, DCI, Subtarget);
13523
13524 // Only perform the checks after legalize when the pattern is available.
13525 if (DCI.isBeforeLegalize()) return SDValue();
13526
13527 return AddCombineTo64bitUMAAL(N, DCI, Subtarget);
13528}
13529
13530/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
13531/// operands N0 and N1. This is a helper for PerformADDCombine that is
13532/// called with the default operands, and if that fails, with commuted
13533/// operands.
13536 const ARMSubtarget *Subtarget){
13537 // Attempt to create vpadd for this add.
13538 if (SDValue Result = AddCombineToVPADD(N, N0, N1, DCI, Subtarget))
13539 return Result;
13540
13541 // Attempt to create vpaddl for this add.
13542 if (SDValue Result = AddCombineVUZPToVPADDL(N, N0, N1, DCI, Subtarget))
13543 return Result;
13544 if (SDValue Result = AddCombineBUILD_VECTORToVPADDL(N, N0, N1, DCI,
13545 Subtarget))
13546 return Result;
13547
13548 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
13549 if (N0.getNode()->hasOneUse())
13550 if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI))
13551 return Result;
13552 return SDValue();
13553}
13554
13556 EVT VT = N->getValueType(0);
13557 SDValue N0 = N->getOperand(0);
13558 SDValue N1 = N->getOperand(1);
13559 SDLoc dl(N);
13560
13561 auto IsVecReduce = [](SDValue Op) {
13562 switch (Op.getOpcode()) {
13563 case ISD::VECREDUCE_ADD:
13564 case ARMISD::VADDVs:
13565 case ARMISD::VADDVu:
13566 case ARMISD::VMLAVs:
13567 case ARMISD::VMLAVu:
13568 return true;
13569 }
13570 return false;
13571 };
13572
13573 auto DistrubuteAddAddVecReduce = [&](SDValue N0, SDValue N1) {
13574 // Distribute add(X, add(vecreduce(Y), vecreduce(Z))) ->
13575 // add(add(X, vecreduce(Y)), vecreduce(Z))
13576 // to make better use of vaddva style instructions.
13577 if (VT == MVT::i32 && N1.getOpcode() == ISD::ADD && !IsVecReduce(N0) &&
13578 IsVecReduce(N1.getOperand(0)) && IsVecReduce(N1.getOperand(1)) &&
13579 !isa<ConstantSDNode>(N0) && N1->hasOneUse()) {
13580 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0, N1.getOperand(0));
13581 return DAG.getNode(ISD::ADD, dl, VT, Add0, N1.getOperand(1));
13582 }
13583 // And turn add(add(A, reduce(B)), add(C, reduce(D))) ->
13584 // add(add(add(A, C), reduce(B)), reduce(D))
13585 if (VT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
13586 N1.getOpcode() == ISD::ADD && N0->hasOneUse() && N1->hasOneUse()) {
13587 unsigned N0RedOp = 0;
13588 if (!IsVecReduce(N0.getOperand(N0RedOp))) {
13589 N0RedOp = 1;
13590 if (!IsVecReduce(N0.getOperand(N0RedOp)))
13591 return SDValue();
13592 }
13593
13594 unsigned N1RedOp = 0;
13595 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13596 N1RedOp = 1;
13597 if (!IsVecReduce(N1.getOperand(N1RedOp)))
13598 return SDValue();
13599
13600 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, N0.getOperand(1 - N0RedOp),
13601 N1.getOperand(1 - N1RedOp));
13602 SDValue Add1 =
13603 DAG.getNode(ISD::ADD, dl, VT, Add0, N0.getOperand(N0RedOp));
13604 return DAG.getNode(ISD::ADD, dl, VT, Add1, N1.getOperand(N1RedOp));
13605 }
13606 return SDValue();
13607 };
13608 if (SDValue R = DistrubuteAddAddVecReduce(N0, N1))
13609 return R;
13610 if (SDValue R = DistrubuteAddAddVecReduce(N1, N0))
13611 return R;
13612
13613 // Distribute add(vecreduce(load(Y)), vecreduce(load(Z)))
13614 // Or add(add(X, vecreduce(load(Y))), vecreduce(load(Z)))
13615 // by ascending load offsets. This can help cores prefetch if the order of
13616 // loads is more predictable.
13617 auto DistrubuteVecReduceLoad = [&](SDValue N0, SDValue N1, bool IsForward) {
13618 // Check if two reductions are known to load data where one is before/after
13619 // another. Return negative if N0 loads data before N1, positive if N1 is
13620 // before N0 and 0 otherwise if nothing is known.
13621 auto IsKnownOrderedLoad = [&](SDValue N0, SDValue N1) {
13622 // Look through to the first operand of a MUL, for the VMLA case.
13623 // Currently only looks at the first operand, in the hope they are equal.
13624 if (N0.getOpcode() == ISD::MUL)
13625 N0 = N0.getOperand(0);
13626 if (N1.getOpcode() == ISD::MUL)
13627 N1 = N1.getOperand(0);
13628
13629 // Return true if the two operands are loads to the same object and the
13630 // offset of the first is known to be less than the offset of the second.
13631 LoadSDNode *Load0 = dyn_cast<LoadSDNode>(N0);
13632 LoadSDNode *Load1 = dyn_cast<LoadSDNode>(N1);
13633 if (!Load0 || !Load1 || Load0->getChain() != Load1->getChain() ||
13634 !Load0->isSimple() || !Load1->isSimple() || Load0->isIndexed() ||
13635 Load1->isIndexed())
13636 return 0;
13637
13638 auto BaseLocDecomp0 = BaseIndexOffset::match(Load0, DAG);
13639 auto BaseLocDecomp1 = BaseIndexOffset::match(Load1, DAG);
13640
13641 if (!BaseLocDecomp0.getBase() ||
13642 BaseLocDecomp0.getBase() != BaseLocDecomp1.getBase() ||
13643 !BaseLocDecomp0.hasValidOffset() || !BaseLocDecomp1.hasValidOffset())
13644 return 0;
13645 if (BaseLocDecomp0.getOffset() < BaseLocDecomp1.getOffset())
13646 return -1;
13647 if (BaseLocDecomp0.getOffset() > BaseLocDecomp1.getOffset())
13648 return 1;
13649 return 0;
13650 };
13651
13652 SDValue X;
13653 if (N0.getOpcode() == ISD::ADD && N0->hasOneUse()) {
13654 if (IsVecReduce(N0.getOperand(0)) && IsVecReduce(N0.getOperand(1))) {
13655 int IsBefore = IsKnownOrderedLoad(N0.getOperand(0).getOperand(0),
13656 N0.getOperand(1).getOperand(0));
13657 if (IsBefore < 0) {
13658 X = N0.getOperand(0);
13659 N0 = N0.getOperand(1);
13660 } else if (IsBefore > 0) {
13661 X = N0.getOperand(1);
13662 N0 = N0.getOperand(0);
13663 } else
13664 return SDValue();
13665 } else if (IsVecReduce(N0.getOperand(0))) {
13666 X = N0.getOperand(1);
13667 N0 = N0.getOperand(0);
13668 } else if (IsVecReduce(N0.getOperand(1))) {
13669 X = N0.getOperand(0);
13670 N0 = N0.getOperand(1);
13671 } else
13672 return SDValue();
13673 } else if (IsForward && IsVecReduce(N0) && IsVecReduce(N1) &&
13674 IsKnownOrderedLoad(N0.getOperand(0), N1.getOperand(0)) < 0) {
13675 // Note this is backward to how you would expect. We create
13676 // add(reduce(load + 16), reduce(load + 0)) so that the
13677 // add(reduce(load+16), X) is combined into VADDVA(X, load+16)), leaving
13678 // the X as VADDV(load + 0)
13679 return DAG.getNode(ISD::ADD, dl, VT, N1, N0);
13680 } else
13681 return SDValue();
13682
13683 if (!IsVecReduce(N0) || !IsVecReduce(N1))
13684 return SDValue();
13685
13686 if (IsKnownOrderedLoad(N1.getOperand(0), N0.getOperand(0)) >= 0)
13687 return SDValue();
13688
13689 // Switch from add(add(X, N0), N1) to add(add(X, N1), N0)
13690 SDValue Add0 = DAG.getNode(ISD::ADD, dl, VT, X, N1);
13691 return DAG.getNode(ISD::ADD, dl, VT, Add0, N0);
13692 };
13693 if (SDValue R = DistrubuteVecReduceLoad(N0, N1, true))
13694 return R;
13695 if (SDValue R = DistrubuteVecReduceLoad(N1, N0, false))
13696 return R;
13697 return SDValue();
13698}
13699
13701 const ARMSubtarget *Subtarget) {
13702 if (!Subtarget->hasMVEIntegerOps())
13703 return SDValue();
13704
13706 return R;
13707
13708 EVT VT = N->getValueType(0);
13709 SDValue N0 = N->getOperand(0);
13710 SDValue N1 = N->getOperand(1);
13711 SDLoc dl(N);
13712
13713 if (VT != MVT::i64)
13714 return SDValue();
13715
13716 // We are looking for a i64 add of a VADDLVx. Due to these being i64's, this
13717 // will look like:
13718 // t1: i32,i32 = ARMISD::VADDLVs x
13719 // t2: i64 = build_pair t1, t1:1
13720 // t3: i64 = add t2, y
13721 // Otherwise we try to push the add up above VADDLVAx, to potentially allow
13722 // the add to be simplified seperately.
13723 // We also need to check for sext / zext and commutitive adds.
13724 auto MakeVecReduce = [&](unsigned Opcode, unsigned OpcodeA, SDValue NA,
13725 SDValue NB) {
13726 if (NB->getOpcode() != ISD::BUILD_PAIR)
13727 return SDValue();
13728 SDValue VecRed = NB->getOperand(0);
13729 if ((VecRed->getOpcode() != Opcode && VecRed->getOpcode() != OpcodeA) ||
13730 VecRed.getResNo() != 0 ||
13731 NB->getOperand(1) != SDValue(VecRed.getNode(), 1))
13732 return SDValue();
13733
13734 if (VecRed->getOpcode() == OpcodeA) {
13735 // add(NA, VADDLVA(Inp), Y) -> VADDLVA(add(NA, Inp), Y)
13736 SDValue Inp = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64,
13737 VecRed.getOperand(0), VecRed.getOperand(1));
13738 NA = DAG.getNode(ISD::ADD, dl, MVT::i64, Inp, NA);
13739 }
13740
13742 std::tie(Ops[0], Ops[1]) = DAG.SplitScalar(NA, dl, MVT::i32, MVT::i32);
13743
13744 unsigned S = VecRed->getOpcode() == OpcodeA ? 2 : 0;
13745 for (unsigned I = S, E = VecRed.getNumOperands(); I < E; I++)
13746 Ops.push_back(VecRed->getOperand(I));
13747 SDValue Red =
13748 DAG.getNode(OpcodeA, dl, DAG.getVTList({MVT::i32, MVT::i32}), Ops);
13749 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Red,
13750 SDValue(Red.getNode(), 1));
13751 };
13752
13753 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N0, N1))
13754 return M;
13755 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N0, N1))
13756 return M;
13757 if (SDValue M = MakeVecReduce(ARMISD::VADDLVs, ARMISD::VADDLVAs, N1, N0))
13758 return M;
13759 if (SDValue M = MakeVecReduce(ARMISD::VADDLVu, ARMISD::VADDLVAu, N1, N0))
13760 return M;
13761 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N0, N1))
13762 return M;
13763 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N0, N1))
13764 return M;
13765 if (SDValue M = MakeVecReduce(ARMISD::VADDLVps, ARMISD::VADDLVAps, N1, N0))
13766 return M;
13767 if (SDValue M = MakeVecReduce(ARMISD::VADDLVpu, ARMISD::VADDLVApu, N1, N0))
13768 return M;
13769 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N0, N1))
13770 return M;
13771 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N0, N1))
13772 return M;
13773 if (SDValue M = MakeVecReduce(ARMISD::VMLALVs, ARMISD::VMLALVAs, N1, N0))
13774 return M;
13775 if (SDValue M = MakeVecReduce(ARMISD::VMLALVu, ARMISD::VMLALVAu, N1, N0))
13776 return M;
13777 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N0, N1))
13778 return M;
13779 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N0, N1))
13780 return M;
13781 if (SDValue M = MakeVecReduce(ARMISD::VMLALVps, ARMISD::VMLALVAps, N1, N0))
13782 return M;
13783 if (SDValue M = MakeVecReduce(ARMISD::VMLALVpu, ARMISD::VMLALVApu, N1, N0))
13784 return M;
13785 return SDValue();
13786}
13787
13788bool
13790 CombineLevel Level) const {
13791 assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA ||
13792 N->getOpcode() == ISD::SRL) &&
13793 "Expected shift op");
13794
13795 if (Level == BeforeLegalizeTypes)
13796 return true;
13797
13798 if (N->getOpcode() != ISD::SHL)
13799 return true;
13800
13801 if (Subtarget->isThumb1Only()) {
13802 // Avoid making expensive immediates by commuting shifts. (This logic
13803 // only applies to Thumb1 because ARM and Thumb2 immediates can be shifted
13804 // for free.)
13805 if (N->getOpcode() != ISD::SHL)
13806 return true;
13807 SDValue N1 = N->getOperand(0);
13808 if (N1->getOpcode() != ISD::ADD && N1->getOpcode() != ISD::AND &&
13809 N1->getOpcode() != ISD::OR && N1->getOpcode() != ISD::XOR)
13810 return true;
13811 if (auto *Const = dyn_cast<ConstantSDNode>(N1->getOperand(1))) {
13812 if (Const->getAPIntValue().ult(256))
13813 return false;
13814 if (N1->getOpcode() == ISD::ADD && Const->getAPIntValue().slt(0) &&
13815 Const->getAPIntValue().sgt(-256))
13816 return false;
13817 }
13818 return true;
13819 }
13820
13821 // Turn off commute-with-shift transform after legalization, so it doesn't
13822 // conflict with PerformSHLSimplify. (We could try to detect when
13823 // PerformSHLSimplify would trigger more precisely, but it isn't
13824 // really necessary.)
13825 return false;
13826}
13827
13829 const SDNode *N) const {
13830 assert(N->getOpcode() == ISD::XOR &&
13831 (N->getOperand(0).getOpcode() == ISD::SHL ||
13832 N->getOperand(0).getOpcode() == ISD::SRL) &&
13833 "Expected XOR(SHIFT) pattern");
13834
13835 // Only commute if the entire NOT mask is a hidden shifted mask.
13836 auto *XorC = dyn_cast<ConstantSDNode>(N->getOperand(1));
13837 auto *ShiftC = dyn_cast<ConstantSDNode>(N->getOperand(0).getOperand(1));
13838 if (XorC && ShiftC) {
13839 unsigned MaskIdx, MaskLen;
13840 if (XorC->getAPIntValue().isShiftedMask(MaskIdx, MaskLen)) {
13841 unsigned ShiftAmt = ShiftC->getZExtValue();
13842 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
13843 if (N->getOperand(0).getOpcode() == ISD::SHL)
13844 return MaskIdx == ShiftAmt && MaskLen == (BitWidth - ShiftAmt);
13845 return MaskIdx == 0 && MaskLen == (BitWidth - ShiftAmt);
13846 }
13847 }
13848
13849 return false;
13850}
13851
13853 const SDNode *N, CombineLevel Level) const {
13854 assert(((N->getOpcode() == ISD::SHL &&
13855 N->getOperand(0).getOpcode() == ISD::SRL) ||
13856 (N->getOpcode() == ISD::SRL &&
13857 N->getOperand(0).getOpcode() == ISD::SHL)) &&
13858 "Expected shift-shift mask");
13859
13860 if (!Subtarget->isThumb1Only())
13861 return true;
13862
13863 if (Level == BeforeLegalizeTypes)
13864 return true;
13865
13866 return false;
13867}
13868
13870 EVT VT) const {
13871 return Subtarget->hasMVEIntegerOps() && isTypeLegal(VT);
13872}
13873
13875 if (!Subtarget->hasNEON()) {
13876 if (Subtarget->isThumb1Only())
13877 return VT.getScalarSizeInBits() <= 32;
13878 return true;
13879 }
13880 return VT.isScalarInteger();
13881}
13882
13884 EVT VT) const {
13885 if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple())
13886 return false;
13887
13888 switch (FPVT.getSimpleVT().SimpleTy) {
13889 case MVT::f16:
13890 return Subtarget->hasVFP2Base();
13891 case MVT::f32:
13892 return Subtarget->hasVFP2Base();
13893 case MVT::f64:
13894 return Subtarget->hasFP64();
13895 case MVT::v4f32:
13896 case MVT::v8f16:
13897 return Subtarget->hasMVEFloatOps();
13898 default:
13899 return false;
13900 }
13901}
13902
13905 const ARMSubtarget *ST) {
13906 // Allow the generic combiner to identify potential bswaps.
13907 if (DCI.isBeforeLegalize())
13908 return SDValue();
13909
13910 // DAG combiner will fold:
13911 // (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
13912 // (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2
13913 // Other code patterns that can be also be modified have the following form:
13914 // b + ((a << 1) | 510)
13915 // b + ((a << 1) & 510)
13916 // b + ((a << 1) ^ 510)
13917 // b + ((a << 1) + 510)
13918
13919 // Many instructions can perform the shift for free, but it requires both
13920 // the operands to be registers. If c1 << c2 is too large, a mov immediate
13921 // instruction will needed. So, unfold back to the original pattern if:
13922 // - if c1 and c2 are small enough that they don't require mov imms.
13923 // - the user(s) of the node can perform an shl
13924
13925 // No shifted operands for 16-bit instructions.
13926 if (ST->isThumb() && ST->isThumb1Only())
13927 return SDValue();
13928
13929 // Check that all the users could perform the shl themselves.
13930 for (auto *U : N->uses()) {
13931 switch(U->getOpcode()) {
13932 default:
13933 return SDValue();
13934 case ISD::SUB:
13935 case ISD::ADD:
13936 case ISD::AND:
13937 case ISD::OR:
13938 case ISD::XOR:
13939 case ISD::SETCC:
13940 case ARMISD::CMP:
13941 // Check that the user isn't already using a constant because there
13942 // aren't any instructions that support an immediate operand and a
13943 // shifted operand.
13944 if (isa<ConstantSDNode>(U->getOperand(0)) ||
13945 isa<ConstantSDNode>(U->getOperand(1)))
13946 return SDValue();
13947
13948 // Check that it's not already using a shift.
13949 if (U->getOperand(0).getOpcode() == ISD::SHL ||
13950 U->getOperand(1).getOpcode() == ISD::SHL)
13951 return SDValue();
13952 break;
13953 }
13954 }
13955
13956 if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::OR &&
13957 N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND)
13958 return SDValue();
13959
13960 if (N->getOperand(0).getOpcode() != ISD::SHL)
13961 return SDValue();
13962
13963 SDValue SHL = N->getOperand(0);
13964
13965 auto *C1ShlC2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
13966 auto *C2 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
13967 if (!C1ShlC2 || !C2)
13968 return SDValue();
13969
13970 APInt C2Int = C2->getAPIntValue();
13971 APInt C1Int = C1ShlC2->getAPIntValue();
13972 unsigned C2Width = C2Int.getBitWidth();
13973 if (C2Int.uge(C2Width))
13974 return SDValue();
13975 uint64_t C2Value = C2Int.getZExtValue();
13976
13977 // Check that performing a lshr will not lose any information.
13978 APInt Mask = APInt::getHighBitsSet(C2Width, C2Width - C2Value);
13979 if ((C1Int & Mask) != C1Int)
13980 return SDValue();
13981
13982 // Shift the first constant.
13983 C1Int.lshrInPlace(C2Int);
13984
13985 // The immediates are encoded as an 8-bit value that can be rotated.
13986 auto LargeImm = [](const APInt &Imm) {
13987 unsigned Zeros = Imm.countl_zero() + Imm.countr_zero();
13988 return Imm.getBitWidth() - Zeros > 8;
13989 };
13990
13991 if (LargeImm(C1Int) || LargeImm(C2Int))
13992 return SDValue();
13993
13994 SelectionDAG &DAG = DCI.DAG;
13995 SDLoc dl(N);
13996 SDValue X = SHL.getOperand(0);
13997 SDValue BinOp = DAG.getNode(N->getOpcode(), dl, MVT::i32, X,
13998 DAG.getConstant(C1Int, dl, MVT::i32));
13999 // Shift left to compensate for the lshr of C1Int.
14000 SDValue Res = DAG.getNode(ISD::SHL, dl, MVT::i32, BinOp, SHL.getOperand(1));
14001
14002 LLVM_DEBUG(dbgs() << "Simplify shl use:\n"; SHL.getOperand(0).dump();
14003 SHL.dump(); N->dump());
14004 LLVM_DEBUG(dbgs() << "Into:\n"; X.dump(); BinOp.dump(); Res.dump());
14005 return Res;
14006}
14007
14008
14009/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
14010///
14013 const ARMSubtarget *Subtarget) {
14014 SDValue N0 = N->getOperand(0);
14015 SDValue N1 = N->getOperand(1);
14016
14017 // Only works one way, because it needs an immediate operand.
14018 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14019 return Result;
14020
14021 if (SDValue Result = PerformADDVecReduce(N, DCI.DAG, Subtarget))
14022 return Result;
14023
14024 // First try with the default operand order.
14025 if (SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget))
14026 return Result;
14027
14028 // If that didn't work, try again with the operands commuted.
14029 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
14030}
14031
14032// Combine (sub 0, (csinc X, Y, CC)) -> (csinv -X, Y, CC)
14033// providing -X is as cheap as X (currently, just a constant).
14035 if (N->getValueType(0) != MVT::i32 || !isNullConstant(N->getOperand(0)))
14036 return SDValue();
14037 SDValue CSINC = N->getOperand(1);
14038 if (CSINC.getOpcode() != ARMISD::CSINC || !CSINC.hasOneUse())
14039 return SDValue();
14040
14041 ConstantSDNode *X = dyn_cast<ConstantSDNode>(CSINC.getOperand(0));
14042 if (!X)
14043 return SDValue();
14044
14045 return DAG.getNode(ARMISD::CSINV, SDLoc(N), MVT::i32,
14046 DAG.getNode(ISD::SUB, SDLoc(N), MVT::i32, N->getOperand(0),
14047 CSINC.getOperand(0)),
14048 CSINC.getOperand(1), CSINC.getOperand(2),
14049 CSINC.getOperand(3));
14050}
14051
14052/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
14053///
14056 const ARMSubtarget *Subtarget) {
14057 SDValue N0 = N->getOperand(0);
14058 SDValue N1 = N->getOperand(1);
14059
14060 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
14061 if (N1.getNode()->hasOneUse())
14062 if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI))
14063 return Result;
14064
14065 if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
14066 return R;
14067
14068 if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
14069 return SDValue();
14070
14071 // Fold (sub (ARMvmovImm 0), (ARMvdup x)) -> (ARMvdup (sub 0, x))
14072 // so that we can readily pattern match more mve instructions which can use
14073 // a scalar operand.
14074 SDValue VDup = N->getOperand(1);
14075 if (VDup->getOpcode() != ARMISD::VDUP)
14076 return SDValue();
14077
14078 SDValue VMov = N->getOperand(0);
14079 if (VMov->getOpcode() == ISD::BITCAST)
14080 VMov = VMov->getOperand(0);
14081
14082 if (VMov->getOpcode() != ARMISD::VMOVIMM || !isZeroVector(VMov))
14083 return SDValue();
14084
14085 SDLoc dl(N);
14086 SDValue Negate = DCI.DAG.getNode(ISD::SUB, dl, MVT::i32,
14087 DCI.DAG.getConstant(0, dl, MVT::i32),
14088 VDup->getOperand(0));
14089 return DCI.DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0), Negate);
14090}
14091
14092/// PerformVMULCombine
14093/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
14094/// special multiplier accumulator forwarding.
14095/// vmul d3, d0, d2
14096/// vmla d3, d1, d2
14097/// is faster than
14098/// vadd d3, d0, d1
14099/// vmul d3, d3, d2
14100// However, for (A + B) * (A + B),
14101// vadd d2, d0, d1
14102// vmul d3, d0, d2
14103// vmla d3, d1, d2
14104// is slower than
14105// vadd d2, d0, d1
14106// vmul d3, d2, d2
14109 const ARMSubtarget *Subtarget) {
14110 if (!Subtarget->hasVMLxForwarding())
14111 return SDValue();
14112
14113 SelectionDAG &DAG = DCI.DAG;
14114 SDValue N0 = N->getOperand(0);
14115 SDValue N1 = N->getOperand(1);
14116 unsigned Opcode = N0.getOpcode();
14117 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14118 Opcode != ISD::FADD && Opcode != ISD::FSUB) {
14119 Opcode = N1.getOpcode();
14120 if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
14121 Opcode != ISD::FADD && Opcode != ISD::FSUB)
14122 return SDValue();
14123 std::swap(N0, N1);
14124 }
14125
14126 if (N0 == N1)
14127 return SDValue();
14128
14129 EVT VT = N->getValueType(0);
14130 SDLoc DL(N);
14131 SDValue N00 = N0->getOperand(0);
14132 SDValue N01 = N0->getOperand(1);
14133 return DAG.getNode(Opcode, DL, VT,
14134 DAG.getNode(ISD::MUL, DL, VT, N00, N1),
14135 DAG.getNode(ISD::MUL, DL, VT, N01, N1));
14136}
14137
14139 const ARMSubtarget *Subtarget) {
14140 EVT VT = N->getValueType(0);
14141 if (VT != MVT::v2i64)
14142 return SDValue();
14143
14144 SDValue N0 = N->getOperand(0);
14145 SDValue N1 = N->getOperand(1);
14146
14147 auto IsSignExt = [&](SDValue Op) {
14148 if (Op->getOpcode() != ISD::SIGN_EXTEND_INREG)
14149 return SDValue();
14150 EVT VT = cast<VTSDNode>(Op->getOperand(1))->getVT();
14151 if (VT.getScalarSizeInBits() == 32)
14152 return Op->getOperand(0);
14153 return SDValue();
14154 };
14155 auto IsZeroExt = [&](SDValue Op) {
14156 // Zero extends are a little more awkward. At the point we are matching
14157 // this, we are looking for an AND with a (-1, 0, -1, 0) buildvector mask.
14158 // That might be before of after a bitcast depending on how the and is
14159 // placed. Because this has to look through bitcasts, it is currently only
14160 // supported on LE.
14161 if (!Subtarget->isLittle())
14162 return SDValue();
14163
14164 SDValue And = Op;
14165 if (And->getOpcode() == ISD::BITCAST)
14166 And = And->getOperand(0);
14167 if (And->getOpcode() != ISD::AND)
14168 return SDValue();
14169 SDValue Mask = And->getOperand(1);
14170 if (Mask->getOpcode() == ISD::BITCAST)
14171 Mask = Mask->getOperand(0);
14172
14173 if (Mask->getOpcode() != ISD::BUILD_VECTOR ||
14174 Mask.getValueType() != MVT::v4i32)
14175 return SDValue();
14176 if (isAllOnesConstant(Mask->getOperand(0)) &&
14177 isNullConstant(Mask->getOperand(1)) &&
14178 isAllOnesConstant(Mask->getOperand(2)) &&
14179 isNullConstant(Mask->getOperand(3)))
14180 return And->getOperand(0);
14181 return SDValue();
14182 };
14183
14184 SDLoc dl(N);
14185 if (SDValue Op0 = IsSignExt(N0)) {
14186 if (SDValue Op1 = IsSignExt(N1)) {
14187 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14188 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14189 return DAG.getNode(ARMISD::VMULLs, dl, VT, New0a, New1a);
14190 }
14191 }
14192 if (SDValue Op0 = IsZeroExt(N0)) {
14193 if (SDValue Op1 = IsZeroExt(N1)) {
14194 SDValue New0a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op0);
14195 SDValue New1a = DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v4i32, Op1);
14196 return DAG.getNode(ARMISD::VMULLu, dl, VT, New0a, New1a);
14197 }
14198 }
14199
14200 return SDValue();
14201}
14202
14205 const ARMSubtarget *Subtarget) {
14206 SelectionDAG &DAG = DCI.DAG;
14207
14208 EVT VT = N->getValueType(0);
14209 if (Subtarget->hasMVEIntegerOps() && VT == MVT::v2i64)
14210 return PerformMVEVMULLCombine(N, DAG, Subtarget);
14211
14212 if (Subtarget->isThumb1Only())
14213 return SDValue();
14214
14215 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14216 return SDValue();
14217
14218 if (VT.is64BitVector() || VT.is128BitVector())
14219 return PerformVMULCombine(N, DCI, Subtarget);
14220 if (VT != MVT::i32)
14221 return SDValue();
14222
14223 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14224 if (!C)
14225 return SDValue();
14226
14227 int64_t MulAmt = C->getSExtValue();
14228 unsigned ShiftAmt = llvm::countr_zero<uint64_t>(MulAmt);
14229
14230 ShiftAmt = ShiftAmt & (32 - 1);
14231 SDValue V = N->getOperand(0);
14232 SDLoc DL(N);
14233
14234 SDValue Res;
14235 MulAmt >>= ShiftAmt;
14236
14237 if (MulAmt >= 0) {
14238 if (llvm::has_single_bit<uint32_t>(MulAmt - 1)) {
14239 // (mul x, 2^N + 1) => (add (shl x, N), x)
14240 Res = DAG.getNode(ISD::ADD, DL, VT,
14241 V,
14242 DAG.getNode(ISD::SHL, DL, VT,
14243 V,
14244 DAG.getConstant(Log2_32(MulAmt - 1), DL,
14245 MVT::i32)));
14246 } else if (llvm::has_single_bit<uint32_t>(MulAmt + 1)) {
14247 // (mul x, 2^N - 1) => (sub (shl x, N), x)
14248 Res = DAG.getNode(ISD::SUB, DL, VT,
14249 DAG.getNode(ISD::SHL, DL, VT,
14250 V,
14251 DAG.getConstant(Log2_32(MulAmt + 1), DL,
14252 MVT::i32)),
14253 V);
14254 } else
14255 return SDValue();
14256 } else {
14257 uint64_t MulAmtAbs = -MulAmt;
14258 if (llvm::has_single_bit<uint32_t>(MulAmtAbs + 1)) {
14259 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
14260 Res = DAG.getNode(ISD::SUB, DL, VT,
14261 V,
14262 DAG.getNode(ISD::SHL, DL, VT,
14263 V,
14264 DAG.getConstant(Log2_32(MulAmtAbs + 1), DL,
14265 MVT::i32)));
14266 } else if (llvm::has_single_bit<uint32_t>(MulAmtAbs - 1)) {
14267 // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
14268 Res = DAG.getNode(ISD::ADD, DL, VT,
14269 V,
14270 DAG.getNode(ISD::SHL, DL, VT,
14271 V,
14272 DAG.getConstant(Log2_32(MulAmtAbs - 1), DL,
14273 MVT::i32)));
14274 Res = DAG.getNode(ISD::SUB, DL, VT,
14275 DAG.getConstant(0, DL, MVT::i32), Res);
14276 } else
14277 return SDValue();
14278 }
14279
14280 if (ShiftAmt != 0)
14281 Res = DAG.getNode(ISD::SHL, DL, VT,
14282 Res, DAG.getConstant(ShiftAmt, DL, MVT::i32));
14283
14284 // Do not add new nodes to DAG combiner worklist.
14285 DCI.CombineTo(N, Res, false);
14286 return SDValue();
14287}
14288
14291 const ARMSubtarget *Subtarget) {
14292 // Allow DAGCombine to pattern-match before we touch the canonical form.
14293 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
14294 return SDValue();
14295
14296 if (N->getValueType(0) != MVT::i32)
14297 return SDValue();
14298
14299 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
14300 if (!N1C)
14301 return SDValue();
14302
14303 uint32_t C1 = (uint32_t)N1C->getZExtValue();
14304 // Don't transform uxtb/uxth.
14305 if (C1 == 255 || C1 == 65535)
14306 return SDValue();
14307
14308 SDNode *N0 = N->getOperand(0).getNode();
14309 if (!N0->hasOneUse())
14310 return SDValue();
14311
14312 if (N0->getOpcode() != ISD::SHL && N0->getOpcode() != ISD::SRL)
14313 return SDValue();
14314
14315 bool LeftShift = N0->getOpcode() == ISD::SHL;
14316
14317 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0->getOperand(1));
14318 if (!N01C)
14319 return SDValue();
14320
14321 uint32_t C2 = (uint32_t)N01C->getZExtValue();
14322 if (!C2 || C2 >= 32)
14323 return SDValue();
14324
14325 // Clear irrelevant bits in the mask.
14326 if (LeftShift)
14327 C1 &= (-1U << C2);
14328 else
14329 C1 &= (-1U >> C2);
14330
14331 SelectionDAG &DAG = DCI.DAG;
14332 SDLoc DL(N);
14333
14334 // We have a pattern of the form "(and (shl x, c2) c1)" or
14335 // "(and (srl x, c2) c1)", where c1 is a shifted mask. Try to
14336 // transform to a pair of shifts, to save materializing c1.
14337
14338 // First pattern: right shift, then mask off leading bits.
14339 // FIXME: Use demanded bits?
14340 if (!LeftShift && isMask_32(C1)) {
14341 uint32_t C3 = llvm::countl_zero(C1);
14342 if (C2 < C3) {
14343 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14344 DAG.getConstant(C3 - C2, DL, MVT::i32));
14345 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14346 DAG.getConstant(C3, DL, MVT::i32));
14347 }
14348 }
14349
14350 // First pattern, reversed: left shift, then mask off trailing bits.
14351 if (LeftShift && isMask_32(~C1)) {
14352 uint32_t C3 = llvm::countr_zero(C1);
14353 if (C2 < C3) {
14354 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14355 DAG.getConstant(C3 - C2, DL, MVT::i32));
14356 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14357 DAG.getConstant(C3, DL, MVT::i32));
14358 }
14359 }
14360
14361 // Second pattern: left shift, then mask off leading bits.
14362 // FIXME: Use demanded bits?
14363 if (LeftShift && isShiftedMask_32(C1)) {
14364 uint32_t Trailing = llvm::countr_zero(C1);
14365 uint32_t C3 = llvm::countl_zero(C1);
14366 if (Trailing == C2 && C2 + C3 < 32) {
14367 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
14368 DAG.getConstant(C2 + C3, DL, MVT::i32));
14369 return DAG.getNode(ISD::SRL, DL, MVT::i32, SHL,
14370 DAG.getConstant(C3, DL, MVT::i32));
14371 }
14372 }
14373
14374 // Second pattern, reversed: right shift, then mask off trailing bits.
14375 // FIXME: Handle other patterns of known/demanded bits.
14376 if (!LeftShift && isShiftedMask_32(C1)) {
14377 uint32_t Leading = llvm::countl_zero(C1);
14378 uint32_t C3 = llvm::countr_zero(C1);
14379 if (Leading == C2 && C2 + C3 < 32) {
14380 SDValue SHL = DAG.getNode(ISD::SRL, DL, MVT::i32, N0->getOperand(0),
14381 DAG.getConstant(C2 + C3, DL, MVT::i32));
14382 return DAG.getNode(ISD::SHL, DL, MVT::i32, SHL,
14383 DAG.getConstant(C3, DL, MVT::i32));
14384 }
14385 }
14386
14387 // FIXME: Transform "(and (shl x, c2) c1)" ->
14388 // "(shl (and x, c1>>c2), c2)" if "c1 >> c2" is a cheaper immediate than
14389 // c1.
14390 return SDValue();
14391}
14392
14395 const ARMSubtarget *Subtarget) {
14396 // Attempt to use immediate-form VBIC
14397 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14398 SDLoc dl(N);
14399 EVT VT = N->getValueType(0);
14400 SelectionDAG &DAG = DCI.DAG;
14401
14402 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT) || VT == MVT::v2i1 ||
14403 VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)
14404 return SDValue();
14405
14406 APInt SplatBits, SplatUndef;
14407 unsigned SplatBitSize;
14408 bool HasAnyUndefs;
14409 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14410 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14411 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14412 SplatBitSize == 64) {
14413 EVT VbicVT;
14414 SDValue Val = isVMOVModifiedImm((~SplatBits).getZExtValue(),
14415 SplatUndef.getZExtValue(), SplatBitSize,
14416 DAG, dl, VbicVT, VT, OtherModImm);
14417 if (Val.getNode()) {
14418 SDValue Input =
14419 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
14420 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
14421 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
14422 }
14423 }
14424 }
14425
14426 if (!Subtarget->isThumb1Only()) {
14427 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
14428 if (SDValue Result = combineSelectAndUseCommutative(N, true, DCI))
14429 return Result;
14430
14431 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14432 return Result;
14433 }
14434
14435 if (Subtarget->isThumb1Only())
14436 if (SDValue Result = CombineANDShift(N, DCI, Subtarget))
14437 return Result;
14438
14439 return SDValue();
14440}
14441
14442// Try combining OR nodes to SMULWB, SMULWT.
14445 const ARMSubtarget *Subtarget) {
14446 if (!Subtarget->hasV6Ops() ||
14447 (Subtarget->isThumb() &&
14448 (!Subtarget->hasThumb2() || !Subtarget->hasDSP())))
14449 return SDValue();
14450
14451 SDValue SRL = OR->getOperand(0);
14452 SDValue SHL = OR->getOperand(1);
14453
14454 if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
14455 SRL = OR->getOperand(1);
14456 SHL = OR->getOperand(0);
14457 }
14458 if (!isSRL16(SRL) || !isSHL16(SHL))
14459 return SDValue();
14460
14461 // The first operands to the shifts need to be the two results from the
14462 // same smul_lohi node.
14463 if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
14464 SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
14465 return SDValue();
14466
14467 SDNode *SMULLOHI = SRL.getOperand(0).getNode();
14468 if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
14469 SHL.getOperand(0) != SDValue(SMULLOHI, 1))
14470 return SDValue();
14471
14472 // Now we have:
14473 // (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
14474 // For SMUL[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
14475 // For SMUWB the 16-bit value will signed extended somehow.
14476 // For SMULWT only the SRA is required.
14477 // Check both sides of SMUL_LOHI
14478 SDValue OpS16 = SMULLOHI->getOperand(0);
14479 SDValue OpS32 = SMULLOHI->getOperand(1);
14480
14481 SelectionDAG &DAG = DCI.DAG;
14482 if (!isS16(OpS16, DAG) && !isSRA16(OpS16)) {
14483 OpS16 = OpS32;
14484 OpS32 = SMULLOHI->getOperand(0);
14485 }
14486
14487 SDLoc dl(OR);
14488 unsigned Opcode = 0;
14489 if (isS16(OpS16, DAG))
14490 Opcode = ARMISD::SMULWB;
14491 else if (isSRA16(OpS16)) {
14492 Opcode = ARMISD::SMULWT;
14493 OpS16 = OpS16->getOperand(0);
14494 }
14495 else
14496 return SDValue();
14497
14498 SDValue Res = DAG.getNode(Opcode, dl, MVT::i32, OpS32, OpS16);
14499 DAG.ReplaceAllUsesOfValueWith(SDValue(OR, 0), Res);
14500 return SDValue(OR, 0);
14501}
14502
14505 const ARMSubtarget *Subtarget) {
14506 // BFI is only available on V6T2+
14507 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
14508 return SDValue();
14509
14510 EVT VT = N->getValueType(0);
14511 SDValue N0 = N->getOperand(0);
14512 SDValue N1 = N->getOperand(1);
14513 SelectionDAG &DAG = DCI.DAG;
14514 SDLoc DL(N);
14515 // 1) or (and A, mask), val => ARMbfi A, val, mask
14516 // iff (val & mask) == val
14517 //
14518 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14519 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
14520 // && mask == ~mask2
14521 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
14522 // && ~mask == mask2
14523 // (i.e., copy a bitfield value into another bitfield of the same width)
14524
14525 if (VT != MVT::i32)
14526 return SDValue();
14527
14528 SDValue N00 = N0.getOperand(0);
14529
14530 // The value and the mask need to be constants so we can verify this is
14531 // actually a bitfield set. If the mask is 0xffff, we can do better
14532 // via a movt instruction, so don't use BFI in that case.
14533 SDValue MaskOp = N0.getOperand(1);
14534 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
14535 if (!MaskC)
14536 return SDValue();
14537 unsigned Mask = MaskC->getZExtValue();
14538 if (Mask == 0xffff)
14539 return SDValue();
14540 SDValue Res;
14541 // Case (1): or (and A, mask), val => ARMbfi A, val, mask
14542 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
14543 if (N1C) {
14544 unsigned Val = N1C->getZExtValue();
14545 if ((Val & ~Mask) != Val)
14546 return SDValue();
14547
14548 if (ARM::isBitFieldInvertedMask(Mask)) {
14549 Val >>= llvm::countr_zero(~Mask);
14550
14551 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
14552 DAG.getConstant(Val, DL, MVT::i32),
14553 DAG.getConstant(Mask, DL, MVT::i32));
14554
14555 DCI.CombineTo(N, Res, false);
14556 // Return value from the original node to inform the combiner than N is
14557 // now dead.
14558 return SDValue(N, 0);
14559 }
14560 } else if (N1.getOpcode() == ISD::AND) {
14561 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
14562 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14563 if (!N11C)
14564 return SDValue();
14565 unsigned Mask2 = N11C->getZExtValue();
14566
14567 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
14568 // as is to match.
14569 if (ARM::isBitFieldInvertedMask(Mask) &&
14570 (Mask == ~Mask2)) {
14571 // The pack halfword instruction works better for masks that fit it,
14572 // so use that when it's available.
14573 if (Subtarget->hasDSP() &&
14574 (Mask == 0xffff || Mask == 0xffff0000))
14575 return SDValue();
14576 // 2a
14577 unsigned amt = llvm::countr_zero(Mask2);
14578 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
14579 DAG.getConstant(amt, DL, MVT::i32));
14580 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
14581 DAG.getConstant(Mask, DL, MVT::i32));
14582 DCI.CombineTo(N, Res, false);
14583 // Return value from the original node to inform the combiner than N is
14584 // now dead.
14585 return SDValue(N, 0);
14586 } else if (ARM::isBitFieldInvertedMask(~Mask) &&
14587 (~Mask == Mask2)) {
14588 // The pack halfword instruction works better for masks that fit it,
14589 // so use that when it's available.
14590 if (Subtarget->hasDSP() &&
14591 (Mask2 == 0xffff || Mask2 == 0xffff0000))
14592 return SDValue();
14593 // 2b
14594 unsigned lsb = llvm::countr_zero(Mask);
14595 Res = DAG.getNode(ISD::SRL, DL, VT, N00,
14596 DAG.getConstant(lsb, DL, MVT::i32));
14597 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
14598 DAG.getConstant(Mask2, DL, MVT::i32));
14599 DCI.CombineTo(N, Res, false);
14600 // Return value from the original node to inform the combiner than N is
14601 // now dead.
14602 return SDValue(N, 0);
14603 }
14604 }
14605
14606 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
14607 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
14609 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
14610 // where lsb(mask) == #shamt and masked bits of B are known zero.
14611 SDValue ShAmt = N00.getOperand(1);
14612 unsigned ShAmtC = ShAmt->getAsZExtVal();
14613 unsigned LSB = llvm::countr_zero(Mask);
14614 if (ShAmtC != LSB)
14615 return SDValue();
14616
14617 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
14618 DAG.getConstant(~Mask, DL, MVT::i32));
14619
14620 DCI.CombineTo(N, Res, false);
14621 // Return value from the original node to inform the combiner than N is
14622 // now dead.
14623 return SDValue(N, 0);
14624 }
14625
14626 return SDValue();
14627}
14628
14629static bool isValidMVECond(unsigned CC, bool IsFloat) {
14630 switch (CC) {
14631 case ARMCC::EQ:
14632 case ARMCC::NE:
14633 case ARMCC::LE:
14634 case ARMCC::GT:
14635 case ARMCC::GE:
14636 case ARMCC::LT:
14637 return true;
14638 case ARMCC::HS:
14639 case ARMCC::HI:
14640 return !IsFloat;
14641 default:
14642 return false;
14643 };
14644}
14645
14647 if (N->getOpcode() == ARMISD::VCMP)
14648 return (ARMCC::CondCodes)N->getConstantOperandVal(2);
14649 else if (N->getOpcode() == ARMISD::VCMPZ)
14650 return (ARMCC::CondCodes)N->getConstantOperandVal(1);
14651 else
14652 llvm_unreachable("Not a VCMP/VCMPZ!");
14653}
14654
14657 return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint());
14658}
14659
14661 const ARMSubtarget *Subtarget) {
14662 // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain
14663 // together with predicates
14664 EVT VT = N->getValueType(0);
14665 SDLoc DL(N);
14666 SDValue N0 = N->getOperand(0);
14667 SDValue N1 = N->getOperand(1);
14668
14669 auto IsFreelyInvertable = [&](SDValue V) {
14670 if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ)
14671 return CanInvertMVEVCMP(V);
14672 return false;
14673 };
14674
14675 // At least one operand must be freely invertable.
14676 if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1)))
14677 return SDValue();
14678
14679 SDValue NewN0 = DAG.getLogicalNOT(DL, N0, VT);
14680 SDValue NewN1 = DAG.getLogicalNOT(DL, N1, VT);
14681 SDValue And = DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1);
14682 return DAG.getLogicalNOT(DL, And, VT);
14683}
14684
14685/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
14688 const ARMSubtarget *Subtarget) {
14689 // Attempt to use immediate-form VORR
14690 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
14691 SDLoc dl(N);
14692 EVT VT = N->getValueType(0);
14693 SelectionDAG &DAG = DCI.DAG;
14694
14695 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14696 return SDValue();
14697
14698 if (Subtarget->hasMVEIntegerOps() && (VT == MVT::v2i1 || VT == MVT::v4i1 ||
14699 VT == MVT::v8i1 || VT == MVT::v16i1))
14700 return PerformORCombine_i1(N, DAG, Subtarget);
14701
14702 APInt SplatBits, SplatUndef;
14703 unsigned SplatBitSize;
14704 bool HasAnyUndefs;
14705 if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) &&
14706 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
14707 if (SplatBitSize == 8 || SplatBitSize == 16 || SplatBitSize == 32 ||
14708 SplatBitSize == 64) {
14709 EVT VorrVT;
14710 SDValue Val =
14711 isVMOVModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
14712 SplatBitSize, DAG, dl, VorrVT, VT, OtherModImm);
14713 if (Val.getNode()) {
14714 SDValue Input =
14715 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
14716 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
14717 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
14718 }
14719 }
14720 }
14721
14722 if (!Subtarget->isThumb1Only()) {
14723 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
14724 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14725 return Result;
14726 if (SDValue Result = PerformORCombineToSMULWBT(N, DCI, Subtarget))
14727 return Result;
14728 }
14729
14730 SDValue N0 = N->getOperand(0);
14731 SDValue N1 = N->getOperand(1);
14732
14733 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
14734 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
14736
14737 // The code below optimizes (or (and X, Y), Z).
14738 // The AND operand needs to have a single user to make these optimizations
14739 // profitable.
14740 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
14741 return SDValue();
14742
14743 APInt SplatUndef;
14744 unsigned SplatBitSize;
14745 bool HasAnyUndefs;
14746
14747 APInt SplatBits0, SplatBits1;
14748 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
14749 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
14750 // Ensure that the second operand of both ands are constants
14751 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
14752 HasAnyUndefs) && !HasAnyUndefs) {
14753 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
14754 HasAnyUndefs) && !HasAnyUndefs) {
14755 // Ensure that the bit width of the constants are the same and that
14756 // the splat arguments are logical inverses as per the pattern we
14757 // are trying to simplify.
14758 if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() &&
14759 SplatBits0 == ~SplatBits1) {
14760 // Canonicalize the vector type to make instruction selection
14761 // simpler.
14762 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
14763 SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
14764 N0->getOperand(1),
14765 N0->getOperand(0),
14766 N1->getOperand(0));
14767 return DAG.getNode(ISD::BITCAST, dl, VT, Result);
14768 }
14769 }
14770 }
14771 }
14772
14773 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
14774 // reasonable.
14775 if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
14776 if (SDValue Res = PerformORCombineToBFI(N, DCI, Subtarget))
14777 return Res;
14778 }
14779
14780 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14781 return Result;
14782
14783 return SDValue();
14784}
14785
14788 const ARMSubtarget *Subtarget) {
14789 EVT VT = N->getValueType(0);
14790 SelectionDAG &DAG = DCI.DAG;
14791
14792 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
14793 return SDValue();
14794
14795 if (!Subtarget->isThumb1Only()) {
14796 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
14797 if (SDValue Result = combineSelectAndUseCommutative(N, false, DCI))
14798 return Result;
14799
14800 if (SDValue Result = PerformSHLSimplify(N, DCI, Subtarget))
14801 return Result;
14802 }
14803
14804 if (Subtarget->hasMVEIntegerOps()) {
14805 // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition.
14806 SDValue N0 = N->getOperand(0);
14807 SDValue N1 = N->getOperand(1);
14808 const TargetLowering *TLI = Subtarget->getTargetLowering();
14809 if (TLI->isConstTrueVal(N1) &&
14810 (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) {
14811 if (CanInvertMVEVCMP(N0)) {
14812 SDLoc DL(N0);
14814
14816 Ops.push_back(N0->getOperand(0));
14817 if (N0->getOpcode() == ARMISD::VCMP)
14818 Ops.push_back(N0->getOperand(1));
14819 Ops.push_back(DAG.getConstant(CC, DL, MVT::i32));
14820 return DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops);
14821 }
14822 }
14823 }
14824
14825 return SDValue();
14826}
14827
14828// ParseBFI - given a BFI instruction in N, extract the "from" value (Rn) and return it,
14829// and fill in FromMask and ToMask with (consecutive) bits in "from" to be extracted and
14830// their position in "to" (Rd).
14831static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask) {
14832 assert(N->getOpcode() == ARMISD::BFI);
14833
14834 SDValue From = N->getOperand(1);
14835 ToMask = ~N->getConstantOperandAPInt(2);
14836 FromMask = APInt::getLowBitsSet(ToMask.getBitWidth(), ToMask.popcount());
14837
14838 // If the Base came from a SHR #C, we can deduce that it is really testing bit
14839 // #C in the base of the SHR.
14840 if (From->getOpcode() == ISD::SRL &&
14841 isa<ConstantSDNode>(From->getOperand(1))) {
14842 APInt Shift = From->getConstantOperandAPInt(1);
14843 assert(Shift.getLimitedValue() < 32 && "Shift too large!");
14844 FromMask <<= Shift.getLimitedValue(31);
14845 From = From->getOperand(0);
14846 }
14847
14848 return From;
14849}
14850
14851// If A and B contain one contiguous set of bits, does A | B == A . B?
14852//
14853// Neither A nor B must be zero.
14854static bool BitsProperlyConcatenate(const APInt &A, const APInt &B) {
14855 unsigned LastActiveBitInA = A.countr_zero();
14856 unsigned FirstActiveBitInB = B.getBitWidth() - B.countl_zero() - 1;
14857 return LastActiveBitInA - 1 == FirstActiveBitInB;
14858}
14859
14861 // We have a BFI in N. Find a BFI it can combine with, if one exists.
14862 APInt ToMask, FromMask;
14863 SDValue From = ParseBFI(N, ToMask, FromMask);
14864 SDValue To = N->getOperand(0);
14865
14866 SDValue V = To;
14867 if (V.getOpcode() != ARMISD::BFI)
14868 return SDValue();
14869
14870 APInt NewToMask, NewFromMask;
14871 SDValue NewFrom = ParseBFI(V.getNode(), NewToMask, NewFromMask);
14872 if (NewFrom != From)
14873 return SDValue();
14874
14875 // Do the written bits conflict with any we've seen so far?
14876 if ((NewToMask & ToMask).getBoolValue())
14877 // Conflicting bits.
14878 return SDValue();
14879
14880 // Are the new bits contiguous when combined with the old bits?
14881 if (BitsProperlyConcatenate(ToMask, NewToMask) &&
14882 BitsProperlyConcatenate(FromMask, NewFromMask))
14883 return V;
14884 if (BitsProperlyConcatenate(NewToMask, ToMask) &&
14885 BitsProperlyConcatenate(NewFromMask, FromMask))
14886 return V;
14887
14888 return SDValue();
14889}
14890
14892 SDValue N0 = N->getOperand(0);
14893 SDValue N1 = N->getOperand(1);
14894
14895 if (N1.getOpcode() == ISD::AND) {
14896 // (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
14897 // the bits being cleared by the AND are not demanded by the BFI.
14898 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
14899 if (!N11C)
14900 return SDValue();
14901 unsigned InvMask = N->getConstantOperandVal(2);
14902 unsigned LSB = llvm::countr_zero(~InvMask);
14903 unsigned Width = llvm::bit_width<unsigned>(~InvMask) - LSB;
14904 assert(Width <
14905 static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
14906 "undefined behavior");
14907 unsigned Mask = (1u << Width) - 1;
14908 unsigned Mask2 = N11C->getZExtValue();
14909 if ((Mask & (~Mask2)) == 0)
14910 return DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0),
14911 N->getOperand(0), N1.getOperand(0), N->getOperand(2));
14912 return SDValue();
14913 }
14914
14915 // Look for another BFI to combine with.
14916 if (SDValue CombineBFI = FindBFIToCombineWith(N)) {
14917 // We've found a BFI.
14918 APInt ToMask1, FromMask1;
14919 SDValue From1 = ParseBFI(N, ToMask1, FromMask1);
14920
14921 APInt ToMask2, FromMask2;
14922 SDValue From2 = ParseBFI(CombineBFI.getNode(), ToMask2, FromMask2);
14923 assert(From1 == From2);
14924 (void)From2;
14925
14926 // Create a new BFI, combining the two together.
14927 APInt NewFromMask = FromMask1 | FromMask2;
14928 APInt NewToMask = ToMask1 | ToMask2;
14929
14930 EVT VT = N->getValueType(0);
14931 SDLoc dl(N);
14932
14933 if (NewFromMask[0] == 0)
14934 From1 = DAG.getNode(ISD::SRL, dl, VT, From1,
14935 DAG.getConstant(NewFromMask.countr_zero(), dl, VT));
14936 return DAG.getNode(ARMISD::BFI, dl, VT, CombineBFI.getOperand(0), From1,
14937 DAG.getConstant(~NewToMask, dl, VT));
14938 }
14939
14940 // Reassociate BFI(BFI (A, B, M1), C, M2) to BFI(BFI (A, C, M2), B, M1) so
14941 // that lower bit insertions are performed first, providing that M1 and M2
14942 // do no overlap. This can allow multiple BFI instructions to be combined
14943 // together by the other folds above.
14944 if (N->getOperand(0).getOpcode() == ARMISD::BFI) {
14945 APInt ToMask1 = ~N->getConstantOperandAPInt(2);
14946 APInt ToMask2 = ~N0.getConstantOperandAPInt(2);
14947
14948 if (!N0.hasOneUse() || (ToMask1 & ToMask2) != 0 ||
14949 ToMask1.countl_zero() < ToMask2.countl_zero())
14950 return SDValue();
14951
14952 EVT VT = N->getValueType(0);
14953 SDLoc dl(N);
14954 SDValue BFI1 = DAG.getNode(ARMISD::BFI, dl, VT, N0.getOperand(0),
14955 N->getOperand(1), N->getOperand(2));
14956 return DAG.getNode(ARMISD::BFI, dl, VT, BFI1, N0.getOperand(1),
14957 N0.getOperand(2));
14958 }
14959
14960 return SDValue();
14961}
14962
14963// Check that N is CMPZ(CSINC(0, 0, CC, X)),
14964// or CMPZ(CMOV(1, 0, CC, $cpsr, X))
14965// return X if valid.
14967 if (Cmp->getOpcode() != ARMISD::CMPZ || !isNullConstant(Cmp->getOperand(1)))
14968 return SDValue();
14969 SDValue CSInc = Cmp->getOperand(0);
14970
14971 // Ignore any `And 1` nodes that may not yet have been removed. We are
14972 // looking for a value that produces 1/0, so these have no effect on the
14973 // code.
14974 while (CSInc.getOpcode() == ISD::AND &&
14975 isa<ConstantSDNode>(CSInc.getOperand(1)) &&
14976 CSInc.getConstantOperandVal(1) == 1 && CSInc->hasOneUse())
14977 CSInc = CSInc.getOperand(0);
14978
14979 if (CSInc.getOpcode() == ARMISD::CSINC &&
14980 isNullConstant(CSInc.getOperand(0)) &&
14981 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14983 return CSInc.getOperand(3);
14984 }
14985 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(0)) &&
14986 isNullConstant(CSInc.getOperand(1)) && CSInc->hasOneUse()) {
14988 return CSInc.getOperand(4);
14989 }
14990 if (CSInc.getOpcode() == ARMISD::CMOV && isOneConstant(CSInc.getOperand(1)) &&
14991 isNullConstant(CSInc.getOperand(0)) && CSInc->hasOneUse()) {
14994 return CSInc.getOperand(4);
14995 }
14996 return SDValue();
14997}
14998
15000 // Given CMPZ(CSINC(C, 0, 0, EQ), 0), we can just use C directly. As in
15001 // t92: glue = ARMISD::CMPZ t74, 0
15002 // t93: i32 = ARMISD::CSINC 0, 0, 1, t92
15003 // t96: glue = ARMISD::CMPZ t93, 0
15004 // t114: i32 = ARMISD::CSINV 0, 0, 0, t96
15006 if (SDValue C = IsCMPZCSINC(N, Cond))
15007 if (Cond == ARMCC::EQ)
15008 return C;
15009 return SDValue();
15010}
15011
15013 // Fold away an unneccessary CMPZ/CSINC
15014 // CSXYZ A, B, C1 (CMPZ (CSINC 0, 0, C2, D), 0) ->
15015 // if C1==EQ -> CSXYZ A, B, C2, D
15016 // if C1==NE -> CSXYZ A, B, NOT(C2), D
15018 if (SDValue C = IsCMPZCSINC(N->getOperand(3).getNode(), Cond)) {
15019 if (N->getConstantOperandVal(2) == ARMCC::EQ)
15020 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15021 N->getOperand(1),
15022 DAG.getConstant(Cond, SDLoc(N), MVT::i32), C);
15023 if (N->getConstantOperandVal(2) == ARMCC::NE)
15024 return DAG.getNode(
15025 N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
15026 N->getOperand(1),
15028 }
15029 return SDValue();
15030}
15031
15032/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
15033/// ARMISD::VMOVRRD.
15036 const ARMSubtarget *Subtarget) {
15037 // vmovrrd(vmovdrr x, y) -> x,y
15038 SDValue InDouble = N->getOperand(0);
15039 if (InDouble.getOpcode() == ARMISD::VMOVDRR && Subtarget->hasFP64())
15040 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
15041
15042 // vmovrrd(load f64) -> (load i32), (load i32)
15043 SDNode *InNode = InDouble.getNode();
15044 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
15045 InNode->getValueType(0) == MVT::f64 &&
15046 InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
15047 !cast<LoadSDNode>(InNode)->isVolatile()) {
15048 // TODO: Should this be done for non-FrameIndex operands?
15049 LoadSDNode *LD = cast<LoadSDNode>(InNode);
15050
15051 SelectionDAG &DAG = DCI.DAG;
15052 SDLoc DL(LD);
15053 SDValue BasePtr = LD->getBasePtr();
15054 SDValue NewLD1 =
15055 DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(),
15056 LD->getAlign(), LD->getMemOperand()->getFlags());
15057
15058 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
15059 DAG.getConstant(4, DL, MVT::i32));
15060
15061 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, LD->getChain(), OffsetPtr,
15062 LD->getPointerInfo().getWithOffset(4),
15063 commonAlignment(LD->getAlign(), 4),
15064 LD->getMemOperand()->getFlags());
15065
15066 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
15067 if (DCI.DAG.getDataLayout().isBigEndian())
15068 std::swap (NewLD1, NewLD2);
15069 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
15070 return Result;
15071 }
15072
15073 // VMOVRRD(extract(..(build_vector(a, b, c, d)))) -> a,b or c,d
15074 // VMOVRRD(extract(insert_vector(insert_vector(.., a, l1), b, l2))) -> a,b
15075 if (InDouble.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15076 isa<ConstantSDNode>(InDouble.getOperand(1))) {
15077 SDValue BV = InDouble.getOperand(0);
15078 // Look up through any nop bitcasts and vector_reg_casts. bitcasts may
15079 // change lane order under big endian.
15080 bool BVSwap = BV.getOpcode() == ISD::BITCAST;
15081 while (
15082 (BV.getOpcode() == ISD::BITCAST ||
15084 (BV.getValueType() == MVT::v2f64 || BV.getValueType() == MVT::v2i64)) {
15085 BVSwap = BV.getOpcode() == ISD::BITCAST;
15086 BV = BV.getOperand(0);
15087 }
15088 if (BV.getValueType() != MVT::v4i32)
15089 return SDValue();
15090
15091 // Handle buildvectors, pulling out the correct lane depending on
15092 // endianness.
15093 unsigned Offset = InDouble.getConstantOperandVal(1) == 1 ? 2 : 0;
15094 if (BV.getOpcode() == ISD::BUILD_VECTOR) {
15095 SDValue Op0 = BV.getOperand(Offset);
15096 SDValue Op1 = BV.getOperand(Offset + 1);
15097 if (!Subtarget->isLittle() && BVSwap)
15098 std::swap(Op0, Op1);
15099
15100 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15101 }
15102
15103 // A chain of insert_vectors, grabbing the correct value of the chain of
15104 // inserts.
15105 SDValue Op0, Op1;
15106 while (BV.getOpcode() == ISD::INSERT_VECTOR_ELT) {
15107 if (isa<ConstantSDNode>(BV.getOperand(2))) {
15108 if (BV.getConstantOperandVal(2) == Offset)
15109 Op0 = BV.getOperand(1);
15110 if (BV.getConstantOperandVal(2) == Offset + 1)
15111 Op1 = BV.getOperand(1);
15112 }
15113 BV = BV.getOperand(0);
15114 }
15115 if (!Subtarget->isLittle() && BVSwap)
15116 std::swap(Op0, Op1);
15117 if (Op0 && Op1)
15118 return DCI.DAG.getMergeValues({Op0, Op1}, SDLoc(N));
15119 }
15120
15121 return SDValue();
15122}
15123
15124/// PerformVMOVDRRCombine - Target-specific dag combine xforms for
15125/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands.
15127 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
15128 SDValue Op0 = N->getOperand(0);
15129 SDValue Op1 = N->getOperand(1);
15130 if (Op0.getOpcode() == ISD::BITCAST)
15131 Op0 = Op0.getOperand(0);
15132 if (Op1.getOpcode() == ISD::BITCAST)
15133 Op1 = Op1.getOperand(0);
15134 if (Op0.getOpcode() == ARMISD::VMOVRRD &&
15135 Op0.getNode() == Op1.getNode() &&
15136 Op0.getResNo() == 0 && Op1.getResNo() == 1)
15137 return DAG.getNode(ISD::BITCAST, SDLoc(N),
15138 N->getValueType(0), Op0.getOperand(0));
15139 return SDValue();
15140}
15141
15144 SDValue Op0 = N->getOperand(0);
15145
15146 // VMOVhr (VMOVrh (X)) -> X
15147 if (Op0->getOpcode() == ARMISD::VMOVrh)
15148 return Op0->getOperand(0);
15149
15150 // FullFP16: half values are passed in S-registers, and we don't
15151 // need any of the bitcast and moves:
15152 //
15153 // t2: f32,ch1,gl1? = CopyFromReg ch, Register:f32 %0, gl?
15154 // t5: i32 = bitcast t2
15155 // t18: f16 = ARMISD::VMOVhr t5
15156 // =>
15157 // tN: f16,ch2,gl2? = CopyFromReg ch, Register::f32 %0, gl?
15158 if (Op0->getOpcode() == ISD::BITCAST) {
15159 SDValue Copy = Op0->getOperand(0);
15160 if (Copy.getValueType() == MVT::f32 &&
15161 Copy->getOpcode() == ISD::CopyFromReg) {
15162 bool HasGlue = Copy->getNumOperands() == 3;
15163 SDValue Ops[] = {Copy->getOperand(0), Copy->getOperand(1),
15164 HasGlue ? Copy->getOperand(2) : SDValue()};
15165 EVT OutTys[] = {N->getValueType(0), MVT::Other, MVT::Glue};
15166 SDValue NewCopy =
15168 DCI.DAG.getVTList(ArrayRef(OutTys, HasGlue ? 3 : 2)),
15169 ArrayRef(Ops, HasGlue ? 3 : 2));
15170
15171 // Update Users, Chains, and Potential Glue.
15172 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), NewCopy.getValue(0));
15173 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(1), NewCopy.getValue(1));
15174 if (HasGlue)
15175 DCI.DAG.ReplaceAllUsesOfValueWith(Copy.getValue(2),
15176 NewCopy.getValue(2));
15177
15178 return NewCopy;
15179 }
15180 }
15181
15182 // fold (VMOVhr (load x)) -> (load (f16*)x)
15183 if (LoadSDNode *LN0 = dyn_cast<LoadSDNode>(Op0)) {
15184 if (LN0->hasOneUse() && LN0->isUnindexed() &&
15185 LN0->getMemoryVT() == MVT::i16) {
15186 SDValue Load =
15187 DCI.DAG.getLoad(N->getValueType(0), SDLoc(N), LN0->getChain(),
15188 LN0->getBasePtr(), LN0->getMemOperand());
15189 DCI.DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15190 DCI.DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Load.getValue(1));
15191 return Load;
15192 }
15193 }
15194
15195 // Only the bottom 16 bits of the source register are used.
15196 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15197 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15198 if (TLI.SimplifyDemandedBits(Op0, DemandedMask, DCI))
15199 return SDValue(N, 0);
15200
15201 return SDValue();
15202}
15203
15205 SDValue N0 = N->getOperand(0);
15206 EVT VT = N->getValueType(0);
15207
15208 // fold (VMOVrh (fpconst x)) -> const x
15209 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0)) {
15210 APFloat V = C->getValueAPF();
15211 return DAG.getConstant(V.bitcastToAPInt().getZExtValue(), SDLoc(N), VT);
15212 }
15213
15214 // fold (VMOVrh (load x)) -> (zextload (i16*)x)
15215 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
15216 LoadSDNode *LN0 = cast<LoadSDNode>(N0);
15217
15218 SDValue Load =
15219 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(),
15220 LN0->getBasePtr(), MVT::i16, LN0->getMemOperand());
15221 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Load.getValue(0));
15222 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
15223 return Load;
15224 }
15225
15226 // Fold VMOVrh(extract(x, n)) -> vgetlaneu(x, n)
15227 if (N0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15228 isa<ConstantSDNode>(N0->getOperand(1)))
15229 return DAG.getNode(ARMISD::VGETLANEu, SDLoc(N), VT, N0->getOperand(0),
15230 N0->getOperand(1));
15231
15232 return SDValue();
15233}
15234
15235/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
15236/// are normal, non-volatile loads. If so, it is profitable to bitcast an
15237/// i64 vector to have f64 elements, since the value can then be loaded
15238/// directly into a VFP register.
15240 unsigned NumElts = N->getValueType(0).getVectorNumElements();
15241 for (unsigned i = 0; i < NumElts; ++i) {
15242 SDNode *Elt = N->getOperand(i).getNode();
15243 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
15244 return true;
15245 }
15246 return false;
15247}
15248
15249/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
15250/// ISD::BUILD_VECTOR.
15253 const ARMSubtarget *Subtarget) {
15254 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
15255 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value
15256 // into a pair of GPRs, which is fine when the value is used as a scalar,
15257 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
15258 SelectionDAG &DAG = DCI.DAG;
15259 if (N->getNumOperands() == 2)
15260 if (SDValue RV = PerformVMOVDRRCombine(N, DAG))
15261 return RV;
15262
15263 // Load i64 elements as f64 values so that type legalization does not split
15264 // them up into i32 values.
15265 EVT VT = N->getValueType(0);
15266 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
15267 return SDValue();
15268 SDLoc dl(N);
15270 unsigned NumElts = VT.getVectorNumElements();
15271 for (unsigned i = 0; i < NumElts; ++i) {
15272 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
15273 Ops.push_back(V);
15274 // Make the DAGCombiner fold the bitcast.
15275 DCI.AddToWorklist(V.getNode());
15276 }
15277 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
15278 SDValue BV = DAG.getBuildVector(FloatVT, dl, Ops);
15279 return DAG.getNode(ISD::BITCAST, dl, VT, BV);
15280}
15281
15282/// Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
15283static SDValue
15285 // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR.
15286 // At that time, we may have inserted bitcasts from integer to float.
15287 // If these bitcasts have survived DAGCombine, change the lowering of this
15288 // BUILD_VECTOR in something more vector friendly, i.e., that does not
15289 // force to use floating point types.
15290
15291 // Make sure we can change the type of the vector.
15292 // This is possible iff:
15293 // 1. The vector is only used in a bitcast to a integer type. I.e.,
15294 // 1.1. Vector is used only once.
15295 // 1.2. Use is a bit convert to an integer type.
15296 // 2. The size of its operands are 32-bits (64-bits are not legal).
15297 EVT VT = N->getValueType(0);
15298 EVT EltVT = VT.getVectorElementType();
15299
15300 // Check 1.1. and 2.
15301 if (EltVT.getSizeInBits() != 32 || !N->hasOneUse())
15302 return SDValue();
15303
15304 // By construction, the input type must be float.
15305 assert(EltVT == MVT::f32 && "Unexpected type!");
15306
15307 // Check 1.2.
15308 SDNode *Use = *N->use_begin();
15309 if (Use->getOpcode() != ISD::BITCAST ||
15310 Use->getValueType(0).isFloatingPoint())
15311 return SDValue();
15312
15313 // Check profitability.
15314 // Model is, if more than half of the relevant operands are bitcast from
15315 // i32, turn the build_vector into a sequence of insert_vector_elt.
15316 // Relevant operands are everything that is not statically
15317 // (i.e., at compile time) bitcasted.
15318 unsigned NumOfBitCastedElts = 0;
15319 unsigned NumElts = VT.getVectorNumElements();
15320 unsigned NumOfRelevantElts = NumElts;
15321 for (unsigned Idx = 0; Idx < NumElts; ++Idx) {
15322 SDValue Elt = N->getOperand(Idx);
15323 if (Elt->getOpcode() == ISD::BITCAST) {
15324 // Assume only bit cast to i32 will go away.
15325 if (Elt->getOperand(0).getValueType() == MVT::i32)
15326 ++NumOfBitCastedElts;
15327 } else if (Elt.isUndef() || isa<ConstantSDNode>(Elt))
15328 // Constants are statically casted, thus do not count them as
15329 // relevant operands.
15330 --NumOfRelevantElts;
15331 }
15332
15333 // Check if more than half of the elements require a non-free bitcast.
15334 if (NumOfBitCastedElts <= NumOfRelevantElts / 2)
15335 return SDValue();
15336
15337 SelectionDAG &DAG = DCI.DAG;
15338 // Create the new vector type.
15339 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
15340 // Check if the type is legal.
15341 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15342 if (!TLI.isTypeLegal(VecVT))
15343 return SDValue();
15344
15345 // Combine:
15346 // ARMISD::BUILD_VECTOR E1, E2, ..., EN.
15347 // => BITCAST INSERT_VECTOR_ELT
15348 // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1),
15349 // (BITCAST EN), N.
15350 SDValue Vec = DAG.getUNDEF(VecVT);
15351 SDLoc dl(N);
15352 for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) {
15353 SDValue V = N->getOperand(Idx);
15354 if (V.isUndef())
15355 continue;
15356 if (V.getOpcode() == ISD::BITCAST &&
15357 V->getOperand(0).getValueType() == MVT::i32)
15358 // Fold obvious case.
15359 V = V.getOperand(0);
15360 else {
15361 V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V);
15362 // Make the DAGCombiner fold the bitcasts.
15363 DCI.AddToWorklist(V.getNode());
15364 }
15365 SDValue LaneIdx = DAG.getConstant(Idx, dl, MVT::i32);
15366 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx);
15367 }
15368 Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec);
15369 // Make the DAGCombiner fold the bitcasts.
15370 DCI.AddToWorklist(Vec.getNode());
15371 return Vec;
15372}
15373
15374static SDValue
15376 EVT VT = N->getValueType(0);
15377 SDValue Op = N->getOperand(0);
15378 SDLoc dl(N);
15379
15380 // PREDICATE_CAST(PREDICATE_CAST(x)) == PREDICATE_CAST(x)
15381 if (Op->getOpcode() == ARMISD::PREDICATE_CAST) {
15382 // If the valuetypes are the same, we can remove the cast entirely.
15383 if (Op->getOperand(0).getValueType() == VT)
15384 return Op->getOperand(0);
15385 return DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15386 }
15387
15388 // Turn pred_cast(xor x, -1) into xor(pred_cast x, -1), in order to produce
15389 // more VPNOT which might get folded as else predicates.
15390 if (Op.getValueType() == MVT::i32 && isBitwiseNot(Op)) {
15391 SDValue X =
15392 DCI.DAG.getNode(ARMISD::PREDICATE_CAST, dl, VT, Op->getOperand(0));
15394 DCI.DAG.getConstant(65535, dl, MVT::i32));
15395 return DCI.DAG.getNode(ISD::XOR, dl, VT, X, C);
15396 }
15397
15398 // Only the bottom 16 bits of the source register are used.
15399 if (Op.getValueType() == MVT::i32) {
15400 APInt DemandedMask = APInt::getLowBitsSet(32, 16);
15401 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
15402 if (TLI.SimplifyDemandedBits(Op, DemandedMask, DCI))
15403 return SDValue(N, 0);
15404 }
15405 return SDValue();
15406}
15407
15409 const ARMSubtarget *ST) {
15410 EVT VT = N->getValueType(0);
15411 SDValue Op = N->getOperand(0);
15412 SDLoc dl(N);
15413
15414 // Under Little endian, a VECTOR_REG_CAST is equivalent to a BITCAST
15415 if (ST->isLittle())
15416 return DAG.getNode(ISD::BITCAST, dl, VT, Op);
15417
15418 // VECTOR_REG_CAST undef -> undef
15419 if (Op.isUndef())
15420 return DAG.getUNDEF(VT);
15421
15422 // VECTOR_REG_CAST(VECTOR_REG_CAST(x)) == VECTOR_REG_CAST(x)
15423 if (Op->getOpcode() == ARMISD::VECTOR_REG_CAST) {
15424 // If the valuetypes are the same, we can remove the cast entirely.
15425 if (Op->getOperand(0).getValueType() == VT)
15426 return Op->getOperand(0);
15427 return DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, VT, Op->getOperand(0));
15428 }
15429
15430 return SDValue();
15431}
15432
15434 const ARMSubtarget *Subtarget) {
15435 if (!Subtarget->hasMVEIntegerOps())
15436 return SDValue();
15437
15438 EVT VT = N->getValueType(0);
15439 SDValue Op0 = N->getOperand(0);
15440 SDValue Op1 = N->getOperand(1);
15441 ARMCC::CondCodes Cond = (ARMCC::CondCodes)N->getConstantOperandVal(2);
15442 SDLoc dl(N);
15443
15444 // vcmp X, 0, cc -> vcmpz X, cc
15445 if (isZeroVector(Op1))
15446 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, N->getOperand(2));
15447
15448 unsigned SwappedCond = getSwappedCondition(Cond);
15449 if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) {
15450 // vcmp 0, X, cc -> vcmpz X, reversed(cc)
15451 if (isZeroVector(Op0))
15452 return DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1,
15453 DAG.getConstant(SwappedCond, dl, MVT::i32));
15454 // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc)
15455 if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP)
15456 return DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0,
15457 DAG.getConstant(SwappedCond, dl, MVT::i32));
15458 }
15459
15460 return SDValue();
15461}
15462
15463/// PerformInsertEltCombine - Target-specific dag combine xforms for
15464/// ISD::INSERT_VECTOR_ELT.
15467 // Bitcast an i64 load inserted into a vector to f64.
15468 // Otherwise, the i64 value will be legalized to a pair of i32 values.
15469 EVT VT = N->getValueType(0);
15470 SDNode *Elt = N->getOperand(1).getNode();
15471 if (VT.getVectorElementType() != MVT::i64 ||
15472 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
15473 return SDValue();
15474
15475 SelectionDAG &DAG = DCI.DAG;
15476 SDLoc dl(N);
15477 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
15479 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
15480 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
15481 // Make the DAGCombiner fold the bitcasts.
15482 DCI.AddToWorklist(Vec.getNode());
15483 DCI.AddToWorklist(V.getNode());
15484 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
15485 Vec, V, N->getOperand(2));
15486 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
15487}
15488
15489// Convert a pair of extracts from the same base vector to a VMOVRRD. Either
15490// directly or bitcast to an integer if the original is a float vector.
15491// extract(x, n); extract(x, n+1) -> VMOVRRD(extract v2f64 x, n/2)
15492// bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD(extract x, n/2)
15493static SDValue
15495 EVT VT = N->getValueType(0);
15496 SDLoc dl(N);
15497
15498 if (!DCI.isAfterLegalizeDAG() || VT != MVT::i32 ||
15499 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(MVT::f64))
15500 return SDValue();
15501
15502 SDValue Ext = SDValue(N, 0);
15503 if (Ext.getOpcode() == ISD::BITCAST &&
15504 Ext.getOperand(0).getValueType() == MVT::f32)
15505 Ext = Ext.getOperand(0);
15506 if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15507 !isa<ConstantSDNode>(Ext.getOperand(1)) ||
15508 Ext.getConstantOperandVal(1) % 2 != 0)
15509 return SDValue();
15510 if (Ext->use_size() == 1 &&
15511 (Ext->use_begin()->getOpcode() == ISD::SINT_TO_FP ||
15512 Ext->use_begin()->getOpcode() == ISD::UINT_TO_FP))
15513 return SDValue();
15514
15515 SDValue Op0 = Ext.getOperand(0);
15516 EVT VecVT = Op0.getValueType();
15517 unsigned ResNo = Op0.getResNo();
15518 unsigned Lane = Ext.getConstantOperandVal(1);
15519 if (VecVT.getVectorNumElements() != 4)
15520 return SDValue();
15521
15522 // Find another extract, of Lane + 1
15523 auto OtherIt = find_if(Op0->uses(), [&](SDNode *V) {
15524 return V->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
15525 isa<ConstantSDNode>(V->getOperand(1)) &&
15526 V->getConstantOperandVal(1) == Lane + 1 &&
15527 V->getOperand(0).getResNo() == ResNo;
15528 });
15529 if (OtherIt == Op0->uses().end())
15530 return SDValue();
15531
15532 // For float extracts, we need to be converting to a i32 for both vector
15533 // lanes.
15534 SDValue OtherExt(*OtherIt, 0);
15535 if (OtherExt.getValueType() != MVT::i32) {
15536 if (OtherExt->use_size() != 1 ||
15537 OtherExt->use_begin()->getOpcode() != ISD::BITCAST ||
15538 OtherExt->use_begin()->getValueType(0) != MVT::i32)
15539 return SDValue();
15540 OtherExt = SDValue(*OtherExt->use_begin(), 0);
15541 }
15542
15543 // Convert the type to a f64 and extract with a VMOVRRD.
15544 SDValue F64 = DCI.DAG.getNode(
15545 ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
15546 DCI.DAG.getNode(ARMISD::VECTOR_REG_CAST, dl, MVT::v2f64, Op0),
15547 DCI.DAG.getConstant(Ext.getConstantOperandVal(1) / 2, dl, MVT::i32));
15548 SDValue VMOVRRD =
15549 DCI.DAG.getNode(ARMISD::VMOVRRD, dl, {MVT::i32, MVT::i32}, F64);
15550
15551 DCI.CombineTo(OtherExt.getNode(), SDValue(VMOVRRD.getNode(), 1));
15552 return VMOVRRD;
15553}
15554
15557 const ARMSubtarget *ST) {
15558 SDValue Op0 = N->getOperand(0);
15559 EVT VT = N->getValueType(0);
15560 SDLoc dl(N);
15561
15562 // extract (vdup x) -> x
15563 if (Op0->getOpcode() == ARMISD::VDUP) {
15564 SDValue X = Op0->getOperand(0);
15565 if (VT == MVT::f16 && X.getValueType() == MVT::i32)
15566 return DCI.DAG.getNode(ARMISD::VMOVhr, dl, VT, X);
15567 if (VT == MVT::i32 && X.getValueType() == MVT::f16)
15568 return DCI.DAG.getNode(ARMISD::VMOVrh, dl, VT, X);
15569 if (VT == MVT::f32 && X.getValueType() == MVT::i32)
15570 return DCI.DAG.getNode(ISD::BITCAST, dl, VT, X);
15571
15572 while (X.getValueType() != VT && X->getOpcode() == ISD::BITCAST)
15573 X = X->getOperand(0);
15574 if (X.getValueType() == VT)
15575 return X;
15576 }
15577
15578 // extract ARM_BUILD_VECTOR -> x
15579 if (Op0->getOpcode() == ARMISD::BUILD_VECTOR &&
15580 isa<ConstantSDNode>(N->getOperand(1)) &&
15581 N->getConstantOperandVal(1) < Op0.getNumOperands()) {
15582 return Op0.getOperand(N->getConstantOperandVal(1));
15583 }
15584
15585 // extract(bitcast(BUILD_VECTOR(VMOVDRR(a, b), ..))) -> a or b
15586 if (Op0.getValueType() == MVT::v4i32 &&
15587 isa<ConstantSDNode>(N->getOperand(1)) &&
15588 Op0.getOpcode() == ISD::BITCAST &&
15590 Op0.getOperand(0).getValueType() == MVT::v2f64) {
15591 SDValue BV = Op0.getOperand(0);
15592 unsigned Offset = N->getConstantOperandVal(1);
15593 SDValue MOV = BV.getOperand(Offset < 2 ? 0 : 1);
15594 if (MOV.getOpcode() == ARMISD::VMOVDRR)
15595 return MOV.getOperand(ST->isLittle() ? Offset % 2 : 1 - Offset % 2);
15596 }
15597
15598 // extract x, n; extract x, n+1 -> VMOVRRD x
15599 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
15600 return R;
15601
15602 // extract (MVETrunc(x)) -> extract x
15603 if (Op0->getOpcode() == ARMISD::MVETRUNC) {
15604 unsigned Idx = N->getConstantOperandVal(1);
15605 unsigned Vec =
15607 unsigned SubIdx =
15609 return DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Op0.getOperand(Vec),
15610 DCI.DAG.getConstant(SubIdx, dl, MVT::i32));
15611 }
15612
15613 return SDValue();
15614}
15615
15617 SDValue Op = N->getOperand(0);
15618 EVT VT = N->getValueType(0);
15619
15620 // sext_inreg(VGETLANEu) -> VGETLANEs
15621 if (Op.getOpcode() == ARMISD::VGETLANEu &&
15622 cast<VTSDNode>(N->getOperand(1))->getVT() ==
15623 Op.getOperand(0).getValueType().getScalarType())
15624 return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0),
15625 Op.getOperand(1));
15626
15627 return SDValue();
15628}
15629
15630static SDValue
15632 SDValue Vec = N->getOperand(0);
15633 SDValue SubVec = N->getOperand(1);
15634 uint64_t IdxVal = N->getConstantOperandVal(2);
15635 EVT VecVT = Vec.getValueType();
15636 EVT SubVT = SubVec.getValueType();
15637
15638 // Only do this for legal fixed vector types.
15639 if (!VecVT.isFixedLengthVector() ||
15640 !DCI.DAG.getTargetLoweringInfo().isTypeLegal(VecVT) ||
15642 return SDValue();
15643
15644 // Ignore widening patterns.
15645 if (IdxVal == 0 && Vec.isUndef())
15646 return SDValue();
15647
15648 // Subvector must be half the width and an "aligned" insertion.
15649 unsigned NumSubElts = SubVT.getVectorNumElements();
15650 if ((SubVT.getSizeInBits() * 2) != VecVT.getSizeInBits() ||
15651 (IdxVal != 0 && IdxVal != NumSubElts))
15652 return SDValue();
15653
15654 // Fold insert_subvector -> concat_vectors
15655 // insert_subvector(Vec,Sub,lo) -> concat_vectors(Sub,extract(Vec,hi))
15656 // insert_subvector(Vec,Sub,hi) -> concat_vectors(extract(Vec,lo),Sub)
15657 SDLoc DL(N);
15658 SDValue Lo, Hi;
15659 if (IdxVal == 0) {
15660 Lo = SubVec;
15661 Hi = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15662 DCI.DAG.getVectorIdxConstant(NumSubElts, DL));
15663 } else {
15664 Lo = DCI.DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, Vec,
15665 DCI.DAG.getVectorIdxConstant(0, DL));
15666 Hi = SubVec;
15667 }
15668 return DCI.DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Lo, Hi);
15669}
15670
15671// shuffle(MVETrunc(x, y)) -> VMOVN(x, y)
15673 SelectionDAG &DAG) {
15674 SDValue Trunc = N->getOperand(0);
15675 EVT VT = Trunc.getValueType();
15676 if (Trunc.getOpcode() != ARMISD::MVETRUNC || !N->getOperand(1).isUndef())
15677 return SDValue();
15678
15679 SDLoc DL(Trunc);
15680 if (isVMOVNTruncMask(N->getMask(), VT, false))
15681 return DAG.getNode(
15682 ARMISD::VMOVN, DL, VT,
15683 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15684 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15685 DAG.getConstant(1, DL, MVT::i32));
15686 else if (isVMOVNTruncMask(N->getMask(), VT, true))
15687 return DAG.getNode(
15688 ARMISD::VMOVN, DL, VT,
15689 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(1)),
15690 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, Trunc.getOperand(0)),
15691 DAG.getConstant(1, DL, MVT::i32));
15692 return SDValue();
15693}
15694
15695/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
15696/// ISD::VECTOR_SHUFFLE.
15698 if (SDValue R = PerformShuffleVMOVNCombine(cast<ShuffleVectorSDNode>(N), DAG))
15699 return R;
15700
15701 // The LLVM shufflevector instruction does not require the shuffle mask
15702 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
15703 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the
15704 // operands do not match the mask length, they are extended by concatenating
15705 // them with undef vectors. That is probably the right thing for other
15706 // targets, but for NEON it is better to concatenate two double-register
15707 // size vector operands into a single quad-register size vector. Do that
15708 // transformation here:
15709 // shuffle(concat(v1, undef), concat(v2, undef)) ->
15710 // shuffle(concat(v1, v2), undef)
15711 SDValue Op0 = N->getOperand(0);
15712 SDValue Op1 = N->getOperand(1);
15713 if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
15714 Op1.getOpcode() != ISD::CONCAT_VECTORS ||
15715 Op0.getNumOperands() != 2 ||
15716 Op1.getNumOperands() != 2)
15717 return SDValue();
15718 SDValue Concat0Op1 = Op0.getOperand(1);
15719 SDValue Concat1Op1 = Op1.getOperand(1);
15720 if (!Concat0Op1.isUndef() || !Concat1Op1.isUndef())
15721 return SDValue();
15722 // Skip the transformation if any of the types are illegal.
15723 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
15724 EVT VT = N->getValueType(0);
15725 if (!TLI.isTypeLegal(VT) ||
15726 !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
15727 !TLI.isTypeLegal(Concat1Op1.getValueType()))
15728 return SDValue();
15729
15730 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
15731 Op0.getOperand(0), Op1.getOperand(0));
15732 // Translate the shuffle mask.
15733 SmallVector<int, 16> NewMask;
15734 unsigned NumElts = VT.getVectorNumElements();
15735 unsigned HalfElts = NumElts/2;
15736 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
15737 for (unsigned n = 0; n < NumElts; ++n) {
15738 int MaskElt = SVN->getMaskElt(n);
15739 int NewElt = -1;
15740 if (MaskElt < (int)HalfElts)
15741 NewElt = MaskElt;
15742 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
15743 NewElt = HalfElts + MaskElt - NumElts;
15744 NewMask.push_back(NewElt);
15745 }
15746 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat,
15747 DAG.getUNDEF(VT), NewMask);
15748}
15749
15750/// Load/store instruction that can be merged with a base address
15751/// update
15756 unsigned AddrOpIdx;
15757};
15758
15760 /// Instruction that updates a pointer
15762 /// Pointer increment operand
15764 /// Pointer increment value if it is a constant, or 0 otherwise
15765 unsigned ConstInc;
15766};
15767
15769 struct BaseUpdateUser &User,
15770 bool SimpleConstIncOnly,
15772 SelectionDAG &DAG = DCI.DAG;
15773 SDNode *N = Target.N;
15774 MemSDNode *MemN = cast<MemSDNode>(N);
15775 SDLoc dl(N);
15776
15777 // Find the new opcode for the updating load/store.
15778 bool isLoadOp = true;
15779 bool isLaneOp = false;
15780 // Workaround for vst1x and vld1x intrinsics which do not have alignment
15781 // as an operand.
15782 bool hasAlignment = true;
15783 unsigned NewOpc = 0;
15784 unsigned NumVecs = 0;
15785 if (Target.isIntrinsic) {
15786 unsigned IntNo = N->getConstantOperandVal(1);
15787 switch (IntNo) {
15788 default:
15789 llvm_unreachable("unexpected intrinsic for Neon base update");
15790 case Intrinsic::arm_neon_vld1:
15791 NewOpc = ARMISD::VLD1_UPD;
15792 NumVecs = 1;
15793 break;
15794 case Intrinsic::arm_neon_vld2:
15795 NewOpc = ARMISD::VLD2_UPD;
15796 NumVecs = 2;
15797 break;
15798 case Intrinsic::arm_neon_vld3:
15799 NewOpc = ARMISD::VLD3_UPD;
15800 NumVecs = 3;
15801 break;
15802 case Intrinsic::arm_neon_vld4:
15803 NewOpc = ARMISD::VLD4_UPD;
15804 NumVecs = 4;
15805 break;
15806 case Intrinsic::arm_neon_vld1x2:
15807 NewOpc = ARMISD::VLD1x2_UPD;
15808 NumVecs = 2;
15809 hasAlignment = false;
15810 break;
15811 case Intrinsic::arm_neon_vld1x3:
15812 NewOpc = ARMISD::VLD1x3_UPD;
15813 NumVecs = 3;
15814 hasAlignment = false;
15815 break;
15816 case Intrinsic::arm_neon_vld1x4:
15817 NewOpc = ARMISD::VLD1x4_UPD;
15818 NumVecs = 4;
15819 hasAlignment = false;
15820 break;
15821 case Intrinsic::arm_neon_vld2dup:
15822 NewOpc = ARMISD::VLD2DUP_UPD;
15823 NumVecs = 2;
15824 break;
15825 case Intrinsic::arm_neon_vld3dup:
15826 NewOpc = ARMISD::VLD3DUP_UPD;
15827 NumVecs = 3;
15828 break;
15829 case Intrinsic::arm_neon_vld4dup:
15830 NewOpc = ARMISD::VLD4DUP_UPD;
15831 NumVecs = 4;
15832 break;
15833 case Intrinsic::arm_neon_vld2lane:
15834 NewOpc = ARMISD::VLD2LN_UPD;
15835 NumVecs = 2;
15836 isLaneOp = true;
15837 break;
15838 case Intrinsic::arm_neon_vld3lane:
15839 NewOpc = ARMISD::VLD3LN_UPD;
15840 NumVecs = 3;
15841 isLaneOp = true;
15842 break;
15843 case Intrinsic::arm_neon_vld4lane:
15844 NewOpc = ARMISD::VLD4LN_UPD;
15845 NumVecs = 4;
15846 isLaneOp = true;
15847 break;
15848 case Intrinsic::arm_neon_vst1:
15849 NewOpc = ARMISD::VST1_UPD;
15850 NumVecs = 1;
15851 isLoadOp = false;
15852 break;
15853 case Intrinsic::arm_neon_vst2:
15854 NewOpc = ARMISD::VST2_UPD;
15855 NumVecs = 2;
15856 isLoadOp = false;
15857 break;
15858 case Intrinsic::arm_neon_vst3:
15859 NewOpc = ARMISD::VST3_UPD;
15860 NumVecs = 3;
15861 isLoadOp = false;
15862 break;
15863 case Intrinsic::arm_neon_vst4:
15864 NewOpc = ARMISD::VST4_UPD;
15865 NumVecs = 4;
15866 isLoadOp = false;
15867 break;
15868 case Intrinsic::arm_neon_vst2lane:
15869 NewOpc = ARMISD::VST2LN_UPD;
15870 NumVecs = 2;
15871 isLoadOp = false;
15872 isLaneOp = true;
15873 break;
15874 case Intrinsic::arm_neon_vst3lane:
15875 NewOpc = ARMISD::VST3LN_UPD;
15876 NumVecs = 3;
15877 isLoadOp = false;
15878 isLaneOp = true;
15879 break;
15880 case Intrinsic::arm_neon_vst4lane:
15881 NewOpc = ARMISD::VST4LN_UPD;
15882 NumVecs = 4;
15883 isLoadOp = false;
15884 isLaneOp = true;
15885 break;
15886 case Intrinsic::arm_neon_vst1x2:
15887 NewOpc = ARMISD::VST1x2_UPD;
15888 NumVecs = 2;
15889 isLoadOp = false;
15890 hasAlignment = false;
15891 break;
15892 case Intrinsic::arm_neon_vst1x3:
15893 NewOpc = ARMISD::VST1x3_UPD;
15894 NumVecs = 3;
15895 isLoadOp = false;
15896 hasAlignment = false;
15897 break;
15898 case Intrinsic::arm_neon_vst1x4:
15899 NewOpc = ARMISD::VST1x4_UPD;
15900 NumVecs = 4;
15901 isLoadOp = false;
15902 hasAlignment = false;
15903 break;
15904 }
15905 } else {
15906 isLaneOp = true;
15907 switch (N->getOpcode()) {
15908 default:
15909 llvm_unreachable("unexpected opcode for Neon base update");
15910 case ARMISD::VLD1DUP:
15911 NewOpc = ARMISD::VLD1DUP_UPD;
15912 NumVecs = 1;
15913 break;
15914 case ARMISD::VLD2DUP:
15915 NewOpc = ARMISD::VLD2DUP_UPD;
15916 NumVecs = 2;
15917 break;
15918 case ARMISD::VLD3DUP:
15919 NewOpc = ARMISD::VLD3DUP_UPD;
15920 NumVecs = 3;
15921 break;
15922 case ARMISD::VLD4DUP:
15923 NewOpc = ARMISD::VLD4DUP_UPD;
15924 NumVecs = 4;
15925 break;
15926 case ISD::LOAD:
15927 NewOpc = ARMISD::VLD1_UPD;
15928 NumVecs = 1;
15929 isLaneOp = false;
15930 break;
15931 case ISD::STORE:
15932 NewOpc = ARMISD::VST1_UPD;
15933 NumVecs = 1;
15934 isLaneOp = false;
15935 isLoadOp = false;
15936 break;
15937 }
15938 }
15939
15940 // Find the size of memory referenced by the load/store.
15941 EVT VecTy;
15942 if (isLoadOp) {
15943 VecTy = N->getValueType(0);
15944 } else if (Target.isIntrinsic) {
15945 VecTy = N->getOperand(Target.AddrOpIdx + 1).getValueType();
15946 } else {
15947 assert(Target.isStore &&
15948 "Node has to be a load, a store, or an intrinsic!");
15949 VecTy = N->getOperand(1).getValueType();
15950 }
15951
15952 bool isVLDDUPOp =
15953 NewOpc == ARMISD::VLD1DUP_UPD || NewOpc == ARMISD::VLD2DUP_UPD ||
15954 NewOpc == ARMISD::VLD3DUP_UPD || NewOpc == ARMISD::VLD4DUP_UPD;
15955
15956 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
15957 if (isLaneOp || isVLDDUPOp)
15958 NumBytes /= VecTy.getVectorNumElements();
15959
15960 if (NumBytes >= 3 * 16 && User.ConstInc != NumBytes) {
15961 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
15962 // separate instructions that make it harder to use a non-constant update.
15963 return false;
15964 }
15965
15966 if (SimpleConstIncOnly && User.ConstInc != NumBytes)
15967 return false;
15968
15969 // OK, we found an ADD we can fold into the base update.
15970 // Now, create a _UPD node, taking care of not breaking alignment.
15971
15972 EVT AlignedVecTy = VecTy;
15973 Align Alignment = MemN->getAlign();
15974
15975 // If this is a less-than-standard-aligned load/store, change the type to
15976 // match the standard alignment.
15977 // The alignment is overlooked when selecting _UPD variants; and it's
15978 // easier to introduce bitcasts here than fix that.
15979 // There are 3 ways to get to this base-update combine:
15980 // - intrinsics: they are assumed to be properly aligned (to the standard
15981 // alignment of the memory type), so we don't need to do anything.
15982 // - ARMISD::VLDx nodes: they are only generated from the aforementioned
15983 // intrinsics, so, likewise, there's nothing to do.
15984 // - generic load/store instructions: the alignment is specified as an
15985 // explicit operand, rather than implicitly as the standard alignment
15986 // of the memory type (like the intrisics). We need to change the
15987 // memory type to match the explicit alignment. That way, we don't
15988 // generate non-standard-aligned ARMISD::VLDx nodes.
15989 if (isa<LSBaseSDNode>(N)) {
15990 if (Alignment.value() < VecTy.getScalarSizeInBits() / 8) {
15991 MVT EltTy = MVT::getIntegerVT(Alignment.value() * 8);
15992 assert(NumVecs == 1 && "Unexpected multi-element generic load/store.");
15993 assert(!isLaneOp && "Unexpected generic load/store lane.");
15994 unsigned NumElts = NumBytes / (EltTy.getSizeInBits() / 8);
15995 AlignedVecTy = MVT::getVectorVT(EltTy, NumElts);
15996 }
15997 // Don't set an explicit alignment on regular load/stores that we want
15998 // to transform to VLD/VST 1_UPD nodes.
15999 // This matches the behavior of regular load/stores, which only get an
16000 // explicit alignment if the MMO alignment is larger than the standard
16001 // alignment of the memory type.
16002 // Intrinsics, however, always get an explicit alignment, set to the
16003 // alignment of the MMO.
16004 Alignment = Align(1);
16005 }
16006
16007 // Create the new updating load/store node.
16008 // First, create an SDVTList for the new updating node's results.
16009 EVT Tys[6];
16010 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16011 unsigned n;
16012 for (n = 0; n < NumResultVecs; ++n)
16013 Tys[n] = AlignedVecTy;
16014 Tys[n++] = MVT::i32;
16015 Tys[n] = MVT::Other;
16016 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16017
16018 // Then, gather the new node's operands.
16020 Ops.push_back(N->getOperand(0)); // incoming chain
16021 Ops.push_back(N->getOperand(Target.AddrOpIdx));
16022 Ops.push_back(User.Inc);
16023
16024 if (StoreSDNode *StN = dyn_cast<StoreSDNode>(N)) {
16025 // Try to match the intrinsic's signature
16026 Ops.push_back(StN->getValue());
16027 } else {
16028 // Loads (and of course intrinsics) match the intrinsics' signature,
16029 // so just add all but the alignment operand.
16030 unsigned LastOperand =
16031 hasAlignment ? N->getNumOperands() - 1 : N->getNumOperands();
16032 for (unsigned i = Target.AddrOpIdx + 1; i < LastOperand; ++i)
16033 Ops.push_back(N->getOperand(i));
16034 }
16035
16036 // For all node types, the alignment operand is always the last one.
16037 Ops.push_back(DAG.getConstant(Alignment.value(), dl, MVT::i32));
16038
16039 // If this is a non-standard-aligned STORE, the penultimate operand is the
16040 // stored value. Bitcast it to the aligned type.
16041 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::STORE) {
16042 SDValue &StVal = Ops[Ops.size() - 2];
16043 StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
16044 }
16045
16046 EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
16047 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
16048 MemN->getMemOperand());
16049
16050 // Update the uses.
16051 SmallVector<SDValue, 5> NewResults;
16052 for (unsigned i = 0; i < NumResultVecs; ++i)
16053 NewResults.push_back(SDValue(UpdN.getNode(), i));
16054
16055 // If this is an non-standard-aligned LOAD, the first result is the loaded
16056 // value. Bitcast it to the expected result type.
16057 if (AlignedVecTy != VecTy && N->getOpcode() == ISD::LOAD) {
16058 SDValue &LdVal = NewResults[0];
16059 LdVal = DAG.getNode(ISD::BITCAST, dl, VecTy, LdVal);
16060 }
16061
16062 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16063 DCI.CombineTo(N, NewResults);
16064 DCI.CombineTo(User.N, SDValue(UpdN.getNode(), NumResultVecs));
16065
16066 return true;
16067}
16068
16069// If (opcode ptr inc) is and ADD-like instruction, return the
16070// increment value. Otherwise return 0.
16071static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr,
16072 SDValue Inc, const SelectionDAG &DAG) {
16073 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16074 if (!CInc)
16075 return 0;
16076
16077 switch (Opcode) {
16078 case ARMISD::VLD1_UPD:
16079 case ISD::ADD:
16080 return CInc->getZExtValue();
16081 case ISD::OR: {
16082 if (DAG.haveNoCommonBitsSet(Ptr, Inc)) {
16083 // (OR ptr inc) is the same as (ADD ptr inc)
16084 return CInc->getZExtValue();
16085 }
16086 return 0;
16087 }
16088 default:
16089 return 0;
16090 }
16091}
16092
16094 switch (N->getOpcode()) {
16095 case ISD::ADD:
16096 case ISD::OR: {
16097 if (isa<ConstantSDNode>(N->getOperand(1))) {
16098 *Ptr = N->getOperand(0);
16099 *CInc = N->getOperand(1);
16100 return true;
16101 }
16102 return false;
16103 }
16104 case ARMISD::VLD1_UPD: {
16105 if (isa<ConstantSDNode>(N->getOperand(2))) {
16106 *Ptr = N->getOperand(1);
16107 *CInc = N->getOperand(2);
16108 return true;
16109 }
16110 return false;
16111 }
16112 default:
16113 return false;
16114 }
16115}
16116
16118 // Check that the add is independent of the load/store.
16119 // Otherwise, folding it would create a cycle. Search through Addr
16120 // as well, since the User may not be a direct user of Addr and
16121 // only share a base pointer.
16124 Worklist.push_back(N);
16125 Worklist.push_back(User);
16126 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16127 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16128 return false;
16129 return true;
16130}
16131
16132/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP,
16133/// NEON load/store intrinsics, and generic vector load/stores, to merge
16134/// base address updates.
16135/// For generic load/stores, the memory type is assumed to be a vector.
16136/// The caller is assumed to have checked legality.
16139 const bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
16140 N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
16141 const bool isStore = N->getOpcode() == ISD::STORE;
16142 const unsigned AddrOpIdx = ((isIntrinsic || isStore) ? 2 : 1);
16143 BaseUpdateTarget Target = {N, isIntrinsic, isStore, AddrOpIdx};
16144
16145 SDValue Addr = N->getOperand(AddrOpIdx);
16146
16148
16149 // Search for a use of the address operand that is an increment.
16150 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16151 UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
16152 SDNode *User = *UI;
16153 if (UI.getUse().getResNo() != Addr.getResNo() ||
16154 User->getNumOperands() != 2)
16155 continue;
16156
16157 SDValue Inc = User->getOperand(UI.getOperandNo() == 1 ? 0 : 1);
16158 unsigned ConstInc =
16159 getPointerConstIncrement(User->getOpcode(), Addr, Inc, DCI.DAG);
16160
16161 if (ConstInc || User->getOpcode() == ISD::ADD)
16162 BaseUpdates.push_back({User, Inc, ConstInc});
16163 }
16164
16165 // If the address is a constant pointer increment itself, find
16166 // another constant increment that has the same base operand
16167 SDValue Base;
16168 SDValue CInc;
16169 if (findPointerConstIncrement(Addr.getNode(), &Base, &CInc)) {
16170 unsigned Offset =
16171 getPointerConstIncrement(Addr->getOpcode(), Base, CInc, DCI.DAG);
16172 for (SDNode::use_iterator UI = Base->use_begin(), UE = Base->use_end();
16173 UI != UE; ++UI) {
16174
16175 SDNode *User = *UI;
16176 if (UI.getUse().getResNo() != Base.getResNo() || User == Addr.getNode() ||
16177 User->getNumOperands() != 2)
16178 continue;
16179
16180 SDValue UserInc = User->getOperand(UI.getOperandNo() == 0 ? 1 : 0);
16181 unsigned UserOffset =
16182 getPointerConstIncrement(User->getOpcode(), Base, UserInc, DCI.DAG);
16183
16184 if (!UserOffset || UserOffset <= Offset)
16185 continue;
16186
16187 unsigned NewConstInc = UserOffset - Offset;
16188 SDValue NewInc = DCI.DAG.getConstant(NewConstInc, SDLoc(N), MVT::i32);
16189 BaseUpdates.push_back({User, NewInc, NewConstInc});
16190 }
16191 }
16192
16193 // Try to fold the load/store with an update that matches memory
16194 // access size. This should work well for sequential loads.
16195 //
16196 // Filter out invalid updates as well.
16197 unsigned NumValidUpd = BaseUpdates.size();
16198 for (unsigned I = 0; I < NumValidUpd;) {
16199 BaseUpdateUser &User = BaseUpdates[I];
16200 if (!isValidBaseUpdate(N, User.N)) {
16201 --NumValidUpd;
16202 std::swap(BaseUpdates[I], BaseUpdates[NumValidUpd]);
16203 continue;
16204 }
16205
16206 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/true, DCI))
16207 return SDValue();
16208 ++I;
16209 }
16210 BaseUpdates.resize(NumValidUpd);
16211
16212 // Try to fold with other users. Non-constant updates are considered
16213 // first, and constant updates are sorted to not break a sequence of
16214 // strided accesses (if there is any).
16215 std::stable_sort(BaseUpdates.begin(), BaseUpdates.end(),
16216 [](const BaseUpdateUser &LHS, const BaseUpdateUser &RHS) {
16217 return LHS.ConstInc < RHS.ConstInc;
16218 });
16219 for (BaseUpdateUser &User : BaseUpdates) {
16220 if (TryCombineBaseUpdate(Target, User, /*SimpleConstIncOnly=*/false, DCI))
16221 return SDValue();
16222 }
16223 return SDValue();
16224}
16225
16228 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16229 return SDValue();
16230
16231 return CombineBaseUpdate(N, DCI);
16232}
16233
16236 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
16237 return SDValue();
16238
16239 SelectionDAG &DAG = DCI.DAG;
16240 SDValue Addr = N->getOperand(2);
16241 MemSDNode *MemN = cast<MemSDNode>(N);
16242 SDLoc dl(N);
16243
16244 // For the stores, where there are multiple intrinsics we only actually want
16245 // to post-inc the last of the them.
16246 unsigned IntNo = N->getConstantOperandVal(1);
16247 if (IntNo == Intrinsic::arm_mve_vst2q && N->getConstantOperandVal(5) != 1)
16248 return SDValue();
16249 if (IntNo == Intrinsic::arm_mve_vst4q && N->getConstantOperandVal(7) != 3)
16250 return SDValue();
16251
16252 // Search for a use of the address operand that is an increment.
16253 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
16254 UE = Addr.getNode()->use_end();
16255 UI != UE; ++UI) {
16256 SDNode *User = *UI;
16257 if (User->getOpcode() != ISD::ADD ||
16258 UI.getUse().getResNo() != Addr.getResNo())
16259 continue;
16260
16261 // Check that the add is independent of the load/store. Otherwise, folding
16262 // it would create a cycle. We can avoid searching through Addr as it's a
16263 // predecessor to both.
16266 Visited.insert(Addr.getNode());
16267 Worklist.push_back(N);
16268 Worklist.push_back(User);
16269 if (SDNode::hasPredecessorHelper(N, Visited, Worklist) ||
16270 SDNode::hasPredecessorHelper(User, Visited, Worklist))
16271 continue;
16272
16273 // Find the new opcode for the updating load/store.
16274 bool isLoadOp = true;
16275 unsigned NewOpc = 0;
16276 unsigned NumVecs = 0;
16277 switch (IntNo) {
16278 default:
16279 llvm_unreachable("unexpected intrinsic for MVE VLDn combine");
16280 case Intrinsic::arm_mve_vld2q:
16281 NewOpc = ARMISD::VLD2_UPD;
16282 NumVecs = 2;
16283 break;
16284 case Intrinsic::arm_mve_vld4q:
16285 NewOpc = ARMISD::VLD4_UPD;
16286 NumVecs = 4;
16287 break;
16288 case Intrinsic::arm_mve_vst2q:
16289 NewOpc = ARMISD::VST2_UPD;
16290 NumVecs = 2;
16291 isLoadOp = false;
16292 break;
16293 case Intrinsic::arm_mve_vst4q:
16294 NewOpc = ARMISD::VST4_UPD;
16295 NumVecs = 4;
16296 isLoadOp = false;
16297 break;
16298 }
16299
16300 // Find the size of memory referenced by the load/store.
16301 EVT VecTy;
16302 if (isLoadOp) {
16303 VecTy = N->getValueType(0);
16304 } else {
16305 VecTy = N->getOperand(3).getValueType();
16306 }
16307
16308 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
16309
16310 // If the increment is a constant, it must match the memory ref size.
16311 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
16312 ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode());
16313 if (!CInc || CInc->getZExtValue() != NumBytes)
16314 continue;
16315
16316 // Create the new updating load/store node.
16317 // First, create an SDVTList for the new updating node's results.
16318 EVT Tys[6];
16319 unsigned NumResultVecs = (isLoadOp ? NumVecs : 0);
16320 unsigned n;
16321 for (n = 0; n < NumResultVecs; ++n)
16322 Tys[n] = VecTy;
16323 Tys[n++] = MVT::i32;
16324 Tys[n] = MVT::Other;
16325 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumResultVecs + 2));
16326
16327 // Then, gather the new node's operands.
16329 Ops.push_back(N->getOperand(0)); // incoming chain
16330 Ops.push_back(N->getOperand(2)); // ptr
16331 Ops.push_back(Inc);
16332
16333 for (unsigned i = 3; i < N->getNumOperands(); ++i)
16334 Ops.push_back(N->getOperand(i));
16335
16336 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, VecTy,
16337 MemN->getMemOperand());
16338
16339 // Update the uses.
16340 SmallVector<SDValue, 5> NewResults;
16341 for (unsigned i = 0; i < NumResultVecs; ++i)
16342 NewResults.push_back(SDValue(UpdN.getNode(), i));
16343
16344 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain
16345 DCI.CombineTo(N, NewResults);
16346 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
16347
16348 break;
16349 }
16350
16351 return SDValue();
16352}
16353
16354/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
16355/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
16356/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and
16357/// return true.
16359 SelectionDAG &DAG = DCI.DAG;
16360 EVT VT = N->getValueType(0);
16361 // vldN-dup instructions only support 64-bit vectors for N > 1.
16362 if (!VT.is64BitVector())
16363 return false;
16364
16365 // Check if the VDUPLANE operand is a vldN-dup intrinsic.
16366 SDNode *VLD = N->getOperand(0).getNode();
16367 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
16368 return false;
16369 unsigned NumVecs = 0;
16370 unsigned NewOpc = 0;
16371 unsigned IntNo = VLD->getConstantOperandVal(1);
16372 if (IntNo == Intrinsic::arm_neon_vld2lane) {
16373 NumVecs = 2;
16374 NewOpc = ARMISD::VLD2DUP;
16375 } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
16376 NumVecs = 3;
16377 NewOpc = ARMISD::VLD3DUP;
16378 } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
16379 NumVecs = 4;
16380 NewOpc = ARMISD::VLD4DUP;
16381 } else {
16382 return false;
16383 }
16384
16385 // First check that all the vldN-lane uses are VDUPLANEs and that the lane
16386 // numbers match the load.
16387 unsigned VLDLaneNo = VLD->getConstantOperandVal(NumVecs + 3);
16388 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16389 UI != UE; ++UI) {
16390 // Ignore uses of the chain result.
16391 if (UI.getUse().getResNo() == NumVecs)
16392 continue;
16393 SDNode *User = *UI;
16394 if (User->getOpcode() != ARMISD::VDUPLANE ||
16395 VLDLaneNo != User->getConstantOperandVal(1))
16396 return false;
16397 }
16398
16399 // Create the vldN-dup node.
16400 EVT Tys[5];
16401 unsigned n;
16402 for (n = 0; n < NumVecs; ++n)
16403 Tys[n] = VT;
16404 Tys[n] = MVT::Other;
16405 SDVTList SDTys = DAG.getVTList(ArrayRef(Tys, NumVecs + 1));
16406 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
16407 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
16408 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys,
16409 Ops, VLDMemInt->getMemoryVT(),
16410 VLDMemInt->getMemOperand());
16411
16412 // Update the uses.
16413 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
16414 UI != UE; ++UI) {
16415 unsigned ResNo = UI.getUse().getResNo();
16416 // Ignore uses of the chain result.
16417 if (ResNo == NumVecs)
16418 continue;
16419 SDNode *User = *UI;
16420 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
16421 }
16422
16423 // Now the vldN-lane intrinsic is dead except for its chain result.
16424 // Update uses of the chain.
16425 std::vector<SDValue> VLDDupResults;
16426 for (unsigned n = 0; n < NumVecs; ++n)
16427 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
16428 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
16429 DCI.CombineTo(VLD, VLDDupResults);
16430
16431 return true;
16432}
16433
16434/// PerformVDUPLANECombine - Target-specific dag combine xforms for
16435/// ARMISD::VDUPLANE.
16438 const ARMSubtarget *Subtarget) {
16439 SDValue Op = N->getOperand(0);
16440 EVT VT = N->getValueType(0);
16441
16442 // On MVE, we just convert the VDUPLANE to a VDUP with an extract.
16443 if (Subtarget->hasMVEIntegerOps()) {
16444 EVT ExtractVT = VT.getVectorElementType();
16445 // We need to ensure we are creating a legal type.
16446 if (!DCI.DAG.getTargetLoweringInfo().isTypeLegal(ExtractVT))
16447 ExtractVT = MVT::i32;
16448 SDValue Extract = DCI.DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), ExtractVT,
16449 N->getOperand(0), N->getOperand(1));
16450 return DCI.DAG.getNode(ARMISD::VDUP, SDLoc(N), VT, Extract);
16451 }
16452
16453 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
16454 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
16455 if (CombineVLDDUP(N, DCI))
16456 return SDValue(N, 0);
16457
16458 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
16459 // redundant. Ignore bit_converts for now; element sizes are checked below.
16460 while (Op.getOpcode() == ISD::BITCAST)
16461 Op = Op.getOperand(0);
16462 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
16463 return SDValue();
16464
16465 // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
16466 unsigned EltSize = Op.getScalarValueSizeInBits();
16467 // The canonical VMOV for a zero vector uses a 32-bit element size.
16468 unsigned Imm = Op.getConstantOperandVal(0);
16469 unsigned EltBits;
16470 if (ARM_AM::decodeVMOVModImm(Imm, EltBits) == 0)
16471 EltSize = 8;
16472 if (EltSize > VT.getScalarSizeInBits())
16473 return SDValue();
16474
16475 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
16476}
16477
16478/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
16480 const ARMSubtarget *Subtarget) {
16481 SDValue Op = N->getOperand(0);
16482 SDLoc dl(N);
16483
16484 if (Subtarget->hasMVEIntegerOps()) {
16485 // Convert VDUP f32 -> VDUP BITCAST i32 under MVE, as we know the value will
16486 // need to come from a GPR.
16487 if (Op.getValueType() == MVT::f32)
16488 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16489 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op));
16490 else if (Op.getValueType() == MVT::f16)
16491 return DAG.getNode(ARMISD::VDUP, dl, N->getValueType(0),
16492 DAG.getNode(ARMISD::VMOVrh, dl, MVT::i32, Op));
16493 }
16494
16495 if (!Subtarget->hasNEON())
16496 return SDValue();
16497
16498 // Match VDUP(LOAD) -> VLD1DUP.
16499 // We match this pattern here rather than waiting for isel because the
16500 // transform is only legal for unindexed loads.
16501 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
16502 if (LD && Op.hasOneUse() && LD->isUnindexed() &&
16503 LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
16504 SDValue Ops[] = {LD->getOperand(0), LD->getOperand(1),
16505 DAG.getConstant(LD->getAlign().value(), SDLoc(N), MVT::i32)};
16506 SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
16507 SDValue VLDDup =
16508 DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys, Ops,
16509 LD->getMemoryVT(), LD->getMemOperand());
16510 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
16511 return VLDDup;
16512 }
16513
16514 return SDValue();
16515}
16516
16519 const ARMSubtarget *Subtarget) {
16520 EVT VT = N->getValueType(0);
16521
16522 // If this is a legal vector load, try to combine it into a VLD1_UPD.
16523 if (Subtarget->hasNEON() && ISD::isNormalLoad(N) && VT.isVector() &&
16525 return CombineBaseUpdate(N, DCI);
16526
16527 return SDValue();
16528}
16529
16530// Optimize trunc store (of multiple scalars) to shuffle and store. First,
16531// pack all of the elements in one place. Next, store to memory in fewer
16532// chunks.
16534 SelectionDAG &DAG) {
16535 SDValue StVal = St->getValue();
16536 EVT VT = StVal.getValueType();
16537 if (!St->isTruncatingStore() || !VT.isVector())
16538 return SDValue();
16539 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16540 EVT StVT = St->getMemoryVT();
16541 unsigned NumElems = VT.getVectorNumElements();
16542 assert(StVT != VT && "Cannot truncate to the same type");
16543 unsigned FromEltSz = VT.getScalarSizeInBits();
16544 unsigned ToEltSz = StVT.getScalarSizeInBits();
16545
16546 // From, To sizes and ElemCount must be pow of two
16547 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz))
16548 return SDValue();
16549
16550 // We are going to use the original vector elt for storing.
16551 // Accumulated smaller vector elements must be a multiple of the store size.
16552 if (0 != (NumElems * FromEltSz) % ToEltSz)
16553 return SDValue();
16554
16555 unsigned SizeRatio = FromEltSz / ToEltSz;
16556 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
16557
16558 // Create a type on which we perform the shuffle.
16559 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
16560 NumElems * SizeRatio);
16561 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
16562
16563 SDLoc DL(St);
16564 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
16565 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
16566 for (unsigned i = 0; i < NumElems; ++i)
16567 ShuffleVec[i] = DAG.getDataLayout().isBigEndian() ? (i + 1) * SizeRatio - 1
16568 : i * SizeRatio;
16569
16570 // Can't shuffle using an illegal type.
16571 if (!TLI.isTypeLegal(WideVecVT))
16572 return SDValue();
16573
16574 SDValue Shuff = DAG.getVectorShuffle(
16575 WideVecVT, DL, WideVec, DAG.getUNDEF(WideVec.getValueType()), ShuffleVec);
16576 // At this point all of the data is stored at the bottom of the
16577 // register. We now need to save it to mem.
16578
16579 // Find the largest store unit
16580 MVT StoreType = MVT::i8;
16581 for (MVT Tp : MVT::integer_valuetypes()) {
16582 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
16583 StoreType = Tp;
16584 }
16585 // Didn't find a legal store type.
16586 if (!TLI.isTypeLegal(StoreType))
16587 return SDValue();
16588
16589 // Bitcast the original vector into a vector of store-size units
16590 EVT StoreVecVT =
16591 EVT::getVectorVT(*DAG.getContext(), StoreType,
16592 VT.getSizeInBits() / EVT(StoreType).getSizeInBits());
16593 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
16594 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
16596 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits() / 8, DL,
16597 TLI.getPointerTy(DAG.getDataLayout()));
16598 SDValue BasePtr = St->getBasePtr();
16599
16600 // Perform one or more big stores into memory.
16601 unsigned E = (ToEltSz * NumElems) / StoreType.getSizeInBits();
16602 for (unsigned I = 0; I < E; I++) {
16603 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreType,
16604 ShuffWide, DAG.getIntPtrConstant(I, DL));
16605 SDValue Ch =
16606 DAG.getStore(St->getChain(), DL, SubVec, BasePtr, St->getPointerInfo(),
16607 St->getAlign(), St->getMemOperand()->getFlags());
16608 BasePtr =
16609 DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, Increment);
16610 Chains.push_back(Ch);
16611 }
16612 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
16613}
16614
16615// Try taking a single vector store from an fpround (which would otherwise turn
16616// into an expensive buildvector) and splitting it into a series of narrowing
16617// stores.
16619 SelectionDAG &DAG) {
16620 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16621 return SDValue();
16622 SDValue Trunc = St->getValue();
16623 if (Trunc->getOpcode() != ISD::FP_ROUND)
16624 return SDValue();
16625 EVT FromVT = Trunc->getOperand(0).getValueType();
16626 EVT ToVT = Trunc.getValueType();
16627 if (!ToVT.isVector())
16628 return SDValue();
16630 EVT ToEltVT = ToVT.getVectorElementType();
16631 EVT FromEltVT = FromVT.getVectorElementType();
16632
16633 if (FromEltVT != MVT::f32 || ToEltVT != MVT::f16)
16634 return SDValue();
16635
16636 unsigned NumElements = 4;
16637 if (FromVT.getVectorNumElements() % NumElements != 0)
16638 return SDValue();
16639
16640 // Test if the Trunc will be convertable to a VMOVN with a shuffle, and if so
16641 // use the VMOVN over splitting the store. We are looking for patterns of:
16642 // !rev: 0 N 1 N+1 2 N+2 ...
16643 // rev: N 0 N+1 1 N+2 2 ...
16644 // The shuffle may either be a single source (in which case N = NumElts/2) or
16645 // two inputs extended with concat to the same size (in which case N =
16646 // NumElts).
16647 auto isVMOVNShuffle = [&](ShuffleVectorSDNode *SVN, bool Rev) {
16648 ArrayRef<int> M = SVN->getMask();
16649 unsigned NumElts = ToVT.getVectorNumElements();
16650 if (SVN->getOperand(1).isUndef())
16651 NumElts /= 2;
16652
16653 unsigned Off0 = Rev ? NumElts : 0;
16654 unsigned Off1 = Rev ? 0 : NumElts;
16655
16656 for (unsigned I = 0; I < NumElts; I += 2) {
16657 if (M[I] >= 0 && M[I] != (int)(Off0 + I / 2))
16658 return false;
16659 if (M[I + 1] >= 0 && M[I + 1] != (int)(Off1 + I / 2))
16660 return false;
16661 }
16662
16663 return true;
16664 };
16665
16666 if (auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Trunc.getOperand(0)))
16667 if (isVMOVNShuffle(Shuffle, false) || isVMOVNShuffle(Shuffle, true))
16668 return SDValue();
16669
16670 LLVMContext &C = *DAG.getContext();
16671 SDLoc DL(St);
16672 // Details about the old store
16673 SDValue Ch = St->getChain();
16674 SDValue BasePtr = St->getBasePtr();
16675 Align Alignment = St->getOriginalAlign();
16677 AAMDNodes AAInfo = St->getAAInfo();
16678
16679 // We split the store into slices of NumElements. fp16 trunc stores are vcvt
16680 // and then stored as truncating integer stores.
16681 EVT NewFromVT = EVT::getVectorVT(C, FromEltVT, NumElements);
16682 EVT NewToVT = EVT::getVectorVT(
16683 C, EVT::getIntegerVT(C, ToEltVT.getSizeInBits()), NumElements);
16684
16686 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
16687 unsigned NewOffset = i * NumElements * ToEltVT.getSizeInBits() / 8;
16688 SDValue NewPtr =
16689 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16690
16691 SDValue Extract =
16692 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewFromVT, Trunc.getOperand(0),
16693 DAG.getConstant(i * NumElements, DL, MVT::i32));
16694
16695 SDValue FPTrunc =
16696 DAG.getNode(ARMISD::VCVTN, DL, MVT::v8f16, DAG.getUNDEF(MVT::v8f16),
16697 Extract, DAG.getConstant(0, DL, MVT::i32));
16698 Extract = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v4i32, FPTrunc);
16699
16700 SDValue Store = DAG.getTruncStore(
16701 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16702 NewToVT, Alignment, MMOFlags, AAInfo);
16703 Stores.push_back(Store);
16704 }
16705 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16706}
16707
16708// Try taking a single vector store from an MVETRUNC (which would otherwise turn
16709// into an expensive buildvector) and splitting it into a series of narrowing
16710// stores.
16712 SelectionDAG &DAG) {
16713 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16714 return SDValue();
16715 SDValue Trunc = St->getValue();
16716 if (Trunc->getOpcode() != ARMISD::MVETRUNC)
16717 return SDValue();
16718 EVT FromVT = Trunc->getOperand(0).getValueType();
16719 EVT ToVT = Trunc.getValueType();
16720
16721 LLVMContext &C = *DAG.getContext();
16722 SDLoc DL(St);
16723 // Details about the old store
16724 SDValue Ch = St->getChain();
16725 SDValue BasePtr = St->getBasePtr();
16726 Align Alignment = St->getOriginalAlign();
16728 AAMDNodes AAInfo = St->getAAInfo();
16729
16730 EVT NewToVT = EVT::getVectorVT(C, ToVT.getVectorElementType(),
16731 FromVT.getVectorNumElements());
16732
16734 for (unsigned i = 0; i < Trunc.getNumOperands(); i++) {
16735 unsigned NewOffset =
16736 i * FromVT.getVectorNumElements() * ToVT.getScalarSizeInBits() / 8;
16737 SDValue NewPtr =
16738 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
16739
16740 SDValue Extract = Trunc.getOperand(i);
16741 SDValue Store = DAG.getTruncStore(
16742 Ch, DL, Extract, NewPtr, St->getPointerInfo().getWithOffset(NewOffset),
16743 NewToVT, Alignment, MMOFlags, AAInfo);
16744 Stores.push_back(Store);
16745 }
16746 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
16747}
16748
16749// Given a floating point store from an extracted vector, with an integer
16750// VGETLANE that already exists, store the existing VGETLANEu directly. This can
16751// help reduce fp register pressure, doesn't require the fp extract and allows
16752// use of more integer post-inc stores not available with vstr.
16754 if (!St->isSimple() || St->isTruncatingStore() || !St->isUnindexed())
16755 return SDValue();
16756 SDValue Extract = St->getValue();
16757 EVT VT = Extract.getValueType();
16758 // For now only uses f16. This may be useful for f32 too, but that will
16759 // be bitcast(extract), not the VGETLANEu we currently check here.
16760 if (VT != MVT::f16 || Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16761 return SDValue();
16762
16763 SDNode *GetLane =
16764 DAG.getNodeIfExists(ARMISD::VGETLANEu, DAG.getVTList(MVT::i32),
16765 {Extract.getOperand(0), Extract.getOperand(1)});
16766 if (!GetLane)
16767 return SDValue();
16768
16769 LLVMContext &C = *DAG.getContext();
16770 SDLoc DL(St);
16771 // Create a new integer store to replace the existing floating point version.
16772 SDValue Ch = St->getChain();
16773 SDValue BasePtr = St->getBasePtr();
16774 Align Alignment = St->getOriginalAlign();
16776 AAMDNodes AAInfo = St->getAAInfo();
16777 EVT NewToVT = EVT::getIntegerVT(C, VT.getSizeInBits());
16778 SDValue Store = DAG.getTruncStore(Ch, DL, SDValue(GetLane, 0), BasePtr,
16779 St->getPointerInfo(), NewToVT, Alignment,
16780 MMOFlags, AAInfo);
16781
16782 return Store;
16783}
16784
16785/// PerformSTORECombine - Target-specific dag combine xforms for
16786/// ISD::STORE.
16789 const ARMSubtarget *Subtarget) {
16790 StoreSDNode *St = cast<StoreSDNode>(N);
16791 if (St->isVolatile())
16792 return SDValue();
16793 SDValue StVal = St->getValue();
16794 EVT VT = StVal.getValueType();
16795
16796 if (Subtarget->hasNEON())
16797 if (SDValue Store = PerformTruncatingStoreCombine(St, DCI.DAG))
16798 return Store;
16799
16800 if (Subtarget->hasMVEFloatOps())
16801 if (SDValue NewToken = PerformSplittingToNarrowingStores(St, DCI.DAG))
16802 return NewToken;
16803
16804 if (Subtarget->hasMVEIntegerOps()) {
16805 if (SDValue NewChain = PerformExtractFpToIntStores(St, DCI.DAG))
16806 return NewChain;
16807 if (SDValue NewToken =
16809 return NewToken;
16810 }
16811
16812 if (!ISD::isNormalStore(St))
16813 return SDValue();
16814
16815 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
16816 // ARM stores of arguments in the same cache line.
16817 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
16818 StVal.getNode()->hasOneUse()) {
16819 SelectionDAG &DAG = DCI.DAG;
16820 bool isBigEndian = DAG.getDataLayout().isBigEndian();
16821 SDLoc DL(St);
16822 SDValue BasePtr = St->getBasePtr();
16823 SDValue NewST1 = DAG.getStore(
16824 St->getChain(), DL, StVal.getNode()->getOperand(isBigEndian ? 1 : 0),
16825 BasePtr, St->getPointerInfo(), St->getOriginalAlign(),
16826 St->getMemOperand()->getFlags());
16827
16828 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
16829 DAG.getConstant(4, DL, MVT::i32));
16830 return DAG.getStore(NewST1.getValue(0), DL,
16831 StVal.getNode()->getOperand(isBigEndian ? 0 : 1),
16832 OffsetPtr, St->getPointerInfo().getWithOffset(4),
16833 St->getOriginalAlign(),
16834 St->getMemOperand()->getFlags());
16835 }
16836
16837 if (StVal.getValueType() == MVT::i64 &&
16839
16840 // Bitcast an i64 store extracted from a vector to f64.
16841 // Otherwise, the i64 value will be legalized to a pair of i32 values.
16842 SelectionDAG &DAG = DCI.DAG;
16843 SDLoc dl(StVal);
16844 SDValue IntVec = StVal.getOperand(0);
16845 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
16847 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
16848 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
16849 Vec, StVal.getOperand(1));
16850 dl = SDLoc(N);
16851 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
16852 // Make the DAGCombiner fold the bitcasts.
16853 DCI.AddToWorklist(Vec.getNode());
16854 DCI.AddToWorklist(ExtElt.getNode());
16855 DCI.AddToWorklist(V.getNode());
16856 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
16857 St->getPointerInfo(), St->getAlign(),
16858 St->getMemOperand()->getFlags(), St->getAAInfo());
16859 }
16860
16861 // If this is a legal vector store, try to combine it into a VST1_UPD.
16862 if (Subtarget->hasNEON() && ISD::isNormalStore(N) && VT.isVector() &&
16864 return CombineBaseUpdate(N, DCI);
16865
16866 return SDValue();
16867}
16868
16869/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
16870/// can replace combinations of VMUL and VCVT (floating-point to integer)
16871/// when the VMUL has a constant operand that is a power of 2.
16872///
16873/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
16874/// vmul.f32 d16, d17, d16
16875/// vcvt.s32.f32 d16, d16
16876/// becomes:
16877/// vcvt.s32.f32 d16, d16, #3
16879 const ARMSubtarget *Subtarget) {
16880 if (!Subtarget->hasNEON())
16881 return SDValue();
16882
16883 SDValue Op = N->getOperand(0);
16884 if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
16885 Op.getOpcode() != ISD::FMUL)
16886 return SDValue();
16887
16888 SDValue ConstVec = Op->getOperand(1);
16889 if (!isa<BuildVectorSDNode>(ConstVec))
16890 return SDValue();
16891
16892 MVT FloatTy = Op.getSimpleValueType().getVectorElementType();
16893 uint32_t FloatBits = FloatTy.getSizeInBits();
16894 MVT IntTy = N->getSimpleValueType(0).getVectorElementType();
16895 uint32_t IntBits = IntTy.getSizeInBits();
16896 unsigned NumLanes = Op.getValueType().getVectorNumElements();
16897 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
16898 // These instructions only exist converting from f32 to i32. We can handle
16899 // smaller integers by generating an extra truncate, but larger ones would
16900 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
16901 // these intructions only support v2i32/v4i32 types.
16902 return SDValue();
16903 }
16904
16905 BitVector UndefElements;
16906 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
16907 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
16908 if (C == -1 || C == 0 || C > 32)
16909 return SDValue();
16910
16911 SDLoc dl(N);
16912 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
16913 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
16914 Intrinsic::arm_neon_vcvtfp2fxu;
16915 SDValue FixConv = DAG.getNode(
16916 ISD::INTRINSIC_WO_CHAIN, dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
16917 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32), Op->getOperand(0),
16918 DAG.getConstant(C, dl, MVT::i32));
16919
16920 if (IntBits < FloatBits)
16921 FixConv = DAG.getNode(ISD::TRUNCATE, dl, N->getValueType(0), FixConv);
16922
16923 return FixConv;
16924}
16925
16927 const ARMSubtarget *Subtarget) {
16928 if (!Subtarget->hasMVEFloatOps())
16929 return SDValue();
16930
16931 // Turn (fadd x, (vselect c, y, -0.0)) into (vselect c, (fadd x, y), x)
16932 // The second form can be more easily turned into a predicated vadd, and
16933 // possibly combined into a fma to become a predicated vfma.
16934 SDValue Op0 = N->getOperand(0);
16935 SDValue Op1 = N->getOperand(1);
16936 EVT VT = N->getValueType(0);
16937 SDLoc DL(N);
16938
16939 // The identity element for a fadd is -0.0 or +0.0 when the nsz flag is set,
16940 // which these VMOV's represent.
16941 auto isIdentitySplat = [&](SDValue Op, bool NSZ) {
16942 if (Op.getOpcode() != ISD::BITCAST ||
16943 Op.getOperand(0).getOpcode() != ARMISD::VMOVIMM)
16944 return false;
16945 uint64_t ImmVal = Op.getOperand(0).getConstantOperandVal(0);
16946 if (VT == MVT::v4f32 && (ImmVal == 1664 || (ImmVal == 0 && NSZ)))
16947 return true;
16948 if (VT == MVT::v8f16 && (ImmVal == 2688 || (ImmVal == 0 && NSZ)))
16949 return true;
16950 return false;
16951 };
16952
16953 if (Op0.getOpcode() == ISD::VSELECT && Op1.getOpcode() != ISD::VSELECT)
16954 std::swap(Op0, Op1);
16955
16956 if (Op1.getOpcode() != ISD::VSELECT)
16957 return SDValue();
16958
16959 SDNodeFlags FaddFlags = N->getFlags();
16960 bool NSZ = FaddFlags.hasNoSignedZeros();
16961 if (!isIdentitySplat(Op1.getOperand(2), NSZ))
16962 return SDValue();
16963
16964 SDValue FAdd =
16965 DAG.getNode(ISD::FADD, DL, VT, Op0, Op1.getOperand(1), FaddFlags);
16966 return DAG.getNode(ISD::VSELECT, DL, VT, Op1.getOperand(0), FAdd, Op0, FaddFlags);
16967}
16968
16970 SDValue LHS = N->getOperand(0);
16971 SDValue RHS = N->getOperand(1);
16972 EVT VT = N->getValueType(0);
16973 SDLoc DL(N);
16974
16975 if (!N->getFlags().hasAllowReassociation())
16976 return SDValue();
16977
16978 // Combine fadd(a, vcmla(b, c, d)) -> vcmla(fadd(a, b), b, c)
16979 auto ReassocComplex = [&](SDValue A, SDValue B) {
16980 if (A.getOpcode() != ISD::INTRINSIC_WO_CHAIN)
16981 return SDValue();
16982 unsigned Opc = A.getConstantOperandVal(0);
16983 if (Opc != Intrinsic::arm_mve_vcmlaq)
16984 return SDValue();
16985 SDValue VCMLA = DAG.getNode(
16986 ISD::INTRINSIC_WO_CHAIN, DL, VT, A.getOperand(0), A.getOperand(1),
16987 DAG.getNode(ISD::FADD, DL, VT, A.getOperand(2), B, N->getFlags()),
16988 A.getOperand(3), A.getOperand(4));
16989 VCMLA->setFlags(A->getFlags());
16990 return VCMLA;
16991 };
16992 if (SDValue R = ReassocComplex(LHS, RHS))
16993 return R;
16994 if (SDValue R = ReassocComplex(RHS, LHS))
16995 return R;
16996
16997 return SDValue();
16998}
16999
17001 const ARMSubtarget *Subtarget) {
17002 if (SDValue S = PerformFAddVSelectCombine(N, DAG, Subtarget))
17003 return S;
17004 if (SDValue S = PerformFADDVCMLACombine(N, DAG))
17005 return S;
17006 return SDValue();
17007}
17008
17009/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
17010/// can replace combinations of VCVT (integer to floating-point) and VDIV
17011/// when the VDIV has a constant operand that is a power of 2.
17012///
17013/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
17014/// vcvt.f32.s32 d16, d16
17015/// vdiv.f32 d16, d17, d16
17016/// becomes:
17017/// vcvt.f32.s32 d16, d16, #3
17019 const ARMSubtarget *Subtarget) {
17020 if (!Subtarget->hasNEON())
17021 return SDValue();
17022
17023 SDValue Op = N->getOperand(0);
17024 unsigned OpOpcode = Op.getNode()->getOpcode();
17025 if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple() ||
17026 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
17027 return SDValue();
17028
17029 SDValue ConstVec = N->getOperand(1);
17030 if (!isa<BuildVectorSDNode>(ConstVec))
17031 return SDValue();
17032
17033 MVT FloatTy = N->getSimpleValueType(0).getVectorElementType();
17034 uint32_t FloatBits = FloatTy.getSizeInBits();
17035 MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType();
17036 uint32_t IntBits = IntTy.getSizeInBits();
17037 unsigned NumLanes = Op.getValueType().getVectorNumElements();
17038 if (FloatBits != 32 || IntBits > 32 || (NumLanes != 4 && NumLanes != 2)) {
17039 // These instructions only exist converting from i32 to f32. We can handle
17040 // smaller integers by generating an extra extend, but larger ones would
17041 // be lossy. We also can't handle anything other than 2 or 4 lanes, since
17042 // these intructions only support v2i32/v4i32 types.
17043 return SDValue();
17044 }
17045
17046 BitVector UndefElements;
17047 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(ConstVec);
17048 int32_t C = BV->getConstantFPSplatPow2ToLog2Int(&UndefElements, 33);
17049 if (C == -1 || C == 0 || C > 32)
17050 return SDValue();
17051
17052 SDLoc dl(N);
17053 bool isSigned = OpOpcode == ISD::SINT_TO_FP;
17054 SDValue ConvInput = Op.getOperand(0);
17055 if (IntBits < FloatBits)
17057 dl, NumLanes == 2 ? MVT::v2i32 : MVT::v4i32,
17058 ConvInput);
17059
17060 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
17061 Intrinsic::arm_neon_vcvtfxu2fp;
17062 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
17063 Op.getValueType(),
17064 DAG.getConstant(IntrinsicOpcode, dl, MVT::i32),
17065 ConvInput, DAG.getConstant(C, dl, MVT::i32));
17066}
17067
17069 const ARMSubtarget *ST) {
17070 if (!ST->hasMVEIntegerOps())
17071 return SDValue();
17072
17073 assert(N->getOpcode() == ISD::VECREDUCE_ADD);
17074 EVT ResVT = N->getValueType(0);
17075 SDValue N0 = N->getOperand(0);
17076 SDLoc dl(N);
17077
17078 // Try to turn vecreduce_add(add(x, y)) into vecreduce(x) + vecreduce(y)
17079 if (ResVT == MVT::i32 && N0.getOpcode() == ISD::ADD &&
17080 (N0.getValueType() == MVT::v4i32 || N0.getValueType() == MVT::v8i16 ||
17081 N0.getValueType() == MVT::v16i8)) {
17082 SDValue Red0 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(0));
17083 SDValue Red1 = DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, N0.getOperand(1));
17084 return DAG.getNode(ISD::ADD, dl, ResVT, Red0, Red1);
17085 }
17086
17087 // We are looking for something that will have illegal types if left alone,
17088 // but that we can convert to a single instruction under MVE. For example
17089 // vecreduce_add(sext(A, v8i32)) => VADDV.s16 A
17090 // or
17091 // vecreduce_add(mul(zext(A, v16i32), zext(B, v16i32))) => VMLADAV.u8 A, B
17092
17093 // The legal cases are:
17094 // VADDV u/s 8/16/32
17095 // VMLAV u/s 8/16/32
17096 // VADDLV u/s 32
17097 // VMLALV u/s 16/32
17098
17099 // If the input vector is smaller than legal (v4i8/v4i16 for example) we can
17100 // extend it and use v4i32 instead.
17101 auto ExtTypeMatches = [](SDValue A, ArrayRef<MVT> ExtTypes) {
17102 EVT AVT = A.getValueType();
17103 return any_of(ExtTypes, [&](MVT Ty) {
17104 return AVT.getVectorNumElements() == Ty.getVectorNumElements() &&
17105 AVT.bitsLE(Ty);
17106 });
17107 };
17108 auto ExtendIfNeeded = [&](SDValue A, unsigned ExtendCode) {
17109 EVT AVT = A.getValueType();
17110 if (!AVT.is128BitVector())
17111 A = DAG.getNode(ExtendCode, dl,
17113 128 / AVT.getVectorMinNumElements())),
17114 A);
17115 return A;
17116 };
17117 auto IsVADDV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes) {
17118 if (ResVT != RetTy || N0->getOpcode() != ExtendCode)
17119 return SDValue();
17120 SDValue A = N0->getOperand(0);
17121 if (ExtTypeMatches(A, ExtTypes))
17122 return ExtendIfNeeded(A, ExtendCode);
17123 return SDValue();
17124 };
17125 auto IsPredVADDV = [&](MVT RetTy, unsigned ExtendCode,
17126 ArrayRef<MVT> ExtTypes, SDValue &Mask) {
17127 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17129 return SDValue();
17130 Mask = N0->getOperand(0);
17131 SDValue Ext = N0->getOperand(1);
17132 if (Ext->getOpcode() != ExtendCode)
17133 return SDValue();
17134 SDValue A = Ext->getOperand(0);
17135 if (ExtTypeMatches(A, ExtTypes))
17136 return ExtendIfNeeded(A, ExtendCode);
17137 return SDValue();
17138 };
17139 auto IsVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17140 SDValue &A, SDValue &B) {
17141 // For a vmla we are trying to match a larger pattern:
17142 // ExtA = sext/zext A
17143 // ExtB = sext/zext B
17144 // Mul = mul ExtA, ExtB
17145 // vecreduce.add Mul
17146 // There might also be en extra extend between the mul and the addreduce, so
17147 // long as the bitwidth is high enough to make them equivalent (for example
17148 // original v8i16 might be mul at v8i32 and the reduce happens at v8i64).
17149 if (ResVT != RetTy)
17150 return false;
17151 SDValue Mul = N0;
17152 if (Mul->getOpcode() == ExtendCode &&
17153 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17154 ResVT.getScalarSizeInBits())
17155 Mul = Mul->getOperand(0);
17156 if (Mul->getOpcode() != ISD::MUL)
17157 return false;
17158 SDValue ExtA = Mul->getOperand(0);
17159 SDValue ExtB = Mul->getOperand(1);
17160 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17161 return false;
17162 A = ExtA->getOperand(0);
17163 B = ExtB->getOperand(0);
17164 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17165 A = ExtendIfNeeded(A, ExtendCode);
17166 B = ExtendIfNeeded(B, ExtendCode);
17167 return true;
17168 }
17169 return false;
17170 };
17171 auto IsPredVMLAV = [&](MVT RetTy, unsigned ExtendCode, ArrayRef<MVT> ExtTypes,
17172 SDValue &A, SDValue &B, SDValue &Mask) {
17173 // Same as the pattern above with a select for the zero predicated lanes
17174 // ExtA = sext/zext A
17175 // ExtB = sext/zext B
17176 // Mul = mul ExtA, ExtB
17177 // N0 = select Mask, Mul, 0
17178 // vecreduce.add N0
17179 if (ResVT != RetTy || N0->getOpcode() != ISD::VSELECT ||
17181 return false;
17182 Mask = N0->getOperand(0);
17183 SDValue Mul = N0->getOperand(1);
17184 if (Mul->getOpcode() == ExtendCode &&
17185 Mul->getOperand(0).getScalarValueSizeInBits() * 2 >=
17186 ResVT.getScalarSizeInBits())
17187 Mul = Mul->getOperand(0);
17188 if (Mul->getOpcode() != ISD::MUL)
17189 return false;
17190 SDValue ExtA = Mul->getOperand(0);
17191 SDValue ExtB = Mul->getOperand(1);
17192 if (ExtA->getOpcode() != ExtendCode || ExtB->getOpcode() != ExtendCode)
17193 return false;
17194 A = ExtA->getOperand(0);
17195 B = ExtB->getOperand(0);
17196 if (ExtTypeMatches(A, ExtTypes) && ExtTypeMatches(B, ExtTypes)) {
17197 A = ExtendIfNeeded(A, ExtendCode);
17198 B = ExtendIfNeeded(B, ExtendCode);
17199 return true;
17200 }
17201 return false;
17202 };
17203 auto Create64bitNode = [&](unsigned Opcode, ArrayRef<SDValue> Ops) {
17204 // Split illegal MVT::v16i8->i64 vector reductions into two legal v8i16->i64
17205 // reductions. The operands are extended with MVEEXT, but as they are
17206 // reductions the lane orders do not matter. MVEEXT may be combined with
17207 // loads to produce two extending loads, or else they will be expanded to
17208 // VREV/VMOVL.
17209 EVT VT = Ops[0].getValueType();
17210 if (VT == MVT::v16i8) {
17211 assert((Opcode == ARMISD::VMLALVs || Opcode == ARMISD::VMLALVu) &&
17212 "Unexpected illegal long reduction opcode");
17213 bool IsUnsigned = Opcode == ARMISD::VMLALVu;
17214
17215 SDValue Ext0 =
17216 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17217 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[0]);
17218 SDValue Ext1 =
17219 DAG.getNode(IsUnsigned ? ARMISD::MVEZEXT : ARMISD::MVESEXT, dl,
17220 DAG.getVTList(MVT::v8i16, MVT::v8i16), Ops[1]);
17221
17222 SDValue MLA0 = DAG.getNode(Opcode, dl, DAG.getVTList(MVT::i32, MVT::i32),
17223 Ext0, Ext1);
17224 SDValue MLA1 =
17225 DAG.getNode(IsUnsigned ? ARMISD::VMLALVAu : ARMISD::VMLALVAs, dl,
17226 DAG.getVTList(MVT::i32, MVT::i32), MLA0, MLA0.getValue(1),
17227 Ext0.getValue(1), Ext1.getValue(1));
17228 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, MLA1, MLA1.getValue(1));
17229 }
17230 SDValue Node = DAG.getNode(Opcode, dl, {MVT::i32, MVT::i32}, Ops);
17231 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Node,
17232 SDValue(Node.getNode(), 1));
17233 };
17234
17235 SDValue A, B;
17236 SDValue Mask;
17237 if (IsVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17238 return DAG.getNode(ARMISD::VMLAVs, dl, ResVT, A, B);
17239 if (IsVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B))
17240 return DAG.getNode(ARMISD::VMLAVu, dl, ResVT, A, B);
17241 if (IsVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17242 A, B))
17243 return Create64bitNode(ARMISD::VMLALVs, {A, B});
17244 if (IsVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v16i8, MVT::v8i16, MVT::v4i32},
17245 A, B))
17246 return Create64bitNode(ARMISD::VMLALVu, {A, B});
17247 if (IsVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B))
17248 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17249 DAG.getNode(ARMISD::VMLAVs, dl, MVT::i32, A, B));
17250 if (IsVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B))
17251 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17252 DAG.getNode(ARMISD::VMLAVu, dl, MVT::i32, A, B));
17253
17254 if (IsPredVMLAV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17255 Mask))
17256 return DAG.getNode(ARMISD::VMLAVps, dl, ResVT, A, B, Mask);
17257 if (IsPredVMLAV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, A, B,
17258 Mask))
17259 return DAG.getNode(ARMISD::VMLAVpu, dl, ResVT, A, B, Mask);
17260 if (IsPredVMLAV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17261 Mask))
17262 return Create64bitNode(ARMISD::VMLALVps, {A, B, Mask});
17263 if (IsPredVMLAV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v4i32}, A, B,
17264 Mask))
17265 return Create64bitNode(ARMISD::VMLALVpu, {A, B, Mask});
17266 if (IsPredVMLAV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, A, B, Mask))
17267 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17268 DAG.getNode(ARMISD::VMLAVps, dl, MVT::i32, A, B, Mask));
17269 if (IsPredVMLAV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, A, B, Mask))
17270 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17271 DAG.getNode(ARMISD::VMLAVpu, dl, MVT::i32, A, B, Mask));
17272
17273 if (SDValue A = IsVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}))
17274 return DAG.getNode(ARMISD::VADDVs, dl, ResVT, A);
17275 if (SDValue A = IsVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}))
17276 return DAG.getNode(ARMISD::VADDVu, dl, ResVT, A);
17277 if (SDValue A = IsVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}))
17278 return Create64bitNode(ARMISD::VADDLVs, {A});
17279 if (SDValue A = IsVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}))
17280 return Create64bitNode(ARMISD::VADDLVu, {A});
17281 if (SDValue A = IsVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}))
17282 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17283 DAG.getNode(ARMISD::VADDVs, dl, MVT::i32, A));
17284 if (SDValue A = IsVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}))
17285 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17286 DAG.getNode(ARMISD::VADDVu, dl, MVT::i32, A));
17287
17288 if (SDValue A = IsPredVADDV(MVT::i32, ISD::SIGN_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17289 return DAG.getNode(ARMISD::VADDVps, dl, ResVT, A, Mask);
17290 if (SDValue A = IsPredVADDV(MVT::i32, ISD::ZERO_EXTEND, {MVT::v8i16, MVT::v16i8}, Mask))
17291 return DAG.getNode(ARMISD::VADDVpu, dl, ResVT, A, Mask);
17292 if (SDValue A = IsPredVADDV(MVT::i64, ISD::SIGN_EXTEND, {MVT::v4i32}, Mask))
17293 return Create64bitNode(ARMISD::VADDLVps, {A, Mask});
17294 if (SDValue A = IsPredVADDV(MVT::i64, ISD::ZERO_EXTEND, {MVT::v4i32}, Mask))
17295 return Create64bitNode(ARMISD::VADDLVpu, {A, Mask});
17296 if (SDValue A = IsPredVADDV(MVT::i16, ISD::SIGN_EXTEND, {MVT::v16i8}, Mask))
17297 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17298 DAG.getNode(ARMISD::VADDVps, dl, MVT::i32, A, Mask));
17299 if (SDValue A = IsPredVADDV(MVT::i16, ISD::ZERO_EXTEND, {MVT::v16i8}, Mask))
17300 return DAG.getNode(ISD::TRUNCATE, dl, ResVT,
17301 DAG.getNode(ARMISD::VADDVpu, dl, MVT::i32, A, Mask));
17302
17303 // Some complications. We can get a case where the two inputs of the mul are
17304 // the same, then the output sext will have been helpfully converted to a
17305 // zext. Turn it back.
17306 SDValue Op = N0;
17307 if (Op->getOpcode() == ISD::VSELECT)
17308 Op = Op->getOperand(1);
17309 if (Op->getOpcode() == ISD::ZERO_EXTEND &&
17310 Op->getOperand(0)->getOpcode() == ISD::MUL) {
17311 SDValue Mul = Op->getOperand(0);
17312 if (Mul->getOperand(0) == Mul->getOperand(1) &&
17313 Mul->getOperand(0)->getOpcode() == ISD::SIGN_EXTEND) {
17314 SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, N0->getValueType(0), Mul);
17315 if (Op != N0)
17316 Ext = DAG.getNode(ISD::VSELECT, dl, N0->getValueType(0),
17317 N0->getOperand(0), Ext, N0->getOperand(2));
17318 return DAG.getNode(ISD::VECREDUCE_ADD, dl, ResVT, Ext);
17319 }
17320 }
17321
17322 return SDValue();
17323}
17324
17325// Looks for vaddv(shuffle) or vmlav(shuffle, shuffle), with a shuffle where all
17326// the lanes are used. Due to the reduction being commutative the shuffle can be
17327// removed.
17329 unsigned VecOp = N->getOperand(0).getValueType().isVector() ? 0 : 2;
17330 auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp));
17331 if (!Shuf || !Shuf->getOperand(1).isUndef())
17332 return SDValue();
17333
17334 // Check all elements are used once in the mask.
17335 ArrayRef<int> Mask = Shuf->getMask();
17336 APInt SetElts(Mask.size(), 0);
17337 for (int E : Mask) {
17338 if (E < 0 || E >= (int)Mask.size())
17339 return SDValue();
17340 SetElts.setBit(E);
17341 }
17342 if (!SetElts.isAllOnes())
17343 return SDValue();
17344
17345 if (N->getNumOperands() != VecOp + 1) {
17346 auto *Shuf2 = dyn_cast<ShuffleVectorSDNode>(N->getOperand(VecOp + 1));
17347 if (!Shuf2 || !Shuf2->getOperand(1).isUndef() || Shuf2->getMask() != Mask)
17348 return SDValue();
17349 }
17350
17352 for (SDValue Op : N->ops()) {
17353 if (Op.getValueType().isVector())
17354 Ops.push_back(Op.getOperand(0));
17355 else
17356 Ops.push_back(Op);
17357 }
17358 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getVTList(), Ops);
17359}
17360
17363 SDValue Op0 = N->getOperand(0);
17364 SDValue Op1 = N->getOperand(1);
17365 unsigned IsTop = N->getConstantOperandVal(2);
17366
17367 // VMOVNT a undef -> a
17368 // VMOVNB a undef -> a
17369 // VMOVNB undef a -> a
17370 if (Op1->isUndef())
17371 return Op0;
17372 if (Op0->isUndef() && !IsTop)
17373 return Op1;
17374
17375 // VMOVNt(c, VQMOVNb(a, b)) => VQMOVNt(c, b)
17376 // VMOVNb(c, VQMOVNb(a, b)) => VQMOVNb(c, b)
17377 if ((Op1->getOpcode() == ARMISD::VQMOVNs ||
17378 Op1->getOpcode() == ARMISD::VQMOVNu) &&
17379 Op1->getConstantOperandVal(2) == 0)
17380 return DCI.DAG.getNode(Op1->getOpcode(), SDLoc(Op1), N->getValueType(0),
17381 Op0, Op1->getOperand(1), N->getOperand(2));
17382
17383 // Only the bottom lanes from Qm (Op1) and either the top or bottom lanes from
17384 // Qd (Op0) are demanded from a VMOVN, depending on whether we are inserting
17385 // into the top or bottom lanes.
17386 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17387 APInt Op1DemandedElts = APInt::getSplat(NumElts, APInt::getLowBitsSet(2, 1));
17388 APInt Op0DemandedElts =
17389 IsTop ? Op1DemandedElts
17390 : APInt::getSplat(NumElts, APInt::getHighBitsSet(2, 1));
17391
17392 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17393 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17394 return SDValue(N, 0);
17395 if (TLI.SimplifyDemandedVectorElts(Op1, Op1DemandedElts, DCI))
17396 return SDValue(N, 0);
17397
17398 return SDValue();
17399}
17400
17403 SDValue Op0 = N->getOperand(0);
17404 unsigned IsTop = N->getConstantOperandVal(2);
17405
17406 unsigned NumElts = N->getValueType(0).getVectorNumElements();
17407 APInt Op0DemandedElts =
17408 APInt::getSplat(NumElts, IsTop ? APInt::getLowBitsSet(2, 1)
17409 : APInt::getHighBitsSet(2, 1));
17410
17411 const TargetLowering &TLI = DCI.DAG.getTargetLoweringInfo();
17412 if (TLI.SimplifyDemandedVectorElts(Op0, Op0DemandedElts, DCI))
17413 return SDValue(N, 0);
17414 return SDValue();
17415}
17416
17419 EVT VT = N->getValueType(0);
17420 SDValue LHS = N->getOperand(0);
17421 SDValue RHS = N->getOperand(1);
17422
17423 auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
17424 auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
17425 // Turn VQDMULH(shuffle, shuffle) -> shuffle(VQDMULH)
17426 if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
17427 LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
17428 (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
17429 SDLoc DL(N);
17430 SDValue NewBinOp = DCI.DAG.getNode(N->getOpcode(), DL, VT,
17431 LHS.getOperand(0), RHS.getOperand(0));
17432 SDValue UndefV = LHS.getOperand(1);
17433 return DCI.DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
17434 }
17435 return SDValue();
17436}
17437
17439 SDLoc DL(N);
17440 SDValue Op0 = N->getOperand(0);
17441 SDValue Op1 = N->getOperand(1);
17442
17443 // Turn X << -C -> X >> C and viceversa. The negative shifts can come up from
17444 // uses of the intrinsics.
17445 if (auto C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
17446 int ShiftAmt = C->getSExtValue();
17447 if (ShiftAmt == 0) {
17448 SDValue Merge = DAG.getMergeValues({Op0, Op1}, DL);
17449 DAG.ReplaceAllUsesWith(N, Merge.getNode());
17450 return SDValue();
17451 }
17452
17453 if (ShiftAmt >= -32 && ShiftAmt < 0) {
17454 unsigned NewOpcode =
17455 N->getOpcode() == ARMISD::LSLL ? ARMISD::LSRL : ARMISD::LSLL;
17456 SDValue NewShift = DAG.getNode(NewOpcode, DL, N->getVTList(), Op0, Op1,
17457 DAG.getConstant(-ShiftAmt, DL, MVT::i32));
17458 DAG.ReplaceAllUsesWith(N, NewShift.getNode());
17459 return NewShift;
17460 }
17461 }
17462
17463 return SDValue();
17464}
17465
17466/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
17468 DAGCombinerInfo &DCI) const {
17469 SelectionDAG &DAG = DCI.DAG;
17470 unsigned IntNo = N->getConstantOperandVal(0);
17471 switch (IntNo) {
17472 default:
17473 // Don't do anything for most intrinsics.
17474 break;
17475
17476 // Vector shifts: check for immediate versions and lower them.
17477 // Note: This is done during DAG combining instead of DAG legalizing because
17478 // the build_vectors for 64-bit vector element shift counts are generally
17479 // not legal, and it is hard to see their values after they get legalized to
17480 // loads from a constant pool.
17481 case Intrinsic::arm_neon_vshifts:
17482 case Intrinsic::arm_neon_vshiftu:
17483 case Intrinsic::arm_neon_vrshifts:
17484 case Intrinsic::arm_neon_vrshiftu:
17485 case Intrinsic::arm_neon_vrshiftn:
17486 case Intrinsic::arm_neon_vqshifts:
17487 case Intrinsic::arm_neon_vqshiftu:
17488 case Intrinsic::arm_neon_vqshiftsu:
17489 case Intrinsic::arm_neon_vqshiftns:
17490 case Intrinsic::arm_neon_vqshiftnu:
17491 case Intrinsic::arm_neon_vqshiftnsu:
17492 case Intrinsic::arm_neon_vqrshiftns:
17493 case Intrinsic::arm_neon_vqrshiftnu:
17494 case Intrinsic::arm_neon_vqrshiftnsu: {
17495 EVT VT = N->getOperand(1).getValueType();
17496 int64_t Cnt;
17497 unsigned VShiftOpc = 0;
17498
17499 switch (IntNo) {
17500 case Intrinsic::arm_neon_vshifts:
17501 case Intrinsic::arm_neon_vshiftu:
17502 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
17503 VShiftOpc = ARMISD::VSHLIMM;
17504 break;
17505 }
17506 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
17507 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? ARMISD::VSHRsIMM
17509 break;
17510 }
17511 return SDValue();
17512
17513 case Intrinsic::arm_neon_vrshifts:
17514 case Intrinsic::arm_neon_vrshiftu:
17515 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
17516 break;
17517 return SDValue();
17518
17519 case Intrinsic::arm_neon_vqshifts:
17520 case Intrinsic::arm_neon_vqshiftu:
17521 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17522 break;
17523 return SDValue();
17524
17525 case Intrinsic::arm_neon_vqshiftsu:
17526 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
17527 break;
17528 llvm_unreachable("invalid shift count for vqshlu intrinsic");
17529
17530 case Intrinsic::arm_neon_vrshiftn:
17531 case Intrinsic::arm_neon_vqshiftns:
17532 case Intrinsic::arm_neon_vqshiftnu:
17533 case Intrinsic::arm_neon_vqshiftnsu:
17534 case Intrinsic::arm_neon_vqrshiftns:
17535 case Intrinsic::arm_neon_vqrshiftnu:
17536 case Intrinsic::arm_neon_vqrshiftnsu:
17537 // Narrowing shifts require an immediate right shift.
17538 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
17539 break;
17540 llvm_unreachable("invalid shift count for narrowing vector shift "
17541 "intrinsic");
17542
17543 default:
17544 llvm_unreachable("unhandled vector shift");
17545 }
17546
17547 switch (IntNo) {
17548 case Intrinsic::arm_neon_vshifts:
17549 case Intrinsic::arm_neon_vshiftu:
17550 // Opcode already set above.
17551 break;
17552 case Intrinsic::arm_neon_vrshifts:
17553 VShiftOpc = ARMISD::VRSHRsIMM;
17554 break;
17555 case Intrinsic::arm_neon_vrshiftu:
17556 VShiftOpc = ARMISD::VRSHRuIMM;
17557 break;
17558 case Intrinsic::arm_neon_vrshiftn:
17559 VShiftOpc = ARMISD::VRSHRNIMM;
17560 break;
17561 case Intrinsic::arm_neon_vqshifts:
17562 VShiftOpc = ARMISD::VQSHLsIMM;
17563 break;
17564 case Intrinsic::arm_neon_vqshiftu:
17565 VShiftOpc = ARMISD::VQSHLuIMM;
17566 break;
17567 case Intrinsic::arm_neon_vqshiftsu:
17568 VShiftOpc = ARMISD::VQSHLsuIMM;
17569 break;
17570 case Intrinsic::arm_neon_vqshiftns:
17571 VShiftOpc = ARMISD::VQSHRNsIMM;
17572 break;
17573 case Intrinsic::arm_neon_vqshiftnu:
17574 VShiftOpc = ARMISD::VQSHRNuIMM;
17575 break;
17576 case Intrinsic::arm_neon_vqshiftnsu:
17577 VShiftOpc = ARMISD::VQSHRNsuIMM;
17578 break;
17579 case Intrinsic::arm_neon_vqrshiftns:
17580 VShiftOpc = ARMISD::VQRSHRNsIMM;
17581 break;
17582 case Intrinsic::arm_neon_vqrshiftnu:
17583 VShiftOpc = ARMISD::VQRSHRNuIMM;
17584 break;
17585 case Intrinsic::arm_neon_vqrshiftnsu:
17586 VShiftOpc = ARMISD::VQRSHRNsuIMM;
17587 break;
17588 }
17589
17590 SDLoc dl(N);
17591 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17592 N->getOperand(1), DAG.getConstant(Cnt, dl, MVT::i32));
17593 }
17594
17595 case Intrinsic::arm_neon_vshiftins: {
17596 EVT VT = N->getOperand(1).getValueType();
17597 int64_t Cnt;
17598 unsigned VShiftOpc = 0;
17599
17600 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
17601 VShiftOpc = ARMISD::VSLIIMM;
17602 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
17603 VShiftOpc = ARMISD::VSRIIMM;
17604 else {
17605 llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
17606 }
17607
17608 SDLoc dl(N);
17609 return DAG.getNode(VShiftOpc, dl, N->getValueType(0),
17610 N->getOperand(1), N->getOperand(2),
17611 DAG.getConstant(Cnt, dl, MVT::i32));
17612 }
17613
17614 case Intrinsic::arm_neon_vqrshifts:
17615 case Intrinsic::arm_neon_vqrshiftu:
17616 // No immediate versions of these to check for.
17617 break;
17618
17619 case Intrinsic::arm_mve_vqdmlah:
17620 case Intrinsic::arm_mve_vqdmlash:
17621 case Intrinsic::arm_mve_vqrdmlah:
17622 case Intrinsic::arm_mve_vqrdmlash:
17623 case Intrinsic::arm_mve_vmla_n_predicated:
17624 case Intrinsic::arm_mve_vmlas_n_predicated:
17625 case Intrinsic::arm_mve_vqdmlah_predicated:
17626 case Intrinsic::arm_mve_vqdmlash_predicated:
17627 case Intrinsic::arm_mve_vqrdmlah_predicated:
17628 case Intrinsic::arm_mve_vqrdmlash_predicated: {
17629 // These intrinsics all take an i32 scalar operand which is narrowed to the
17630 // size of a single lane of the vector type they return. So we don't need
17631 // any bits of that operand above that point, which allows us to eliminate
17632 // uxth/sxth.
17633 unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
17634 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17635 if (SimplifyDemandedBits(N->getOperand(3), DemandedMask, DCI))
17636 return SDValue();
17637 break;
17638 }
17639
17640 case Intrinsic::arm_mve_minv:
17641 case Intrinsic::arm_mve_maxv:
17642 case Intrinsic::arm_mve_minav:
17643 case Intrinsic::arm_mve_maxav:
17644 case Intrinsic::arm_mve_minv_predicated:
17645 case Intrinsic::arm_mve_maxv_predicated:
17646 case Intrinsic::arm_mve_minav_predicated:
17647 case Intrinsic::arm_mve_maxav_predicated: {
17648 // These intrinsics all take an i32 scalar operand which is narrowed to the
17649 // size of a single lane of the vector type they take as the other input.
17650 unsigned BitWidth = N->getOperand(2)->getValueType(0).getScalarSizeInBits();
17651 APInt DemandedMask = APInt::getLowBitsSet(32, BitWidth);
17652 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
17653 return SDValue();
17654 break;
17655 }
17656
17657 case Intrinsic::arm_mve_addv: {
17658 // Turn this intrinsic straight into the appropriate ARMISD::VADDV node,
17659 // which allow PerformADDVecReduce to turn it into VADDLV when possible.
17660 bool Unsigned = N->getConstantOperandVal(2);
17661 unsigned Opc = Unsigned ? ARMISD::VADDVu : ARMISD::VADDVs;
17662 return DAG.getNode(Opc, SDLoc(N), N->getVTList(), N->getOperand(1));
17663 }
17664
17665 case Intrinsic::arm_mve_addlv:
17666 case Intrinsic::arm_mve_addlv_predicated: {
17667 // Same for these, but ARMISD::VADDLV has to be followed by a BUILD_PAIR
17668 // which recombines the two outputs into an i64
17669 bool Unsigned = N->getConstantOperandVal(2);
17670 unsigned Opc = IntNo == Intrinsic::arm_mve_addlv ?
17673
17675 for (unsigned i = 1, e = N->getNumOperands(); i < e; i++)
17676 if (i != 2) // skip the unsigned flag
17677 Ops.push_back(N->getOperand(i));
17678
17679 SDLoc dl(N);
17680 SDValue val = DAG.getNode(Opc, dl, {MVT::i32, MVT::i32}, Ops);
17681 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, val.getValue(0),
17682 val.getValue(1));
17683 }
17684 }
17685
17686 return SDValue();
17687}
17688
17689/// PerformShiftCombine - Checks for immediate versions of vector shifts and
17690/// lowers them. As with the vector shift intrinsics, this is done during DAG
17691/// combining instead of DAG legalizing because the build_vectors for 64-bit
17692/// vector element shift counts are generally not legal, and it is hard to see
17693/// their values after they get legalized to loads from a constant pool.
17696 const ARMSubtarget *ST) {
17697 SelectionDAG &DAG = DCI.DAG;
17698 EVT VT = N->getValueType(0);
17699
17700 if (ST->isThumb1Only() && N->getOpcode() == ISD::SHL && VT == MVT::i32 &&
17701 N->getOperand(0)->getOpcode() == ISD::AND &&
17702 N->getOperand(0)->hasOneUse()) {
17703 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
17704 return SDValue();
17705 // Look for the pattern (shl (and x, AndMask), ShiftAmt). This doesn't
17706 // usually show up because instcombine prefers to canonicalize it to
17707 // (and (shl x, ShiftAmt) (shl AndMask, ShiftAmt)), but the shift can come
17708 // out of GEP lowering in some cases.
17709 SDValue N0 = N->getOperand(0);
17710 ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(N->getOperand(1));
17711 if (!ShiftAmtNode)
17712 return SDValue();
17713 uint32_t ShiftAmt = static_cast<uint32_t>(ShiftAmtNode->getZExtValue());
17714 ConstantSDNode *AndMaskNode = dyn_cast<ConstantSDNode>(N0->getOperand(1));
17715 if (!AndMaskNode)
17716 return SDValue();
17717 uint32_t AndMask = static_cast<uint32_t>(AndMaskNode->getZExtValue());
17718 // Don't transform uxtb/uxth.
17719 if (AndMask == 255 || AndMask == 65535)
17720 return SDValue();
17721 if (isMask_32(AndMask)) {
17722 uint32_t MaskedBits = llvm::countl_zero(AndMask);
17723 if (MaskedBits > ShiftAmt) {
17724 SDLoc DL(N);
17725 SDValue SHL = DAG.getNode(ISD::SHL, DL, MVT::i32, N0->getOperand(0),
17726 DAG.getConstant(MaskedBits, DL, MVT::i32));
17727 return DAG.getNode(
17728 ISD::SRL, DL, MVT::i32, SHL,
17729 DAG.getConstant(MaskedBits - ShiftAmt, DL, MVT::i32));
17730 }
17731 }
17732 }
17733
17734 // Nothing to be done for scalar shifts.
17735 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17736 if (!VT.isVector() || !TLI.isTypeLegal(VT))
17737 return SDValue();
17738 if (ST->hasMVEIntegerOps())
17739 return SDValue();
17740
17741 int64_t Cnt;
17742
17743 switch (N->getOpcode()) {
17744 default: llvm_unreachable("unexpected shift opcode");
17745
17746 case ISD::SHL:
17747 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) {
17748 SDLoc dl(N);
17749 return DAG.getNode(ARMISD::VSHLIMM, dl, VT, N->getOperand(0),
17750 DAG.getConstant(Cnt, dl, MVT::i32));
17751 }
17752 break;
17753
17754 case ISD::SRA:
17755 case ISD::SRL:
17756 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
17757 unsigned VShiftOpc =
17758 (N->getOpcode() == ISD::SRA ? ARMISD::VSHRsIMM : ARMISD::VSHRuIMM);
17759 SDLoc dl(N);
17760 return DAG.getNode(VShiftOpc, dl, VT, N->getOperand(0),
17761 DAG.getConstant(Cnt, dl, MVT::i32));
17762 }
17763 }
17764 return SDValue();
17765}
17766
17767// Look for a sign/zero/fpextend extend of a larger than legal load. This can be
17768// split into multiple extending loads, which are simpler to deal with than an
17769// arbitrary extend. For fp extends we use an integer extending load and a VCVTL
17770// to convert the type to an f32.
17772 SDValue N0 = N->getOperand(0);
17773 if (N0.getOpcode() != ISD::LOAD)
17774 return SDValue();
17775 LoadSDNode *LD = cast<LoadSDNode>(N0.getNode());
17776 if (!LD->isSimple() || !N0.hasOneUse() || LD->isIndexed() ||
17777 LD->getExtensionType() != ISD::NON_EXTLOAD)
17778 return SDValue();
17779 EVT FromVT = LD->getValueType(0);
17780 EVT ToVT = N->getValueType(0);
17781 if (!ToVT.isVector())
17782 return SDValue();
17784 EVT ToEltVT = ToVT.getVectorElementType();
17785 EVT FromEltVT = FromVT.getVectorElementType();
17786
17787 unsigned NumElements = 0;
17788 if (ToEltVT == MVT::i32 && FromEltVT == MVT::i8)
17789 NumElements = 4;
17790 if (ToEltVT == MVT::f32 && FromEltVT == MVT::f16)
17791 NumElements = 4;
17792 if (NumElements == 0 ||
17793 (FromEltVT != MVT::f16 && FromVT.getVectorNumElements() == NumElements) ||
17794 FromVT.getVectorNumElements() % NumElements != 0 ||
17795 !isPowerOf2_32(NumElements))
17796 return SDValue();
17797
17798 LLVMContext &C = *DAG.getContext();
17799 SDLoc DL(LD);
17800 // Details about the old load
17801 SDValue Ch = LD->getChain();
17802 SDValue BasePtr = LD->getBasePtr();
17803 Align Alignment = LD->getOriginalAlign();
17804 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
17805 AAMDNodes AAInfo = LD->getAAInfo();
17806
17807 ISD::LoadExtType NewExtType =
17808 N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
17809 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
17810 EVT NewFromVT = EVT::getVectorVT(
17811 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
17812 EVT NewToVT = EVT::getVectorVT(
17813 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
17814
17817 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
17818 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
17819 SDValue NewPtr =
17820 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
17821
17822 SDValue NewLoad =
17823 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
17824 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
17825 Alignment, MMOFlags, AAInfo);
17826 Loads.push_back(NewLoad);
17827 Chains.push_back(SDValue(NewLoad.getNode(), 1));
17828 }
17829
17830 // Float truncs need to extended with VCVTB's into their floating point types.
17831 if (FromEltVT == MVT::f16) {
17833
17834 for (unsigned i = 0; i < Loads.size(); i++) {
17835 SDValue LoadBC =
17836 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, MVT::v8f16, Loads[i]);
17837 SDValue FPExt = DAG.getNode(ARMISD::VCVTL, DL, MVT::v4f32, LoadBC,
17838 DAG.getConstant(0, DL, MVT::i32));
17839 Extends.push_back(FPExt);
17840 }
17841
17842 Loads = Extends;
17843 }
17844
17845 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
17846 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
17847 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ToVT, Loads);
17848}
17849
17850/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
17851/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
17853 const ARMSubtarget *ST) {
17854 SDValue N0 = N->getOperand(0);
17855
17856 // Check for sign- and zero-extensions of vector extract operations of 8- and
17857 // 16-bit vector elements. NEON and MVE support these directly. They are
17858 // handled during DAG combining because type legalization will promote them
17859 // to 32-bit types and it is messy to recognize the operations after that.
17860 if ((ST->hasNEON() || ST->hasMVEIntegerOps()) &&
17862 SDValue Vec = N0.getOperand(0);
17863 SDValue Lane = N0.getOperand(1);
17864 EVT VT = N->getValueType(0);
17865 EVT EltVT = N0.getValueType();
17866 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17867
17868 if (VT == MVT::i32 &&
17869 (EltVT == MVT::i8 || EltVT == MVT::i16) &&
17870 TLI.isTypeLegal(Vec.getValueType()) &&
17871 isa<ConstantSDNode>(Lane)) {
17872
17873 unsigned Opc = 0;
17874 switch (N->getOpcode()) {
17875 default: llvm_unreachable("unexpected opcode");
17876 case ISD::SIGN_EXTEND:
17877 Opc = ARMISD::VGETLANEs;
17878 break;
17879 case ISD::ZERO_EXTEND:
17880 case ISD::ANY_EXTEND:
17881 Opc = ARMISD::VGETLANEu;
17882 break;
17883 }
17884 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane);
17885 }
17886 }
17887
17888 if (ST->hasMVEIntegerOps())
17889 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17890 return NewLoad;
17891
17892 return SDValue();
17893}
17894
17896 const ARMSubtarget *ST) {
17897 if (ST->hasMVEFloatOps())
17898 if (SDValue NewLoad = PerformSplittingToWideningLoad(N, DAG))
17899 return NewLoad;
17900
17901 return SDValue();
17902}
17903
17904// Lower smin(smax(x, C1), C2) to ssat or usat, if they have saturating
17905// constant bounds.
17907 const ARMSubtarget *Subtarget) {
17908 if ((Subtarget->isThumb() || !Subtarget->hasV6Ops()) &&
17909 !Subtarget->isThumb2())
17910 return SDValue();
17911
17912 EVT VT = Op.getValueType();
17913 SDValue Op0 = Op.getOperand(0);
17914
17915 if (VT != MVT::i32 ||
17916 (Op0.getOpcode() != ISD::SMIN && Op0.getOpcode() != ISD::SMAX) ||
17917 !isa<ConstantSDNode>(Op.getOperand(1)) ||
17918 !isa<ConstantSDNode>(Op0.getOperand(1)))
17919 return SDValue();
17920
17921 SDValue Min = Op;
17922 SDValue Max = Op0;
17923 SDValue Input = Op0.getOperand(0);
17924 if (Min.getOpcode() == ISD::SMAX)
17925 std::swap(Min, Max);
17926
17927 APInt MinC = Min.getConstantOperandAPInt(1);
17928 APInt MaxC = Max.getConstantOperandAPInt(1);
17929
17930 if (Min.getOpcode() != ISD::SMIN || Max.getOpcode() != ISD::SMAX ||
17931 !(MinC + 1).isPowerOf2())
17932 return SDValue();
17933
17934 SDLoc DL(Op);
17935 if (MinC == ~MaxC)
17936 return DAG.getNode(ARMISD::SSAT, DL, VT, Input,
17937 DAG.getConstant(MinC.countr_one(), DL, VT));
17938 if (MaxC == 0)
17939 return DAG.getNode(ARMISD::USAT, DL, VT, Input,
17940 DAG.getConstant(MinC.countr_one(), DL, VT));
17941
17942 return SDValue();
17943}
17944
17945/// PerformMinMaxCombine - Target-specific DAG combining for creating truncating
17946/// saturates.
17948 const ARMSubtarget *ST) {
17949 EVT VT = N->getValueType(0);
17950 SDValue N0 = N->getOperand(0);
17951
17952 if (VT == MVT::i32)
17953 return PerformMinMaxToSatCombine(SDValue(N, 0), DAG, ST);
17954
17955 if (!ST->hasMVEIntegerOps())
17956 return SDValue();
17957
17958 if (SDValue V = PerformVQDMULHCombine(N, DAG))
17959 return V;
17960
17961 if (VT != MVT::v4i32 && VT != MVT::v8i16)
17962 return SDValue();
17963
17964 auto IsSignedSaturate = [&](SDNode *Min, SDNode *Max) {
17965 // Check one is a smin and the other is a smax
17966 if (Min->getOpcode() != ISD::SMIN)
17967 std::swap(Min, Max);
17968 if (Min->getOpcode() != ISD::SMIN || Max->getOpcode() != ISD::SMAX)
17969 return false;
17970
17971 APInt SaturateC;
17972 if (VT == MVT::v4i32)
17973 SaturateC = APInt(32, (1 << 15) - 1, true);
17974 else //if (VT == MVT::v8i16)
17975 SaturateC = APInt(16, (1 << 7) - 1, true);
17976
17977 APInt MinC, MaxC;
17978 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
17979 MinC != SaturateC)
17980 return false;
17981 if (!ISD::isConstantSplatVector(Max->getOperand(1).getNode(), MaxC) ||
17982 MaxC != ~SaturateC)
17983 return false;
17984 return true;
17985 };
17986
17987 if (IsSignedSaturate(N, N0.getNode())) {
17988 SDLoc DL(N);
17989 MVT ExtVT, HalfVT;
17990 if (VT == MVT::v4i32) {
17991 HalfVT = MVT::v8i16;
17992 ExtVT = MVT::v4i16;
17993 } else { // if (VT == MVT::v8i16)
17994 HalfVT = MVT::v16i8;
17995 ExtVT = MVT::v8i8;
17996 }
17997
17998 // Create a VQMOVNB with undef top lanes, then signed extended into the top
17999 // half. That extend will hopefully be removed if only the bottom bits are
18000 // demanded (though a truncating store, for example).
18001 SDValue VQMOVN =
18002 DAG.getNode(ARMISD::VQMOVNs, DL, HalfVT, DAG.getUNDEF(HalfVT),
18003 N0->getOperand(0), DAG.getConstant(0, DL, MVT::i32));
18004 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18005 return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Bitcast,
18006 DAG.getValueType(ExtVT));
18007 }
18008
18009 auto IsUnsignedSaturate = [&](SDNode *Min) {
18010 // For unsigned, we just need to check for <= 0xffff
18011 if (Min->getOpcode() != ISD::UMIN)
18012 return false;
18013
18014 APInt SaturateC;
18015 if (VT == MVT::v4i32)
18016 SaturateC = APInt(32, (1 << 16) - 1, true);
18017 else //if (VT == MVT::v8i16)
18018 SaturateC = APInt(16, (1 << 8) - 1, true);
18019
18020 APInt MinC;
18021 if (!ISD::isConstantSplatVector(Min->getOperand(1).getNode(), MinC) ||
18022 MinC != SaturateC)
18023 return false;
18024 return true;
18025 };
18026
18027 if (IsUnsignedSaturate(N)) {
18028 SDLoc DL(N);
18029 MVT HalfVT;
18030 unsigned ExtConst;
18031 if (VT == MVT::v4i32) {
18032 HalfVT = MVT::v8i16;
18033 ExtConst = 0x0000FFFF;
18034 } else { //if (VT == MVT::v8i16)
18035 HalfVT = MVT::v16i8;
18036 ExtConst = 0x00FF;
18037 }
18038
18039 // Create a VQMOVNB with undef top lanes, then ZExt into the top half with
18040 // an AND. That extend will hopefully be removed if only the bottom bits are
18041 // demanded (though a truncating store, for example).
18042 SDValue VQMOVN =
18043 DAG.getNode(ARMISD::VQMOVNu, DL, HalfVT, DAG.getUNDEF(HalfVT), N0,
18044 DAG.getConstant(0, DL, MVT::i32));
18045 SDValue Bitcast = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, VQMOVN);
18046 return DAG.getNode(ISD::AND, DL, VT, Bitcast,
18047 DAG.getConstant(ExtConst, DL, VT));
18048 }
18049
18050 return SDValue();
18051}
18052
18054 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V);
18055 if (!C)
18056 return nullptr;
18057 const APInt *CV = &C->getAPIntValue();
18058 return CV->isPowerOf2() ? CV : nullptr;
18059}
18060
18062 // If we have a CMOV, OR and AND combination such as:
18063 // if (x & CN)
18064 // y |= CM;
18065 //
18066 // And:
18067 // * CN is a single bit;
18068 // * All bits covered by CM are known zero in y
18069 //
18070 // Then we can convert this into a sequence of BFI instructions. This will
18071 // always be a win if CM is a single bit, will always be no worse than the
18072 // TST&OR sequence if CM is two bits, and for thumb will be no worse if CM is
18073 // three bits (due to the extra IT instruction).
18074
18075 SDValue Op0 = CMOV->getOperand(0);
18076 SDValue Op1 = CMOV->getOperand(1);
18077 auto CC = CMOV->getConstantOperandAPInt(2).getLimitedValue();
18078 SDValue CmpZ = CMOV->getOperand(4);
18079
18080 // The compare must be against zero.
18081 if (!isNullConstant(CmpZ->getOperand(1)))
18082 return SDValue();
18083
18084 assert(CmpZ->getOpcode() == ARMISD::CMPZ);
18085 SDValue And = CmpZ->getOperand(0);
18086 if (And->getOpcode() != ISD::AND)
18087 return SDValue();
18088 const APInt *AndC = isPowerOf2Constant(And->getOperand(1));
18089 if (!AndC)
18090 return SDValue();
18091 SDValue X = And->getOperand(0);
18092
18093 if (CC == ARMCC::EQ) {
18094 // We're performing an "equal to zero" compare. Swap the operands so we
18095 // canonicalize on a "not equal to zero" compare.
18096 std::swap(Op0, Op1);
18097 } else {
18098 assert(CC == ARMCC::NE && "How can a CMPZ node not be EQ or NE?");
18099 }
18100
18101 if (Op1->getOpcode() != ISD::OR)
18102 return SDValue();
18103
18104 ConstantSDNode *OrC = dyn_cast<ConstantSDNode>(Op1->getOperand(1));
18105 if (!OrC)
18106 return SDValue();
18107 SDValue Y = Op1->getOperand(0);
18108
18109 if (Op0 != Y)
18110 return SDValue();
18111
18112 // Now, is it profitable to continue?
18113 APInt OrCI = OrC->getAPIntValue();
18114 unsigned Heuristic = Subtarget->isThumb() ? 3 : 2;
18115 if (OrCI.popcount() > Heuristic)
18116 return SDValue();
18117
18118 // Lastly, can we determine that the bits defined by OrCI
18119 // are zero in Y?
18120 KnownBits Known = DAG.computeKnownBits(Y);
18121 if ((OrCI & Known.Zero) != OrCI)
18122 return SDValue();
18123
18124 // OK, we can do the combine.
18125 SDValue V = Y;
18126 SDLoc dl(X);
18127 EVT VT = X.getValueType();
18128 unsigned BitInX = AndC->logBase2();
18129
18130 if (BitInX != 0) {
18131 // We must shift X first.
18132 X = DAG.getNode(ISD::SRL, dl, VT, X,
18133 DAG.getConstant(BitInX, dl, VT));
18134 }
18135
18136 for (unsigned BitInY = 0, NumActiveBits = OrCI.getActiveBits();
18137 BitInY < NumActiveBits; ++BitInY) {
18138 if (OrCI[BitInY] == 0)
18139 continue;
18140 APInt Mask(VT.getSizeInBits(), 0);
18141 Mask.setBit(BitInY);
18142 V = DAG.getNode(ARMISD::BFI, dl, VT, V, X,
18143 // Confusingly, the operand is an *inverted* mask.
18144 DAG.getConstant(~Mask, dl, VT));
18145 }
18146
18147 return V;
18148}
18149
18150// Given N, the value controlling the conditional branch, search for the loop
18151// intrinsic, returning it, along with how the value is used. We need to handle
18152// patterns such as the following:
18153// (brcond (xor (setcc (loop.decrement), 0, ne), 1), exit)
18154// (brcond (setcc (loop.decrement), 0, eq), exit)
18155// (brcond (setcc (loop.decrement), 0, ne), header)
18157 bool &Negate) {
18158 switch (N->getOpcode()) {
18159 default:
18160 break;
18161 case ISD::XOR: {
18162 if (!isa<ConstantSDNode>(N.getOperand(1)))
18163 return SDValue();
18164 if (!cast<ConstantSDNode>(N.getOperand(1))->isOne())
18165 return SDValue();
18166 Negate = !Negate;
18167 return SearchLoopIntrinsic(N.getOperand(0), CC, Imm, Negate);
18168 }
18169 case ISD::SETCC: {
18170 auto *Const = dyn_cast<ConstantSDNode>(N.getOperand(1));
18171 if (!Const)
18172 return SDValue();
18173 if (Const->isZero())
18174 Imm = 0;
18175 else if (Const->isOne())
18176 Imm = 1;
18177 else
18178 return SDValue();
18179 CC = cast<CondCodeSDNode>(N.getOperand(2))->get();
18180 return SearchLoopIntrinsic(N->getOperand(0), CC, Imm, Negate);
18181 }
18183 unsigned IntOp = N.getConstantOperandVal(1);
18184 if (IntOp != Intrinsic::test_start_loop_iterations &&
18185 IntOp != Intrinsic::loop_decrement_reg)
18186 return SDValue();
18187 return N;
18188 }
18189 }
18190 return SDValue();
18191}
18192
18195 const ARMSubtarget *ST) {
18196
18197 // The hwloop intrinsics that we're interested are used for control-flow,
18198 // either for entering or exiting the loop:
18199 // - test.start.loop.iterations will test whether its operand is zero. If it
18200 // is zero, the proceeding branch should not enter the loop.
18201 // - loop.decrement.reg also tests whether its operand is zero. If it is
18202 // zero, the proceeding branch should not branch back to the beginning of
18203 // the loop.
18204 // So here, we need to check that how the brcond is using the result of each
18205 // of the intrinsics to ensure that we're branching to the right place at the
18206 // right time.
18207
18209 SDValue Cond;
18210 int Imm = 1;
18211 bool Negate = false;
18212 SDValue Chain = N->getOperand(0);
18213 SDValue Dest;
18214
18215 if (N->getOpcode() == ISD::BRCOND) {
18216 CC = ISD::SETEQ;
18217 Cond = N->getOperand(1);
18218 Dest = N->getOperand(2);
18219 } else {
18220 assert(N->getOpcode() == ISD::BR_CC && "Expected BRCOND or BR_CC!");
18221 CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
18222 Cond = N->getOperand(2);
18223 Dest = N->getOperand(4);
18224 if (auto *Const = dyn_cast<ConstantSDNode>(N->getOperand(3))) {
18225 if (!Const->isOne() && !Const->isZero())
18226 return SDValue();
18227 Imm = Const->getZExtValue();
18228 } else
18229 return SDValue();
18230 }
18231
18232 SDValue Int = SearchLoopIntrinsic(Cond, CC, Imm, Negate);
18233 if (!Int)
18234 return SDValue();
18235
18236 if (Negate)
18237 CC = ISD::getSetCCInverse(CC, /* Integer inverse */ MVT::i32);
18238
18239 auto IsTrueIfZero = [](ISD::CondCode CC, int Imm) {
18240 return (CC == ISD::SETEQ && Imm == 0) ||
18241 (CC == ISD::SETNE && Imm == 1) ||
18242 (CC == ISD::SETLT && Imm == 1) ||
18243 (CC == ISD::SETULT && Imm == 1);
18244 };
18245
18246 auto IsFalseIfZero = [](ISD::CondCode CC, int Imm) {
18247 return (CC == ISD::SETEQ && Imm == 1) ||
18248 (CC == ISD::SETNE && Imm == 0) ||
18249 (CC == ISD::SETGT && Imm == 0) ||
18250 (CC == ISD::SETUGT && Imm == 0) ||
18251 (CC == ISD::SETGE && Imm == 1) ||
18252 (CC == ISD::SETUGE && Imm == 1);
18253 };
18254
18255 assert((IsTrueIfZero(CC, Imm) || IsFalseIfZero(CC, Imm)) &&
18256 "unsupported condition");
18257
18258 SDLoc dl(Int);
18259 SelectionDAG &DAG = DCI.DAG;
18260 SDValue Elements = Int.getOperand(2);
18261 unsigned IntOp = Int->getConstantOperandVal(1);
18262 assert((N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BR)
18263 && "expected single br user");
18264 SDNode *Br = *N->use_begin();
18265 SDValue OtherTarget = Br->getOperand(1);
18266
18267 // Update the unconditional branch to branch to the given Dest.
18268 auto UpdateUncondBr = [](SDNode *Br, SDValue Dest, SelectionDAG &DAG) {
18269 SDValue NewBrOps[] = { Br->getOperand(0), Dest };
18270 SDValue NewBr = DAG.getNode(ISD::BR, SDLoc(Br), MVT::Other, NewBrOps);
18271 DAG.ReplaceAllUsesOfValueWith(SDValue(Br, 0), NewBr);
18272 };
18273
18274 if (IntOp == Intrinsic::test_start_loop_iterations) {
18275 SDValue Res;
18276 SDValue Setup = DAG.getNode(ARMISD::WLSSETUP, dl, MVT::i32, Elements);
18277 // We expect this 'instruction' to branch when the counter is zero.
18278 if (IsTrueIfZero(CC, Imm)) {
18279 SDValue Ops[] = {Chain, Setup, Dest};
18280 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18281 } else {
18282 // The logic is the reverse of what we need for WLS, so find the other
18283 // basic block target: the target of the proceeding br.
18284 UpdateUncondBr(Br, Dest, DAG);
18285
18286 SDValue Ops[] = {Chain, Setup, OtherTarget};
18287 Res = DAG.getNode(ARMISD::WLS, dl, MVT::Other, Ops);
18288 }
18289 // Update LR count to the new value
18290 DAG.ReplaceAllUsesOfValueWith(Int.getValue(0), Setup);
18291 // Update chain
18292 DAG.ReplaceAllUsesOfValueWith(Int.getValue(2), Int.getOperand(0));
18293 return Res;
18294 } else {
18295 SDValue Size =
18296 DAG.getTargetConstant(Int.getConstantOperandVal(3), dl, MVT::i32);
18297 SDValue Args[] = { Int.getOperand(0), Elements, Size, };
18298 SDValue LoopDec = DAG.getNode(ARMISD::LOOP_DEC, dl,
18299 DAG.getVTList(MVT::i32, MVT::Other), Args);
18300 DAG.ReplaceAllUsesWith(Int.getNode(), LoopDec.getNode());
18301
18302 // We expect this instruction to branch when the count is not zero.
18303 SDValue Target = IsFalseIfZero(CC, Imm) ? Dest : OtherTarget;
18304
18305 // Update the unconditional branch to target the loop preheader if we've
18306 // found the condition has been reversed.
18307 if (Target == OtherTarget)
18308 UpdateUncondBr(Br, Dest, DAG);
18309
18310 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
18311 SDValue(LoopDec.getNode(), 1), Chain);
18312
18313 SDValue EndArgs[] = { Chain, SDValue(LoopDec.getNode(), 0), Target };
18314 return DAG.getNode(ARMISD::LE, dl, MVT::Other, EndArgs);
18315 }
18316 return SDValue();
18317}
18318
18319/// PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
18320SDValue
18322 SDValue Cmp = N->getOperand(4);
18323 if (Cmp.getOpcode() != ARMISD::CMPZ)
18324 // Only looking at NE cases.
18325 return SDValue();
18326
18327 EVT VT = N->getValueType(0);
18328 SDLoc dl(N);
18329 SDValue LHS = Cmp.getOperand(0);
18330 SDValue RHS = Cmp.getOperand(1);
18331 SDValue Chain = N->getOperand(0);
18332 SDValue BB = N->getOperand(1);
18333 SDValue ARMcc = N->getOperand(2);
18335
18336 // (brcond Chain BB ne CPSR (cmpz (and (cmov 0 1 CC CPSR Cmp) 1) 0))
18337 // -> (brcond Chain BB CC CPSR Cmp)
18338 if (CC == ARMCC::NE && LHS.getOpcode() == ISD::AND && LHS->hasOneUse() &&
18339 LHS->getOperand(0)->getOpcode() == ARMISD::CMOV &&
18340 LHS->getOperand(0)->hasOneUse() &&
18341 isNullConstant(LHS->getOperand(0)->getOperand(0)) &&
18342 isOneConstant(LHS->getOperand(0)->getOperand(1)) &&
18343 isOneConstant(LHS->getOperand(1)) && isNullConstant(RHS)) {
18344 return DAG.getNode(
18345 ARMISD::BRCOND, dl, VT, Chain, BB, LHS->getOperand(0)->getOperand(2),
18346 LHS->getOperand(0)->getOperand(3), LHS->getOperand(0)->getOperand(4));
18347 }
18348
18349 return SDValue();
18350}
18351
18352/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
18353SDValue
18355 SDValue Cmp = N->getOperand(4);
18356 if (Cmp.getOpcode() != ARMISD::CMPZ)
18357 // Only looking at EQ and NE cases.
18358 return SDValue();
18359
18360 EVT VT = N->getValueType(0);
18361 SDLoc dl(N);
18362 SDValue LHS = Cmp.getOperand(0);
18363 SDValue RHS = Cmp.getOperand(1);
18364 SDValue FalseVal = N->getOperand(0);
18365 SDValue TrueVal = N->getOperand(1);
18366 SDValue ARMcc = N->getOperand(2);
18368
18369 // BFI is only available on V6T2+.
18370 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops()) {
18372 if (R)
18373 return R;
18374 }
18375
18376 // Simplify
18377 // mov r1, r0
18378 // cmp r1, x
18379 // mov r0, y
18380 // moveq r0, x
18381 // to
18382 // cmp r0, x
18383 // movne r0, y
18384 //
18385 // mov r1, r0
18386 // cmp r1, x
18387 // mov r0, x
18388 // movne r0, y
18389 // to
18390 // cmp r0, x
18391 // movne r0, y
18392 /// FIXME: Turn this into a target neutral optimization?
18393 SDValue Res;
18394 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
18395 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
18396 N->getOperand(3), Cmp);
18397 } else if (CC == ARMCC::EQ && TrueVal == RHS) {
18398 SDValue ARMcc;
18399 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
18400 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
18401 N->getOperand(3), NewCmp);
18402 }
18403
18404 // (cmov F T ne CPSR (cmpz (cmov 0 1 CC CPSR Cmp) 0))
18405 // -> (cmov F T CC CPSR Cmp)
18406 if (CC == ARMCC::NE && LHS.getOpcode() == ARMISD::CMOV && LHS->hasOneUse() &&
18407 isNullConstant(LHS->getOperand(0)) && isOneConstant(LHS->getOperand(1)) &&
18409 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
18410 LHS->getOperand(2), LHS->getOperand(3),
18411 LHS->getOperand(4));
18412 }
18413
18414 if (!VT.isInteger())
18415 return SDValue();
18416
18417 // Fold away an unneccessary CMPZ/CMOV
18418 // CMOV A, B, C1, $cpsr, (CMPZ (CMOV 1, 0, C2, D), 0) ->
18419 // if C1==EQ -> CMOV A, B, C2, $cpsr, D
18420 // if C1==NE -> CMOV A, B, NOT(C2), $cpsr, D
18421 if (N->getConstantOperandVal(2) == ARMCC::EQ ||
18422 N->getConstantOperandVal(2) == ARMCC::NE) {
18424 if (SDValue C = IsCMPZCSINC(N->getOperand(4).getNode(), Cond)) {
18425 if (N->getConstantOperandVal(2) == ARMCC::NE)
18427 return DAG.getNode(N->getOpcode(), SDLoc(N), MVT::i32, N->getOperand(0),
18428 N->getOperand(1),
18429 DAG.getTargetConstant(Cond, SDLoc(N), MVT::i32),
18430 N->getOperand(3), C);
18431 }
18432 }
18433
18434 // Materialize a boolean comparison for integers so we can avoid branching.
18435 if (isNullConstant(FalseVal)) {
18436 if (CC == ARMCC::EQ && isOneConstant(TrueVal)) {
18437 if (!Subtarget->isThumb1Only() && Subtarget->hasV5TOps()) {
18438 // If x == y then x - y == 0 and ARM's CLZ will return 32, shifting it
18439 // right 5 bits will make that 32 be 1, otherwise it will be 0.
18440 // CMOV 0, 1, ==, (CMPZ x, y) -> SRL (CTLZ (SUB x, y)), 5
18441 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18442 Res = DAG.getNode(ISD::SRL, dl, VT, DAG.getNode(ISD::CTLZ, dl, VT, Sub),
18443 DAG.getConstant(5, dl, MVT::i32));
18444 } else {
18445 // CMOV 0, 1, ==, (CMPZ x, y) ->
18446 // (UADDO_CARRY (SUB x, y), t:0, t:1)
18447 // where t = (USUBO_CARRY 0, (SUB x, y), 0)
18448 //
18449 // The USUBO_CARRY computes 0 - (x - y) and this will give a borrow when
18450 // x != y. In other words, a carry C == 1 when x == y, C == 0
18451 // otherwise.
18452 // The final UADDO_CARRY computes
18453 // x - y + (0 - (x - y)) + C == C
18454 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
18455 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18456 SDValue Neg = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, Sub);
18457 // ISD::USUBO_CARRY returns a borrow but we want the carry here
18458 // actually.
18459 SDValue Carry =
18460 DAG.getNode(ISD::SUB, dl, MVT::i32,
18461 DAG.getConstant(1, dl, MVT::i32), Neg.getValue(1));
18462 Res = DAG.getNode(ISD::UADDO_CARRY, dl, VTs, Sub, Neg, Carry);
18463 }
18464 } else if (CC == ARMCC::NE && !isNullConstant(RHS) &&
18465 (!Subtarget->isThumb1Only() || isPowerOf2Constant(TrueVal))) {
18466 // This seems pointless but will allow us to combine it further below.
18467 // CMOV 0, z, !=, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18468 SDValue Sub =
18469 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18470 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18471 Sub.getValue(1), SDValue());
18472 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, TrueVal, ARMcc,
18473 N->getOperand(3), CPSRGlue.getValue(1));
18474 FalseVal = Sub;
18475 }
18476 } else if (isNullConstant(TrueVal)) {
18477 if (CC == ARMCC::EQ && !isNullConstant(RHS) &&
18478 (!Subtarget->isThumb1Only() || isPowerOf2Constant(FalseVal))) {
18479 // This seems pointless but will allow us to combine it further below
18480 // Note that we change == for != as this is the dual for the case above.
18481 // CMOV z, 0, ==, (CMPZ x, y) -> CMOV (SUBS x, y), z, !=, (SUBS x, y):1
18482 SDValue Sub =
18483 DAG.getNode(ARMISD::SUBS, dl, DAG.getVTList(VT, MVT::i32), LHS, RHS);
18484 SDValue CPSRGlue = DAG.getCopyToReg(DAG.getEntryNode(), dl, ARM::CPSR,
18485 Sub.getValue(1), SDValue());
18486 Res = DAG.getNode(ARMISD::CMOV, dl, VT, Sub, FalseVal,
18487 DAG.getConstant(ARMCC::NE, dl, MVT::i32),
18488 N->getOperand(3), CPSRGlue.getValue(1));
18489 FalseVal = Sub;
18490 }
18491 }
18492
18493 // On Thumb1, the DAG above may be further combined if z is a power of 2
18494 // (z == 2 ^ K).
18495 // CMOV (SUBS x, y), z, !=, (SUBS x, y):1 ->
18496 // t1 = (USUBO (SUB x, y), 1)
18497 // t2 = (USUBO_CARRY (SUB x, y), t1:0, t1:1)
18498 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18499 //
18500 // This also handles the special case of comparing against zero; it's
18501 // essentially, the same pattern, except there's no SUBS:
18502 // CMOV x, z, !=, (CMPZ x, 0) ->
18503 // t1 = (USUBO x, 1)
18504 // t2 = (USUBO_CARRY x, t1:0, t1:1)
18505 // Result = if K != 0 then (SHL t2:0, K) else t2:0
18506 const APInt *TrueConst;
18507 if (Subtarget->isThumb1Only() && CC == ARMCC::NE &&
18508 ((FalseVal.getOpcode() == ARMISD::SUBS &&
18509 FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) ||
18510 (FalseVal == LHS && isNullConstant(RHS))) &&
18511 (TrueConst = isPowerOf2Constant(TrueVal))) {
18512 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
18513 unsigned ShiftAmount = TrueConst->logBase2();
18514 if (ShiftAmount)
18515 TrueVal = DAG.getConstant(1, dl, VT);
18516 SDValue Subc = DAG.getNode(ISD::USUBO, dl, VTs, FalseVal, TrueVal);
18517 Res = DAG.getNode(ISD::USUBO_CARRY, dl, VTs, FalseVal, Subc,
18518 Subc.getValue(1));
18519
18520 if (ShiftAmount)
18521 Res = DAG.getNode(ISD::SHL, dl, VT, Res,
18522 DAG.getConstant(ShiftAmount, dl, MVT::i32));
18523 }
18524
18525 if (Res.getNode()) {
18526 KnownBits Known = DAG.computeKnownBits(SDValue(N,0));
18527 // Capture demanded bits information that would be otherwise lost.
18528 if (Known.Zero == 0xfffffffe)
18529 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18530 DAG.getValueType(MVT::i1));
18531 else if (Known.Zero == 0xffffff00)
18532 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18533 DAG.getValueType(MVT::i8));
18534 else if (Known.Zero == 0xffff0000)
18535 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
18536 DAG.getValueType(MVT::i16));
18537 }
18538
18539 return Res;
18540}
18541
18544 const ARMSubtarget *ST) {
18545 SelectionDAG &DAG = DCI.DAG;
18546 SDValue Src = N->getOperand(0);
18547 EVT DstVT = N->getValueType(0);
18548
18549 // Convert v4f32 bitcast (v4i32 vdup (i32)) -> v4f32 vdup (i32) under MVE.
18550 if (ST->hasMVEIntegerOps() && Src.getOpcode() == ARMISD::VDUP) {
18551 EVT SrcVT = Src.getValueType();
18552 if (SrcVT.getScalarSizeInBits() == DstVT.getScalarSizeInBits())
18553 return DAG.getNode(ARMISD::VDUP, SDLoc(N), DstVT, Src.getOperand(0));
18554 }
18555
18556 // We may have a bitcast of something that has already had this bitcast
18557 // combine performed on it, so skip past any VECTOR_REG_CASTs.
18558 while (Src.getOpcode() == ARMISD::VECTOR_REG_CAST)
18559 Src = Src.getOperand(0);
18560
18561 // Bitcast from element-wise VMOV or VMVN doesn't need VREV if the VREV that
18562 // would be generated is at least the width of the element type.
18563 EVT SrcVT = Src.getValueType();
18564 if ((Src.getOpcode() == ARMISD::VMOVIMM ||
18565 Src.getOpcode() == ARMISD::VMVNIMM ||
18566 Src.getOpcode() == ARMISD::VMOVFPIMM) &&
18567 SrcVT.getScalarSizeInBits() <= DstVT.getScalarSizeInBits() &&
18568 DAG.getDataLayout().isBigEndian())
18569 return DAG.getNode(ARMISD::VECTOR_REG_CAST, SDLoc(N), DstVT, Src);
18570
18571 // bitcast(extract(x, n)); bitcast(extract(x, n+1)) -> VMOVRRD x
18572 if (SDValue R = PerformExtractEltToVMOVRRD(N, DCI))
18573 return R;
18574
18575 return SDValue();
18576}
18577
18578// Some combines for the MVETrunc truncations legalizer helper. Also lowers the
18579// node into stack operations after legalizeOps.
18582 SelectionDAG &DAG = DCI.DAG;
18583 EVT VT = N->getValueType(0);
18584 SDLoc DL(N);
18585
18586 // MVETrunc(Undef, Undef) -> Undef
18587 if (all_of(N->ops(), [](SDValue Op) { return Op.isUndef(); }))
18588 return DAG.getUNDEF(VT);
18589
18590 // MVETrunc(MVETrunc a b, MVETrunc c, d) -> MVETrunc
18591 if (N->getNumOperands() == 2 &&
18592 N->getOperand(0).getOpcode() == ARMISD::MVETRUNC &&
18593 N->getOperand(1).getOpcode() == ARMISD::MVETRUNC)
18594 return DAG.getNode(ARMISD::MVETRUNC, DL, VT, N->getOperand(0).getOperand(0),
18595 N->getOperand(0).getOperand(1),
18596 N->getOperand(1).getOperand(0),
18597 N->getOperand(1).getOperand(1));
18598
18599 // MVETrunc(shuffle, shuffle) -> VMOVN
18600 if (N->getNumOperands() == 2 &&
18601 N->getOperand(0).getOpcode() == ISD::VECTOR_SHUFFLE &&
18602 N->getOperand(1).getOpcode() == ISD::VECTOR_SHUFFLE) {
18603 auto *S0 = cast<ShuffleVectorSDNode>(N->getOperand(0).getNode());
18604 auto *S1 = cast<ShuffleVectorSDNode>(N->getOperand(1).getNode());
18605
18606 if (S0->getOperand(0) == S1->getOperand(0) &&
18607 S0->getOperand(1) == S1->getOperand(1)) {
18608 // Construct complete shuffle mask
18609 SmallVector<int, 8> Mask(S0->getMask());
18610 Mask.append(S1->getMask().begin(), S1->getMask().end());
18611
18612 if (isVMOVNTruncMask(Mask, VT, false))
18613 return DAG.getNode(
18614 ARMISD::VMOVN, DL, VT,
18615 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18616 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18617 DAG.getConstant(1, DL, MVT::i32));
18618 if (isVMOVNTruncMask(Mask, VT, true))
18619 return DAG.getNode(
18620 ARMISD::VMOVN, DL, VT,
18621 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(1)),
18622 DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, S0->getOperand(0)),
18623 DAG.getConstant(1, DL, MVT::i32));
18624 }
18625 }
18626
18627 // For MVETrunc of a buildvector or shuffle, it can be beneficial to lower the
18628 // truncate to a buildvector to allow the generic optimisations to kick in.
18629 if (all_of(N->ops(), [](SDValue Op) {
18630 return Op.getOpcode() == ISD::BUILD_VECTOR ||
18631 Op.getOpcode() == ISD::VECTOR_SHUFFLE ||
18632 (Op.getOpcode() == ISD::BITCAST &&
18633 Op.getOperand(0).getOpcode() == ISD::BUILD_VECTOR);
18634 })) {
18635 SmallVector<SDValue, 8> Extracts;
18636 for (unsigned Op = 0; Op < N->getNumOperands(); Op++) {
18637 SDValue O = N->getOperand(Op);
18638 for (unsigned i = 0; i < O.getValueType().getVectorNumElements(); i++) {
18639 SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, O,
18640 DAG.getConstant(i, DL, MVT::i32));
18641 Extracts.push_back(Ext);
18642 }
18643 }
18644 return DAG.getBuildVector(VT, DL, Extracts);
18645 }
18646
18647 // If we are late in the legalization process and nothing has optimised
18648 // the trunc to anything better, lower it to a stack store and reload,
18649 // performing the truncation whilst keeping the lanes in the correct order:
18650 // VSTRH.32 a, stack; VSTRH.32 b, stack+8; VLDRW.32 stack;
18651 if (!DCI.isAfterLegalizeDAG())
18652 return SDValue();
18653
18654 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18655 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18656 int NumIns = N->getNumOperands();
18657 assert((NumIns == 2 || NumIns == 4) &&
18658 "Expected 2 or 4 inputs to an MVETrunc");
18659 EVT StoreVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
18660 if (N->getNumOperands() == 4)
18661 StoreVT = StoreVT.getHalfNumVectorElementsVT(*DAG.getContext());
18662
18663 SmallVector<SDValue> Chains;
18664 for (int I = 0; I < NumIns; I++) {
18665 SDValue Ptr = DAG.getNode(
18666 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18667 DAG.getConstant(I * 16 / NumIns, DL, StackPtr.getValueType()));
18669 DAG.getMachineFunction(), SPFI, I * 16 / NumIns);
18670 SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), DL, N->getOperand(I),
18671 Ptr, MPI, StoreVT, Align(4));
18672 Chains.push_back(Ch);
18673 }
18674
18675 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18676 MachinePointerInfo MPI =
18678 return DAG.getLoad(VT, DL, Chain, StackPtr, MPI, Align(4));
18679}
18680
18681// Take a MVEEXT(load x) and split that into (extload x, extload x+8)
18683 SelectionDAG &DAG) {
18684 SDValue N0 = N->getOperand(0);
18685 LoadSDNode *LD = dyn_cast<LoadSDNode>(N0.getNode());
18686 if (!LD || !LD->isSimple() || !N0.hasOneUse() || LD->isIndexed())
18687 return SDValue();
18688
18689 EVT FromVT = LD->getMemoryVT();
18690 EVT ToVT = N->getValueType(0);
18691 if (!ToVT.isVector())
18692 return SDValue();
18693 assert(FromVT.getVectorNumElements() == ToVT.getVectorNumElements() * 2);
18694 EVT ToEltVT = ToVT.getVectorElementType();
18695 EVT FromEltVT = FromVT.getVectorElementType();
18696
18697 unsigned NumElements = 0;
18698 if (ToEltVT == MVT::i32 && (FromEltVT == MVT::i16 || FromEltVT == MVT::i8))
18699 NumElements = 4;
18700 if (ToEltVT == MVT::i16 && FromEltVT == MVT::i8)
18701 NumElements = 8;
18702 assert(NumElements != 0);
18703
18704 ISD::LoadExtType NewExtType =
18705 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
18706 if (LD->getExtensionType() != ISD::NON_EXTLOAD &&
18707 LD->getExtensionType() != ISD::EXTLOAD &&
18708 LD->getExtensionType() != NewExtType)
18709 return SDValue();
18710
18711 LLVMContext &C = *DAG.getContext();
18712 SDLoc DL(LD);
18713 // Details about the old load
18714 SDValue Ch = LD->getChain();
18715 SDValue BasePtr = LD->getBasePtr();
18716 Align Alignment = LD->getOriginalAlign();
18717 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
18718 AAMDNodes AAInfo = LD->getAAInfo();
18719
18720 SDValue Offset = DAG.getUNDEF(BasePtr.getValueType());
18721 EVT NewFromVT = EVT::getVectorVT(
18722 C, EVT::getIntegerVT(C, FromEltVT.getScalarSizeInBits()), NumElements);
18723 EVT NewToVT = EVT::getVectorVT(
18724 C, EVT::getIntegerVT(C, ToEltVT.getScalarSizeInBits()), NumElements);
18725
18728 for (unsigned i = 0; i < FromVT.getVectorNumElements() / NumElements; i++) {
18729 unsigned NewOffset = (i * NewFromVT.getSizeInBits()) / 8;
18730 SDValue NewPtr =
18731 DAG.getObjectPtrOffset(DL, BasePtr, TypeSize::getFixed(NewOffset));
18732
18733 SDValue NewLoad =
18734 DAG.getLoad(ISD::UNINDEXED, NewExtType, NewToVT, DL, Ch, NewPtr, Offset,
18735 LD->getPointerInfo().getWithOffset(NewOffset), NewFromVT,
18736 Alignment, MMOFlags, AAInfo);
18737 Loads.push_back(NewLoad);
18738 Chains.push_back(SDValue(NewLoad.getNode(), 1));
18739 }
18740
18741 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
18742 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewChain);
18743 return DAG.getMergeValues(Loads, DL);
18744}
18745
18746// Perform combines for MVEEXT. If it has not be optimized to anything better
18747// before lowering, it gets converted to stack store and extloads performing the
18748// extend whilst still keeping the same lane ordering.
18751 SelectionDAG &DAG = DCI.DAG;
18752 EVT VT = N->getValueType(0);
18753 SDLoc DL(N);
18754 assert(N->getNumValues() == 2 && "Expected MVEEXT with 2 elements");
18755 assert((VT == MVT::v4i32 || VT == MVT::v8i16) && "Unexpected MVEEXT type");
18756
18757 EVT ExtVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18758 *DAG.getContext());
18759 auto Extend = [&](SDValue V) {
18760 SDValue VVT = DAG.getNode(ARMISD::VECTOR_REG_CAST, DL, VT, V);
18761 return N->getOpcode() == ARMISD::MVESEXT
18762 ? DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, VVT,
18763 DAG.getValueType(ExtVT))
18764 : DAG.getZeroExtendInReg(VVT, DL, ExtVT);
18765 };
18766
18767 // MVEEXT(VDUP) -> SIGN_EXTEND_INREG(VDUP)
18768 if (N->getOperand(0).getOpcode() == ARMISD::VDUP) {
18769 SDValue Ext = Extend(N->getOperand(0));
18770 return DAG.getMergeValues({Ext, Ext}, DL);
18771 }
18772
18773 // MVEEXT(shuffle) -> SIGN_EXTEND_INREG/ZERO_EXTEND_INREG
18774 if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(N->getOperand(0))) {
18775 ArrayRef<int> Mask = SVN->getMask();
18776 assert(Mask.size() == 2 * VT.getVectorNumElements());
18777 assert(Mask.size() == SVN->getValueType(0).getVectorNumElements());
18778 unsigned Rev = VT == MVT::v4i32 ? ARMISD::VREV32 : ARMISD::VREV16;
18779 SDValue Op0 = SVN->getOperand(0);
18780 SDValue Op1 = SVN->getOperand(1);
18781
18782 auto CheckInregMask = [&](int Start, int Offset) {
18783 for (int Idx = 0, E = VT.getVectorNumElements(); Idx < E; ++Idx)
18784 if (Mask[Start + Idx] >= 0 && Mask[Start + Idx] != Idx * 2 + Offset)
18785 return false;
18786 return true;
18787 };
18788 SDValue V0 = SDValue(N, 0);
18789 SDValue V1 = SDValue(N, 1);
18790 if (CheckInregMask(0, 0))
18791 V0 = Extend(Op0);
18792 else if (CheckInregMask(0, 1))
18793 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18794 else if (CheckInregMask(0, Mask.size()))
18795 V0 = Extend(Op1);
18796 else if (CheckInregMask(0, Mask.size() + 1))
18797 V0 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18798
18799 if (CheckInregMask(VT.getVectorNumElements(), Mask.size()))
18800 V1 = Extend(Op1);
18801 else if (CheckInregMask(VT.getVectorNumElements(), Mask.size() + 1))
18802 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op1));
18803 else if (CheckInregMask(VT.getVectorNumElements(), 0))
18804 V1 = Extend(Op0);
18805 else if (CheckInregMask(VT.getVectorNumElements(), 1))
18806 V1 = Extend(DAG.getNode(Rev, DL, SVN->getValueType(0), Op0));
18807
18808 if (V0.getNode() != N || V1.getNode() != N)
18809 return DAG.getMergeValues({V0, V1}, DL);
18810 }
18811
18812 // MVEEXT(load) -> extload, extload
18813 if (N->getOperand(0)->getOpcode() == ISD::LOAD)
18815 return L;
18816
18817 if (!DCI.isAfterLegalizeDAG())
18818 return SDValue();
18819
18820 // Lower to a stack store and reload:
18821 // VSTRW.32 a, stack; VLDRH.32 stack; VLDRH.32 stack+8;
18822 SDValue StackPtr = DAG.CreateStackTemporary(TypeSize::getFixed(16), Align(4));
18823 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
18824 int NumOuts = N->getNumValues();
18825 assert((NumOuts == 2 || NumOuts == 4) &&
18826 "Expected 2 or 4 outputs to an MVEEXT");
18827 EVT LoadVT = N->getOperand(0).getValueType().getHalfNumVectorElementsVT(
18828 *DAG.getContext());
18829 if (N->getNumOperands() == 4)
18830 LoadVT = LoadVT.getHalfNumVectorElementsVT(*DAG.getContext());
18831
18832 MachinePointerInfo MPI =
18834 SDValue Chain = DAG.getStore(DAG.getEntryNode(), DL, N->getOperand(0),
18835 StackPtr, MPI, Align(4));
18836
18838 for (int I = 0; I < NumOuts; I++) {
18839 SDValue Ptr = DAG.getNode(
18840 ISD::ADD, DL, StackPtr.getValueType(), StackPtr,
18841 DAG.getConstant(I * 16 / NumOuts, DL, StackPtr.getValueType()));
18843 DAG.getMachineFunction(), SPFI, I * 16 / NumOuts);
18844 SDValue Load = DAG.getExtLoad(
18845 N->getOpcode() == ARMISD::MVESEXT ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL,
18846 VT, Chain, Ptr, MPI, LoadVT, Align(4));
18847 Loads.push_back(Load);
18848 }
18849
18850 return DAG.getMergeValues(Loads, DL);
18851}
18852
18854 DAGCombinerInfo &DCI) const {
18855 switch (N->getOpcode()) {
18856 default: break;
18857 case ISD::SELECT_CC:
18858 case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
18859 case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
18860 case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
18861 case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget);
18862 case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
18863 case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
18864 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget);
18865 case ISD::SUB: return PerformSUBCombine(N, DCI, Subtarget);
18866 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
18867 case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
18868 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget);
18869 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget);
18870 case ISD::BRCOND:
18871 case ISD::BR_CC: return PerformHWLoopCombine(N, DCI, Subtarget);
18872 case ARMISD::ADDC:
18873 case ARMISD::SUBC: return PerformAddcSubcCombine(N, DCI, Subtarget);
18874 case ARMISD::SUBE: return PerformAddeSubeCombine(N, DCI, Subtarget);
18875 case ARMISD::BFI: return PerformBFICombine(N, DCI.DAG);
18876 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI, Subtarget);
18877 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
18878 case ARMISD::VMOVhr: return PerformVMOVhrCombine(N, DCI);
18879 case ARMISD::VMOVrh: return PerformVMOVrhCombine(N, DCI.DAG);
18880 case ISD::STORE: return PerformSTORECombine(N, DCI, Subtarget);
18881 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI, Subtarget);
18884 return PerformExtractEltCombine(N, DCI, Subtarget);
18888 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget);
18889 case ARMISD::VDUP: return PerformVDUPCombine(N, DCI.DAG, Subtarget);
18890 case ISD::FP_TO_SINT:
18891 case ISD::FP_TO_UINT:
18892 return PerformVCVTCombine(N, DCI.DAG, Subtarget);
18893 case ISD::FADD:
18894 return PerformFADDCombine(N, DCI.DAG, Subtarget);
18895 case ISD::FDIV:
18896 return PerformVDIVCombine(N, DCI.DAG, Subtarget);
18898 return PerformIntrinsicCombine(N, DCI);
18899 case ISD::SHL:
18900 case ISD::SRA:
18901 case ISD::SRL:
18902 return PerformShiftCombine(N, DCI, Subtarget);
18903 case ISD::SIGN_EXTEND:
18904 case ISD::ZERO_EXTEND:
18905 case ISD::ANY_EXTEND:
18906 return PerformExtendCombine(N, DCI.DAG, Subtarget);
18907 case ISD::FP_EXTEND:
18908 return PerformFPExtendCombine(N, DCI.DAG, Subtarget);
18909 case ISD::SMIN:
18910 case ISD::UMIN:
18911 case ISD::SMAX:
18912 case ISD::UMAX:
18913 return PerformMinMaxCombine(N, DCI.DAG, Subtarget);
18914 case ARMISD::CMOV:
18915 return PerformCMOVCombine(N, DCI.DAG);
18916 case ARMISD::BRCOND:
18917 return PerformBRCONDCombine(N, DCI.DAG);
18918 case ARMISD::CMPZ:
18919 return PerformCMPZCombine(N, DCI.DAG);
18920 case ARMISD::CSINC:
18921 case ARMISD::CSINV:
18922 case ARMISD::CSNEG:
18923 return PerformCSETCombine(N, DCI.DAG);
18924 case ISD::LOAD:
18925 return PerformLOADCombine(N, DCI, Subtarget);
18926 case ARMISD::VLD1DUP:
18927 case ARMISD::VLD2DUP:
18928 case ARMISD::VLD3DUP:
18929 case ARMISD::VLD4DUP:
18930 return PerformVLDCombine(N, DCI);
18932 return PerformARMBUILD_VECTORCombine(N, DCI);
18933 case ISD::BITCAST:
18934 return PerformBITCASTCombine(N, DCI, Subtarget);
18936 return PerformPREDICATE_CASTCombine(N, DCI);
18938 return PerformVECTOR_REG_CASTCombine(N, DCI.DAG, Subtarget);
18939 case ARMISD::MVETRUNC:
18940 return PerformMVETruncCombine(N, DCI);
18941 case ARMISD::MVESEXT:
18942 case ARMISD::MVEZEXT:
18943 return PerformMVEExtCombine(N, DCI);
18944 case ARMISD::VCMP:
18945 return PerformVCMPCombine(N, DCI.DAG, Subtarget);
18946 case ISD::VECREDUCE_ADD:
18947 return PerformVECREDUCE_ADDCombine(N, DCI.DAG, Subtarget);
18948 case ARMISD::VADDVs:
18949 case ARMISD::VADDVu:
18950 case ARMISD::VADDLVs:
18951 case ARMISD::VADDLVu:
18952 case ARMISD::VADDLVAs:
18953 case ARMISD::VADDLVAu:
18954 case ARMISD::VMLAVs:
18955 case ARMISD::VMLAVu:
18956 case ARMISD::VMLALVs:
18957 case ARMISD::VMLALVu:
18958 case ARMISD::VMLALVAs:
18959 case ARMISD::VMLALVAu:
18960 return PerformReduceShuffleCombine(N, DCI.DAG);
18961 case ARMISD::VMOVN:
18962 return PerformVMOVNCombine(N, DCI);
18963 case ARMISD::VQMOVNs:
18964 case ARMISD::VQMOVNu:
18965 return PerformVQMOVNCombine(N, DCI);
18966 case ARMISD::VQDMULH:
18967 return PerformVQDMULHCombine(N, DCI);
18968 case ARMISD::ASRL:
18969 case ARMISD::LSRL:
18970 case ARMISD::LSLL:
18971 return PerformLongShiftCombine(N, DCI.DAG);
18972 case ARMISD::SMULWB: {
18973 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18974 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18975 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18976 return SDValue();
18977 break;
18978 }
18979 case ARMISD::SMULWT: {
18980 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18981 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
18982 if (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI))
18983 return SDValue();
18984 break;
18985 }
18986 case ARMISD::SMLALBB:
18987 case ARMISD::QADD16b:
18988 case ARMISD::QSUB16b:
18989 case ARMISD::UQADD16b:
18990 case ARMISD::UQSUB16b: {
18991 unsigned BitWidth = N->getValueType(0).getSizeInBits();
18992 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16);
18993 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
18994 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
18995 return SDValue();
18996 break;
18997 }
18998 case ARMISD::SMLALBT: {
18999 unsigned LowWidth = N->getOperand(0).getValueType().getSizeInBits();
19000 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19001 unsigned HighWidth = N->getOperand(1).getValueType().getSizeInBits();
19002 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19003 if ((SimplifyDemandedBits(N->getOperand(0), LowMask, DCI)) ||
19004 (SimplifyDemandedBits(N->getOperand(1), HighMask, DCI)))
19005 return SDValue();
19006 break;
19007 }
19008 case ARMISD::SMLALTB: {
19009 unsigned HighWidth = N->getOperand(0).getValueType().getSizeInBits();
19010 APInt HighMask = APInt::getHighBitsSet(HighWidth, 16);
19011 unsigned LowWidth = N->getOperand(1).getValueType().getSizeInBits();
19012 APInt LowMask = APInt::getLowBitsSet(LowWidth, 16);
19013 if ((SimplifyDemandedBits(N->getOperand(0), HighMask, DCI)) ||
19014 (SimplifyDemandedBits(N->getOperand(1), LowMask, DCI)))
19015 return SDValue();
19016 break;
19017 }
19018 case ARMISD::SMLALTT: {
19019 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19020 APInt DemandedMask = APInt::getHighBitsSet(BitWidth, 16);
19021 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19022 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19023 return SDValue();
19024 break;
19025 }
19026 case ARMISD::QADD8b:
19027 case ARMISD::QSUB8b:
19028 case ARMISD::UQADD8b:
19029 case ARMISD::UQSUB8b: {
19030 unsigned BitWidth = N->getValueType(0).getSizeInBits();
19031 APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 8);
19032 if ((SimplifyDemandedBits(N->getOperand(0), DemandedMask, DCI)) ||
19033 (SimplifyDemandedBits(N->getOperand(1), DemandedMask, DCI)))
19034 return SDValue();
19035 break;
19036 }
19039 switch (N->getConstantOperandVal(1)) {
19040 case Intrinsic::arm_neon_vld1:
19041 case Intrinsic::arm_neon_vld1x2:
19042 case Intrinsic::arm_neon_vld1x3:
19043 case Intrinsic::arm_neon_vld1x4:
19044 case Intrinsic::arm_neon_vld2:
19045 case Intrinsic::arm_neon_vld3:
19046 case Intrinsic::arm_neon_vld4:
19047 case Intrinsic::arm_neon_vld2lane:
19048 case Intrinsic::arm_neon_vld3lane:
19049 case Intrinsic::arm_neon_vld4lane:
19050 case Intrinsic::arm_neon_vld2dup:
19051 case Intrinsic::arm_neon_vld3dup:
19052 case Intrinsic::arm_neon_vld4dup:
19053 case Intrinsic::arm_neon_vst1:
19054 case Intrinsic::arm_neon_vst1x2:
19055 case Intrinsic::arm_neon_vst1x3:
19056 case Intrinsic::arm_neon_vst1x4:
19057 case Intrinsic::arm_neon_vst2:
19058 case Intrinsic::arm_neon_vst3:
19059 case Intrinsic::arm_neon_vst4:
19060 case Intrinsic::arm_neon_vst2lane:
19061 case Intrinsic::arm_neon_vst3lane:
19062 case Intrinsic::arm_neon_vst4lane:
19063 return PerformVLDCombine(N, DCI);
19064 case Intrinsic::arm_mve_vld2q:
19065 case Intrinsic::arm_mve_vld4q:
19066 case Intrinsic::arm_mve_vst2q:
19067 case Intrinsic::arm_mve_vst4q:
19068 return PerformMVEVLDCombine(N, DCI);
19069 default: break;
19070 }
19071 break;
19072 }
19073 return SDValue();
19074}
19075
19077 EVT VT) const {
19078 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
19079}
19080
19082 Align Alignment,
19084 unsigned *Fast) const {
19085 // Depends what it gets converted into if the type is weird.
19086 if (!VT.isSimple())
19087 return false;
19088
19089 // The AllowsUnaligned flag models the SCTLR.A setting in ARM cpus
19090 bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
19091 auto Ty = VT.getSimpleVT().SimpleTy;
19092
19093 if (Ty == MVT::i8 || Ty == MVT::i16 || Ty == MVT::i32) {
19094 // Unaligned access can use (for example) LRDB, LRDH, LDR
19095 if (AllowsUnaligned) {
19096 if (Fast)
19097 *Fast = Subtarget->hasV7Ops();
19098 return true;
19099 }
19100 }
19101
19102 if (Ty == MVT::f64 || Ty == MVT::v2f64) {
19103 // For any little-endian targets with neon, we can support unaligned ld/st
19104 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
19105 // A big-endian target may also explicitly support unaligned accesses
19106 if (Subtarget->hasNEON() && (AllowsUnaligned || Subtarget->isLittle())) {
19107 if (Fast)
19108 *Fast = 1;
19109 return true;
19110 }
19111 }
19112
19113 if (!Subtarget->hasMVEIntegerOps())
19114 return false;
19115
19116 // These are for predicates
19117 if ((Ty == MVT::v16i1 || Ty == MVT::v8i1 || Ty == MVT::v4i1 ||
19118 Ty == MVT::v2i1)) {
19119 if (Fast)
19120 *Fast = 1;
19121 return true;
19122 }
19123
19124 // These are for truncated stores/narrowing loads. They are fine so long as
19125 // the alignment is at least the size of the item being loaded
19126 if ((Ty == MVT::v4i8 || Ty == MVT::v8i8 || Ty == MVT::v4i16) &&
19127 Alignment >= VT.getScalarSizeInBits() / 8) {
19128 if (Fast)
19129 *Fast = true;
19130 return true;
19131 }
19132
19133 // In little-endian MVE, the store instructions VSTRB.U8, VSTRH.U16 and
19134 // VSTRW.U32 all store the vector register in exactly the same format, and
19135 // differ only in the range of their immediate offset field and the required
19136 // alignment. So there is always a store that can be used, regardless of
19137 // actual type.
19138 //
19139 // For big endian, that is not the case. But can still emit a (VSTRB.U8;
19140 // VREV64.8) pair and get the same effect. This will likely be better than
19141 // aligning the vector through the stack.
19142 if (Ty == MVT::v16i8 || Ty == MVT::v8i16 || Ty == MVT::v8f16 ||
19143 Ty == MVT::v4i32 || Ty == MVT::v4f32 || Ty == MVT::v2i64 ||
19144 Ty == MVT::v2f64) {
19145 if (Fast)
19146 *Fast = 1;
19147 return true;
19148 }
19149
19150 return false;
19151}
19152
19153
19155 const MemOp &Op, const AttributeList &FuncAttributes) const {
19156 // See if we can use NEON instructions for this...
19157 if ((Op.isMemcpy() || Op.isZeroMemset()) && Subtarget->hasNEON() &&
19158 !FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
19159 unsigned Fast;
19160 if (Op.size() >= 16 &&
19161 (Op.isAligned(Align(16)) ||
19162 (allowsMisalignedMemoryAccesses(MVT::v2f64, 0, Align(1),
19164 Fast))) {
19165 return MVT::v2f64;
19166 } else if (Op.size() >= 8 &&
19167 (Op.isAligned(Align(8)) ||
19169 MVT::f64, 0, Align(1), MachineMemOperand::MONone, &Fast) &&
19170 Fast))) {
19171 return MVT::f64;
19172 }
19173 }
19174
19175 // Let the target-independent logic figure it out.
19176 return MVT::Other;
19177}
19178
19179// 64-bit integers are split into their high and low parts and held in two
19180// different registers, so the trunc is free since the low register can just
19181// be used.
19182bool ARMTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
19183 if (!SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
19184 return false;
19185 unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
19186 unsigned DestBits = DstTy->getPrimitiveSizeInBits();
19187 return (SrcBits == 64 && DestBits == 32);
19188}
19189
19191 if (SrcVT.isVector() || DstVT.isVector() || !SrcVT.isInteger() ||
19192 !DstVT.isInteger())
19193 return false;
19194 unsigned SrcBits = SrcVT.getSizeInBits();
19195 unsigned DestBits = DstVT.getSizeInBits();
19196 return (SrcBits == 64 && DestBits == 32);
19197}
19198
19200 if (Val.getOpcode() != ISD::LOAD)
19201 return false;
19202
19203 EVT VT1 = Val.getValueType();
19204 if (!VT1.isSimple() || !VT1.isInteger() ||
19205 !VT2.isSimple() || !VT2.isInteger())
19206 return false;
19207
19208 switch (VT1.getSimpleVT().SimpleTy) {
19209 default: break;
19210 case MVT::i1:
19211 case MVT::i8:
19212 case MVT::i16:
19213 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
19214 return true;
19215 }
19216
19217 return false;
19218}
19219
19221 if (!VT.isSimple())
19222 return false;
19223
19224 // There are quite a few FP16 instructions (e.g. VNMLA, VNMLS, etc.) that
19225 // negate values directly (fneg is free). So, we don't want to let the DAG
19226 // combiner rewrite fneg into xors and some other instructions. For f16 and
19227 // FullFP16 argument passing, some bitcast nodes may be introduced,
19228 // triggering this DAG combine rewrite, so we are avoiding that with this.
19229 switch (VT.getSimpleVT().SimpleTy) {
19230 default: break;
19231 case MVT::f16:
19232 return Subtarget->hasFullFP16();
19233 }
19234
19235 return false;
19236}
19237
19238/// Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth
19239/// of the vector elements.
19240static bool areExtractExts(Value *Ext1, Value *Ext2) {
19241 auto areExtDoubled = [](Instruction *Ext) {
19242 return Ext->getType()->getScalarSizeInBits() ==
19243 2 * Ext->getOperand(0)->getType()->getScalarSizeInBits();
19244 };
19245
19246 if (!match(Ext1, m_ZExtOrSExt(m_Value())) ||
19247 !match(Ext2, m_ZExtOrSExt(m_Value())) ||
19248 !areExtDoubled(cast<Instruction>(Ext1)) ||
19249 !areExtDoubled(cast<Instruction>(Ext2)))
19250 return false;
19251
19252 return true;
19253}
19254
19255/// Check if sinking \p I's operands to I's basic block is profitable, because
19256/// the operands can be folded into a target instruction, e.g.
19257/// sext/zext can be folded into vsubl.
19259 SmallVectorImpl<Use *> &Ops) const {
19260 if (!I->getType()->isVectorTy())
19261 return false;
19262
19263 if (Subtarget->hasNEON()) {
19264 switch (I->getOpcode()) {
19265 case Instruction::Sub:
19266 case Instruction::Add: {
19267 if (!areExtractExts(I->getOperand(0), I->getOperand(1)))
19268 return false;
19269 Ops.push_back(&I->getOperandUse(0));
19270 Ops.push_back(&I->getOperandUse(1));
19271 return true;
19272 }
19273 default:
19274 return false;
19275 }
19276 }
19277
19278 if (!Subtarget->hasMVEIntegerOps())
19279 return false;
19280
19281 auto IsFMSMul = [&](Instruction *I) {
19282 if (!I->hasOneUse())
19283 return false;
19284 auto *Sub = cast<Instruction>(*I->users().begin());
19285 return Sub->getOpcode() == Instruction::FSub && Sub->getOperand(1) == I;
19286 };
19287 auto IsFMS = [&](Instruction *I) {
19288 if (match(I->getOperand(0), m_FNeg(m_Value())) ||
19289 match(I->getOperand(1), m_FNeg(m_Value())))
19290 return true;
19291 return false;
19292 };
19293
19294 auto IsSinker = [&](Instruction *I, int Operand) {
19295 switch (I->getOpcode()) {
19296 case Instruction::Add:
19297 case Instruction::Mul:
19298 case Instruction::FAdd:
19299 case Instruction::ICmp:
19300 case Instruction::FCmp:
19301 return true;
19302 case Instruction::FMul:
19303 return !IsFMSMul(I);
19304 case Instruction::Sub:
19305 case Instruction::FSub:
19306 case Instruction::Shl:
19307 case Instruction::LShr:
19308 case Instruction::AShr:
19309 return Operand == 1;
19310 case Instruction::Call:
19311 if (auto *II = dyn_cast<IntrinsicInst>(I)) {
19312 switch (II->getIntrinsicID()) {
19313 case Intrinsic::fma:
19314 return !IsFMS(I);
19315 case Intrinsic::sadd_sat:
19316 case Intrinsic::uadd_sat:
19317 case Intrinsic::arm_mve_add_predicated:
19318 case Intrinsic::arm_mve_mul_predicated:
19319 case Intrinsic::arm_mve_qadd_predicated:
19320 case Intrinsic::arm_mve_vhadd:
19321 case Intrinsic::arm_mve_hadd_predicated:
19322 case Intrinsic::arm_mve_vqdmull:
19323 case Intrinsic::arm_mve_vqdmull_predicated:
19324 case Intrinsic::arm_mve_vqdmulh:
19325 case Intrinsic::arm_mve_qdmulh_predicated:
19326 case Intrinsic::arm_mve_vqrdmulh:
19327 case Intrinsic::arm_mve_qrdmulh_predicated:
19328 case Intrinsic::arm_mve_fma_predicated:
19329 return true;
19330 case Intrinsic::ssub_sat:
19331 case Intrinsic::usub_sat:
19332 case Intrinsic::arm_mve_sub_predicated:
19333 case Intrinsic::arm_mve_qsub_predicated:
19334 case Intrinsic::arm_mve_hsub_predicated:
19335 case Intrinsic::arm_mve_vhsub:
19336 return Operand == 1;
19337 default:
19338 return false;
19339 }
19340 }
19341 return false;
19342 default:
19343 return false;
19344 }
19345 };
19346
19347 for (auto OpIdx : enumerate(I->operands())) {
19348 Instruction *Op = dyn_cast<Instruction>(OpIdx.value().get());
19349 // Make sure we are not already sinking this operand
19350 if (!Op || any_of(Ops, [&](Use *U) { return U->get() == Op; }))
19351 continue;
19352
19353 Instruction *Shuffle = Op;
19354 if (Shuffle->getOpcode() == Instruction::BitCast)
19355 Shuffle = dyn_cast<Instruction>(Shuffle->getOperand(0));
19356 // We are looking for a splat that can be sunk.
19357 if (!Shuffle ||
19358 !match(Shuffle, m_Shuffle(
19360 m_Undef(), m_ZeroMask())))
19361 continue;
19362 if (!IsSinker(I, OpIdx.index()))
19363 continue;
19364
19365 // All uses of the shuffle should be sunk to avoid duplicating it across gpr
19366 // and vector registers
19367 for (Use &U : Op->uses()) {
19368 Instruction *Insn = cast<Instruction>(U.getUser());
19369 if (!IsSinker(Insn, U.getOperandNo()))
19370 return false;
19371 }
19372
19373 Ops.push_back(&Shuffle->getOperandUse(0));
19374 if (Shuffle != Op)
19375 Ops.push_back(&Op->getOperandUse(0));
19376 Ops.push_back(&OpIdx.value());
19377 }
19378 return true;
19379}
19380
19382 if (!Subtarget->hasMVEIntegerOps())
19383 return nullptr;
19384 Type *SVIType = SVI->getType();
19385 Type *ScalarType = SVIType->getScalarType();
19386
19387 if (ScalarType->isFloatTy())
19388 return Type::getInt32Ty(SVIType->getContext());
19389 if (ScalarType->isHalfTy())
19390 return Type::getInt16Ty(SVIType->getContext());
19391 return nullptr;
19392}
19393
19395 EVT VT = ExtVal.getValueType();
19396
19397 if (!isTypeLegal(VT))
19398 return false;
19399
19400 if (auto *Ld = dyn_cast<MaskedLoadSDNode>(ExtVal.getOperand(0))) {
19401 if (Ld->isExpandingLoad())
19402 return false;
19403 }
19404
19405 if (Subtarget->hasMVEIntegerOps())
19406 return true;
19407
19408 // Don't create a loadext if we can fold the extension into a wide/long
19409 // instruction.
19410 // If there's more than one user instruction, the loadext is desirable no
19411 // matter what. There can be two uses by the same instruction.
19412 if (ExtVal->use_empty() ||
19413 !ExtVal->use_begin()->isOnlyUserOf(ExtVal.getNode()))
19414 return true;
19415
19416 SDNode *U = *ExtVal->use_begin();
19417 if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB ||
19418 U->getOpcode() == ISD::SHL || U->getOpcode() == ARMISD::VSHLIMM))
19419 return false;
19420
19421 return true;
19422}
19423
19425 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
19426 return false;
19427
19428 if (!isTypeLegal(EVT::getEVT(Ty1)))
19429 return false;
19430
19431 assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop");
19432
19433 // Assuming the caller doesn't have a zeroext or signext return parameter,
19434 // truncation all the way down to i1 is valid.
19435 return true;
19436}
19437
19438/// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
19439/// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
19440/// expanded to FMAs when this method returns true, otherwise fmuladd is
19441/// expanded to fmul + fadd.
19442///
19443/// ARM supports both fused and unfused multiply-add operations; we already
19444/// lower a pair of fmul and fadd to the latter so it's not clear that there
19445/// would be a gain or that the gain would be worthwhile enough to risk
19446/// correctness bugs.
19447///
19448/// For MVE, we set this to true as it helps simplify the need for some
19449/// patterns (and we don't have the non-fused floating point instruction).
19450bool ARMTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
19451 EVT VT) const {
19452 if (!VT.isSimple())
19453 return false;
19454
19455 switch (VT.getSimpleVT().SimpleTy) {
19456 case MVT::v4f32:
19457 case MVT::v8f16:
19458 return Subtarget->hasMVEFloatOps();
19459 case MVT::f16:
19460 return Subtarget->useFPVFMx16();
19461 case MVT::f32:
19462 return Subtarget->useFPVFMx();
19463 case MVT::f64:
19464 return Subtarget->useFPVFMx64();
19465 default:
19466 break;
19467 }
19468
19469 return false;
19470}
19471
19472static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
19473 if (V < 0)
19474 return false;
19475
19476 unsigned Scale = 1;
19477 switch (VT.getSimpleVT().SimpleTy) {
19478 case MVT::i1:
19479 case MVT::i8:
19480 // Scale == 1;
19481 break;
19482 case MVT::i16:
19483 // Scale == 2;
19484 Scale = 2;
19485 break;
19486 default:
19487 // On thumb1 we load most things (i32, i64, floats, etc) with a LDR
19488 // Scale == 4;
19489 Scale = 4;
19490 break;
19491 }
19492
19493 if ((V & (Scale - 1)) != 0)
19494 return false;
19495 return isUInt<5>(V / Scale);
19496}
19497
19498static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
19499 const ARMSubtarget *Subtarget) {
19500 if (!VT.isInteger() && !VT.isFloatingPoint())
19501 return false;
19502 if (VT.isVector() && Subtarget->hasNEON())
19503 return false;
19504 if (VT.isVector() && VT.isFloatingPoint() && Subtarget->hasMVEIntegerOps() &&
19505 !Subtarget->hasMVEFloatOps())
19506 return false;
19507
19508 bool IsNeg = false;
19509 if (V < 0) {
19510 IsNeg = true;
19511 V = -V;
19512 }
19513
19514 unsigned NumBytes = std::max((unsigned)VT.getSizeInBits() / 8, 1U);
19515
19516 // MVE: size * imm7
19517 if (VT.isVector() && Subtarget->hasMVEIntegerOps()) {
19518 switch (VT.getSimpleVT().getVectorElementType().SimpleTy) {
19519 case MVT::i32:
19520 case MVT::f32:
19521 return isShiftedUInt<7,2>(V);
19522 case MVT::i16:
19523 case MVT::f16:
19524 return isShiftedUInt<7,1>(V);
19525 case MVT::i8:
19526 return isUInt<7>(V);
19527 default:
19528 return false;
19529 }
19530 }
19531
19532 // half VLDR: 2 * imm8
19533 if (VT.isFloatingPoint() && NumBytes == 2 && Subtarget->hasFPRegs16())
19534 return isShiftedUInt<8, 1>(V);
19535 // VLDR and LDRD: 4 * imm8
19536 if ((VT.isFloatingPoint() && Subtarget->hasVFP2Base()) || NumBytes == 8)
19537 return isShiftedUInt<8, 2>(V);
19538
19539 if (NumBytes == 1 || NumBytes == 2 || NumBytes == 4) {
19540 // + imm12 or - imm8
19541 if (IsNeg)
19542 return isUInt<8>(V);
19543 return isUInt<12>(V);
19544 }
19545
19546 return false;
19547}
19548
19549/// isLegalAddressImmediate - Return true if the integer value can be used
19550/// as the offset of the target addressing mode for load / store of the
19551/// given type.
19552static bool isLegalAddressImmediate(int64_t V, EVT VT,
19553 const ARMSubtarget *Subtarget) {
19554 if (V == 0)
19555 return true;
19556
19557 if (!VT.isSimple())
19558 return false;
19559
19560 if (Subtarget->isThumb1Only())
19561 return isLegalT1AddressImmediate(V, VT);
19562 else if (Subtarget->isThumb2())
19563 return isLegalT2AddressImmediate(V, VT, Subtarget);
19564
19565 // ARM mode.
19566 if (V < 0)
19567 V = - V;
19568 switch (VT.getSimpleVT().SimpleTy) {
19569 default: return false;
19570 case MVT::i1:
19571 case MVT::i8:
19572 case MVT::i32:
19573 // +- imm12
19574 return isUInt<12>(V);
19575 case MVT::i16:
19576 // +- imm8
19577 return isUInt<8>(V);
19578 case MVT::f32:
19579 case MVT::f64:
19580 if (!Subtarget->hasVFP2Base()) // FIXME: NEON?
19581 return false;
19582 return isShiftedUInt<8, 2>(V);
19583 }
19584}
19585
19587 EVT VT) const {
19588 int Scale = AM.Scale;
19589 if (Scale < 0)
19590 return false;
19591
19592 switch (VT.getSimpleVT().SimpleTy) {
19593 default: return false;
19594 case MVT::i1:
19595 case MVT::i8:
19596 case MVT::i16:
19597 case MVT::i32:
19598 if (Scale == 1)
19599 return true;
19600 // r + r << imm
19601 Scale = Scale & ~1;
19602 return Scale == 2 || Scale == 4 || Scale == 8;
19603 case MVT::i64:
19604 // FIXME: What are we trying to model here? ldrd doesn't have an r + r
19605 // version in Thumb mode.
19606 // r + r
19607 if (Scale == 1)
19608 return true;
19609 // r * 2 (this can be lowered to r + r).
19610 if (!AM.HasBaseReg && Scale == 2)
19611 return true;
19612 return false;
19613 case MVT::isVoid:
19614 // Note, we allow "void" uses (basically, uses that aren't loads or
19615 // stores), because arm allows folding a scale into many arithmetic
19616 // operations. This should be made more precise and revisited later.
19617
19618 // Allow r << imm, but the imm has to be a multiple of two.
19619 if (Scale & 1) return false;
19620 return isPowerOf2_32(Scale);
19621 }
19622}
19623
19625 EVT VT) const {
19626 const int Scale = AM.Scale;
19627
19628 // Negative scales are not supported in Thumb1.
19629 if (Scale < 0)
19630 return false;
19631
19632 // Thumb1 addressing modes do not support register scaling excepting the
19633 // following cases:
19634 // 1. Scale == 1 means no scaling.
19635 // 2. Scale == 2 this can be lowered to r + r if there is no base register.
19636 return (Scale == 1) || (!AM.HasBaseReg && Scale == 2);
19637}
19638
19639/// isLegalAddressingMode - Return true if the addressing mode represented
19640/// by AM is legal for this target, for a load/store of the specified type.
19642 const AddrMode &AM, Type *Ty,
19643 unsigned AS, Instruction *I) const {
19644 EVT VT = getValueType(DL, Ty, true);
19645 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
19646 return false;
19647
19648 // Can never fold addr of global into load/store.
19649 if (AM.BaseGV)
19650 return false;
19651
19652 switch (AM.Scale) {
19653 case 0: // no scale reg, must be "r+i" or "r", or "i".
19654 break;
19655 default:
19656 // ARM doesn't support any R+R*scale+imm addr modes.
19657 if (AM.BaseOffs)
19658 return false;
19659
19660 if (!VT.isSimple())
19661 return false;
19662
19663 if (Subtarget->isThumb1Only())
19664 return isLegalT1ScaledAddressingMode(AM, VT);
19665
19666 if (Subtarget->isThumb2())
19667 return isLegalT2ScaledAddressingMode(AM, VT);
19668
19669 int Scale = AM.Scale;
19670 switch (VT.getSimpleVT().SimpleTy) {
19671 default: return false;
19672 case MVT::i1:
19673 case MVT::i8:
19674 case MVT::i32:
19675 if (Scale < 0) Scale = -Scale;
19676 if (Scale == 1)
19677 return true;
19678 // r + r << imm
19679 return isPowerOf2_32(Scale & ~1);
19680 case MVT::i16:
19681 case MVT::i64:
19682 // r +/- r
19683 if (Scale == 1 || (AM.HasBaseReg && Scale == -1))
19684 return true;
19685 // r * 2 (this can be lowered to r + r).
19686 if (!AM.HasBaseReg && Scale == 2)
19687 return true;
19688 return false;
19689
19690 case MVT::isVoid:
19691 // Note, we allow "void" uses (basically, uses that aren't loads or
19692 // stores), because arm allows folding a scale into many arithmetic
19693 // operations. This should be made more precise and revisited later.
19694
19695 // Allow r << imm, but the imm has to be a multiple of two.
19696 if (Scale & 1) return false;
19697 return isPowerOf2_32(Scale);
19698 }
19699 }
19700 return true;
19701}
19702
19703/// isLegalICmpImmediate - Return true if the specified immediate is legal
19704/// icmp immediate, that is the target has icmp instructions which can compare
19705/// a register against the immediate without having to materialize the
19706/// immediate into a register.
19708 // Thumb2 and ARM modes can use cmn for negative immediates.
19709 if (!Subtarget->isThumb())
19710 return ARM_AM::getSOImmVal((uint32_t)Imm) != -1 ||
19711 ARM_AM::getSOImmVal(-(uint32_t)Imm) != -1;
19712 if (Subtarget->isThumb2())
19713 return ARM_AM::getT2SOImmVal((uint32_t)Imm) != -1 ||
19714 ARM_AM::getT2SOImmVal(-(uint32_t)Imm) != -1;
19715 // Thumb1 doesn't have cmn, and only 8-bit immediates.
19716 return Imm >= 0 && Imm <= 255;
19717}
19718
19719/// isLegalAddImmediate - Return true if the specified immediate is a legal add
19720/// *or sub* immediate, that is the target has add or sub instructions which can
19721/// add a register with the immediate without having to materialize the
19722/// immediate into a register.
19724 // Same encoding for add/sub, just flip the sign.
19725 int64_t AbsImm = std::abs(Imm);
19726 if (!Subtarget->isThumb())
19727 return ARM_AM::getSOImmVal(AbsImm) != -1;
19728 if (Subtarget->isThumb2())
19729 return ARM_AM::getT2SOImmVal(AbsImm) != -1;
19730 // Thumb1 only has 8-bit unsigned immediate.
19731 return AbsImm >= 0 && AbsImm <= 255;
19732}
19733
19734// Return false to prevent folding
19735// (mul (add r, c0), c1) -> (add (mul r, c1), c0*c1) in DAGCombine,
19736// if the folding leads to worse code.
19738 SDValue ConstNode) const {
19739 // Let the DAGCombiner decide for vector types and large types.
19740 const EVT VT = AddNode.getValueType();
19741 if (VT.isVector() || VT.getScalarSizeInBits() > 32)
19742 return true;
19743
19744 // It is worse if c0 is legal add immediate, while c1*c0 is not
19745 // and has to be composed by at least two instructions.
19746 const ConstantSDNode *C0Node = cast<ConstantSDNode>(AddNode.getOperand(1));
19747 const ConstantSDNode *C1Node = cast<ConstantSDNode>(ConstNode);
19748 const int64_t C0 = C0Node->getSExtValue();
19749 APInt CA = C0Node->getAPIntValue() * C1Node->getAPIntValue();
19751 return true;
19752 if (ConstantMaterializationCost((unsigned)CA.getZExtValue(), Subtarget) > 1)
19753 return false;
19754
19755 // Default to true and let the DAGCombiner decide.
19756 return true;
19757}
19758
19760 bool isSEXTLoad, SDValue &Base,
19761 SDValue &Offset, bool &isInc,
19762 SelectionDAG &DAG) {
19763 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19764 return false;
19765
19766 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
19767 // AddressingMode 3
19768 Base = Ptr->getOperand(0);
19769 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19770 int RHSC = (int)RHS->getZExtValue();
19771 if (RHSC < 0 && RHSC > -256) {
19772 assert(Ptr->getOpcode() == ISD::ADD);
19773 isInc = false;
19774 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19775 return true;
19776 }
19777 }
19778 isInc = (Ptr->getOpcode() == ISD::ADD);
19779 Offset = Ptr->getOperand(1);
19780 return true;
19781 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
19782 // AddressingMode 2
19783 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19784 int RHSC = (int)RHS->getZExtValue();
19785 if (RHSC < 0 && RHSC > -0x1000) {
19786 assert(Ptr->getOpcode() == ISD::ADD);
19787 isInc = false;
19788 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19789 Base = Ptr->getOperand(0);
19790 return true;
19791 }
19792 }
19793
19794 if (Ptr->getOpcode() == ISD::ADD) {
19795 isInc = true;
19796 ARM_AM::ShiftOpc ShOpcVal=
19797 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
19798 if (ShOpcVal != ARM_AM::no_shift) {
19799 Base = Ptr->getOperand(1);
19800 Offset = Ptr->getOperand(0);
19801 } else {
19802 Base = Ptr->getOperand(0);
19803 Offset = Ptr->getOperand(1);
19804 }
19805 return true;
19806 }
19807
19808 isInc = (Ptr->getOpcode() == ISD::ADD);
19809 Base = Ptr->getOperand(0);
19810 Offset = Ptr->getOperand(1);
19811 return true;
19812 }
19813
19814 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
19815 return false;
19816}
19817
19819 bool isSEXTLoad, SDValue &Base,
19820 SDValue &Offset, bool &isInc,
19821 SelectionDAG &DAG) {
19822 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19823 return false;
19824
19825 Base = Ptr->getOperand(0);
19826 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
19827 int RHSC = (int)RHS->getZExtValue();
19828 if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
19829 assert(Ptr->getOpcode() == ISD::ADD);
19830 isInc = false;
19831 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19832 return true;
19833 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
19834 isInc = Ptr->getOpcode() == ISD::ADD;
19835 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19836 return true;
19837 }
19838 }
19839
19840 return false;
19841}
19842
19843static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment,
19844 bool isSEXTLoad, bool IsMasked, bool isLE,
19846 bool &isInc, SelectionDAG &DAG) {
19847 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
19848 return false;
19849 if (!isa<ConstantSDNode>(Ptr->getOperand(1)))
19850 return false;
19851
19852 // We allow LE non-masked loads to change the type (for example use a vldrb.8
19853 // as opposed to a vldrw.32). This can allow extra addressing modes or
19854 // alignments for what is otherwise an equivalent instruction.
19855 bool CanChangeType = isLE && !IsMasked;
19856
19857 ConstantSDNode *RHS = cast<ConstantSDNode>(Ptr->getOperand(1));
19858 int RHSC = (int)RHS->getZExtValue();
19859
19860 auto IsInRange = [&](int RHSC, int Limit, int Scale) {
19861 if (RHSC < 0 && RHSC > -Limit * Scale && RHSC % Scale == 0) {
19862 assert(Ptr->getOpcode() == ISD::ADD);
19863 isInc = false;
19864 Offset = DAG.getConstant(-RHSC, SDLoc(Ptr), RHS->getValueType(0));
19865 return true;
19866 } else if (RHSC > 0 && RHSC < Limit * Scale && RHSC % Scale == 0) {
19867 isInc = Ptr->getOpcode() == ISD::ADD;
19868 Offset = DAG.getConstant(RHSC, SDLoc(Ptr), RHS->getValueType(0));
19869 return true;
19870 }
19871 return false;
19872 };
19873
19874 // Try to find a matching instruction based on s/zext, Alignment, Offset and
19875 // (in BE/masked) type.
19876 Base = Ptr->getOperand(0);
19877 if (VT == MVT::v4i16) {
19878 if (Alignment >= 2 && IsInRange(RHSC, 0x80, 2))
19879 return true;
19880 } else if (VT == MVT::v4i8 || VT == MVT::v8i8) {
19881 if (IsInRange(RHSC, 0x80, 1))
19882 return true;
19883 } else if (Alignment >= 4 &&
19884 (CanChangeType || VT == MVT::v4i32 || VT == MVT::v4f32) &&
19885 IsInRange(RHSC, 0x80, 4))
19886 return true;
19887 else if (Alignment >= 2 &&
19888 (CanChangeType || VT == MVT::v8i16 || VT == MVT::v8f16) &&
19889 IsInRange(RHSC, 0x80, 2))
19890 return true;
19891 else if ((CanChangeType || VT == MVT::v16i8) && IsInRange(RHSC, 0x80, 1))
19892 return true;
19893 return false;
19894}
19895
19896/// getPreIndexedAddressParts - returns true by value, base pointer and
19897/// offset pointer and addressing mode by reference if the node's address
19898/// can be legally represented as pre-indexed load / store address.
19899bool
19901 SDValue &Offset,
19903 SelectionDAG &DAG) const {
19904 if (Subtarget->isThumb1Only())
19905 return false;
19906
19907 EVT VT;
19908 SDValue Ptr;
19909 Align Alignment;
19910 bool isSEXTLoad = false;
19911 bool IsMasked = false;
19912 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19913 Ptr = LD->getBasePtr();
19914 VT = LD->getMemoryVT();
19915 Alignment = LD->getAlign();
19916 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19917 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19918 Ptr = ST->getBasePtr();
19919 VT = ST->getMemoryVT();
19920 Alignment = ST->getAlign();
19921 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19922 Ptr = LD->getBasePtr();
19923 VT = LD->getMemoryVT();
19924 Alignment = LD->getAlign();
19925 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19926 IsMasked = true;
19927 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19928 Ptr = ST->getBasePtr();
19929 VT = ST->getMemoryVT();
19930 Alignment = ST->getAlign();
19931 IsMasked = true;
19932 } else
19933 return false;
19934
19935 bool isInc;
19936 bool isLegal = false;
19937 if (VT.isVector())
19938 isLegal = Subtarget->hasMVEIntegerOps() &&
19940 Ptr.getNode(), VT, Alignment, isSEXTLoad, IsMasked,
19941 Subtarget->isLittle(), Base, Offset, isInc, DAG);
19942 else {
19943 if (Subtarget->isThumb2())
19944 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19945 Offset, isInc, DAG);
19946 else
19947 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
19948 Offset, isInc, DAG);
19949 }
19950 if (!isLegal)
19951 return false;
19952
19953 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
19954 return true;
19955}
19956
19957/// getPostIndexedAddressParts - returns true by value, base pointer and
19958/// offset pointer and addressing mode by reference if this node can be
19959/// combined with a load / store to form a post-indexed load / store.
19961 SDValue &Base,
19962 SDValue &Offset,
19964 SelectionDAG &DAG) const {
19965 EVT VT;
19966 SDValue Ptr;
19967 Align Alignment;
19968 bool isSEXTLoad = false, isNonExt;
19969 bool IsMasked = false;
19970 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
19971 VT = LD->getMemoryVT();
19972 Ptr = LD->getBasePtr();
19973 Alignment = LD->getAlign();
19974 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19975 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19976 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
19977 VT = ST->getMemoryVT();
19978 Ptr = ST->getBasePtr();
19979 Alignment = ST->getAlign();
19980 isNonExt = !ST->isTruncatingStore();
19981 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
19982 VT = LD->getMemoryVT();
19983 Ptr = LD->getBasePtr();
19984 Alignment = LD->getAlign();
19985 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
19986 isNonExt = LD->getExtensionType() == ISD::NON_EXTLOAD;
19987 IsMasked = true;
19988 } else if (MaskedStoreSDNode *ST = dyn_cast<MaskedStoreSDNode>(N)) {
19989 VT = ST->getMemoryVT();
19990 Ptr = ST->getBasePtr();
19991 Alignment = ST->getAlign();
19992 isNonExt = !ST->isTruncatingStore();
19993 IsMasked = true;
19994 } else
19995 return false;
19996
19997 if (Subtarget->isThumb1Only()) {
19998 // Thumb-1 can do a limited post-inc load or store as an updating LDM. It
19999 // must be non-extending/truncating, i32, with an offset of 4.
20000 assert(Op->getValueType(0) == MVT::i32 && "Non-i32 post-inc op?!");
20001 if (Op->getOpcode() != ISD::ADD || !isNonExt)
20002 return false;
20003 auto *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1));
20004 if (!RHS || RHS->getZExtValue() != 4)
20005 return false;
20006 if (Alignment < Align(4))
20007 return false;
20008
20009 Offset = Op->getOperand(1);
20010 Base = Op->getOperand(0);
20011 AM = ISD::POST_INC;
20012 return true;
20013 }
20014
20015 bool isInc;
20016 bool isLegal = false;
20017 if (VT.isVector())
20018 isLegal = Subtarget->hasMVEIntegerOps() &&
20019 getMVEIndexedAddressParts(Op, VT, Alignment, isSEXTLoad, IsMasked,
20020 Subtarget->isLittle(), Base, Offset,
20021 isInc, DAG);
20022 else {
20023 if (Subtarget->isThumb2())
20024 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20025 isInc, DAG);
20026 else
20027 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
20028 isInc, DAG);
20029 }
20030 if (!isLegal)
20031 return false;
20032
20033 if (Ptr != Base) {
20034 // Swap base ptr and offset to catch more post-index load / store when
20035 // it's legal. In Thumb2 mode, offset must be an immediate.
20036 if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
20037 !Subtarget->isThumb2())
20039
20040 // Post-indexed load / store update the base pointer.
20041 if (Ptr != Base)
20042 return false;
20043 }
20044
20045 AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
20046 return true;
20047}
20048
20050 KnownBits &Known,
20051 const APInt &DemandedElts,
20052 const SelectionDAG &DAG,
20053 unsigned Depth) const {
20054 unsigned BitWidth = Known.getBitWidth();
20055 Known.resetAll();
20056 switch (Op.getOpcode()) {
20057 default: break;
20058 case ARMISD::ADDC:
20059 case ARMISD::ADDE:
20060 case ARMISD::SUBC:
20061 case ARMISD::SUBE:
20062 // Special cases when we convert a carry to a boolean.
20063 if (Op.getResNo() == 0) {
20064 SDValue LHS = Op.getOperand(0);
20065 SDValue RHS = Op.getOperand(1);
20066 // (ADDE 0, 0, C) will give us a single bit.
20067 if (Op->getOpcode() == ARMISD::ADDE && isNullConstant(LHS) &&
20070 return;
20071 }
20072 }
20073 break;
20074 case ARMISD::CMOV: {
20075 // Bits are known zero/one if known on the LHS and RHS.
20076 Known = DAG.computeKnownBits(Op.getOperand(0), Depth+1);
20077 if (Known.isUnknown())
20078 return;
20079
20080 KnownBits KnownRHS = DAG.computeKnownBits(Op.getOperand(1), Depth+1);
20081 Known = Known.intersectWith(KnownRHS);
20082 return;
20083 }
20085 Intrinsic::ID IntID =
20086 static_cast<Intrinsic::ID>(Op->getConstantOperandVal(1));
20087 switch (IntID) {
20088 default: return;
20089 case Intrinsic::arm_ldaex:
20090 case Intrinsic::arm_ldrex: {
20091 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
20092 unsigned MemBits = VT.getScalarSizeInBits();
20093 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
20094 return;
20095 }
20096 }
20097 }
20098 case ARMISD::BFI: {
20099 // Conservatively, we can recurse down the first operand
20100 // and just mask out all affected bits.
20101 Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
20102
20103 // The operand to BFI is already a mask suitable for removing the bits it
20104 // sets.
20105 const APInt &Mask = Op.getConstantOperandAPInt(2);
20106 Known.Zero &= Mask;
20107 Known.One &= Mask;
20108 return;
20109 }
20110 case ARMISD::VGETLANEs:
20111 case ARMISD::VGETLANEu: {
20112 const SDValue &SrcSV = Op.getOperand(0);
20113 EVT VecVT = SrcSV.getValueType();
20114 assert(VecVT.isVector() && "VGETLANE expected a vector type");
20115 const unsigned NumSrcElts = VecVT.getVectorNumElements();
20116 ConstantSDNode *Pos = cast<ConstantSDNode>(Op.getOperand(1).getNode());
20117 assert(Pos->getAPIntValue().ult(NumSrcElts) &&
20118 "VGETLANE index out of bounds");
20119 unsigned Idx = Pos->getZExtValue();
20120 APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
20121 Known = DAG.computeKnownBits(SrcSV, DemandedElt, Depth + 1);
20122
20123 EVT VT = Op.getValueType();
20124 const unsigned DstSz = VT.getScalarSizeInBits();
20125 const unsigned SrcSz = VecVT.getVectorElementType().getSizeInBits();
20126 (void)SrcSz;
20127 assert(SrcSz == Known.getBitWidth());
20128 assert(DstSz > SrcSz);
20129 if (Op.getOpcode() == ARMISD::VGETLANEs)
20130 Known = Known.sext(DstSz);
20131 else {
20132 Known = Known.zext(DstSz);
20133 }
20134 assert(DstSz == Known.getBitWidth());
20135 break;
20136 }
20137 case ARMISD::VMOVrh: {
20138 KnownBits KnownOp = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20139 assert(KnownOp.getBitWidth() == 16);
20140 Known = KnownOp.zext(32);
20141 break;
20142 }
20143 case ARMISD::CSINC:
20144 case ARMISD::CSINV:
20145 case ARMISD::CSNEG: {
20146 KnownBits KnownOp0 = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
20147 KnownBits KnownOp1 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
20148
20149 // The result is either:
20150 // CSINC: KnownOp0 or KnownOp1 + 1
20151 // CSINV: KnownOp0 or ~KnownOp1
20152 // CSNEG: KnownOp0 or KnownOp1 * -1
20153 if (Op.getOpcode() == ARMISD::CSINC)
20154 KnownOp1 = KnownBits::computeForAddSub(
20155 /*Add=*/true, /*NSW=*/false, /*NUW=*/false, KnownOp1,
20157 else if (Op.getOpcode() == ARMISD::CSINV)
20158 std::swap(KnownOp1.Zero, KnownOp1.One);
20159 else if (Op.getOpcode() == ARMISD::CSNEG)
20160 KnownOp1 = KnownBits::mul(
20161 KnownOp1, KnownBits::makeConstant(APInt(32, -1)));
20162
20163 Known = KnownOp0.intersectWith(KnownOp1);
20164 break;
20165 }
20166 }
20167}
20168
20170 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
20171 TargetLoweringOpt &TLO) const {
20172 // Delay optimization, so we don't have to deal with illegal types, or block
20173 // optimizations.
20174 if (!TLO.LegalOps)
20175 return false;
20176
20177 // Only optimize AND for now.
20178 if (Op.getOpcode() != ISD::AND)
20179 return false;
20180
20181 EVT VT = Op.getValueType();
20182
20183 // Ignore vectors.
20184 if (VT.isVector())
20185 return false;
20186
20187 assert(VT == MVT::i32 && "Unexpected integer type");
20188
20189 // Make sure the RHS really is a constant.
20190 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
20191 if (!C)
20192 return false;
20193
20194 unsigned Mask = C->getZExtValue();
20195
20196 unsigned Demanded = DemandedBits.getZExtValue();
20197 unsigned ShrunkMask = Mask & Demanded;
20198 unsigned ExpandedMask = Mask | ~Demanded;
20199
20200 // If the mask is all zeros, let the target-independent code replace the
20201 // result with zero.
20202 if (ShrunkMask == 0)
20203 return false;
20204
20205 // If the mask is all ones, erase the AND. (Currently, the target-independent
20206 // code won't do this, so we have to do it explicitly to avoid an infinite
20207 // loop in obscure cases.)
20208 if (ExpandedMask == ~0U)
20209 return TLO.CombineTo(Op, Op.getOperand(0));
20210
20211 auto IsLegalMask = [ShrunkMask, ExpandedMask](unsigned Mask) -> bool {
20212 return (ShrunkMask & Mask) == ShrunkMask && (~ExpandedMask & Mask) == 0;
20213 };
20214 auto UseMask = [Mask, Op, VT, &TLO](unsigned NewMask) -> bool {
20215 if (NewMask == Mask)
20216 return true;
20217 SDLoc DL(Op);
20218 SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
20219 SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
20220 return TLO.CombineTo(Op, NewOp);
20221 };
20222
20223 // Prefer uxtb mask.
20224 if (IsLegalMask(0xFF))
20225 return UseMask(0xFF);
20226
20227 // Prefer uxth mask.
20228 if (IsLegalMask(0xFFFF))
20229 return UseMask(0xFFFF);
20230
20231 // [1, 255] is Thumb1 movs+ands, legal immediate for ARM/Thumb2.
20232 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20233 if (ShrunkMask < 256)
20234 return UseMask(ShrunkMask);
20235
20236 // [-256, -2] is Thumb1 movs+bics, legal immediate for ARM/Thumb2.
20237 // FIXME: Prefer a contiguous sequence of bits for other optimizations.
20238 if ((int)ExpandedMask <= -2 && (int)ExpandedMask >= -256)
20239 return UseMask(ExpandedMask);
20240
20241 // Potential improvements:
20242 //
20243 // We could try to recognize lsls+lsrs or lsrs+lsls pairs here.
20244 // We could try to prefer Thumb1 immediates which can be lowered to a
20245 // two-instruction sequence.
20246 // We could try to recognize more legal ARM/Thumb2 immediates here.
20247
20248 return false;
20249}
20250
20252 SDValue Op, const APInt &OriginalDemandedBits,
20253 const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO,
20254 unsigned Depth) const {
20255 unsigned Opc = Op.getOpcode();
20256
20257 switch (Opc) {
20258 case ARMISD::ASRL:
20259 case ARMISD::LSRL: {
20260 // If this is result 0 and the other result is unused, see if the demand
20261 // bits allow us to shrink this long shift into a standard small shift in
20262 // the opposite direction.
20263 if (Op.getResNo() == 0 && !Op->hasAnyUseOfValue(1) &&
20264 isa<ConstantSDNode>(Op->getOperand(2))) {
20265 unsigned ShAmt = Op->getConstantOperandVal(2);
20266 if (ShAmt < 32 && OriginalDemandedBits.isSubsetOf(APInt::getAllOnes(32)
20267 << (32 - ShAmt)))
20268 return TLO.CombineTo(
20269 Op, TLO.DAG.getNode(
20270 ISD::SHL, SDLoc(Op), MVT::i32, Op.getOperand(1),
20271 TLO.DAG.getConstant(32 - ShAmt, SDLoc(Op), MVT::i32)));
20272 }
20273 break;
20274 }
20275 case ARMISD::VBICIMM: {
20276 SDValue Op0 = Op.getOperand(0);
20277 unsigned ModImm = Op.getConstantOperandVal(1);
20278 unsigned EltBits = 0;
20279 uint64_t Mask = ARM_AM::decodeVMOVModImm(ModImm, EltBits);
20280 if ((OriginalDemandedBits & Mask) == 0)
20281 return TLO.CombineTo(Op, Op0);
20282 }
20283 }
20284
20286 Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
20287}
20288
20289//===----------------------------------------------------------------------===//
20290// ARM Inline Assembly Support
20291//===----------------------------------------------------------------------===//
20292
20294 // Looking for "rev" which is V6+.
20295 if (!Subtarget->hasV6Ops())
20296 return false;
20297
20298 InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand());
20299 StringRef AsmStr = IA->getAsmString();
20300 SmallVector<StringRef, 4> AsmPieces;
20301 SplitString(AsmStr, AsmPieces, ";\n");
20302
20303 switch (AsmPieces.size()) {
20304 default: return false;
20305 case 1:
20306 AsmStr = AsmPieces[0];
20307 AsmPieces.clear();
20308 SplitString(AsmStr, AsmPieces, " \t,");
20309
20310 // rev $0, $1
20311 if (AsmPieces.size() == 3 &&
20312 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
20313 IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
20314 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
20315 if (Ty && Ty->getBitWidth() == 32)
20317 }
20318 break;
20319 }
20320
20321 return false;
20322}
20323
20324const char *ARMTargetLowering::LowerXConstraint(EVT ConstraintVT) const {
20325 // At this point, we have to lower this constraint to something else, so we
20326 // lower it to an "r" or "w". However, by doing this we will force the result
20327 // to be in register, while the X constraint is much more permissive.
20328 //
20329 // Although we are correct (we are free to emit anything, without
20330 // constraints), we might break use cases that would expect us to be more
20331 // efficient and emit something else.
20332 if (!Subtarget->hasVFP2Base())
20333 return "r";
20334 if (ConstraintVT.isFloatingPoint())
20335 return "w";
20336 if (ConstraintVT.isVector() && Subtarget->hasNEON() &&
20337 (ConstraintVT.getSizeInBits() == 64 ||
20338 ConstraintVT.getSizeInBits() == 128))
20339 return "w";
20340
20341 return "r";
20342}
20343
20344/// getConstraintType - Given a constraint letter, return the type of
20345/// constraint it is for this target.
20348 unsigned S = Constraint.size();
20349 if (S == 1) {
20350 switch (Constraint[0]) {
20351 default: break;
20352 case 'l': return C_RegisterClass;
20353 case 'w': return C_RegisterClass;
20354 case 'h': return C_RegisterClass;
20355 case 'x': return C_RegisterClass;
20356 case 't': return C_RegisterClass;
20357 case 'j': return C_Immediate; // Constant for movw.
20358 // An address with a single base register. Due to the way we
20359 // currently handle addresses it is the same as an 'r' memory constraint.
20360 case 'Q': return C_Memory;
20361 }
20362 } else if (S == 2) {
20363 switch (Constraint[0]) {
20364 default: break;
20365 case 'T': return C_RegisterClass;
20366 // All 'U+' constraints are addresses.
20367 case 'U': return C_Memory;
20368 }
20369 }
20370 return TargetLowering::getConstraintType(Constraint);
20371}
20372
20373/// Examine constraint type and operand type and determine a weight value.
20374/// This object must already have been set up with the operand type
20375/// and the current alternative constraint selected.
20378 AsmOperandInfo &info, const char *constraint) const {
20380 Value *CallOperandVal = info.CallOperandVal;
20381 // If we don't have a value, we can't do a match,
20382 // but allow it at the lowest weight.
20383 if (!CallOperandVal)
20384 return CW_Default;
20385 Type *type = CallOperandVal->getType();
20386 // Look at the constraint type.
20387 switch (*constraint) {
20388 default:
20390 break;
20391 case 'l':
20392 if (type->isIntegerTy()) {
20393 if (Subtarget->isThumb())
20394 weight = CW_SpecificReg;
20395 else
20396 weight = CW_Register;
20397 }
20398 break;
20399 case 'w':
20400 if (type->isFloatingPointTy())
20401 weight = CW_Register;
20402 break;
20403 }
20404 return weight;
20405}
20406
20407using RCPair = std::pair<unsigned, const TargetRegisterClass *>;
20408
20410 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
20411 switch (Constraint.size()) {
20412 case 1:
20413 // GCC ARM Constraint Letters
20414 switch (Constraint[0]) {
20415 case 'l': // Low regs or general regs.
20416 if (Subtarget->isThumb())
20417 return RCPair(0U, &ARM::tGPRRegClass);
20418 return RCPair(0U, &ARM::GPRRegClass);
20419 case 'h': // High regs or no regs.
20420 if (Subtarget->isThumb())
20421 return RCPair(0U, &ARM::hGPRRegClass);
20422 break;
20423 case 'r':
20424 if (Subtarget->isThumb1Only())
20425 return RCPair(0U, &ARM::tGPRRegClass);
20426 return RCPair(0U, &ARM::GPRRegClass);
20427 case 'w':
20428 if (VT == MVT::Other)
20429 break;
20430 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20431 return RCPair(0U, &ARM::SPRRegClass);
20432 if (VT.getSizeInBits() == 64)
20433 return RCPair(0U, &ARM::DPRRegClass);
20434 if (VT.getSizeInBits() == 128)
20435 return RCPair(0U, &ARM::QPRRegClass);
20436 break;
20437 case 'x':
20438 if (VT == MVT::Other)
20439 break;
20440 if (VT == MVT::f32 || VT == MVT::f16 || VT == MVT::bf16)
20441 return RCPair(0U, &ARM::SPR_8RegClass);
20442 if (VT.getSizeInBits() == 64)
20443 return RCPair(0U, &ARM::DPR_8RegClass);
20444 if (VT.getSizeInBits() == 128)
20445 return RCPair(0U, &ARM::QPR_8RegClass);
20446 break;
20447 case 't':
20448 if (VT == MVT::Other)
20449 break;
20450 if (VT == MVT::f32 || VT == MVT::i32 || VT == MVT::f16 || VT == MVT::bf16)
20451 return RCPair(0U, &ARM::SPRRegClass);
20452 if (VT.getSizeInBits() == 64)
20453 return RCPair(0U, &ARM::DPR_VFP2RegClass);
20454 if (VT.getSizeInBits() == 128)
20455 return RCPair(0U, &ARM::QPR_VFP2RegClass);
20456 break;
20457 }
20458 break;
20459
20460 case 2:
20461 if (Constraint[0] == 'T') {
20462 switch (Constraint[1]) {
20463 default:
20464 break;
20465 case 'e':
20466 return RCPair(0U, &ARM::tGPREvenRegClass);
20467 case 'o':
20468 return RCPair(0U, &ARM::tGPROddRegClass);
20469 }
20470 }
20471 break;
20472
20473 default:
20474 break;
20475 }
20476
20477 if (StringRef("{cc}").equals_insensitive(Constraint))
20478 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
20479
20480 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
20481}
20482
20483/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
20484/// vector. If it is invalid, don't add anything to Ops.
20486 StringRef Constraint,
20487 std::vector<SDValue> &Ops,
20488 SelectionDAG &DAG) const {
20489 SDValue Result;
20490
20491 // Currently only support length 1 constraints.
20492 if (Constraint.size() != 1)
20493 return;
20494
20495 char ConstraintLetter = Constraint[0];
20496 switch (ConstraintLetter) {
20497 default: break;
20498 case 'j':
20499 case 'I': case 'J': case 'K': case 'L':
20500 case 'M': case 'N': case 'O':
20501 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
20502 if (!C)
20503 return;
20504
20505 int64_t CVal64 = C->getSExtValue();
20506 int CVal = (int) CVal64;
20507 // None of these constraints allow values larger than 32 bits. Check
20508 // that the value fits in an int.
20509 if (CVal != CVal64)
20510 return;
20511
20512 switch (ConstraintLetter) {
20513 case 'j':
20514 // Constant suitable for movw, must be between 0 and
20515 // 65535.
20516 if (Subtarget->hasV6T2Ops() || (Subtarget->hasV8MBaselineOps()))
20517 if (CVal >= 0 && CVal <= 65535)
20518 break;
20519 return;
20520 case 'I':
20521 if (Subtarget->isThumb1Only()) {
20522 // This must be a constant between 0 and 255, for ADD
20523 // immediates.
20524 if (CVal >= 0 && CVal <= 255)
20525 break;
20526 } else if (Subtarget->isThumb2()) {
20527 // A constant that can be used as an immediate value in a
20528 // data-processing instruction.
20529 if (ARM_AM::getT2SOImmVal(CVal) != -1)
20530 break;
20531 } else {
20532 // A constant that can be used as an immediate value in a
20533 // data-processing instruction.
20534 if (ARM_AM::getSOImmVal(CVal) != -1)
20535 break;
20536 }
20537 return;
20538
20539 case 'J':
20540 if (Subtarget->isThumb1Only()) {
20541 // This must be a constant between -255 and -1, for negated ADD
20542 // immediates. This can be used in GCC with an "n" modifier that
20543 // prints the negated value, for use with SUB instructions. It is
20544 // not useful otherwise but is implemented for compatibility.
20545 if (CVal >= -255 && CVal <= -1)
20546 break;
20547 } else {
20548 // This must be a constant between -4095 and 4095. It is not clear
20549 // what this constraint is intended for. Implemented for
20550 // compatibility with GCC.
20551 if (CVal >= -4095 && CVal <= 4095)
20552 break;
20553 }
20554 return;
20555
20556 case 'K':
20557 if (Subtarget->isThumb1Only()) {
20558 // A 32-bit value where only one byte has a nonzero value. Exclude
20559 // zero to match GCC. This constraint is used by GCC internally for
20560 // constants that can be loaded with a move/shift combination.
20561 // It is not useful otherwise but is implemented for compatibility.
20562 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
20563 break;
20564 } else if (Subtarget->isThumb2()) {
20565 // A constant whose bitwise inverse can be used as an immediate
20566 // value in a data-processing instruction. This can be used in GCC
20567 // with a "B" modifier that prints the inverted value, for use with
20568 // BIC and MVN instructions. It is not useful otherwise but is
20569 // implemented for compatibility.
20570 if (ARM_AM::getT2SOImmVal(~CVal) != -1)
20571 break;
20572 } else {
20573 // A constant whose bitwise inverse can be used as an immediate
20574 // value in a data-processing instruction. This can be used in GCC
20575 // with a "B" modifier that prints the inverted value, for use with
20576 // BIC and MVN instructions. It is not useful otherwise but is
20577 // implemented for compatibility.
20578 if (ARM_AM::getSOImmVal(~CVal) != -1)
20579 break;
20580 }
20581 return;
20582
20583 case 'L':
20584 if (Subtarget->isThumb1Only()) {
20585 // This must be a constant between -7 and 7,
20586 // for 3-operand ADD/SUB immediate instructions.
20587 if (CVal >= -7 && CVal < 7)
20588 break;
20589 } else if (Subtarget->isThumb2()) {
20590 // A constant whose negation can be used as an immediate value in a
20591 // data-processing instruction. This can be used in GCC with an "n"
20592 // modifier that prints the negated value, for use with SUB
20593 // instructions. It is not useful otherwise but is implemented for
20594 // compatibility.
20595 if (ARM_AM::getT2SOImmVal(-CVal) != -1)
20596 break;
20597 } else {
20598 // A constant whose negation can be used as an immediate value in a
20599 // data-processing instruction. This can be used in GCC with an "n"
20600 // modifier that prints the negated value, for use with SUB
20601 // instructions. It is not useful otherwise but is implemented for
20602 // compatibility.
20603 if (ARM_AM::getSOImmVal(-CVal) != -1)
20604 break;
20605 }
20606 return;
20607
20608 case 'M':
20609 if (Subtarget->isThumb1Only()) {
20610 // This must be a multiple of 4 between 0 and 1020, for
20611 // ADD sp + immediate.
20612 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
20613 break;
20614 } else {
20615 // A power of two or a constant between 0 and 32. This is used in
20616 // GCC for the shift amount on shifted register operands, but it is
20617 // useful in general for any shift amounts.
20618 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
20619 break;
20620 }
20621 return;
20622
20623 case 'N':
20624 if (Subtarget->isThumb1Only()) {
20625 // This must be a constant between 0 and 31, for shift amounts.
20626 if (CVal >= 0 && CVal <= 31)
20627 break;
20628 }
20629 return;
20630
20631 case 'O':
20632 if (Subtarget->isThumb1Only()) {
20633 // This must be a multiple of 4 between -508 and 508, for
20634 // ADD/SUB sp = sp + immediate.
20635 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
20636 break;
20637 }
20638 return;
20639 }
20640 Result = DAG.getTargetConstant(CVal, SDLoc(Op), Op.getValueType());
20641 break;
20642 }
20643
20644 if (Result.getNode()) {
20645 Ops.push_back(Result);
20646 return;
20647 }
20648 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
20649}
20650
20652 const SDNode *N, MVT::SimpleValueType SVT) {
20653 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20654 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20655 "Unhandled Opcode in getDivRemLibcall");
20656 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20657 N->getOpcode() == ISD::SREM;
20658 RTLIB::Libcall LC;
20659 switch (SVT) {
20660 default: llvm_unreachable("Unexpected request for libcall!");
20661 case MVT::i8: LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break;
20662 case MVT::i16: LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
20663 case MVT::i32: LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
20664 case MVT::i64: LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
20665 }
20666 return LC;
20667}
20668
20670 const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
20671 assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
20672 N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM) &&
20673 "Unhandled Opcode in getDivRemArgList");
20674 bool isSigned = N->getOpcode() == ISD::SDIVREM ||
20675 N->getOpcode() == ISD::SREM;
20678 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
20679 EVT ArgVT = N->getOperand(i).getValueType();
20680 Type *ArgTy = ArgVT.getTypeForEVT(*Context);
20681 Entry.Node = N->getOperand(i);
20682 Entry.Ty = ArgTy;
20683 Entry.IsSExt = isSigned;
20684 Entry.IsZExt = !isSigned;
20685 Args.push_back(Entry);
20686 }
20687 if (Subtarget->isTargetWindows() && Args.size() >= 2)
20688 std::swap(Args[0], Args[1]);
20689 return Args;
20690}
20691
20692SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
20693 assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
20694 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
20695 Subtarget->isTargetWindows()) &&
20696 "Register-based DivRem lowering only");
20697 unsigned Opcode = Op->getOpcode();
20698 assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
20699 "Invalid opcode for Div/Rem lowering");
20700 bool isSigned = (Opcode == ISD::SDIVREM);
20701 EVT VT = Op->getValueType(0);
20702 SDLoc dl(Op);
20703
20704 if (VT == MVT::i64 && isa<ConstantSDNode>(Op.getOperand(1))) {
20706 if (expandDIVREMByConstant(Op.getNode(), Result, MVT::i32, DAG)) {
20707 SDValue Res0 =
20708 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[0], Result[1]);
20709 SDValue Res1 =
20710 DAG.getNode(ISD::BUILD_PAIR, dl, VT, Result[2], Result[3]);
20711 return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
20712 {Res0, Res1});
20713 }
20714 }
20715
20716 Type *Ty = VT.getTypeForEVT(*DAG.getContext());
20717
20718 // If the target has hardware divide, use divide + multiply + subtract:
20719 // div = a / b
20720 // rem = a - b * div
20721 // return {div, rem}
20722 // This should be lowered into UDIV/SDIV + MLS later on.
20723 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
20724 : Subtarget->hasDivideInARMMode();
20725 if (hasDivide && Op->getValueType(0).isSimple() &&
20726 Op->getSimpleValueType(0) == MVT::i32) {
20727 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
20728 const SDValue Dividend = Op->getOperand(0);
20729 const SDValue Divisor = Op->getOperand(1);
20730 SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
20731 SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
20732 SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
20733
20734 SDValue Values[2] = {Div, Rem};
20735 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
20736 }
20737
20738 RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
20739 VT.getSimpleVT().SimpleTy);
20740 SDValue InChain = DAG.getEntryNode();
20741
20743 DAG.getContext(),
20744 Subtarget);
20745
20748
20749 Type *RetTy = StructType::get(Ty, Ty);
20750
20751 if (Subtarget->isTargetWindows())
20752 InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
20753
20755 CLI.setDebugLoc(dl).setChain(InChain)
20756 .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
20758
20759 std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
20760 return CallInfo.first;
20761}
20762
20763// Lowers REM using divmod helpers
20764// see RTABI section 4.2/4.3
20765SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
20766 EVT VT = N->getValueType(0);
20767
20768 if (VT == MVT::i64 && isa<ConstantSDNode>(N->getOperand(1))) {
20770 if (expandDIVREMByConstant(N, Result, MVT::i32, DAG))
20771 return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), N->getValueType(0),
20772 Result[0], Result[1]);
20773 }
20774
20775 // Build return types (div and rem)
20776 std::vector<Type*> RetTyParams;
20777 Type *RetTyElement;
20778
20779 switch (VT.getSimpleVT().SimpleTy) {
20780 default: llvm_unreachable("Unexpected request for libcall!");
20781 case MVT::i8: RetTyElement = Type::getInt8Ty(*DAG.getContext()); break;
20782 case MVT::i16: RetTyElement = Type::getInt16Ty(*DAG.getContext()); break;
20783 case MVT::i32: RetTyElement = Type::getInt32Ty(*DAG.getContext()); break;
20784 case MVT::i64: RetTyElement = Type::getInt64Ty(*DAG.getContext()); break;
20785 }
20786
20787 RetTyParams.push_back(RetTyElement);
20788 RetTyParams.push_back(RetTyElement);
20789 ArrayRef<Type*> ret = ArrayRef<Type*>(RetTyParams);
20790 Type *RetTy = StructType::get(*DAG.getContext(), ret);
20791
20792 RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
20793 SimpleTy);
20794 SDValue InChain = DAG.getEntryNode();
20796 Subtarget);
20797 bool isSigned = N->getOpcode() == ISD::SREM;
20800
20801 if (Subtarget->isTargetWindows())
20802 InChain = WinDBZCheckDenominator(DAG, N, InChain);
20803
20804 // Lower call
20805 CallLoweringInfo CLI(DAG);
20806 CLI.setChain(InChain)
20807 .setCallee(CallingConv::ARM_AAPCS, RetTy, Callee, std::move(Args))
20809 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
20810
20811 // Return second (rem) result operand (first contains div)
20812 SDNode *ResNode = CallResult.first.getNode();
20813 assert(ResNode->getNumOperands() == 2 && "divmod should return two operands");
20814 return ResNode->getOperand(1);
20815}
20816
20817SDValue
20818ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
20819 assert(Subtarget->isTargetWindows() && "unsupported target platform");
20820 SDLoc DL(Op);
20821
20822 // Get the inputs.
20823 SDValue Chain = Op.getOperand(0);
20824 SDValue Size = Op.getOperand(1);
20825
20827 "no-stack-arg-probe")) {
20829 cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
20830 SDValue SP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20831 Chain = SP.getValue(1);
20832 SP = DAG.getNode(ISD::SUB, DL, MVT::i32, SP, Size);
20833 if (Align)
20834 SP =
20835 DAG.getNode(ISD::AND, DL, MVT::i32, SP.getValue(0),
20836 DAG.getConstant(-(uint64_t)Align->value(), DL, MVT::i32));
20837 Chain = DAG.getCopyToReg(Chain, DL, ARM::SP, SP);
20838 SDValue Ops[2] = { SP, Chain };
20839 return DAG.getMergeValues(Ops, DL);
20840 }
20841
20842 SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size,
20843 DAG.getConstant(2, DL, MVT::i32));
20844
20845 SDValue Glue;
20846 Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Glue);
20847 Glue = Chain.getValue(1);
20848
20849 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
20850 Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Glue);
20851
20852 SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32);
20853 Chain = NewSP.getValue(1);
20854
20855 SDValue Ops[2] = { NewSP, Chain };
20856 return DAG.getMergeValues(Ops, DL);
20857}
20858
20859SDValue ARMTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
20860 bool IsStrict = Op->isStrictFPOpcode();
20861 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20862 const unsigned DstSz = Op.getValueType().getSizeInBits();
20863 const unsigned SrcSz = SrcVal.getValueType().getSizeInBits();
20864 assert(DstSz > SrcSz && DstSz <= 64 && SrcSz >= 16 &&
20865 "Unexpected type for custom-lowering FP_EXTEND");
20866
20867 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20868 "With both FP DP and 16, any FP conversion is legal!");
20869
20870 assert(!(DstSz == 32 && Subtarget->hasFP16()) &&
20871 "With FP16, 16 to 32 conversion is legal!");
20872
20873 // Converting from 32 -> 64 is valid if we have FP64.
20874 if (SrcSz == 32 && DstSz == 64 && Subtarget->hasFP64()) {
20875 // FIXME: Remove this when we have strict fp instruction selection patterns
20876 if (IsStrict) {
20877 SDLoc Loc(Op);
20879 Loc, Op.getValueType(), SrcVal);
20880 return DAG.getMergeValues({Result, Op.getOperand(0)}, Loc);
20881 }
20882 return Op;
20883 }
20884
20885 // Either we are converting from 16 -> 64, without FP16 and/or
20886 // FP.double-precision or without Armv8-fp. So we must do it in two
20887 // steps.
20888 // Or we are converting from 32 -> 64 without fp.double-precision or 16 -> 32
20889 // without FP16. So we must do a function call.
20890 SDLoc Loc(Op);
20891 RTLIB::Libcall LC;
20892 MakeLibCallOptions CallOptions;
20893 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20894 for (unsigned Sz = SrcSz; Sz <= 32 && Sz < DstSz; Sz *= 2) {
20895 bool Supported = (Sz == 16 ? Subtarget->hasFP16() : Subtarget->hasFP64());
20896 MVT SrcVT = (Sz == 16 ? MVT::f16 : MVT::f32);
20897 MVT DstVT = (Sz == 16 ? MVT::f32 : MVT::f64);
20898 if (Supported) {
20899 if (IsStrict) {
20900 SrcVal = DAG.getNode(ISD::STRICT_FP_EXTEND, Loc,
20901 {DstVT, MVT::Other}, {Chain, SrcVal});
20902 Chain = SrcVal.getValue(1);
20903 } else {
20904 SrcVal = DAG.getNode(ISD::FP_EXTEND, Loc, DstVT, SrcVal);
20905 }
20906 } else {
20907 LC = RTLIB::getFPEXT(SrcVT, DstVT);
20908 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20909 "Unexpected type for custom-lowering FP_EXTEND");
20910 std::tie(SrcVal, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20911 Loc, Chain);
20912 }
20913 }
20914
20915 return IsStrict ? DAG.getMergeValues({SrcVal, Chain}, Loc) : SrcVal;
20916}
20917
20918SDValue ARMTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
20919 bool IsStrict = Op->isStrictFPOpcode();
20920
20921 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
20922 EVT SrcVT = SrcVal.getValueType();
20923 EVT DstVT = Op.getValueType();
20924 const unsigned DstSz = Op.getValueType().getSizeInBits();
20925 const unsigned SrcSz = SrcVT.getSizeInBits();
20926 (void)DstSz;
20927 assert(DstSz < SrcSz && SrcSz <= 64 && DstSz >= 16 &&
20928 "Unexpected type for custom-lowering FP_ROUND");
20929
20930 assert((!Subtarget->hasFP64() || !Subtarget->hasFPARMv8Base()) &&
20931 "With both FP DP and 16, any FP conversion is legal!");
20932
20933 SDLoc Loc(Op);
20934
20935 // Instruction from 32 -> 16 if hasFP16 is valid
20936 if (SrcSz == 32 && Subtarget->hasFP16())
20937 return Op;
20938
20939 // Lib call from 32 -> 16 / 64 -> [32, 16]
20940 RTLIB::Libcall LC = RTLIB::getFPROUND(SrcVT, DstVT);
20941 assert(LC != RTLIB::UNKNOWN_LIBCALL &&
20942 "Unexpected type for custom-lowering FP_ROUND");
20943 MakeLibCallOptions CallOptions;
20944 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
20946 std::tie(Result, Chain) = makeLibCall(DAG, LC, DstVT, SrcVal, CallOptions,
20947 Loc, Chain);
20948 return IsStrict ? DAG.getMergeValues({Result, Chain}, Loc) : Result;
20949}
20950
20951bool
20953 // The ARM target isn't yet aware of offsets.
20954 return false;
20955}
20956
20958 if (v == 0xffffffff)
20959 return false;
20960
20961 // there can be 1's on either or both "outsides", all the "inside"
20962 // bits must be 0's
20963 return isShiftedMask_32(~v);
20964}
20965
20966/// isFPImmLegal - Returns true if the target can instruction select the
20967/// specified FP immediate natively. If false, the legalizer will
20968/// materialize the FP immediate as a load from a constant pool.
20970 bool ForCodeSize) const {
20971 if (!Subtarget->hasVFP3Base())
20972 return false;
20973 if (VT == MVT::f16 && Subtarget->hasFullFP16())
20974 return ARM_AM::getFP16Imm(Imm) != -1;
20975 if (VT == MVT::f32 && Subtarget->hasFullFP16() &&
20976 ARM_AM::getFP32FP16Imm(Imm) != -1)
20977 return true;
20978 if (VT == MVT::f32)
20979 return ARM_AM::getFP32Imm(Imm) != -1;
20980 if (VT == MVT::f64 && Subtarget->hasFP64())
20981 return ARM_AM::getFP64Imm(Imm) != -1;
20982 return false;
20983}
20984
20985/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
20986/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
20987/// specified in the intrinsic calls.
20989 const CallInst &I,
20990 MachineFunction &MF,
20991 unsigned Intrinsic) const {
20992 switch (Intrinsic) {
20993 case Intrinsic::arm_neon_vld1:
20994 case Intrinsic::arm_neon_vld2:
20995 case Intrinsic::arm_neon_vld3:
20996 case Intrinsic::arm_neon_vld4:
20997 case Intrinsic::arm_neon_vld2lane:
20998 case Intrinsic::arm_neon_vld3lane:
20999 case Intrinsic::arm_neon_vld4lane:
21000 case Intrinsic::arm_neon_vld2dup:
21001 case Intrinsic::arm_neon_vld3dup:
21002 case Intrinsic::arm_neon_vld4dup: {
21004 // Conservatively set memVT to the entire set of vectors loaded.
21005 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21006 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21007 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21008 Info.ptrVal = I.getArgOperand(0);
21009 Info.offset = 0;
21010 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21011 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21012 // volatile loads with NEON intrinsics not supported
21014 return true;
21015 }
21016 case Intrinsic::arm_neon_vld1x2:
21017 case Intrinsic::arm_neon_vld1x3:
21018 case Intrinsic::arm_neon_vld1x4: {
21020 // Conservatively set memVT to the entire set of vectors loaded.
21021 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21022 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
21023 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21024 Info.ptrVal = I.getArgOperand(I.arg_size() - 1);
21025 Info.offset = 0;
21026 Info.align.reset();
21027 // volatile loads with NEON intrinsics not supported
21029 return true;
21030 }
21031 case Intrinsic::arm_neon_vst1:
21032 case Intrinsic::arm_neon_vst2:
21033 case Intrinsic::arm_neon_vst3:
21034 case Intrinsic::arm_neon_vst4:
21035 case Intrinsic::arm_neon_vst2lane:
21036 case Intrinsic::arm_neon_vst3lane:
21037 case Intrinsic::arm_neon_vst4lane: {
21039 // Conservatively set memVT to the entire set of vectors stored.
21040 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21041 unsigned NumElts = 0;
21042 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21043 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21044 if (!ArgTy->isVectorTy())
21045 break;
21046 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21047 }
21048 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21049 Info.ptrVal = I.getArgOperand(0);
21050 Info.offset = 0;
21051 Value *AlignArg = I.getArgOperand(I.arg_size() - 1);
21052 Info.align = cast<ConstantInt>(AlignArg)->getMaybeAlignValue();
21053 // volatile stores with NEON intrinsics not supported
21055 return true;
21056 }
21057 case Intrinsic::arm_neon_vst1x2:
21058 case Intrinsic::arm_neon_vst1x3:
21059 case Intrinsic::arm_neon_vst1x4: {
21061 // Conservatively set memVT to the entire set of vectors stored.
21062 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21063 unsigned NumElts = 0;
21064 for (unsigned ArgI = 1, ArgE = I.arg_size(); ArgI < ArgE; ++ArgI) {
21065 Type *ArgTy = I.getArgOperand(ArgI)->getType();
21066 if (!ArgTy->isVectorTy())
21067 break;
21068 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
21069 }
21070 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
21071 Info.ptrVal = I.getArgOperand(0);
21072 Info.offset = 0;
21073 Info.align.reset();
21074 // volatile stores with NEON intrinsics not supported
21076 return true;
21077 }
21078 case Intrinsic::arm_mve_vld2q:
21079 case Intrinsic::arm_mve_vld4q: {
21081 // Conservatively set memVT to the entire set of vectors loaded.
21082 Type *VecTy = cast<StructType>(I.getType())->getElementType(1);
21083 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vld2q ? 2 : 4;
21084 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21085 Info.ptrVal = I.getArgOperand(0);
21086 Info.offset = 0;
21087 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21088 // volatile loads with MVE intrinsics not supported
21090 return true;
21091 }
21092 case Intrinsic::arm_mve_vst2q:
21093 case Intrinsic::arm_mve_vst4q: {
21095 // Conservatively set memVT to the entire set of vectors stored.
21096 Type *VecTy = I.getArgOperand(1)->getType();
21097 unsigned Factor = Intrinsic == Intrinsic::arm_mve_vst2q ? 2 : 4;
21098 Info.memVT = EVT::getVectorVT(VecTy->getContext(), MVT::i64, Factor * 2);
21099 Info.ptrVal = I.getArgOperand(0);
21100 Info.offset = 0;
21101 Info.align = Align(VecTy->getScalarSizeInBits() / 8);
21102 // volatile stores with MVE intrinsics not supported
21104 return true;
21105 }
21106 case Intrinsic::arm_mve_vldr_gather_base:
21107 case Intrinsic::arm_mve_vldr_gather_base_predicated: {
21109 Info.ptrVal = nullptr;
21110 Info.memVT = MVT::getVT(I.getType());
21111 Info.align = Align(1);
21113 return true;
21114 }
21115 case Intrinsic::arm_mve_vldr_gather_base_wb:
21116 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
21118 Info.ptrVal = nullptr;
21119 Info.memVT = MVT::getVT(I.getType()->getContainedType(0));
21120 Info.align = Align(1);
21122 return true;
21123 }
21124 case Intrinsic::arm_mve_vldr_gather_offset:
21125 case Intrinsic::arm_mve_vldr_gather_offset_predicated: {
21127 Info.ptrVal = nullptr;
21128 MVT DataVT = MVT::getVT(I.getType());
21129 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
21130 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21131 DataVT.getVectorNumElements());
21132 Info.align = Align(1);
21134 return true;
21135 }
21136 case Intrinsic::arm_mve_vstr_scatter_base:
21137 case Intrinsic::arm_mve_vstr_scatter_base_predicated: {
21139 Info.ptrVal = nullptr;
21140 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21141 Info.align = Align(1);
21143 return true;
21144 }
21145 case Intrinsic::arm_mve_vstr_scatter_base_wb:
21146 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated: {
21148 Info.ptrVal = nullptr;
21149 Info.memVT = MVT::getVT(I.getArgOperand(2)->getType());
21150 Info.align = Align(1);
21152 return true;
21153 }
21154 case Intrinsic::arm_mve_vstr_scatter_offset:
21155 case Intrinsic::arm_mve_vstr_scatter_offset_predicated: {
21157 Info.ptrVal = nullptr;
21158 MVT DataVT = MVT::getVT(I.getArgOperand(2)->getType());
21159 unsigned MemSize = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
21160 Info.memVT = MVT::getVectorVT(MVT::getIntegerVT(MemSize),
21161 DataVT.getVectorNumElements());
21162 Info.align = Align(1);
21164 return true;
21165 }
21166 case Intrinsic::arm_ldaex:
21167 case Intrinsic::arm_ldrex: {
21168 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21169 Type *ValTy = I.getParamElementType(0);
21171 Info.memVT = MVT::getVT(ValTy);
21172 Info.ptrVal = I.getArgOperand(0);
21173 Info.offset = 0;
21174 Info.align = DL.getABITypeAlign(ValTy);
21176 return true;
21177 }
21178 case Intrinsic::arm_stlex:
21179 case Intrinsic::arm_strex: {
21180 auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
21181 Type *ValTy = I.getParamElementType(1);
21183 Info.memVT = MVT::getVT(ValTy);
21184 Info.ptrVal = I.getArgOperand(1);
21185 Info.offset = 0;
21186 Info.align = DL.getABITypeAlign(ValTy);
21188 return true;
21189 }
21190 case Intrinsic::arm_stlexd:
21191 case Intrinsic::arm_strexd:
21193 Info.memVT = MVT::i64;
21194 Info.ptrVal = I.getArgOperand(2);
21195 Info.offset = 0;
21196 Info.align = Align(8);
21198 return true;
21199
21200 case Intrinsic::arm_ldaexd:
21201 case Intrinsic::arm_ldrexd:
21203 Info.memVT = MVT::i64;
21204 Info.ptrVal = I.getArgOperand(0);
21205 Info.offset = 0;
21206 Info.align = Align(8);
21208 return true;
21209
21210 default:
21211 break;
21212 }
21213
21214 return false;
21215}
21216
21217/// Returns true if it is beneficial to convert a load of a constant
21218/// to just the constant itself.
21220 Type *Ty) const {
21221 assert(Ty->isIntegerTy());
21222
21223 unsigned Bits = Ty->getPrimitiveSizeInBits();
21224 if (Bits == 0 || Bits > 32)
21225 return false;
21226 return true;
21227}
21228
21230 unsigned Index) const {
21232 return false;
21233
21234 return (Index == 0 || Index == ResVT.getVectorNumElements());
21235}
21236
21238 ARM_MB::MemBOpt Domain) const {
21239 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21240
21241 // First, if the target has no DMB, see what fallback we can use.
21242 if (!Subtarget->hasDataBarrier()) {
21243 // Some ARMv6 cpus can support data barriers with an mcr instruction.
21244 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
21245 // here.
21246 if (Subtarget->hasV6Ops() && !Subtarget->isThumb()) {
21247 Function *MCR = Intrinsic::getDeclaration(M, Intrinsic::arm_mcr);
21248 Value* args[6] = {Builder.getInt32(15), Builder.getInt32(0),
21249 Builder.getInt32(0), Builder.getInt32(7),
21250 Builder.getInt32(10), Builder.getInt32(5)};
21251 return Builder.CreateCall(MCR, args);
21252 } else {
21253 // Instead of using barriers, atomic accesses on these subtargets use
21254 // libcalls.
21255 llvm_unreachable("makeDMB on a target so old that it has no barriers");
21256 }
21257 } else {
21258 Function *DMB = Intrinsic::getDeclaration(M, Intrinsic::arm_dmb);
21259 // Only a full system barrier exists in the M-class architectures.
21260 Domain = Subtarget->isMClass() ? ARM_MB::SY : Domain;
21261 Constant *CDomain = Builder.getInt32(Domain);
21262 return Builder.CreateCall(DMB, CDomain);
21263 }
21264}
21265
21266// Based on http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
21268 Instruction *Inst,
21269 AtomicOrdering Ord) const {
21270 switch (Ord) {
21273 llvm_unreachable("Invalid fence: unordered/non-atomic");
21276 return nullptr; // Nothing to do
21278 if (!Inst->hasAtomicStore())
21279 return nullptr; // Nothing to do
21280 [[fallthrough]];
21283 if (Subtarget->preferISHSTBarriers())
21284 return makeDMB(Builder, ARM_MB::ISHST);
21285 // FIXME: add a comment with a link to documentation justifying this.
21286 else
21287 return makeDMB(Builder, ARM_MB::ISH);
21288 }
21289 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
21290}
21291
21293 Instruction *Inst,
21294 AtomicOrdering Ord) const {
21295 switch (Ord) {
21298 llvm_unreachable("Invalid fence: unordered/not-atomic");
21301 return nullptr; // Nothing to do
21305 return makeDMB(Builder, ARM_MB::ISH);
21306 }
21307 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
21308}
21309
21310// Loads and stores less than 64-bits are already atomic; ones above that
21311// are doomed anyway, so defer to the default libcall and blame the OS when
21312// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21313// anything for those.
21316 bool has64BitAtomicStore;
21317 if (Subtarget->isMClass())
21318 has64BitAtomicStore = false;
21319 else if (Subtarget->isThumb())
21320 has64BitAtomicStore = Subtarget->hasV7Ops();
21321 else
21322 has64BitAtomicStore = Subtarget->hasV6Ops();
21323
21324 unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits();
21325 return Size == 64 && has64BitAtomicStore ? AtomicExpansionKind::Expand
21327}
21328
21329// Loads and stores less than 64-bits are already atomic; ones above that
21330// are doomed anyway, so defer to the default libcall and blame the OS when
21331// things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit
21332// anything for those.
21333// FIXME: ldrd and strd are atomic if the CPU has LPAE (e.g. A15 has that
21334// guarantee, see DDI0406C ARM architecture reference manual,
21335// sections A8.8.72-74 LDRD)
21338 bool has64BitAtomicLoad;
21339 if (Subtarget->isMClass())
21340 has64BitAtomicLoad = false;
21341 else if (Subtarget->isThumb())
21342 has64BitAtomicLoad = Subtarget->hasV7Ops();
21343 else
21344 has64BitAtomicLoad = Subtarget->hasV6Ops();
21345
21346 unsigned Size = LI->getType()->getPrimitiveSizeInBits();
21347 return (Size == 64 && has64BitAtomicLoad) ? AtomicExpansionKind::LLOnly
21349}
21350
21351// For the real atomic operations, we have ldrex/strex up to 32 bits,
21352// and up to 64 bits on the non-M profiles
21355 if (AI->isFloatingPointOperation())
21357
21358 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
21359 bool hasAtomicRMW;
21360 if (Subtarget->isMClass())
21361 hasAtomicRMW = Subtarget->hasV8MBaselineOps();
21362 else if (Subtarget->isThumb())
21363 hasAtomicRMW = Subtarget->hasV7Ops();
21364 else
21365 hasAtomicRMW = Subtarget->hasV6Ops();
21366 if (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW) {
21367 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21368 // implement atomicrmw without spilling. If the target address is also on
21369 // the stack and close enough to the spill slot, this can lead to a
21370 // situation where the monitor always gets cleared and the atomic operation
21371 // can never succeed. So at -O0 lower this operation to a CAS loop.
21372 if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None)
21375 }
21377}
21378
21379// Similar to shouldExpandAtomicRMWInIR, ldrex/strex can be used up to 32
21380// bits, and up to 64 bits on the non-M profiles.
21383 // At -O0, fast-regalloc cannot cope with the live vregs necessary to
21384 // implement cmpxchg without spilling. If the address being exchanged is also
21385 // on the stack and close enough to the spill slot, this can lead to a
21386 // situation where the monitor always gets cleared and the atomic operation
21387 // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
21388 unsigned Size = AI->getOperand(1)->getType()->getPrimitiveSizeInBits();
21389 bool HasAtomicCmpXchg;
21390 if (Subtarget->isMClass())
21391 HasAtomicCmpXchg = Subtarget->hasV8MBaselineOps();
21392 else if (Subtarget->isThumb())
21393 HasAtomicCmpXchg = Subtarget->hasV7Ops();
21394 else
21395 HasAtomicCmpXchg = Subtarget->hasV6Ops();
21396 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None &&
21397 HasAtomicCmpXchg && Size <= (Subtarget->isMClass() ? 32U : 64U))
21400}
21401
21403 const Instruction *I) const {
21404 return InsertFencesForAtomic;
21405}
21406
21408 // ROPI/RWPI are not supported currently.
21409 return !Subtarget->isROPI() && !Subtarget->isRWPI();
21410}
21411
21413 if (!Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21415
21416 // MSVC CRT has a global variable holding security cookie.
21417 M.getOrInsertGlobal("__security_cookie",
21418 PointerType::getUnqual(M.getContext()));
21419
21420 // MSVC CRT has a function to validate security cookie.
21421 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
21422 "__security_check_cookie", Type::getVoidTy(M.getContext()),
21423 PointerType::getUnqual(M.getContext()));
21424 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
21425 F->addParamAttr(0, Attribute::AttrKind::InReg);
21426}
21427
21429 // MSVC CRT has a global variable holding security cookie.
21430 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21431 return M.getGlobalVariable("__security_cookie");
21433}
21434
21436 // MSVC CRT has a function to validate security cookie.
21437 if (Subtarget->getTargetTriple().isWindowsMSVCEnvironment())
21438 return M.getFunction("__security_check_cookie");
21440}
21441
21443 unsigned &Cost) const {
21444 // If we do not have NEON, vector types are not natively supported.
21445 if (!Subtarget->hasNEON())
21446 return false;
21447
21448 // Floating point values and vector values map to the same register file.
21449 // Therefore, although we could do a store extract of a vector type, this is
21450 // better to leave at float as we have more freedom in the addressing mode for
21451 // those.
21452 if (VectorTy->isFPOrFPVectorTy())
21453 return false;
21454
21455 // If the index is unknown at compile time, this is very expensive to lower
21456 // and it is not possible to combine the store with the extract.
21457 if (!isa<ConstantInt>(Idx))
21458 return false;
21459
21460 assert(VectorTy->isVectorTy() && "VectorTy is not a vector type");
21461 unsigned BitWidth = VectorTy->getPrimitiveSizeInBits().getFixedValue();
21462 // We can do a store + vector extract on any vector that fits perfectly in a D
21463 // or Q register.
21464 if (BitWidth == 64 || BitWidth == 128) {
21465 Cost = 0;
21466 return true;
21467 }
21468 return false;
21469}
21470
21472 return Subtarget->hasV6T2Ops();
21473}
21474
21476 return Subtarget->hasV6T2Ops();
21477}
21478
21480 const Instruction &AndI) const {
21481 if (!Subtarget->hasV7Ops())
21482 return false;
21483
21484 // Sink the `and` instruction only if the mask would fit into a modified
21485 // immediate operand.
21486 ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
21487 if (!Mask || Mask->getValue().getBitWidth() > 32u)
21488 return false;
21489 auto MaskVal = unsigned(Mask->getValue().getZExtValue());
21490 return (Subtarget->isThumb2() ? ARM_AM::getT2SOImmVal(MaskVal)
21491 : ARM_AM::getSOImmVal(MaskVal)) != -1;
21492}
21493
21496 SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const {
21497 if (Subtarget->hasMinSize() && !Subtarget->isTargetWindows())
21500 ExpansionFactor);
21501}
21502
21504 Value *Addr,
21505 AtomicOrdering Ord) const {
21506 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21507 bool IsAcquire = isAcquireOrStronger(Ord);
21508
21509 // Since i64 isn't legal and intrinsics don't get type-lowered, the ldrexd
21510 // intrinsic must return {i32, i32} and we have to recombine them into a
21511 // single i64 here.
21512 if (ValueTy->getPrimitiveSizeInBits() == 64) {
21514 IsAcquire ? Intrinsic::arm_ldaexd : Intrinsic::arm_ldrexd;
21516
21517 Value *LoHi = Builder.CreateCall(Ldrex, Addr, "lohi");
21518
21519 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
21520 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
21521 if (!Subtarget->isLittle())
21522 std::swap (Lo, Hi);
21523 Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
21524 Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
21525 return Builder.CreateOr(
21526 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 32)), "val64");
21527 }
21528
21529 Type *Tys[] = { Addr->getType() };
21530 Intrinsic::ID Int = IsAcquire ? Intrinsic::arm_ldaex : Intrinsic::arm_ldrex;
21531 Function *Ldrex = Intrinsic::getDeclaration(M, Int, Tys);
21532 CallInst *CI = Builder.CreateCall(Ldrex, Addr);
21533
21534 CI->addParamAttr(
21535 0, Attribute::get(M->getContext(), Attribute::ElementType, ValueTy));
21536 return Builder.CreateTruncOrBitCast(CI, ValueTy);
21537}
21538
21540 IRBuilderBase &Builder) const {
21541 if (!Subtarget->hasV7Ops())
21542 return;
21543 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21544 Builder.CreateCall(Intrinsic::getDeclaration(M, Intrinsic::arm_clrex));
21545}
21546
21548 Value *Val, Value *Addr,
21549 AtomicOrdering Ord) const {
21550 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
21551 bool IsRelease = isReleaseOrStronger(Ord);
21552
21553 // Since the intrinsics must have legal type, the i64 intrinsics take two
21554 // parameters: "i32, i32". We must marshal Val into the appropriate form
21555 // before the call.
21556 if (Val->getType()->getPrimitiveSizeInBits() == 64) {
21558 IsRelease ? Intrinsic::arm_stlexd : Intrinsic::arm_strexd;
21560 Type *Int32Ty = Type::getInt32Ty(M->getContext());
21561
21562 Value *Lo = Builder.CreateTrunc(Val, Int32Ty, "lo");
21563 Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 32), Int32Ty, "hi");
21564 if (!Subtarget->isLittle())
21565 std::swap(Lo, Hi);
21566 return Builder.CreateCall(Strex, {Lo, Hi, Addr});
21567 }
21568
21569 Intrinsic::ID Int = IsRelease ? Intrinsic::arm_stlex : Intrinsic::arm_strex;
21570 Type *Tys[] = { Addr->getType() };
21571 Function *Strex = Intrinsic::getDeclaration(M, Int, Tys);
21572
21573 CallInst *CI = Builder.CreateCall(
21574 Strex, {Builder.CreateZExtOrBitCast(
21575 Val, Strex->getFunctionType()->getParamType(0)),
21576 Addr});
21577 CI->addParamAttr(1, Attribute::get(M->getContext(), Attribute::ElementType,
21578 Val->getType()));
21579 return CI;
21580}
21581
21582
21584 return Subtarget->isMClass();
21585}
21586
21587/// A helper function for determining the number of interleaved accesses we
21588/// will generate when lowering accesses of the given type.
21589unsigned
21591 const DataLayout &DL) const {
21592 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
21593}
21594
21596 unsigned Factor, FixedVectorType *VecTy, Align Alignment,
21597 const DataLayout &DL) const {
21598
21599 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
21600 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
21601
21602 if (!Subtarget->hasNEON() && !Subtarget->hasMVEIntegerOps())
21603 return false;
21604
21605 // Ensure the vector doesn't have f16 elements. Even though we could do an
21606 // i16 vldN, we can't hold the f16 vectors and will end up converting via
21607 // f32.
21608 if (Subtarget->hasNEON() && VecTy->getElementType()->isHalfTy())
21609 return false;
21610 if (Subtarget->hasMVEIntegerOps() && Factor == 3)
21611 return false;
21612
21613 // Ensure the number of vector elements is greater than 1.
21614 if (VecTy->getNumElements() < 2)
21615 return false;
21616
21617 // Ensure the element type is legal.
21618 if (ElSize != 8 && ElSize != 16 && ElSize != 32)
21619 return false;
21620 // And the alignment if high enough under MVE.
21621 if (Subtarget->hasMVEIntegerOps() && Alignment < ElSize / 8)
21622 return false;
21623
21624 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
21625 // 128 will be split into multiple interleaved accesses.
21626 if (Subtarget->hasNEON() && VecSize == 64)
21627 return true;
21628 return VecSize % 128 == 0;
21629}
21630
21632 if (Subtarget->hasNEON())
21633 return 4;
21634 if (Subtarget->hasMVEIntegerOps())
21637}
21638
21639/// Lower an interleaved load into a vldN intrinsic.
21640///
21641/// E.g. Lower an interleaved load (Factor = 2):
21642/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr, align 4
21643/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
21644/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
21645///
21646/// Into:
21647/// %vld2 = { <4 x i32>, <4 x i32> } call llvm.arm.neon.vld2(%ptr, 4)
21648/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 0
21649/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %vld2, i32 1
21652 ArrayRef<unsigned> Indices, unsigned Factor) const {
21653 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21654 "Invalid interleave factor");
21655 assert(!Shuffles.empty() && "Empty shufflevector input");
21656 assert(Shuffles.size() == Indices.size() &&
21657 "Unmatched number of shufflevectors and indices");
21658
21659 auto *VecTy = cast<FixedVectorType>(Shuffles[0]->getType());
21660 Type *EltTy = VecTy->getElementType();
21661
21662 const DataLayout &DL = LI->getModule()->getDataLayout();
21663 Align Alignment = LI->getAlign();
21664
21665 // Skip if we do not have NEON and skip illegal vector types. We can
21666 // "legalize" wide vector types into multiple interleaved accesses as long as
21667 // the vector types are divisible by 128.
21668 if (!isLegalInterleavedAccessType(Factor, VecTy, Alignment, DL))
21669 return false;
21670
21671 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
21672
21673 // A pointer vector can not be the return type of the ldN intrinsics. Need to
21674 // load integer vectors first and then convert to pointer vectors.
21675 if (EltTy->isPointerTy())
21676 VecTy = FixedVectorType::get(DL.getIntPtrType(EltTy), VecTy);
21677
21678 IRBuilder<> Builder(LI);
21679
21680 // The base address of the load.
21681 Value *BaseAddr = LI->getPointerOperand();
21682
21683 if (NumLoads > 1) {
21684 // If we're going to generate more than one load, reset the sub-vector type
21685 // to something legal.
21686 VecTy = FixedVectorType::get(VecTy->getElementType(),
21687 VecTy->getNumElements() / NumLoads);
21688 }
21689
21690 assert(isTypeLegal(EVT::getEVT(VecTy)) && "Illegal vldN vector type!");
21691
21692 auto createLoadIntrinsic = [&](Value *BaseAddr) {
21693 if (Subtarget->hasNEON()) {
21694 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21695 Type *Tys[] = {VecTy, PtrTy};
21696 static const Intrinsic::ID LoadInts[3] = {Intrinsic::arm_neon_vld2,
21697 Intrinsic::arm_neon_vld3,
21698 Intrinsic::arm_neon_vld4};
21699 Function *VldnFunc =
21700 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
21701
21703 Ops.push_back(BaseAddr);
21704 Ops.push_back(Builder.getInt32(LI->getAlign().value()));
21705
21706 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21707 } else {
21708 assert((Factor == 2 || Factor == 4) &&
21709 "expected interleave factor of 2 or 4 for MVE");
21710 Intrinsic::ID LoadInts =
21711 Factor == 2 ? Intrinsic::arm_mve_vld2q : Intrinsic::arm_mve_vld4q;
21712 Type *PtrTy = Builder.getPtrTy(LI->getPointerAddressSpace());
21713 Type *Tys[] = {VecTy, PtrTy};
21714 Function *VldnFunc =
21715 Intrinsic::getDeclaration(LI->getModule(), LoadInts, Tys);
21716
21718 Ops.push_back(BaseAddr);
21719 return Builder.CreateCall(VldnFunc, Ops, "vldN");
21720 }
21721 };
21722
21723 // Holds sub-vectors extracted from the load intrinsic return values. The
21724 // sub-vectors are associated with the shufflevector instructions they will
21725 // replace.
21727
21728 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
21729 // If we're generating more than one load, compute the base address of
21730 // subsequent loads as an offset from the previous.
21731 if (LoadCount > 0)
21732 BaseAddr = Builder.CreateConstGEP1_32(VecTy->getElementType(), BaseAddr,
21733 VecTy->getNumElements() * Factor);
21734
21735 CallInst *VldN = createLoadIntrinsic(BaseAddr);
21736
21737 // Replace uses of each shufflevector with the corresponding vector loaded
21738 // by ldN.
21739 for (unsigned i = 0; i < Shuffles.size(); i++) {
21740 ShuffleVectorInst *SV = Shuffles[i];
21741 unsigned Index = Indices[i];
21742
21743 Value *SubVec = Builder.CreateExtractValue(VldN, Index);
21744
21745 // Convert the integer vector to pointer vector if the element is pointer.
21746 if (EltTy->isPointerTy())
21747 SubVec = Builder.CreateIntToPtr(
21748 SubVec,
21750
21751 SubVecs[SV].push_back(SubVec);
21752 }
21753 }
21754
21755 // Replace uses of the shufflevector instructions with the sub-vectors
21756 // returned by the load intrinsic. If a shufflevector instruction is
21757 // associated with more than one sub-vector, those sub-vectors will be
21758 // concatenated into a single wide vector.
21759 for (ShuffleVectorInst *SVI : Shuffles) {
21760 auto &SubVec = SubVecs[SVI];
21761 auto *WideVec =
21762 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
21763 SVI->replaceAllUsesWith(WideVec);
21764 }
21765
21766 return true;
21767}
21768
21769/// Lower an interleaved store into a vstN intrinsic.
21770///
21771/// E.g. Lower an interleaved store (Factor = 3):
21772/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
21773/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
21774/// store <12 x i32> %i.vec, <12 x i32>* %ptr, align 4
21775///
21776/// Into:
21777/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
21778/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
21779/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
21780/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21781///
21782/// Note that the new shufflevectors will be removed and we'll only generate one
21783/// vst3 instruction in CodeGen.
21784///
21785/// Example for a more general valid mask (Factor 3). Lower:
21786/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
21787/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
21788/// store <12 x i32> %i.vec, <12 x i32>* %ptr
21789///
21790/// Into:
21791/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
21792/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
21793/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
21794/// call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
21796 ShuffleVectorInst *SVI,
21797 unsigned Factor) const {
21798 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
21799 "Invalid interleave factor");
21800
21801 auto *VecTy = cast<FixedVectorType>(SVI->getType());
21802 assert(VecTy->getNumElements() % Factor == 0 && "Invalid interleaved store");
21803
21804 unsigned LaneLen = VecTy->getNumElements() / Factor;
21805 Type *EltTy = VecTy->getElementType();
21806 auto *SubVecTy = FixedVectorType::get(EltTy, LaneLen);
21807
21808 const DataLayout &DL = SI->getModule()->getDataLayout();
21809 Align Alignment = SI->getAlign();
21810
21811 // Skip if we do not have NEON and skip illegal vector types. We can
21812 // "legalize" wide vector types into multiple interleaved accesses as long as
21813 // the vector types are divisible by 128.
21814 if (!isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL))
21815 return false;
21816
21817 unsigned NumStores = getNumInterleavedAccesses(SubVecTy, DL);
21818
21819 Value *Op0 = SVI->getOperand(0);
21820 Value *Op1 = SVI->getOperand(1);
21821 IRBuilder<> Builder(SI);
21822
21823 // StN intrinsics don't support pointer vectors as arguments. Convert pointer
21824 // vectors to integer vectors.
21825 if (EltTy->isPointerTy()) {
21826 Type *IntTy = DL.getIntPtrType(EltTy);
21827
21828 // Convert to the corresponding integer vector.
21829 auto *IntVecTy =
21830 FixedVectorType::get(IntTy, cast<FixedVectorType>(Op0->getType()));
21831 Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
21832 Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
21833
21834 SubVecTy = FixedVectorType::get(IntTy, LaneLen);
21835 }
21836
21837 // The base address of the store.
21838 Value *BaseAddr = SI->getPointerOperand();
21839
21840 if (NumStores > 1) {
21841 // If we're going to generate more than one store, reset the lane length
21842 // and sub-vector type to something legal.
21843 LaneLen /= NumStores;
21844 SubVecTy = FixedVectorType::get(SubVecTy->getElementType(), LaneLen);
21845 }
21846
21847 assert(isTypeLegal(EVT::getEVT(SubVecTy)) && "Illegal vstN vector type!");
21848
21849 auto Mask = SVI->getShuffleMask();
21850
21851 auto createStoreIntrinsic = [&](Value *BaseAddr,
21852 SmallVectorImpl<Value *> &Shuffles) {
21853 if (Subtarget->hasNEON()) {
21854 static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
21855 Intrinsic::arm_neon_vst3,
21856 Intrinsic::arm_neon_vst4};
21857 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21858 Type *Tys[] = {PtrTy, SubVecTy};
21859
21861 SI->getModule(), StoreInts[Factor - 2], Tys);
21862
21864 Ops.push_back(BaseAddr);
21865 append_range(Ops, Shuffles);
21866 Ops.push_back(Builder.getInt32(SI->getAlign().value()));
21867 Builder.CreateCall(VstNFunc, Ops);
21868 } else {
21869 assert((Factor == 2 || Factor == 4) &&
21870 "expected interleave factor of 2 or 4 for MVE");
21871 Intrinsic::ID StoreInts =
21872 Factor == 2 ? Intrinsic::arm_mve_vst2q : Intrinsic::arm_mve_vst4q;
21873 Type *PtrTy = Builder.getPtrTy(SI->getPointerAddressSpace());
21874 Type *Tys[] = {PtrTy, SubVecTy};
21875 Function *VstNFunc =
21876 Intrinsic::getDeclaration(SI->getModule(), StoreInts, Tys);
21877
21879 Ops.push_back(BaseAddr);
21880 append_range(Ops, Shuffles);
21881 for (unsigned F = 0; F < Factor; F++) {
21882 Ops.push_back(Builder.getInt32(F));
21883 Builder.CreateCall(VstNFunc, Ops);
21884 Ops.pop_back();
21885 }
21886 }
21887 };
21888
21889 for (unsigned StoreCount = 0; StoreCount < NumStores; ++StoreCount) {
21890 // If we generating more than one store, we compute the base address of
21891 // subsequent stores as an offset from the previous.
21892 if (StoreCount > 0)
21893 BaseAddr = Builder.CreateConstGEP1_32(SubVecTy->getElementType(),
21894 BaseAddr, LaneLen * Factor);
21895
21896 SmallVector<Value *, 4> Shuffles;
21897
21898 // Split the shufflevector operands into sub vectors for the new vstN call.
21899 for (unsigned i = 0; i < Factor; i++) {
21900 unsigned IdxI = StoreCount * LaneLen * Factor + i;
21901 if (Mask[IdxI] >= 0) {
21902 Shuffles.push_back(Builder.CreateShuffleVector(
21903 Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
21904 } else {
21905 unsigned StartMask = 0;
21906 for (unsigned j = 1; j < LaneLen; j++) {
21907 unsigned IdxJ = StoreCount * LaneLen * Factor + j;
21908 if (Mask[IdxJ * Factor + IdxI] >= 0) {
21909 StartMask = Mask[IdxJ * Factor + IdxI] - IdxJ;
21910 break;
21911 }
21912 }
21913 // Note: If all elements in a chunk are undefs, StartMask=0!
21914 // Note: Filling undef gaps with random elements is ok, since
21915 // those elements were being written anyway (with undefs).
21916 // In the case of all undefs we're defaulting to using elems from 0
21917 // Note: StartMask cannot be negative, it's checked in
21918 // isReInterleaveMask
21919 Shuffles.push_back(Builder.CreateShuffleVector(
21920 Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
21921 }
21922 }
21923
21924 createStoreIntrinsic(BaseAddr, Shuffles);
21925 }
21926 return true;
21927}
21928
21936
21938 uint64_t &Members) {
21939 if (auto *ST = dyn_cast<StructType>(Ty)) {
21940 for (unsigned i = 0; i < ST->getNumElements(); ++i) {
21941 uint64_t SubMembers = 0;
21942 if (!isHomogeneousAggregate(ST->getElementType(i), Base, SubMembers))
21943 return false;
21944 Members += SubMembers;
21945 }
21946 } else if (auto *AT = dyn_cast<ArrayType>(Ty)) {
21947 uint64_t SubMembers = 0;
21948 if (!isHomogeneousAggregate(AT->getElementType(), Base, SubMembers))
21949 return false;
21950 Members += SubMembers * AT->getNumElements();
21951 } else if (Ty->isFloatTy()) {
21952 if (Base != HA_UNKNOWN && Base != HA_FLOAT)
21953 return false;
21954 Members = 1;
21955 Base = HA_FLOAT;
21956 } else if (Ty->isDoubleTy()) {
21957 if (Base != HA_UNKNOWN && Base != HA_DOUBLE)
21958 return false;
21959 Members = 1;
21960 Base = HA_DOUBLE;
21961 } else if (auto *VT = dyn_cast<VectorType>(Ty)) {
21962 Members = 1;
21963 switch (Base) {
21964 case HA_FLOAT:
21965 case HA_DOUBLE:
21966 return false;
21967 case HA_VECT64:
21968 return VT->getPrimitiveSizeInBits().getFixedValue() == 64;
21969 case HA_VECT128:
21970 return VT->getPrimitiveSizeInBits().getFixedValue() == 128;
21971 case HA_UNKNOWN:
21972 switch (VT->getPrimitiveSizeInBits().getFixedValue()) {
21973 case 64:
21974 Base = HA_VECT64;
21975 return true;
21976 case 128:
21977 Base = HA_VECT128;
21978 return true;
21979 default:
21980 return false;
21981 }
21982 }
21983 }
21984
21985 return (Members > 0 && Members <= 4);
21986}
21987
21988/// Return the correct alignment for the current calling convention.
21990 Type *ArgTy, const DataLayout &DL) const {
21991 const Align ABITypeAlign = DL.getABITypeAlign(ArgTy);
21992 if (!ArgTy->isVectorTy())
21993 return ABITypeAlign;
21994
21995 // Avoid over-aligning vector parameters. It would require realigning the
21996 // stack and waste space for no real benefit.
21997 return std::min(ABITypeAlign, DL.getStackAlignment());
21998}
21999
22000/// Return true if a type is an AAPCS-VFP homogeneous aggregate or one of
22001/// [N x i32] or [N x i64]. This allows front-ends to skip emitting padding when
22002/// passing according to AAPCS rules.
22004 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
22005 const DataLayout &DL) const {
22006 if (getEffectiveCallingConv(CallConv, isVarArg) !=
22008 return false;
22009
22011 uint64_t Members = 0;
22012 bool IsHA = isHomogeneousAggregate(Ty, Base, Members);
22013 LLVM_DEBUG(dbgs() << "isHA: " << IsHA << " "; Ty->dump());
22014
22015 bool IsIntArray = Ty->isArrayTy() && Ty->getArrayElementType()->isIntegerTy();
22016 return IsHA || IsIntArray;
22017}
22018
22020 const Constant *PersonalityFn) const {
22021 // Platforms which do not use SjLj EH may return values in these registers
22022 // via the personality function.
22023 return Subtarget->useSjLjEH() ? Register() : ARM::R0;
22024}
22025
22027 const Constant *PersonalityFn) const {
22028 // Platforms which do not use SjLj EH may return values in these registers
22029 // via the personality function.
22030 return Subtarget->useSjLjEH() ? Register() : ARM::R1;
22031}
22032
22033void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
22034 // Update IsSplitCSR in ARMFunctionInfo.
22035 ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
22036 AFI->setIsSplitCSR(true);
22037}
22038
22039void ARMTargetLowering::insertCopiesSplitCSR(
22040 MachineBasicBlock *Entry,
22041 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
22042 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
22043 const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
22044 if (!IStart)
22045 return;
22046
22047 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
22048 MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
22049 MachineBasicBlock::iterator MBBI = Entry->begin();
22050 for (const MCPhysReg *I = IStart; *I; ++I) {
22051 const TargetRegisterClass *RC = nullptr;
22052 if (ARM::GPRRegClass.contains(*I))
22053 RC = &ARM::GPRRegClass;
22054 else if (ARM::DPRRegClass.contains(*I))
22055 RC = &ARM::DPRRegClass;
22056 else
22057 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
22058
22059 Register NewVR = MRI->createVirtualRegister(RC);
22060 // Create copy from CSR to a virtual register.
22061 // FIXME: this currently does not emit CFI pseudo-instructions, it works
22062 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
22063 // nounwind. If we want to generalize this later, we may need to emit
22064 // CFI pseudo-instructions.
22065 assert(Entry->getParent()->getFunction().hasFnAttribute(
22066 Attribute::NoUnwind) &&
22067 "Function should be nounwind in insertCopiesSplitCSR!");
22068 Entry->addLiveIn(*I);
22069 BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR)
22070 .addReg(*I);
22071
22072 // Insert the copy-back instructions right before the terminator.
22073 for (auto *Exit : Exits)
22074 BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(),
22075 TII->get(TargetOpcode::COPY), *I)
22076 .addReg(NewVR);
22077 }
22078}
22079
22083}
22084
22086 return Subtarget->hasMVEIntegerOps();
22087}
22088
22091 auto *VTy = dyn_cast<FixedVectorType>(Ty);
22092 if (!VTy)
22093 return false;
22094
22095 auto *ScalarTy = VTy->getScalarType();
22096 unsigned NumElements = VTy->getNumElements();
22097
22098 unsigned VTyWidth = VTy->getScalarSizeInBits() * NumElements;
22099 if (VTyWidth < 128 || !llvm::isPowerOf2_32(VTyWidth))
22100 return false;
22101
22102 // Both VCADD and VCMUL/VCMLA support the same types, F16 and F32
22103 if (ScalarTy->isHalfTy() || ScalarTy->isFloatTy())
22104 return Subtarget->hasMVEFloatOps();
22105
22107 return false;
22108
22109 return Subtarget->hasMVEIntegerOps() &&
22110 (ScalarTy->isIntegerTy(8) || ScalarTy->isIntegerTy(16) ||
22111 ScalarTy->isIntegerTy(32));
22112}
22113
22116 ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB,
22117 Value *Accumulator) const {
22118
22119 FixedVectorType *Ty = cast<FixedVectorType>(InputA->getType());
22120
22121 unsigned TyWidth = Ty->getScalarSizeInBits() * Ty->getNumElements();
22122
22123 assert(TyWidth >= 128 && "Width of vector type must be at least 128 bits");
22124
22125 if (TyWidth > 128) {
22126 int Stride = Ty->getNumElements() / 2;
22127 auto SplitSeq = llvm::seq<int>(0, Ty->getNumElements());
22128 auto SplitSeqVec = llvm::to_vector(SplitSeq);
22129 ArrayRef<int> LowerSplitMask(&SplitSeqVec[0], Stride);
22130 ArrayRef<int> UpperSplitMask(&SplitSeqVec[Stride], Stride);
22131
22132 auto *LowerSplitA = B.CreateShuffleVector(InputA, LowerSplitMask);
22133 auto *LowerSplitB = B.CreateShuffleVector(InputB, LowerSplitMask);
22134 auto *UpperSplitA = B.CreateShuffleVector(InputA, UpperSplitMask);
22135 auto *UpperSplitB = B.CreateShuffleVector(InputB, UpperSplitMask);
22136 Value *LowerSplitAcc = nullptr;
22137 Value *UpperSplitAcc = nullptr;
22138
22139 if (Accumulator) {
22140 LowerSplitAcc = B.CreateShuffleVector(Accumulator, LowerSplitMask);
22141 UpperSplitAcc = B.CreateShuffleVector(Accumulator, UpperSplitMask);
22142 }
22143
22144 auto *LowerSplitInt = createComplexDeinterleavingIR(
22145 B, OperationType, Rotation, LowerSplitA, LowerSplitB, LowerSplitAcc);
22146 auto *UpperSplitInt = createComplexDeinterleavingIR(
22147 B, OperationType, Rotation, UpperSplitA, UpperSplitB, UpperSplitAcc);
22148
22149 ArrayRef<int> JoinMask(&SplitSeqVec[0], Ty->getNumElements());
22150 return B.CreateShuffleVector(LowerSplitInt, UpperSplitInt, JoinMask);
22151 }
22152
22153 auto *IntTy = Type::getInt32Ty(B.getContext());
22154
22155 ConstantInt *ConstRotation = nullptr;
22156 if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
22157 ConstRotation = ConstantInt::get(IntTy, (int)Rotation);
22158
22159 if (Accumulator)
22160 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmlaq, Ty,
22161 {ConstRotation, Accumulator, InputB, InputA});
22162 return B.CreateIntrinsic(Intrinsic::arm_mve_vcmulq, Ty,
22163 {ConstRotation, InputB, InputA});
22164 }
22165
22166 if (OperationType == ComplexDeinterleavingOperation::CAdd) {
22167 // 1 means the value is not halved.
22168 auto *ConstHalving = ConstantInt::get(IntTy, 1);
22169
22171 ConstRotation = ConstantInt::get(IntTy, 0);
22173 ConstRotation = ConstantInt::get(IntTy, 1);
22174
22175 if (!ConstRotation)
22176 return nullptr; // Invalid rotation for arm_mve_vcaddq
22177
22178 return B.CreateIntrinsic(Intrinsic::arm_mve_vcaddq, Ty,
22179 {ConstHalving, ConstRotation, InputA, InputB});
22180 }
22181
22182 return nullptr;
22183}
unsigned const MachineRegisterInfo * MRI
static bool isAddSubSExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt)
isVShiftRImm - Check if this is a valid build_vector for the immediate operand of a vector shift righ...
static bool isExtendedBUILD_VECTOR(SDValue N, SelectionDAG &DAG, bool isSigned)
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
static bool areExtractExts(Value *Ext1, Value *Ext2)
Check if Ext1 and Ext2 are extends of the same type, doubling the bitwidth of the vector elements.
static EVT getExtensionTo64Bits(const EVT &OrigVT)
static const MCPhysReg GPRArgRegs[]
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt)
getVShiftImm - Check if this is a valid build_vector for the immediate operand of a vector shift oper...
#define MAKE_CASE(V)
static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG)
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V)
static bool isAddSubZExt(SDValue N, SelectionDAG &DAG)
static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt)
isVShiftLImm - Check if this is a valid build_vector for the immediate operand of a vector shift left...
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static bool isConstant(const MachineInstr &MI)
static const LLT S1
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG)
static bool isStore(int Opcode)
static bool isThumb(const MCSubtargetInfo &STI)
static SDValue LowerUADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformExtractEltToVMOVRRD(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG)
static SDValue LowerBUILD_VECTOR_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
@ HA_DOUBLE
@ HA_VECT128
@ HA_VECT64
@ HA_FLOAT
@ HA_UNKNOWN
static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, const EVT &OrigTy, const EVT &ExtTy, unsigned ExtOpcode)
AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total value size to 64 bits.
static cl::opt< unsigned > ConstpoolPromotionMaxSize("arm-promote-constant-max-size", cl::Hidden, cl::desc("Maximum size of constant to promote into a constant pool"), cl::init(64))
static bool isZeroOrAllOnes(SDValue N, bool AllOnes)
static SDValue LowerINSERT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isVTBLMask(ArrayRef< int > M, EVT VT)
static SDValue PerformSUBCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
static cl::opt< bool > EnableConstpoolPromotion("arm-promote-constant", cl::Hidden, cl::desc("Enable / disable promotion of unnamed_addr constants into " "constant pools"), cl::init(false))
static SDValue PerformFAddVSelectCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformExtractFpToIntStores(StoreSDNode *St, SelectionDAG &DAG)
static SDValue PerformVDUPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
static SDValue PerformExtractEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static const APInt * isPowerOf2Constant(SDValue V)
static SDValue PerformVCVTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformVMOVhrCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG)
static SDValue LowerVECTOR_SHUFFLEUsingOneOff(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static bool isValidMVECond(unsigned CC, bool IsFloat)
static SDValue PerformPREDICATE_CASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC)
IntCCToARMCC - Convert a DAG integer condition code to an ARM CC.
static SDValue PerformSTORECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformSTORECombine - Target-specific dag combine xforms for ISD::STORE.
static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry, SelectionDAG &DAG)
static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isGTorGE(ISD::CondCode CC)
static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) intrinsic,...
static SDValue ParseBFI(SDNode *N, APInt &ToMask, APInt &FromMask)
static bool isReverseMask(ArrayRef< int > M, EVT VT)
static bool isVZIP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of "vector_shuffle v,...
static SDValue PerformSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue AddCombineTo64bitUMAAL(SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVECTOR_REG_CASTCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG)
static bool findPointerConstIncrement(SDNode *N, SDValue *Ptr, SDValue *CInc)
static bool isVTRNMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool CanInvertMVEVCMP(SDValue N)
static SDValue PerformLongShiftCombine(SDNode *N, SelectionDAG &DAG)
static SDValue AddCombineToVPADD(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformShiftCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
PerformShiftCombine - Checks for immediate versions of vector shifts and lowers them.
static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, ARMCC::CondCodes &CondCode2)
FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static EVT getVectorTyFromPredicateVector(EVT VT)
static SDValue PerformFADDVCMLACombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
Target-specific dag combine xforms for ARMISD::BUILD_VECTOR.
static bool isSRL16(const SDValue &Op)
static SDValue PerformVMOVrhCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformLOADCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue IsCMPZCSINC(SDNode *Cmp, ARMCC::CondCodes &CC)
static unsigned getPointerConstIncrement(unsigned Opcode, SDValue Ptr, SDValue Inc, const SelectionDAG &DAG)
static SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static Register genTPEntry(MachineBasicBlock *TpEntry, MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpExit, Register OpSizeReg, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI)
Adds logic in loop entry MBB to calculate loop iteration count and adds t2WhileLoopSetup and t2WhileL...
static bool isLTorLE(ISD::CondCode CC)
static SDValue PerformVCMPCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformMVEVMULLCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl, SelectionDAG &DAG)
static SDValue PerformBITCASTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue AddCombineTo64bitMLAL(SDNode *AddeSubeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG)
static bool checkAndUpdateCPSRKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock *BB, const TargetRegisterInfo *TRI)
static SDValue PerformCMPZCombine(SDNode *N, SelectionDAG &DAG)
static bool hasNormalLoadOperand(SDNode *N)
hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node are normal,...
static SDValue PerformInsertEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
PerformInsertEltCombine - Target-specific dag combine xforms for ISD::INSERT_VECTOR_ELT.
static SDValue PerformVDUPLANECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVDUPLANECombine - Target-specific dag combine xforms for ARMISD::VDUPLANE.
static SDValue LowerBuildVectorOfFPTrunc(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static cl::opt< unsigned > ConstpoolPromotionMaxTotal("arm-promote-constant-max-total", cl::Hidden, cl::desc("Maximum size of ALL constants to promote into a constant pool"), cl::init(128))
static SDValue LowerTruncatei1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static RTLIB::Libcall getDivRemLibcall(const SDNode *N, MVT::SimpleValueType SVT)
static SDValue PerformABSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG &DAG)
SkipLoadExtensionForVMULL - return a load of the original vector size that does not do any sign/zero ...
static SDValue AddCombineVUZPToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static const MCPhysReg GPRArgRegs[]
static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombineWithOperands - Try DAG combinations for an ADD with operands N0 and N1.
static SDValue PromoteMVEPredVector(SDLoc dl, SDValue Pred, EVT VT, SelectionDAG &DAG)
static bool isVZIPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformORCombineToSMULWBT(SDNode *OR, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isVTRN_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of "vector_shuffle v,...
static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue FindBFIToCombineWith(SDNode *N)
static SDValue LowerADDSUBSAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT, SelectionDAG &DAG)
ShuffleOpCodes
@ OP_VEXT3
@ OP_VTRNR
@ OP_VDUP1
@ OP_VZIPR
@ OP_VUZPR
@ OP_VREV
@ OP_VZIPL
@ OP_VTRNL
@ OP_COPY
@ OP_VEXT1
@ OP_VDUP0
@ OP_VEXT2
@ OP_VUZPL
@ OP_VDUP3
@ OP_VDUP2
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, bool &swpCmpOps, bool &swpVselOps)
static void ReplaceLongIntrinsic(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isS16(const SDValue &Op, SelectionDAG &DAG)
static bool isSRA16(const SDValue &Op)
static SDValue AddCombineBUILD_VECTORToVPADDL(SDNode *N, SDValue N0, SDValue N1, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerVECTOR_SHUFFLEUsingMovs(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue LowerInterruptReturn(SmallVectorImpl< SDValue > &RetOps, const SDLoc &DL, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl, SelectionDAG &DAG)
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, SDValue &RetVal1, SDValue &RetVal2)
static SDValue LowerCONCAT_VECTORS_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isSHL16(const SDValue &Op)
static bool isVEXTMask(ArrayRef< int > M, EVT VT, bool &ReverseVEXT, unsigned &Imm)
static SDValue PerformMVEVLDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static bool isTruncMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue PerformADDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2)
Return the load opcode for a given load size.
static bool isLegalT2AddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
static bool isLegalMVEShuffleOp(unsigned PFEntry)
static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformShuffleVMOVNCombine(ShuffleVectorSDNode *N, SelectionDAG &DAG)
static bool isVUZPMask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG)
PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for ISD::VECTOR_SHUFFLE.
static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG)
SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, ANY_EXTEND,...
static bool isVMOVNTruncMask(ArrayRef< int > M, EVT ToVT, bool rev)
static SDValue PerformVQMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
static SDValue LowerVecReduceMinMax(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformFPExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformAddcSubcCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformVSELECTCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static TargetLowering::ArgListTy getDivRemArgList(const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget)
static SDValue PerformVECREDUCE_ADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getZeroVector - Returns a vector of specified type with all zero elements.
static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSplittingToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static ARMCC::CondCodes getVCMPCondCode(SDValue N)
static cl::opt< bool > ARMInterworking("arm-interworking", cl::Hidden, cl::desc("Enable / disable ARM interworking (for debugging only)"), cl::init(true))
static void ReplaceREADCYCLECOUNTER(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformORCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC, bool &Invert, SDValue &OtherOp, SelectionDAG &DAG)
static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformVSetCCToVCTPCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerBUILD_VECTORToVIDUP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isZeroVector(SDValue N)
static SDValue PerformAddeSubeCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void ReplaceCMP_SWAP_64Results(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG)
static bool isLowerSaturate(const SDValue LHS, const SDValue RHS, const SDValue TrueVal, const SDValue FalseVal, const ISD::CondCode CC, const SDValue K)
static SDValue LowerPredicateLoad(SDValue Op, SelectionDAG &DAG)
static void emitPostSt(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned StSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment store operation with given size.
static bool isVMOVNMask(ArrayRef< int > M, EVT VT, bool Top, bool SingleSource)
static SDValue CombineBaseUpdate(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
CombineBaseUpdate - Target-specific DAG combine function for VLDDUP, NEON load/store intrinsics,...
static SDValue LowerSaturatingConditional(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVRRDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
static SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformCSETCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformVMOVNCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue PerformInsertSubvectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI)
static SDValue LowerVectorExtend(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain)
static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, ArrayRef< int > ShuffleMask, SelectionDAG &DAG)
static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformVMULCombine Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the special multi...
static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG)
static SDValue PerformBFICombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformORCombine - Target-specific dag combine xforms for ISD::OR.
static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG)
static SDValue PerformTruncatingStoreCombine(StoreSDNode *St, SelectionDAG &DAG)
static unsigned SelectPairHalf(unsigned Elements, ArrayRef< int > Mask, unsigned Index)
static void emitPostLd(MachineBasicBlock *BB, MachineBasicBlock::iterator Pos, const TargetInstrInfo *TII, const DebugLoc &dl, unsigned LdSize, unsigned Data, unsigned AddrIn, unsigned AddrOut, bool IsThumb1, bool IsThumb2)
Emit a post-increment load operation with given size.
static SDValue TryDistrubutionADDVecReduce(SDNode *N, SelectionDAG &DAG)
static bool isValidBaseUpdate(SDNode *N, SDNode *User)
static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, const ARMSubtarget *ST, const SDLoc &dl)
static bool IsQRMVEInstruction(const SDNode *N, const SDNode *Op)
static SDValue PerformMinMaxToSatCombine(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformXORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static bool getMVEIndexedAddressParts(SDNode *Ptr, EVT VT, Align Alignment, bool isSEXTLoad, bool IsMasked, bool isLE, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
std::pair< unsigned, const TargetRegisterClass * > RCPair
static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, TargetLowering::DAGCombinerInfo &DCI, bool AllOnes=false)
static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, ISD::ZERO_EXTEND,...
static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
cl::opt< unsigned > MVEMaxSupportedInterleaveFactor("mve-max-interleave-factor", cl::Hidden, cl::desc("Maximum interleave factor for MVE VLDn to generate."), cl::init(2))
static SDValue isVMOVModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, unsigned SplatBitSize, SelectionDAG &DAG, const SDLoc &dl, EVT &VT, EVT VectorVT, VMOVModImmType type)
isVMOVModifiedImm - Check if the specified splat value corresponds to a valid vector constant for a N...
static SDValue LowerBuildVectorOfFPExt(SDValue BV, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, SelectionDAG &DAG)
BC is a bitcast that is about to be turned into a VMOVDRR.
static SDValue promoteToConstantPool(const ARMTargetLowering *TLI, const GlobalValue *GV, SelectionDAG &DAG, EVT PtrVT, const SDLoc &dl)
static unsigned isNEONTwoResultShuffleMask(ArrayRef< int > ShuffleMask, EVT VT, unsigned &WhichResult, bool &isV_UNDEF)
Check if ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN), and return the corresponding AR...
static bool BitsProperlyConcatenate(const APInt &A, const APInt &B)
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, bool isSEXTLoad, SDValue &Base, SDValue &Offset, bool &isInc, SelectionDAG &DAG)
static SDValue LowerVecReduce(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG)
static bool TryCombineBaseUpdate(struct BaseUpdateTarget &Target, struct BaseUpdateUser &User, bool SimpleConstIncOnly, TargetLowering::DAGCombinerInfo &DCI)
static bool allUsersAreInFunction(const Value *V, const Function *F)
Return true if all users of V are within function F, looking through ConstantExprs.
static bool isSingletonVEXTMask(ArrayRef< int > M, EVT VT, unsigned &Imm)
static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG)
PerformVMOVDRRCombine - Target-specific dag combine xforms for ARMISD::VMOVDRR.
static bool isLowerSaturatingConditional(const SDValue &Op, SDValue &V, SDValue &SatK)
static bool isLegalAddressImmediate(int64_t V, EVT VT, const ARMSubtarget *Subtarget)
isLegalAddressImmediate - Return true if the integer value can be used as the offset of the target ad...
static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static bool isLegalT1AddressImmediate(int64_t V, EVT VT)
static SDValue CombineANDShift(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG)
static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformADDECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformADDECombine - Target-specific dag combine transform from ARMISD::ADDC, ARMISD::ADDE,...
static SDValue PerformReduceShuffleCombine(SDNode *N, SelectionDAG &DAG)
static SDValue PerformUMLALCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerTruncate(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformHWLoopCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST)
static SDValue PerformSplittingMVETruncToNarrowingStores(StoreSDNode *St, SelectionDAG &DAG)
static bool isVUZP_v_undef_Mask(ArrayRef< int > M, EVT VT, unsigned &WhichResult)
isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of "vector_shuffle v,...
static bool isHomogeneousAggregate(Type *Ty, HABaseType &Base, uint64_t &Members)
static SDValue PerformMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformFADDCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerReverse_VECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
static SDValue PerformVDIVCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) can replace combinations of ...
static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static SDValue PerformADDVecReduce(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue LowerPredicateStore(SDValue Op, SelectionDAG &DAG)
static SDValue SearchLoopIntrinsic(SDValue N, ISD::CondCode &CC, int &Imm, bool &Negate)
static bool canChangeToInt(SDValue Op, bool &SeenZero, const ARMSubtarget *Subtarget)
canChangeToInt - Given the fp compare operand, return true if it is suitable to morph to an integer c...
static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2)
Return the store opcode for a given store size.
static bool IsVUZPShuffleNode(SDNode *N)
static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue AddCombineTo64BitSMLAL16(SDNode *AddcNode, SDNode *AddeNode, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
static void attachMEMCPYScratchRegs(const ARMSubtarget *Subtarget, MachineInstr &MI, const SDNode *Node)
Attaches vregs to MEMCPY that it will use as scratch registers when it is expanded into LDM/STM.
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
static SDValue findMUL_LOHI(SDValue V)
static SDValue LowerVECTOR_SHUFFLE_i1(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformORCombine_i1(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *Subtarget)
static SDValue PerformSplittingMVEEXTToWideningLoad(SDNode *N, SelectionDAG &DAG)
static SDValue PerformSplittingToWideningLoad(SDNode *N, SelectionDAG &DAG)
static void genTPLoopBody(MachineBasicBlock *TpLoopBody, MachineBasicBlock *TpEntry, MachineBasicBlock *TpExit, const TargetInstrInfo *TII, DebugLoc Dl, MachineRegisterInfo &MRI, Register OpSrcReg, Register OpDestReg, Register ElementCountReg, Register TotalIterationsReg, bool IsMemcpy)
Adds logic in the loopBody MBB to generate MVE_VCTP, t2DoLoopDec and t2DoLoopEnd.
static SDValue PerformBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget)
PerformBUILD_VECTORCombine - Target-specific dag combine xforms for ISD::BUILD_VECTOR.
static SDValue LowerVecReduceF(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST)
static SDValue PerformMinMaxCombine(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST)
PerformMinMaxCombine - Target-specific DAG combining for creating truncating saturates.
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine.
Function Alias Analysis Results
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
std::string Name
uint64_t Size
Symbol * Sym
Definition: ELF_riscv.cpp:479
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
static bool isSigned(unsigned int Opcode)
#define Check(C,...)
#define op(i)
#define im(i)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
std::pair< Value *, Value * > ShuffleOps
We are building a shuffle to create V, which is a sequence of insertelement, extractelement pairs.
static Value * LowerCTPOP(LLVMContext &Context, Value *V, Instruction *IP)
Emit the code to lower ctpop of V before the specified instruction IP.
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
loop Loop Strength Reduction
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
nvptx lower args
uint64_t High
IntegerType * Int32Ty
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
PowerPC Reduce CR logical Operation
const char LLVMTargetMachineRef TM
R600 Clause Merge
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI Lower i1 Copies
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file describes how to lower LLVM code to machine code.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static X86::CondCode getSwappedCondition(X86::CondCode CC)
Assuming the flags are set by MI(a,b), return the condition code if we modify the instructions such t...
static constexpr int Concat[]
Value * RHS
Value * LHS
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
unsigned popcount() const
Count the number of bits set.
Definition: APInt.h:1620
APInt zextOrTrunc(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:1002
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition: APInt.h:1179
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition: APInt.h:349
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1089
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1589
unsigned countl_zero() const
The APInt version of std::countl_zero.
Definition: APInt.h:1548
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
unsigned logBase2() const
Definition: APInt.h:1703
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition: APInt.h:453
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
unsigned countr_one() const
Count the number of trailing one bits.
Definition: APInt.h:1606
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1199
virtual const ARMBaseRegisterInfo & getRegisterInfo() const =0
const uint32_t * getSjLjDispatchPreservedMask(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
Register getFrameRegister(const MachineFunction &MF) const override
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
const uint32_t * getTLSCallPreservedMask(const MachineFunction &MF) const
const uint32_t * getThisReturnPreservedMask(const MachineFunction &MF, CallingConv::ID) const
getThisReturnPreservedMask - Returns a call preserved mask specific to the case that 'returned' is on...
static ARMConstantPoolConstant * Create(const Constant *C, unsigned ID)
static ARMConstantPoolMBB * Create(LLVMContext &C, const MachineBasicBlock *mbb, unsigned ID, unsigned char PCAdj)
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
SmallPtrSet< const GlobalVariable *, 2 > & getGlobalsPromotedToConstantPool()
void setArgumentStackToRestore(unsigned v)
void setPromotedConstpoolIncrease(int Sz)
void setArgRegsSaveSize(unsigned s)
void setReturnRegsCount(unsigned s)
void setVarArgsFrameIndex(int Index)
unsigned getArgRegsSaveSize() const
void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV)
Indicate to the backend that GV has had its storage changed to inside a constant pool.
void setArgumentStackSize(unsigned size)
unsigned getArgumentStackSize() const
bool isTargetMachO() const
Definition: ARMSubtarget.h:382
bool useMovt() const
bool isTargetAEABI() const
Definition: ARMSubtarget.h:391
bool hasARMOps() const
Definition: ARMSubtarget.h:335
bool supportsTailCall() const
Definition: ARMSubtarget.h:469
const Triple & getTargetTriple() const
Definition: ARMSubtarget.h:368
bool hasVFP4Base() const
Definition: ARMSubtarget.h:343
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:266
bool isThumb1Only() const
Definition: ARMSubtarget.h:434
bool useFPVFMx() const
Definition: ARMSubtarget.h:352
bool hasFPARMv8Base() const
Definition: ARMSubtarget.h:344
bool isThumb2() const
Definition: ARMSubtarget.h:435
bool isTargetWindows() const
Definition: ARMSubtarget.h:378
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
bool hasBaseDSP() const
Definition: ARMSubtarget.h:358
const ARMTargetLowering * getTargetLowering() const override
Definition: ARMSubtarget.h:270
bool useSjLjEH() const
Definition: ARMSubtarget.h:357
bool isTargetDarwin() const
Definition: ARMSubtarget.h:370
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:278
bool hasVFP2Base() const
Definition: ARMSubtarget.h:341
bool isTargetAndroid() const
Definition: ARMSubtarget.h:420
bool isROPI() const
bool isTargetCOFF() const
Definition: ARMSubtarget.h:380
bool isTargetGNUAEABI() const
Definition: ARMSubtarget.h:396
bool hasVFP3Base() const
Definition: ARMSubtarget.h:342
bool isAPCS_ABI() const
bool useFPVFMx64() const
Definition: ARMSubtarget.h:356
bool isTargetWatchOS() const
Definition: ARMSubtarget.h:372
bool hasMinSize() const
Definition: ARMSubtarget.h:433
bool isTargetIOS() const
Definition: ARMSubtarget.h:371
bool useNEONForSinglePrecisionFP() const
Definition: ARMSubtarget.h:337
const InstrItineraryData * getInstrItineraryData() const override
getInstrItins - Return the instruction itineraries based on subtarget selection.
Definition: ARMSubtarget.h:503
bool isTargetWatchABI() const
Definition: ARMSubtarget.h:373
bool hasAnyDataBarrier() const
Definition: ARMSubtarget.h:346
bool isTargetDriverKit() const
Definition: ARMSubtarget.h:374
bool isAAPCS_ABI() const
bool isRWPI() const
bool isLittle() const
Definition: ARMSubtarget.h:477
bool allowsUnalignedMem() const
Definition: ARMSubtarget.h:471
bool isTargetMuslAEABI() const
Definition: ARMSubtarget.h:401
bool isTargetLinux() const
Definition: ARMSubtarget.h:375
bool useFPVFMx16() const
Definition: ARMSubtarget.h:355
bool isMClass() const
Definition: ARMSubtarget.h:436
unsigned getPrefLoopLogAlignment() const
Definition: ARMSubtarget.h:556
bool isTargetHardFloat() const
bool useMulOps() const
Definition: ARMSubtarget.h:350
bool isTargetELF() const
Definition: ARMSubtarget.h:381
Align getDualLoadStoreAlignment() const
Definition: ARMSubtarget.h:513
bool isReadOnly(const GlobalValue *GV) const
unsigned getMaxSupportedInterleaveFactor() const override
Get the maximum supported factor for interleaved memory accesses.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
bool shouldInsertFencesForAtomic(const Instruction *I) const override
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
Align getABIAlignmentForCallingConv(Type *ArgTy, const DataLayout &DL) const override
Return the correct alignment for the current calling convention.
bool isDesirableToCommuteWithShift(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to move this shift by a constant amount through its operand,...
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
const ARMSubtarget * getSubtarget() const
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const
bool isLegalT1ScaledAddressingMode(const AddrMode &AM, EVT VT) const
Returns true if the addressing mode representing by AM is legal for the Thumb1 target,...
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, Align &PrefAlign) const override
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const override
Check if sinking I's operands to I's basic block is profitable, because the operands can be folded in...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const override
bool isMulAddWithConstProfitable(SDValue AddNode, SDValue ConstNode) const override
Return true if it may be profitable to transform (mul (add x, c1), c2) -> (add (mul x,...
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool isFNegFree(EVT VT) const override
Return true if an fneg operation is free to the point where it is never worthwhile to replace it with...
void finalizeLowering(MachineFunction &MF) const override
Execute target specific actions to finalize target lowering.
SDValue PerformMVETruncCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize=false) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint letter, return the type of constraint it is for this target.
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
SDValue PerformIntrinsicCombine(SDNode *N, DAGCombinerInfo &DCI) const
PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
bool isDesirableToCommuteXorWithShift(const SDNode *N) const override
Return true if it is profitable to combine an XOR of a logical shift to create a logical shift of NOT...
bool ExpandInlineAsm(CallInst *CI) const override
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const
PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
Value * createComplexDeinterleavingIR(IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, Value *Accumulator=nullptr) const override
Create the IR node for the given complex deinterleaving operation.
bool isComplexDeinterleavingSupported() const override
Does this target support complex deinterleaving.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
SDValue PerformMVEExtCombine(SDNode *N, DAGCombinerInfo &DCI) const
bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override
Return true if it is profitable to fold a pair of shifts into a mask.
bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT) const override
Return true if pulling a binary operation into a select with an identity constant is profitable.
bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &OriginalDemandedBits, const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth) const override
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the value type to use for ISD::SETCC.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
CCAssignFn * CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) const override
createFastISel - This method returns a target specific FastISel object, or null if the target does no...
void AdjustInstrPostInstrSelection(MachineInstr &MI, SDNode *Node) const override
This method should be implemented by targets that mark instructions with the 'hasPostISelHook' flag.
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, unsigned Index) const override
Return true if EXTRACT_SUBVECTOR is cheap for this result type with this index.
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
getTgtMemIntrinsic - Represent NEON load and store intrinsics as MemIntrinsicNodes.
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
bool isShuffleMaskLegal(ArrayRef< int > M, EVT VT) const override
isShuffleMaskLegal - Targets can use this to indicate that they only support some VECTOR_SHUFFLE oper...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
bool useLoadStackGuardNode() const override
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const override
getRegClassFor - Return the register class that should be used for the specified value type.
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
bool isCheapToSpeculateCttz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic cttz.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
bool isCheapToSpeculateCtlz(Type *Ty) const override
Return true if it is cheap to speculate a call to intrinsic ctlz.
bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, TargetLoweringOpt &TLO) const override
bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, unsigned Factor) const override
Lower an interleaved store into a vstN intrinsic.
ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI)
bool isComplexDeinterleavingOperationSupported(ComplexDeinterleavingOperation Operation, Type *Ty) const override
Does this target support complex deinterleaving with the given operation and type.
SDValue PerformBRCONDCombine(SDNode *N, SelectionDAG &DAG) const
PerformBRCONDCombine - Target-specific DAG combining for ARMISD::BRCOND.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const override
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
Instruction * makeDMB(IRBuilderBase &Builder, ARM_MB::MemBOpt Domain) const
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
const char * LowerXConstraint(EVT ConstraintVT) const override
Try to replace an X constraint, which matches anything, with another that has more specific requireme...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const override
Return true if it is profitable for dag combiner to transform a floating point op of specified opcode...
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
allowsMisalignedMemoryAccesses - Returns true if the target allows unaligned memory accesses of the s...
bool isLegalInterleavedAccessType(unsigned Factor, FixedVectorType *VecTy, Align Alignment, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
bool isVectorLoadExtDesirable(SDValue ExtVal) const override
Return true if folding a vector load into ExtVal (a sign, zero, or any extend node) is profitable.
bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const override
Return true if the target can combine store(extractelement VectorTy, Idx).
bool lowerInterleavedLoad(LoadInst *LI, ArrayRef< ShuffleVectorInst * > Shuffles, ArrayRef< unsigned > Indices, unsigned Factor) const override
Lower an interleaved load into a vldN intrinsic.
bool useSoftFloat() const override
bool alignLoopsWithOptSize() const override
Should loops be aligned even when the function is marked OptSize (but not MinSize).
SDValue PerformCMOVToBFICombine(SDNode *N, SelectionDAG &DAG) const
bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override
Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT from min(max(fptoi)) satur...
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
Returns true if an argument of type Ty needs to be passed in a contiguous block of registers in calli...
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override
Return if the target supports combining a chain like:
ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const override
bool getPostIndexedAddressParts(SDNode *N, SDNode *Op, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPostIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mo...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
bool isFloatingPointOperation() const
Definition: Instructions.h:922
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Definition: Attributes.cpp:93
static BaseIndexOffset match(const SDNode *N, const SelectionDAG &DAG)
Parses tree in N for base, index, offset addresses.
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
The address of a basic block.
Definition: Constants.h:888
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, uint32_t BitWidth) const
If this is a constant FP splat and the splatted constant FP is an exact power or 2,...
CCState - This class holds information needed while lowering arguments and return values.
void getInRegsParamInfo(unsigned InRegsParamRecordIndex, unsigned &BeginReg, unsigned &EndReg) const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
void rewindByValRegsInfo()
unsigned getInRegsParamsProcessed() const
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
void addInRegsParamInfo(unsigned RegBegin, unsigned RegEnd)
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
unsigned getInRegsParamsCount() const
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1696
AttributeList getAttributes() const
Return the parameter attributes for this call.
Definition: InstrTypes.h:1780
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
Adds the attribute to the indicated argument.
Definition: InstrTypes.h:1832
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
Definition: Constants.h:704
const APFloat & getValueAPF() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:267
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
bool isBigEndian() const
Definition: DataLayout.h:239
Align getStackAlignment() const
Definition: DataLayout.h:271
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
Align getPreferredAlign(const GlobalVariable *GV) const
Returns the preferred alignment of the specified global.
StringRef getPrivateGlobalPrefix() const
Definition: DataLayout.h:332
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:874
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
iterator begin()
Definition: DenseMap.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Diagnostic information for unsupported feature in backend.
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:539
unsigned getNumElements() const
Definition: DerivedTypes.h:582
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
Type * getParamType(unsigned i) const
Parameter type accessors.
Definition: DerivedTypes.h:135
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition: Function.h:200
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
arg_iterator arg_begin()
Definition: Function.h:813
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:342
bool hasStructRetAttr() const
Determine if the function returns a structure through first or second pointer argument.
Definition: Function.h:661
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:213
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:669
const GlobalValue * getGlobal() const
bool isDSOLocal() const
Definition: GlobalValue.h:305
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:528
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:278
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:655
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:630
@ InternalLinkage
Rename collisions when linking (static functions).
Definition: GlobalValue.h:59
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2120
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2006
Value * CreateConstGEP1_32(Type *Ty, Value *Ptr, unsigned Idx0, const Twine &Name="")
Definition: IRBuilder.h:1880
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2499
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2105
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1431
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:480
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1410
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2010
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:2477
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2100
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1491
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:563
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2395
Value * CreateTruncOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2136
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2649
std::optional< unsigned > getOperandCycle(unsigned ItinClassIndx, unsigned OperandIdx) const
Return the cycle for the given class and operand.
bool isEmpty() const
Returns true if there are no itineraries.
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:80
const BasicBlock * getParent() const
Definition: Instruction.h:151
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:251
Class to represent integer types.
Definition: DerivedTypes.h:40
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:72
static bool LowerToByteSwap(CallInst *CI)
Try to replace a call instruction with a call to a bswap intrinsic.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Value * getPointerOperand()
Definition: Instructions.h:280
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:236
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getSchedClass() const
Return the scheduling class for this instruction.
Definition: MCInstrDesc.h:600
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
ArrayRef< MCOperandInfo > operands() const
Definition: MCInstrDesc.h:239
unsigned getNumDefs() const
Return the number of MachineOperands that are register definitions.
Definition: MCInstrDesc.h:248
int getOperandConstraint(unsigned OpNum, MCOI::OperandConstraint Constraint) const
Returns the value of the specified operand constraint if it is present.
Definition: MCInstrDesc.h:219
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:40
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isInteger() const
Return true if this is an integer or a vector integer type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:585
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
MachineBasicBlock * getFallThrough(bool JumpToFallThrough=true)
Return the fallthrough block if the block can implicitly transfer control to the block after it by fa...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool canFallThrough()
Return true if the block can implicitly transfer control to the block after it by falling off the end...
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
void moveAfter(MachineBasicBlock *NewBefore)
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getFunctionContextIndex() const
Return the index for the function context object.
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & reset(Property P)
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineBasicBlock iterator that automatically skips over MIs that are inside bundles (i....
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Inline
EK_Inline - Jump table entries are emitted inline at their point of use.
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
void setReg(Register Reg)
Change the register this operand corresponds to.
static MachineOperand CreateImm(int64_t Val)
Register getReg() const
getReg - Returns the register number.
void setIsDef(bool Val=true)
Change a def to a use, or a use to a def.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
This class is used to represent an MLOAD node.
This class is used to represent an MSTORE node.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
bool isVolatile() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Align getOriginalAlign() const
Returns alignment and volatility of the memory access.
bool isSimple() const
Returns true if the memory operation is neither atomic or volatile.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:287
virtual void print(raw_ostream &OS, const Module *M) const
print - Print out the internal state of the pass.
Definition: Pass.cpp:130
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
iterator_range< use_iterator > uses()
size_t use_size() const
Return the number of uses of this node.
static bool hasPredecessorHelper(const SDNode *N, SmallPtrSetImpl< const SDNode * > &Visited, SmallVectorImpl< const SDNode * > &Worklist, unsigned int MaxSteps=0, bool TopologicalPrune=false)
Returns true if N is a predecessor of any node in Worklist.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
bool use_empty() const
Return true if there are no uses of this node.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
bool isPredecessorOf(const SDNode *N) const
Return true if this node is a predecessor of N.
bool hasAnyUseOfValue(unsigned Value) const
Return true if there are any use of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool isUndef() const
Return true if the type of the node type undefined.
void setFlags(SDNodeFlags NewFlags)
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
unsigned getNumOperands() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:474
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getSplatValue(SDValue V, bool LegalTypes=false)
If V is a splat vector, return its scalar source operand by extracting that element from the source v...
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
bool haveNoCommonBitsSet(SDValue A, SDValue B) const
Return true if A and B have no common bits set.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
std::pair< SDValue, SDValue > SplitVectorOperand(const SDNode *N, unsigned OpNo)
Split the node's operand with EXTRACT_SUBVECTOR and return the low/high part.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
void addCallSiteInfo(const SDNode *Node, CallSiteInfoImpl &&CallInfo)
Set CallSiteInfo to be associated with Node.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
Definition: SelectionDAG.h:659
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
MaybeAlign InferPtrAlign(SDValue Ptr) const
Infer alignment of a load / store address.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * getNodeIfExists(unsigned Opcode, SDVTList VTList, ArrayRef< SDValue > Ops, const SDNodeFlags Flags)
Get the specified node if it's already available, or else return NULL.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
static void getShuffleMask(const Constant *Mask, SmallVectorImpl< int > &Result)
Convert the input shuffle mask operand to a vector of integers.
static bool isIdentityMask(ArrayRef< int > Mask, int NumSrcElts)
Return true if this shuffle mask chooses elements from exactly one source vector without lane crossin...
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
static bool isSplatMask(const int *Mask, EVT VT)
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const unsigned char * bytes_end() const
Definition: StringRef.h:118
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
const unsigned char * bytes_begin() const
Definition: StringRef.h:115
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual void finalizeLowering(MachineFunction &MF) const
Execute target specific actions to finalize target lowering.
void setMaxDivRemBitWidthSupported(unsigned SizeInBits)
Set the size in bits of the maximum div/rem the backend supports.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC)
Override the default CondCode to be used to test the result of the comparison libcall against zero.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const
Get the CallingConv that should be used for the specified libcall.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
ShiftLegalizationStrategy
Return the preferred strategy to legalize tihs SHIFT instruction, with ExpansionFactor being the recu...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
const TargetMachine & getTargetMachine() const
void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC)
Set the CallingConv that should be used for the specified libcall.
void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked load does or does not work with the specified type and ind...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
virtual unsigned getMaxSupportedInterleaveFactor() const
Get the maximum supported factor for interleaved memory accesses.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed masked store does or does not work with the specified type and in...
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual ShiftLegalizationStrategy preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N, unsigned ExpansionFactor) const
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
const char * getLibcallName(RTLIB::Libcall Call) const
Get the libcall routine name for the specified libcall.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef, APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Vector Op.
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool expandDIVREMByConstant(SDNode *N, SmallVectorImpl< SDValue > &Result, EVT HiLoVT, SelectionDAG &DAG, SDValue LL=SDValue(), SDValue LH=SDValue()) const
Attempt to expand an n-bit div/rem/divrem by constant using a n/2-bit urem by constant and other arit...
bool isPositionIndependent() const
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, SDValue N1, MutableArrayRef< int > Mask, SelectionDAG &DAG) const
Tries to build a legal vector shuffle using the provided parameters or equivalent variations.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0, bool AssumeSingleUse=false) const
Look at Op.
virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, unsigned Depth=0) const
Attempt to simplify any target nodes based on the demanded bits/elts, returning true on success.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isConstTrueVal(SDValue N) const
Return if the N is a constant or constant vector equal to the true value from getBooleanContents().
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const Triple & getTargetTriple() const
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
Target - Wrapper for Target specific information.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
ObjectFormatType getObjectFormat() const
Get the object format for this triple.
Definition: Triple.h:387
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:651
bool isOSVersionLT(unsigned Major, unsigned Minor=0, unsigned Micro=0) const
Helper function for doing comparisons against version numbers included in the target triple.
Definition: Triple.h:484
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:618
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isArrayTy() const
True if this is an instance of ArrayType.
Definition: Type.h:252
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
Type * getArrayElementType() const
Definition: Type.h:404
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
void dump() const
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt16Ty(LLVMContext &C)
bool isHalfTy() const
Return true if this is 'half', a 16-bit IEEE fp type.
Definition: Type.h:143
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:216
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Base class of all SIMD vector types.
Definition: DerivedTypes.h:403
Type * getElementType() const
Definition: DerivedTypes.h:436
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
static CondCodes getOppositeCondition(CondCodes CC)
Definition: ARMBaseInfo.h:48
@ SECREL
Thread Pointer Offset.
@ SBREL
Section Relative (Windows TLS)
@ GOTTPOFF
Global Offset Table, PC Relative.
@ TPOFF
Global Offset Table, Thread Pointer Offset.
TOF
Target Operand Flag enum.
Definition: ARMBaseInfo.h:242
@ MO_NONLAZY
MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it represents a symbol which,...
Definition: ARMBaseInfo.h:288
@ MO_SBREL
MO_SBREL - On a symbol operand, this represents a static base relative relocation.
Definition: ARMBaseInfo.h:270
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
Definition: ARMBaseInfo.h:275
@ MO_GOT
MO_GOT - On a symbol operand, this represents a GOT relative relocation.
Definition: ARMBaseInfo.h:266
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
Definition: ARMBaseInfo.h:263
static ShiftOpc getShiftOpcForNode(unsigned Opcode)
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
int getFP32Imm(const APInt &Imm)
getFP32Imm - Return an 8-bit floating-point version of the 32-bit floating-point value.
uint64_t decodeVMOVModImm(unsigned ModImm, unsigned &EltBits)
decodeVMOVModImm - Decode a NEON/MVE modified immediate value into the element value and the element ...
unsigned getAM2Offset(unsigned AM2Opc)
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
unsigned createVMOVModImm(unsigned OpCmode, unsigned Val)
int getFP64Imm(const APInt &Imm)
getFP64Imm - Return an 8-bit floating-point version of the 64-bit floating-point value.
int getFP16Imm(const APInt &Imm)
getFP16Imm - Return an 8-bit floating-point version of the 16-bit floating-point value.
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
int getFP32FP16Imm(const APInt &Imm)
If this is a FP16Imm encoded as a fp32 value, return the 8-bit encoding for it.
AddrOpc getAM2Op(unsigned AM2Opc)
bool isBitFieldInvertedMask(unsigned v)
const unsigned FPStatusBits
const unsigned FPReservedBits
const unsigned RoundingBitsPos
FastISel * createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ ARM_APCS
ARM Procedure Calling Standard (obsolete, but still used on some targets).
Definition: CallingConv.h:107
@ CFGuard_Check
Special calling convention on Windows for calling the Control Guard Check ICall funtion.
Definition: CallingConv.h:82
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ ARM_AAPCS
ARM Architecture Procedure Calling Standard calling convention (aka EABI).
Definition: CallingConv.h:111
@ CXX_FAST_TLS
Used for access functions.
Definition: CallingConv.h:72
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ ARM_AAPCS_VFP
Same as ARM_AAPCS, but uses hard floating point ABI.
Definition: CallingConv.h:114
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1126
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1122
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ SET_FPENV
Sets the current floating-point environment.
Definition: ISDOpcodes.h:998
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1370
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
@ FGETSIGN
INT = FGETSIGN(FP) - Return the sign bit of the specified floating point value as an integer 0/1 valu...
Definition: ISDOpcodes.h:497
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1269
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition: ISDOpcodes.h:559
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1155
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1271
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1241
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1272
@ RESET_FPENV
Set floating-point environment to default state.
Definition: ISDOpcodes.h:1002
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ SET_FPMODE
Sets the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1021
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:151
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
Definition: ISDOpcodes.h:1355
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1233
@ RESET_FPMODE
Sets default dynamic floating-point control modes.
Definition: ISDOpcodes.h:1025
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1369
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:913
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1267
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1268
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1400
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SET_ROUNDING
Set rounding mode.
Definition: ISDOpcodes.h:885
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition: ISDOpcodes.h:662
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1047
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
Definition: ISDOpcodes.h:1352
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ WRITE_REGISTER
Definition: ISDOpcodes.h:119
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1221
@ VECREDUCE_FMIN
Definition: ISDOpcodes.h:1356
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:988
@ SETCCCARRY
Like SetCC, ops #0 and #1 are the LHS and RHS operands to compare, but op #2 is a boolean indicating ...
Definition: ISDOpcodes.h:758
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1270
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1056
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1237
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1371
@ RegisterMask
Definition: ISDOpcodes.h:75
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1151
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1364
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ GET_FPMODE
Reads the current dynamic floating-point control modes.
Definition: ISDOpcodes.h:1016
@ GET_FPENV
Gets the current floating-point environment.
Definition: ISDOpcodes.h:993
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1265
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ READ_REGISTER
READ_REGISTER, WRITE_REGISTER - This node represents llvm.register on the DAG, which implements the n...
Definition: ISDOpcodes.h:118
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:203
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ DEBUGTRAP
DEBUGTRAP - Trap intended to get the attention of a debugger.
Definition: ISDOpcodes.h:1211
@ FP_TO_UINT_SAT
Definition: ISDOpcodes.h:856
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1248
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1273
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ SMULO
Same for multiplication.
Definition: ISDOpcodes.h:331
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1041
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
@ VECREDUCE_UMIN
Definition: ISDOpcodes.h:1372
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1263
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:984
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1264
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1182
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1208
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition: ISDOpcodes.h:657
@ VECREDUCE_FMUL
Definition: ISDOpcodes.h:1353
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1262
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ SPONENTRY
SPONENTRY - Represents the llvm.sponentry intrinsic.
Definition: ISDOpcodes.h:106
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition: ISDOpcodes.h:855
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1146
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1070
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ AssertZext
Definition: ISDOpcodes.h:62
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1556
bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1472
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
static const int LAST_INDEXED_MODE
Definition: ISDOpcodes.h:1474
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1451
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:548
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
match_combine_or< CastInst_match< OpTy, ZExtInst >, CastInst_match< OpTy, SExtInst > > m_ZExtOrSExt(const OpTy &Op)
FNeg_match< OpTy > m_FNeg(const OpTy &X)
Match 'fneg X' as 'fsub -0.0, X'.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Define
Register definition.
@ Kill
The last use of a register.
@ SingleThread
Synchronized with respect to signal handlers executing in the same thread.
Definition: LLVMContext.h:54
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
bool CC_ARM_APCS_GHC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
bool HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns true if Val1 has a lower Constant Materialization Cost than Val2.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:228
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are are tuples (A,...
Definition: STLExtras.h:2415
bool isStrongerThanMonotonic(AtomicOrdering AO)
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
constexpr bool isMask_32(uint32_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:240
bool FastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2082
bool RetCC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
Value * concatenateVectors(IRBuilderBase &Builder, ArrayRef< Value * > Vecs)
Concatenate a list of vectors.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
void shuffle(Iterator first, Iterator last, RNG &&g)
Definition: STLExtras.h:1550
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
bool CC_ARM_AAPCS_VFP(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr bool isShiftedMask_32(uint32_t Value)
Return true if the argument contains a non-empty sequence of ones with the remainder zero (32 bit ver...
Definition: MathExtras.h:252
bool CC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1738
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
bool RetCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_Win32_CFGuard_Check(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
Definition: SmallVector.h:1312
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
CombineLevel
Definition: DAGCombine.h:15
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
unsigned ConstantMaterializationCost(unsigned Val, const ARMSubtarget *Subtarget, bool ForCodesize=false)
Returns the number of instructions required to materialize the given constant in a register,...
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
@ FAdd
Sum of floats.
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:233
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool RetFastCC_ARM_APCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool CC_ARM_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
DWARFExpression::Operation Op
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1758
bool isOneConstant(SDValue V)
Returns true if V is a constant integer one.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
bool isVREVMask(ArrayRef< int > M, EVT VT, unsigned BlockSize)
isVREVMask - Check if a vector shuffle corresponds to a VREV instruction with the specified blocksize...
unsigned gettBLXrOpcode(const MachineFunction &MF)
llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
unsigned convertAddSubFlagsOpcode(unsigned OldOpc)
Map pseudo instructions that imply an 'S' bit onto real opcodes.
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Load/store instruction that can be merged with a base address update.
SDNode * N
Instruction that updates a pointer.
unsigned ConstInc
Pointer increment value if it is a constant, or 0 otherwise.
SDValue Inc
Pointer increment operand.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:760
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:448
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition: ValueTypes.h:349
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition: ValueTypes.h:455
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:628
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isFixedLengthVector() const
Definition: ValueTypes.h:177
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition: ValueTypes.h:58
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:298
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
bool is64BitVector() const
Return true if this is a 64-bit vector type.
Definition: ValueTypes.h:198
static KnownBits makeConstant(const APInt &C)
Create known bits from a known constant.
Definition: KnownBits.h:297
bool isUnknown() const
Returns true if we don't know any bits.
Definition: KnownBits.h:63
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
static KnownBits computeForAddSub(bool Add, bool NSW, bool NUW, const KnownBits &LHS, const KnownBits &RHS)
Compute known bits resulting from adding LHS and RHS.
Definition: KnownBits.cpp:57
static KnownBits mul(const KnownBits &LHS, const KnownBits &RHS, bool NoUndefSelfMultiply=false)
Compute known bits resulting from multiplying LHS and RHS.
Definition: KnownBits.cpp:765
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getJumpTable(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a jump table entry.
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setInRegister(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
A convenience struct that encapsulates a DAG, and two SDValues for returning information from TargetL...
bool CombineTo(SDValue O, SDValue N)