LLVM 22.0.0git
HexagonGenWideningVecInstr.cpp
Go to the documentation of this file.
1//===--------------------- HexagonGenWideningVecInstr.cpp -----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Replace widening vector operations with hexagon intrinsics.
10//
11//===----------------------------------------------------------------------===//
12
14#include "llvm/ADT/APInt.h"
15#include "llvm/IR/BasicBlock.h"
16#include "llvm/IR/Constants.h"
17#include "llvm/IR/Function.h"
18#include "llvm/IR/IRBuilder.h"
19#include "llvm/IR/Instruction.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23#include "llvm/IR/Type.h"
24#include "llvm/IR/Value.h"
26#include "llvm/Pass.h"
28#include <algorithm>
29#include <utility>
30
31using namespace llvm;
32
33// A command line argument to enable the generation of widening instructions
34// for short-vectors.
36 "hexagon-widen-short-vector",
37 cl::desc("Generate widening instructions for short vectors."), cl::Hidden);
38
39namespace llvm {
42} // end namespace llvm
43
44namespace {
45
46class HexagonGenWideningVecInstr : public FunctionPass {
47public:
48 static char ID;
49
50 HexagonGenWideningVecInstr() : FunctionPass(ID) {
52 }
53
54 HexagonGenWideningVecInstr(const HexagonTargetMachine *TM)
55 : FunctionPass(ID), TM(TM) {
57 }
58
59 StringRef getPassName() const override {
60 return "Hexagon generate widening vector instructions";
61 }
62
63 bool runOnFunction(Function &F) override;
64
65 void getAnalysisUsage(AnalysisUsage &AU) const override {
66 FunctionPass::getAnalysisUsage(AU);
67 }
68
69private:
70 Module *M = nullptr;
71 const HexagonTargetMachine *TM = nullptr;
72 const HexagonSubtarget *HST = nullptr;
73 unsigned HwVLen;
74 enum OPKind { OP_None = 0, OP_Add, OP_Sub, OP_Mul, OP_Shl };
75
76 struct OPInfo {
77 Value *OP = nullptr;
78 Value *ExtInOP = nullptr;
79 bool IsZExt = false;
80 unsigned ExtInSize = 0;
81 bool IsScalar = false;
82 };
83
84 bool visitBlock(BasicBlock *B);
85 bool processInstruction(Instruction *Inst);
86 bool replaceWithIntrinsic(Instruction *Inst, OPKind OPK, OPInfo &OP1Info,
87 OPInfo &OP2Info);
88 bool getOperandInfo(Value *V, OPInfo &OPI);
89 bool isExtendedConstant(Constant *C, bool IsSigned);
90 unsigned getElementSizeInBits(Value *V, bool IsZExt);
91 Type *getElementTy(unsigned size, IRBuilder<> &IRB);
92
93 Value *adjustExtensionForOp(OPInfo &OPI, IRBuilder<> &IRB,
94 unsigned NewEltsize, unsigned NumElts);
95
96 Intrinsic::ID getIntrinsic(OPKind OPK, bool IsOP1ZExt, bool IsOP2ZExt,
97 unsigned NewOpEltSize, unsigned NewResEltSize,
98 bool IsConstScalar, int ConstOpNum);
99
100 std::pair<Value *, Value *> opSplit(Value *OP, Instruction *Inst,
101 Type *NewOpType);
102
103 Value *createIntrinsic(Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1,
104 Value *NewOP2, Type *ResType, unsigned NumElts,
105 bool Interleave);
106 bool processInstructionForVMPA(Instruction *Inst);
107 bool getVmpaOperandInfo(Value *V, OPInfo &OPI);
108 void reorderVmpaOperands(OPInfo *OPI);
109 bool replaceWithVmpaIntrinsic(Instruction *Inst, OPInfo *OPI);
110 bool genSaturatingInst(Instruction *Inst);
111 bool getMinMax(Constant *MinC, Constant *MaxC, std::pair<int, int> &MinMax);
112 bool isSaturatingVAsr(Instruction *Inst, Value *S, int MinV, int MaxV,
113 bool &IsResSigned);
114 Value *extendShiftByVal(Value *ShiftByVal, IRBuilder<> &IRB);
115 Intrinsic::ID getVAsrIntrinsic(bool IsInSigned, bool IsResSigned);
116 Value *createVAsrIntrinsic(Instruction *Inst, Value *VecOP, Value *ShiftByVal,
117 bool IsResSigned);
118 bool genVAvg(Instruction *Inst);
119 bool checkConstantVector(Value *OP, int64_t &SplatVal, bool IsOPZExt);
120 void updateMPYConst(Intrinsic::ID IntId, int64_t SplatVal, bool IsOPZExt,
121 Value *&OP, IRBuilder<> &IRB);
122 void packConstant(Intrinsic::ID IntId, int64_t SplatVal, Value *&OP,
123 IRBuilder<> &IRB);
124};
125
126} // end anonymous namespace
127
128char HexagonGenWideningVecInstr::ID = 0;
129
130INITIALIZE_PASS_BEGIN(HexagonGenWideningVecInstr, "widening-vec",
131 "Hexagon generate "
132 "widening vector instructions",
133 false, false)
135INITIALIZE_PASS_END(HexagonGenWideningVecInstr, "widening-vec",
136 "Hexagon generate "
137 "widening vector instructions",
139
141 if (Value *SplatV = C->getSplatValue()) {
142 auto *CI = dyn_cast<ConstantInt>(SplatV);
143 assert(CI);
144 return CI->getValue().isNegative();
145 }
146 unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
147 for (unsigned i = 0, e = NumElts; i != e; ++i) {
148 auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i));
149 assert(CI);
150 if (CI->getValue().isNegative())
151 return true;
152 continue;
153 }
154 return false;
155}
156
157bool HexagonGenWideningVecInstr::getOperandInfo(Value *V, OPInfo &OPI) {
158 using namespace PatternMatch;
159 OPI.OP = V;
160 Value *ExtV = nullptr;
161 Constant *C = nullptr;
162
163 bool Match = false;
164 if ((Match = (match(V, (m_ZExt(m_Value(ExtV)))) ||
166 m_ZExt(m_Value(ExtV)), m_Zero()),
167 m_Poison(), m_ZeroMask()))))) {
168 OPI.ExtInOP = ExtV;
169 OPI.IsZExt = true;
170 }
171
172 if (!Match &&
173 (Match = (match(V, (m_SExt(m_Value(ExtV)))) ||
175 m_SExt(m_Value(ExtV)), m_Zero()),
176 m_Poison(), m_ZeroMask()))))) {
177 OPI.ExtInOP = ExtV;
178 OPI.IsZExt = false;
179 }
180 if (!Match &&
181 (Match =
183 m_Poison(), m_ZeroMask()))))) {
184 if (match(ExtV, m_And(m_Value(), m_SpecificInt(255)))) {
185 OPI.ExtInOP = ExtV;
186 OPI.IsZExt = true;
187 OPI.ExtInSize = 8;
188 return true;
189 }
190 if (match(ExtV, m_And(m_Value(), m_SpecificInt(65535)))) {
191 OPI.ExtInOP = ExtV;
192 OPI.IsZExt = true;
193 OPI.ExtInSize = 16;
194 return true;
195 }
196 return false;
197 }
198
199 if (!Match && (Match = match(V, m_Constant(C)))) {
200 if (!isExtendedConstant(C, false) && !isExtendedConstant(C, true))
201 return false;
202 OPI.ExtInOP = C;
203 OPI.IsZExt = !hasNegativeValues(C);
204 }
205
206 if (!Match)
207 return false;
208
209 // If the operand is extended, find the element size of its input.
210 if (OPI.ExtInOP)
211 OPI.ExtInSize = getElementSizeInBits(OPI.ExtInOP, OPI.IsZExt);
212 return true;
213}
214
215bool HexagonGenWideningVecInstr::isExtendedConstant(Constant *C,
216 bool IsSigned) {
217 Type *CTy = cast<FixedVectorType>(C->getType())->getElementType();
218 unsigned EltSize = CTy->getPrimitiveSizeInBits();
219 unsigned HalfSize = EltSize / 2;
220 if (Value *SplatV = C->getSplatValue()) {
221 if (auto *CI = dyn_cast<ConstantInt>(SplatV))
222 return IsSigned ? isIntN(HalfSize, CI->getSExtValue())
223 : isUIntN(HalfSize, CI->getZExtValue());
224 return false;
225 }
226 unsigned NumElts = cast<FixedVectorType>(C->getType())->getNumElements();
227 for (unsigned i = 0, e = NumElts; i != e; ++i) {
228 if (auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i))) {
229 if ((IsSigned && !isIntN(HalfSize, CI->getSExtValue())) ||
230 (!IsSigned && !isUIntN(HalfSize, CI->getZExtValue())))
231 return false;
232 continue;
233 }
234 return false;
235 }
236 return true;
237}
238
239unsigned HexagonGenWideningVecInstr::getElementSizeInBits(Value *V,
240 bool IsZExt = false) {
241 using namespace PatternMatch;
242 Type *ValTy = V->getType();
243 Type *EltTy = ValTy;
244 if (auto *C = dyn_cast<Constant>(V)) {
245 unsigned NumElts = cast<FixedVectorType>(EltTy)->getNumElements();
246 unsigned EltSize = cast<FixedVectorType>(EltTy)
247 ->getElementType()
248 ->getPrimitiveSizeInBits()
249 .getKnownMinValue();
250 unsigned ReducedSize = EltSize / 2;
251
252 while (ReducedSize >= 8) {
253 for (unsigned i = 0, e = NumElts; i != e; ++i) {
254 if (auto *CI = dyn_cast<ConstantInt>(C->getAggregateElement(i))) {
255 if (IsZExt) {
256 if (!isUIntN(ReducedSize, CI->getZExtValue()))
257 return EltSize;
258 } else if (!isIntN(ReducedSize, CI->getSExtValue()))
259 return EltSize;
260 }
261 }
262 EltSize = ReducedSize;
263 ReducedSize = ReducedSize / 2;
264 }
265 return EltSize;
266 }
267
268 if (ValTy->isVectorTy())
269 EltTy = cast<FixedVectorType>(ValTy)->getElementType();
270 return EltTy->getPrimitiveSizeInBits();
271}
272
273Value *HexagonGenWideningVecInstr::adjustExtensionForOp(OPInfo &OPI,
274 IRBuilder<> &IRB,
275 unsigned NewExtSize,
276 unsigned NumElts) {
277 Value *V = OPI.ExtInOP;
278 bool IsZExt = OPI.IsZExt;
279 unsigned EltSize = getElementSizeInBits(OPI.ExtInOP, OPI.IsZExt);
280 Type *EltType = getElementTy(NewExtSize, IRB);
281 auto *NewOpTy = FixedVectorType::get(EltType, NumElts);
282
283 if (dyn_cast<Constant>(V))
284 return IRB.CreateTrunc(V, NewOpTy);
285
286 if (V->getType()->isVectorTy()) {
287 if (NewExtSize == EltSize)
288 return V;
289 assert(NewExtSize == 16);
290 auto *NewOpTy = FixedVectorType::get(IRB.getInt16Ty(), NumElts);
291 return (IsZExt) ? IRB.CreateZExt(V, NewOpTy) : IRB.CreateSExt(V, NewOpTy);
292 }
293
294 // The operand must correspond to a shuffle vector which is used to construct
295 // a vector out of a scalar. Since the scalar value (V) is extended,
296 // replace it with a new shuffle vector with the smaller element size.
297 [[maybe_unused]] auto *I = dyn_cast<Instruction>(OPI.OP);
298 assert(I && I->getOpcode() == Instruction::ShuffleVector);
299
300 if (NewExtSize > EltSize)
301 V = (IsZExt) ? IRB.CreateZExt(V, EltType) : IRB.CreateSExt(V, EltType);
302 else if (NewExtSize < EltSize)
303 V = IRB.CreateTrunc(V, EltType);
304
305 Value *IE =
306 IRB.CreateInsertElement(PoisonValue::get(NewOpTy), V, IRB.getInt32(0));
307
308 SmallVector<Constant *, 8> ShuffleMask;
309 for (unsigned i = 0; i < NumElts; ++i)
310 ShuffleMask.push_back(IRB.getInt32(0));
311
312 return IRB.CreateShuffleVector(IE, PoisonValue::get(NewOpTy),
313 ConstantVector::get(ShuffleMask));
314}
315
316Intrinsic::ID HexagonGenWideningVecInstr::getIntrinsic(
317 OPKind OPK, bool IsOP1ZExt, bool IsOP2ZExt, unsigned InEltSize,
318 unsigned ResEltSize, bool IsConstScalar, int ConstOpNum) {
319 // Since the operands have been extended, the ResEltSize must be 16 or more.
320 switch (OPK) {
321 case OP_Add:
322 // Both operands should be either zero extended or sign extended.
323 assert(IsOP1ZExt == IsOP2ZExt);
324 if (InEltSize == 8 && ResEltSize == 16) {
325 // Operands must be zero extended as we don't have a widening vector
326 // 'add' that can take signed exteded values.
327 assert(IsOP1ZExt && "Operands must be zero-extended");
328 return Intrinsic::hexagon_vadd_uu;
329 }
330 if (InEltSize == 16 && ResEltSize == 32)
331 return (IsOP1ZExt) ? Intrinsic::hexagon_vadd_uu
332 : Intrinsic::hexagon_vadd_ss;
333
334 llvm_unreachable("Incorrect input and output operand sizes");
335
336 case OP_Sub:
337 // Both operands should be either zero extended or sign extended.
338 assert(IsOP1ZExt == IsOP2ZExt);
339 if (InEltSize == 8 && ResEltSize == 16) {
340 // Operands must be zero extended as we don't have a widening vector
341 // 'sub' that can take signed exteded values.
342 assert(IsOP1ZExt && "Operands must be zero-extended");
343 return Intrinsic::hexagon_vsub_uu;
344 }
345 if (InEltSize == 16 && ResEltSize == 32)
346 return (IsOP1ZExt) ? Intrinsic::hexagon_vsub_uu
347 : Intrinsic::hexagon_vsub_ss;
348
349 llvm_unreachable("Incorrect input and output operand sizes");
350
351 case OP_Mul:
352 assert(ResEltSize == 2 * InEltSize);
353 // Enter inside 'if' block when one of the operand is constant vector
354 if (IsConstScalar) {
355 // When inputs are of 8bit type and output is 16bit type, enter 'if' block
356 if (InEltSize == 8 && ResEltSize == 16) {
357 // Enter the 'if' block, when 2nd operand of the mul instruction is
358 // constant vector, otherwise enter 'else' block
359 if (ConstOpNum == 2 && IsOP1ZExt) {
360 // If the value inside the constant vector is zero-extended, then
361 // return hexagon_vmpy_ub_ub, else return hexagon_vmpy_ub_b
362 return (IsOP2ZExt) ? Intrinsic::hexagon_vmpy_ub_ub
363 : Intrinsic::hexagon_vmpy_ub_b;
364 } else if (ConstOpNum == 1 && IsOP2ZExt) {
365 return (IsOP1ZExt) ? Intrinsic::hexagon_vmpy_ub_ub
366 : Intrinsic::hexagon_vmpy_ub_b;
367 }
368 }
369 // When inputs are of 16bit type and output is 32bit type,
370 // enter 'if' block
371 if (InEltSize == 16 && ResEltSize == 32) {
372 if (IsOP1ZExt && IsOP2ZExt) {
373 // If the value inside the constant vector and other operand is
374 // zero-extended, then return hexagon_vmpy_uh_uh
375 return Intrinsic::hexagon_vmpy_uh_uh;
376 } else if (!IsOP1ZExt && !IsOP2ZExt) {
377 // If the value inside the constant vector and other operand is
378 // sign-extended, then return hexagon_vmpy_h_h
379 return Intrinsic::hexagon_vmpy_h_h;
380 }
381 }
382 }
383 if (IsOP1ZExt)
384 return IsOP2ZExt ? Intrinsic::hexagon_vmpy_uu
385 : Intrinsic::hexagon_vmpy_us;
386 else
387 return IsOP2ZExt ? Intrinsic::hexagon_vmpy_su
388 : Intrinsic::hexagon_vmpy_ss;
389 default:
390 llvm_unreachable("Instruction not handled!");
391 }
392}
393
394Type *HexagonGenWideningVecInstr::getElementTy(unsigned size,
395 IRBuilder<> &IRB) {
396 switch (size) {
397 case 8:
398 return IRB.getInt8Ty();
399 case 16:
400 return IRB.getInt16Ty();
401 case 32:
402 return IRB.getInt32Ty();
403 default:
404 llvm_unreachable("Unhandled Element size");
405 }
406}
407
408Value *HexagonGenWideningVecInstr::createIntrinsic(
409 Intrinsic::ID IntId, Instruction *Inst, Value *NewOP1, Value *NewOP2,
410 Type *ResType, unsigned NumElts, bool Interleave = true) {
411 IRBuilder<> IRB(Inst);
412 Function *ExtF = Intrinsic::getOrInsertDeclaration(M, IntId, ResType);
413 Value *NewIn = IRB.CreateCall(ExtF, {NewOP1, NewOP2});
414 if (Interleave) {
415 // Interleave elements in the output vector.
416 SmallVector<Constant *, 16> ShuffleMask;
417 unsigned HalfElts = NumElts / 2;
418 for (unsigned i = 0; i < HalfElts; ++i) {
419 ShuffleMask.push_back(IRB.getInt32(i));
420 ShuffleMask.push_back(IRB.getInt32(HalfElts + i));
421 }
422 NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(ResType),
423 ConstantVector::get(ShuffleMask));
424 }
425 return NewIn;
426}
427
428std::pair<Value *, Value *>
429HexagonGenWideningVecInstr::opSplit(Value *OP, Instruction *Inst,
430 Type *NewOpType) {
431 Type *InstTy = Inst->getType();
432 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
433 IRBuilder<> IRB(Inst);
434 if (InstTy->getPrimitiveSizeInBits() < 2 * HwVLen) {
435 // The only time we need to split an OP even though it is not a
436 // vector-pair is while generating vasr instruction for the short vector.
437 // Since hi/lo intrinsics can't be used here as they expect the operands to
438 // be of 64xi32 type, the shuffle_vector pair with the appropriate masks is
439 // used instead.
440 assert(NumElts % 2 == 0 && "Unexpected Vector Type!!");
441 unsigned HalfElts = NumElts / 2;
444 for (unsigned i = 0; i < HalfElts; ++i)
445 LoM.push_back(IRB.getInt32(i));
446 for (unsigned i = 0; i < HalfElts; ++i)
447 HiM.push_back(IRB.getInt32(HalfElts + i));
448
449 Value *Hi = IRB.CreateShuffleVector(OP, PoisonValue::get(OP->getType()),
451 Value *Lo = IRB.CreateShuffleVector(OP, PoisonValue::get(OP->getType()),
453 return std::pair<Value *, Value *>(Hi, Lo);
454 }
455
456 Intrinsic::ID IntHi = Intrinsic::hexagon_V6_hi_128B;
457 Intrinsic::ID IntLo = Intrinsic::hexagon_V6_lo_128B;
460 auto *InType = FixedVectorType::get(IRB.getInt32Ty(), 64);
461 OP = IRB.CreateBitCast(OP, InType);
462 Value *Hi = IRB.CreateCall(ExtFHi, {OP}); // 32xi32
463 Value *Lo = IRB.CreateCall(ExtFLo, {OP});
464 Hi = IRB.CreateBitCast(Hi, NewOpType);
465 Lo = IRB.CreateBitCast(Lo, NewOpType);
466 return std::pair<Value *, Value *>(Hi, Lo);
467}
468
469bool HexagonGenWideningVecInstr::checkConstantVector(Value *OP,
470 int64_t &SplatVal,
471 bool IsOPZExt) {
472 if (auto *C1 = dyn_cast<Constant>(OP)) {
473 if (Value *SplatV = C1->getSplatValue()) {
474 auto *CI = dyn_cast<ConstantInt>(SplatV);
475 if (IsOPZExt) {
476 SplatVal = CI->getZExtValue();
477 } else {
478 SplatVal = CI->getSExtValue();
479 }
480 return true;
481 }
482 }
483 return false;
484}
485
486void HexagonGenWideningVecInstr::updateMPYConst(Intrinsic::ID IntId,
487 int64_t SplatVal, bool IsOPZExt,
488 Value *&OP, IRBuilder<> &IRB) {
489 if ((IntId == Intrinsic::hexagon_vmpy_uu ||
490 IntId == Intrinsic::hexagon_vmpy_us ||
491 IntId == Intrinsic::hexagon_vmpy_su ||
492 IntId == Intrinsic::hexagon_vmpy_ss) &&
493 OP->getType()->isVectorTy()) {
494 // Create a vector with all elements equal to SplatVal
495 Type *VecTy = OP->getType();
496 Value *splatVector =
497 ConstantInt::get(VecTy, static_cast<uint32_t>(SplatVal));
498 OP = IsOPZExt ? IRB.CreateZExt(splatVector, VecTy)
499 : IRB.CreateSExt(splatVector, VecTy);
500 } else {
501 packConstant(IntId, SplatVal, OP, IRB);
502 }
503}
504
505void HexagonGenWideningVecInstr::packConstant(Intrinsic::ID IntId,
506 int64_t SplatVal, Value *&OP,
507 IRBuilder<> &IRB) {
508 uint32_t Val32 = static_cast<uint32_t>(SplatVal);
509 if (IntId == Intrinsic::hexagon_vmpy_ub_ub) {
510 assert(SplatVal >= 0 && SplatVal <= UINT8_MAX);
511 uint32_t packed = (Val32 << 24) | (Val32 << 16) | (Val32 << 8) | Val32;
512 OP = IRB.getInt32(packed);
513 } else if (IntId == Intrinsic::hexagon_vmpy_ub_b) {
514 assert(SplatVal >= INT8_MIN && SplatVal <= INT8_MAX);
515 uint32_t packed = (Val32 << 24) | ((Val32 << 16) & ((1 << 24) - 1)) |
516 ((Val32 << 8) & ((1 << 16) - 1)) |
517 (Val32 & ((1 << 8) - 1));
518 OP = IRB.getInt32(packed);
519 } else if (IntId == Intrinsic::hexagon_vmpy_uh_uh) {
520 assert(SplatVal >= 0 && SplatVal <= UINT16_MAX);
521 uint32_t packed = (Val32 << 16) | Val32;
522 OP = IRB.getInt32(packed);
523 } else if (IntId == Intrinsic::hexagon_vmpy_h_h) {
524 assert(SplatVal >= INT16_MIN && SplatVal <= INT16_MAX);
525 uint32_t packed = (Val32 << 16) | (Val32 & ((1 << 16) - 1));
526 OP = IRB.getInt32(packed);
527 }
528}
529
530bool HexagonGenWideningVecInstr::replaceWithIntrinsic(Instruction *Inst,
531 OPKind OPK,
532 OPInfo &OP1Info,
533 OPInfo &OP2Info) {
534 Type *InstTy = Inst->getType();
535 Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
536 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
537 unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
538
539 bool IsOP1ZExt = OP1Info.IsZExt;
540 bool IsOP2ZExt = OP2Info.IsZExt;
541
542 // The resulting values of 'add' and 'sub' are always sign-extended.
543 bool IsResZExt = (OPK == OP_Mul || OPK == OP_Shl)
544 ? (OP1Info.IsZExt && OP2Info.IsZExt)
545 : false;
546
547 unsigned MaxEltSize = std::max(OP1Info.ExtInSize, OP2Info.ExtInSize);
548 unsigned NewOpEltSize = MaxEltSize;
549 unsigned NewResEltSize = 2 * MaxEltSize;
550
551 // For Add and Sub, both the operands should be either zero extended
552 // or sign extended. In case of a mismatch, they are extended to the
553 // next size (ex: 8 bits -> 16 bits) so that the sign-extended vadd/vsub
554 // instructions can be used. Also, we don't support 8-bits signed vadd/vsub
555 // instructions. They are extended to 16-bits and then signed 16-bits
556 // non-widening vadd/vsub is used to perform the operation.
557 if (OPK != OP_Mul && OPK != OP_Shl &&
558 (IsOP1ZExt != IsOP2ZExt || (!IsOP1ZExt && NewOpEltSize == 8)))
559 NewOpEltSize = 2 * NewOpEltSize;
560
561 unsigned ResVLen = NewResEltSize * NumElts;
562 if (ResVLen < HwVLen && !WidenShortVector)
563 return false;
564 if (NewOpEltSize > 16 || ((ResVLen > HwVLen) && (ResVLen % HwVLen) != 0))
565 return false;
566
567 IRBuilder<> IRB(Inst);
568 Value *NewOP1 = adjustExtensionForOp(OP1Info, IRB, NewOpEltSize, NumElts);
569 Value *NewOP2 = adjustExtensionForOp(OP2Info, IRB, NewOpEltSize, NumElts);
570
571 if (NewOpEltSize == NewResEltSize) {
572 assert(OPK != OP_Mul && OPK != OP_Shl);
573 // Instead of intrinsics, use vector add/sub.
574 Value *NewIn = IRB.CreateBinOp(cast<BinaryOperator>(Inst)->getOpcode(),
575 NewOP1, NewOP2);
576 if (InstEltSize > NewResEltSize)
577 NewIn = IRB.CreateSExt(NewIn, InstTy);
578 Inst->replaceAllUsesWith(NewIn);
579 return true;
580 }
581
582 bool IsConstScalar = false;
583 int64_t SplatVal = 0;
584 int ConstOpNum = 1;
585 if (OPK == OP_Mul || OPK == OP_Shl) {
586 IsConstScalar = checkConstantVector(NewOP1, SplatVal, IsOP1ZExt);
587 if (!IsConstScalar) {
588 IsConstScalar = checkConstantVector(NewOP2, SplatVal, IsOP2ZExt);
589 ConstOpNum = 2;
590 }
591 }
592
593 if (IsConstScalar && OPK == OP_Shl) {
594 if (((NewOpEltSize == 8) && (SplatVal > 0) && (SplatVal < 8)) ||
595 ((NewOpEltSize == 16) && (SplatVal > 0) && (SplatVal < 16))) {
596 SplatVal = 1LL << SplatVal;
597 OPK = OP_Mul;
598 } else {
599 return false;
600 }
601 } else if (!IsConstScalar && OPK == OP_Shl) {
602 return false;
603 }
604
605 Intrinsic::ID IntId = getIntrinsic(OPK, IsOP1ZExt, IsOP2ZExt, NewOpEltSize,
606 NewResEltSize, IsConstScalar, ConstOpNum);
607
608 if (IsConstScalar) {
609 updateMPYConst(IntId, SplatVal, IsOP2ZExt, NewOP2, IRB);
610 }
611
612 // Split the node if it needs more than a vector pair for the result.
613 if (ResVLen > 2 * HwVLen) {
614 assert(ResVLen == 4 * HwVLen);
615 // Split the operands
616 unsigned HalfElts = NumElts / 2;
617 auto *NewOpType =
618 FixedVectorType::get(getElementTy(NewOpEltSize, IRB), HalfElts);
619 auto *ResType =
620 FixedVectorType::get(getElementTy(NewResEltSize, IRB), HalfElts);
621 std::pair<Value *, Value *> SplitOP1 = opSplit(NewOP1, Inst, NewOpType);
622 std::pair<Value *, Value *> SplitOP2;
623 if (IsConstScalar && (IntId == Intrinsic::hexagon_vmpy_h_h ||
624 IntId == Intrinsic::hexagon_vmpy_uh_uh)) {
625 SplitOP2 = std::pair<Value *, Value *>(NewOP2, NewOP2);
626 } else {
627 SplitOP2 = opSplit(NewOP2, Inst, NewOpType);
628 }
629 Value *NewInHi = createIntrinsic(IntId, Inst, SplitOP1.first,
630 SplitOP2.first, ResType, HalfElts, true);
631 Value *NewInLo = createIntrinsic(IntId, Inst, SplitOP1.second,
632 SplitOP2.second, ResType, HalfElts, true);
633 assert(InstEltSize == NewResEltSize);
634 SmallVector<Constant *, 8> ShuffleMask;
635 for (unsigned i = 0; i < NumElts; ++i)
636 ShuffleMask.push_back(IRB.getInt32(i));
637 // Concat Hi and Lo.
638 Value *NewIn = IRB.CreateShuffleVector(NewInLo, NewInHi,
639 ConstantVector::get(ShuffleMask));
640
641 Inst->replaceAllUsesWith(NewIn);
642 return true;
643 }
644
645 auto *ResType =
646 FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts);
647 Value *NewIn =
648 createIntrinsic(IntId, Inst, NewOP1, NewOP2, ResType, NumElts, true);
649 if (InstEltSize > NewResEltSize)
650 NewIn = (IsResZExt) ? IRB.CreateZExt(NewIn, InstTy)
651 : IRB.CreateSExt(NewIn, InstTy);
652
653 Inst->replaceAllUsesWith(NewIn);
654
655 return true;
656}
657
658// Process instruction and replace them with widening vector
659// intrinsics if possible.
660bool HexagonGenWideningVecInstr::processInstruction(Instruction *Inst) {
661 Type *InstTy = Inst->getType();
662 if (!InstTy->isVectorTy() ||
663 cast<FixedVectorType>(InstTy)->getNumElements() > 128)
664 return false;
665 unsigned InstLen = InstTy->getPrimitiveSizeInBits();
666 if (!HST->isTypeForHVX(cast<VectorType>(InstTy)) && InstLen != 4 * HwVLen)
667 return false;
668 if (InstLen < HwVLen && !WidenShortVector)
669 return false;
670
671 using namespace PatternMatch;
672
673 OPKind OPK;
674 Value *OP1 = nullptr, *OP2 = nullptr;
675 if (match(Inst, (m_Sub(m_Value(OP1), m_Value(OP2)))))
676 OPK = OP_Sub;
677 else if (match(Inst, (m_Add(m_Value(OP1), m_Value(OP2)))))
678 OPK = OP_Add;
679 else if (match(Inst, (m_Mul(m_Value(OP1), m_Value(OP2)))))
680 OPK = OP_Mul;
681 else if (match(Inst, (m_Shl(m_Value(OP1), m_Value(OP2)))))
682 OPK = OP_Shl;
683 else
684 return false;
685
686 OPInfo OP1Info, OP2Info;
687
688 if (!getOperandInfo(OP1, OP1Info) || !getOperandInfo(OP2, OP2Info))
689 return false;
690
691 // Proceed only if both input operands are extended.
692 if (!OP1Info.ExtInOP || !OP2Info.ExtInOP)
693 return false;
694
695 return replaceWithIntrinsic(Inst, OPK, OP1Info, OP2Info);
696}
697
698bool HexagonGenWideningVecInstr::getVmpaOperandInfo(Value *V, OPInfo &OPI) {
699 using namespace PatternMatch;
700 OPI.OP = V;
701 Value *ExtV, *OP1 = nullptr;
702
703 if (match(V,
705 m_Poison(), m_ZeroMask()))) ||
706 match(V,
708 m_Poison(), m_ZeroMask()))) {
709 OPI.ExtInOP = ExtV;
710 OPI.IsZExt = true;
711 OPI.IsScalar = true;
712 OPI.ExtInSize = ExtV->getType()->getPrimitiveSizeInBits();
713 return true;
714 }
715
716 ConstantInt *I = nullptr;
717 if ((match(V, m_Shuffle(m_InsertElt(m_Poison(), m_Value(ExtV), m_Zero()),
718 m_Poison(), m_ZeroMask())))) {
719 if (match(ExtV, m_And(m_Value(OP1), m_ConstantInt(I)))) {
720 uint32_t IValue = I->getZExtValue();
721 if (IValue <= 255) {
722 OPI.ExtInOP = ExtV;
723 OPI.IsZExt = true;
724 OPI.ExtInSize = 8;
725 OPI.IsScalar = true;
726 return true;
727 }
728 }
729 }
730
731 // Match for non-scalar operands
732 return getOperandInfo(V, OPI);
733}
734
735// Process instruction and replace with the vmpa intrinsic if possible.
736bool HexagonGenWideningVecInstr::processInstructionForVMPA(Instruction *Inst) {
737 using namespace PatternMatch;
738 Type *InstTy = Inst->getType();
739 // TODO: Extend it to handle short vector instructions (< HwVLen).
740 // vmpa instructions produce a vector register pair.
741 if (!InstTy->isVectorTy() || InstTy->getPrimitiveSizeInBits() != 2 * HwVLen)
742 return false;
743
744 Value *OP1 = nullptr, *OP2 = nullptr;
745 if (!match(Inst, (m_Add(m_Value(OP1), m_Value(OP2)))))
746 return false;
747
748 Value *OP[4] = {nullptr, nullptr, nullptr, nullptr};
749 if (!match(OP1, m_Mul(m_Value(OP[0]), m_Value(OP[1]))) ||
750 !match(OP2, m_Mul(m_Value(OP[2]), m_Value(OP[3]))))
751 return false;
752
753 OPInfo OP_Info[4];
754 for (unsigned i = 0; i < 4; i++)
755 if (!getVmpaOperandInfo(OP[i], OP_Info[i]) || !OP_Info[i].ExtInOP)
756 return false;
757
758 return replaceWithVmpaIntrinsic(Inst, OP_Info);
759}
760
761// Reorder operand info in OPI so that the vector operands come before their
762// scalar counterparts.
763void HexagonGenWideningVecInstr::reorderVmpaOperands(OPInfo *OPI) {
764 for (unsigned i = 0; i < 2; i++)
765 if (!OPI[2 * i].ExtInOP->getType()->isVectorTy()) {
766 OPInfo Temp;
767 Temp = OPI[2 * i];
768 OPI[2 * i] = OPI[2 * i + 1];
769 OPI[2 * i + 1] = Temp;
770 }
771}
772
773// Only handles the case where one input to vmpa has to be a scalar
774// and another is a vector. It can be easily extended to cover
775// other types of vmpa instructions.
776bool HexagonGenWideningVecInstr::replaceWithVmpaIntrinsic(Instruction *Inst,
777 OPInfo *OPI) {
778 reorderVmpaOperands(OPI);
779
780 // After reordering of the operands in OPI, the odd elements must have
781 // IsScalar flag set to true. Also, check the even elements for non-scalars.
782 if (!OPI[1].IsScalar || !OPI[3].IsScalar || OPI[0].IsScalar ||
783 OPI[2].IsScalar)
784 return false;
785
786 OPInfo SOPI1 = OPI[1];
787 OPInfo SOPI2 = OPI[3];
788
789 // The scalar operand in the vmpa instructions needs to be an int8.
790 if (SOPI1.ExtInSize != SOPI2.ExtInSize || SOPI1.ExtInSize != 8)
791 return false;
792
793 Type *InstTy = Inst->getType();
794 Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
795 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
796 unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
797
798 unsigned MaxVEltSize = std::max(OPI[0].ExtInSize, OPI[2].ExtInSize);
799 unsigned NewVOpEltSize = MaxVEltSize;
800 unsigned NewResEltSize = 2 * MaxVEltSize;
801
802 if (NumElts * NewVOpEltSize < HwVLen) {
803 // Extend the operand so that we don't end up with an invalid vector size.
804 NewVOpEltSize = 2 * NewVOpEltSize;
805 NewResEltSize = 2 * NewResEltSize;
806 }
807
808 IRBuilder<> IRB(Inst);
809
810 // Construct scalar operand
811 Value *NewSOP1 = SOPI1.ExtInOP;
812 Value *NewSOP2 = SOPI2.ExtInOP;
813
814 Type *S1Ty = NewSOP1->getType();
815 Type *S2Ty = NewSOP2->getType();
816 if (S1Ty->getPrimitiveSizeInBits() < 32)
817 NewSOP1 = IRB.CreateZExt(NewSOP1, IRB.getInt32Ty());
818 if (S2Ty->getPrimitiveSizeInBits() < 32)
819 NewSOP2 = IRB.CreateZExt(NewSOP2, IRB.getInt32Ty());
820
821 Value *SHL = IRB.CreateShl(NewSOP1, IRB.getInt32(8));
822 Value *OR = IRB.CreateOr(SHL, NewSOP2);
823 Intrinsic::ID CombineIntID = Intrinsic::hexagon_A2_combine_ll;
824 Function *ExtF = Intrinsic::getOrInsertDeclaration(M, CombineIntID);
825 Value *ScalarOP = IRB.CreateCall(ExtF, {OR, OR});
826
827 // Construct vector operand
828 Value *NewVOP1 = adjustExtensionForOp(OPI[0], IRB, NewVOpEltSize, NumElts);
829 Value *NewVOP2 = adjustExtensionForOp(OPI[2], IRB, NewVOpEltSize, NumElts);
830
831 // Combine both vector operands to form the vector-pair for vmpa
832 Intrinsic::ID VCombineIntID = Intrinsic::hexagon_V6_vcombine_128B;
833 ExtF = Intrinsic::getOrInsertDeclaration(M, VCombineIntID);
834 Type *InType = FixedVectorType::get(IRB.getInt32Ty(), 32);
835 NewVOP1 = IRB.CreateBitCast(NewVOP1, InType);
836 NewVOP2 = IRB.CreateBitCast(NewVOP2, InType);
837 Value *VecOP = IRB.CreateCall(ExtF, {NewVOP1, NewVOP2});
838
839 Intrinsic::ID VmpaIntID = (NewResEltSize == 16)
840 ? Intrinsic::hexagon_V6_vmpabus_128B
841 : Intrinsic::hexagon_V6_vmpauhb_128B;
842 ExtF = Intrinsic::getOrInsertDeclaration(M, VmpaIntID);
843 auto *ResType =
844 FixedVectorType::get(getElementTy(NewResEltSize, IRB), NumElts);
845 Value *NewIn = IRB.CreateCall(ExtF, {VecOP, ScalarOP});
846 NewIn = IRB.CreateBitCast(NewIn, ResType);
847
848 if (InstEltSize > NewResEltSize)
849 // Extend the output to match the original instruction type.
850 NewIn = IRB.CreateSExt(NewIn, InstTy);
851
852 // Interleave elements in the output vector.
853 SmallVector<Constant *, 16> ShuffleMask;
854 unsigned HalfElts = NumElts / 2;
855 for (unsigned i = 0; i < HalfElts; ++i) {
856 ShuffleMask.push_back(IRB.getInt32(i));
857 ShuffleMask.push_back(IRB.getInt32(HalfElts + i));
858 }
859 NewIn = IRB.CreateShuffleVector(NewIn, PoisonValue::get(ResType),
860 ConstantVector::get(ShuffleMask));
861
862 Inst->replaceAllUsesWith(NewIn);
863 return true;
864}
865
866bool HexagonGenWideningVecInstr::genSaturatingInst(Instruction *Inst) {
867 Type *InstTy = Inst->getType();
868 assert(InstTy->isVectorTy());
869 if (InstTy->getPrimitiveSizeInBits() > HwVLen)
870 return false;
871
872 using namespace PatternMatch;
873 CmpPredicate P1, P2;
874 Value *L1 = nullptr, *T1 = nullptr, *L2 = nullptr, *T2 = nullptr,
875 *L3 = nullptr;
876 Constant *RC1 = nullptr, *FC1 = nullptr, *RC2 = nullptr, *FC2 = nullptr,
877 *RC3 = nullptr;
878
879 // Pattern of interest: ashr -> llvm.smin -> llvm.smax -> trunc
880 // Match trunc instruction
882 m_Constant(RC1))))) {
883 // Match llvm.smin instruction
885 // Match ashr instruction
886 if (match(L2, m_AShr(m_Value(L3), m_Constant(RC3)))) {
887 std::pair<int, int> MinMax;
888 // get min, max values from operatands of smin and smax
889 if (getMinMax(RC1, RC2, MinMax)) {
890 bool IsResSigned;
891 // Validate the saturating vasr pattern
892 if (isSaturatingVAsr(Inst, L2, MinMax.first, MinMax.second,
893 IsResSigned)) {
894 // Get the shift value from the ashr operand
895 ConstantInt *shift_val =
896 dyn_cast<ConstantInt>(RC3->getSplatValue());
897 if (shift_val) {
898 Value *NewIn =
899 createVAsrIntrinsic(Inst, L3, shift_val, IsResSigned);
900 Inst->replaceAllUsesWith(NewIn);
901 return true;
902 }
903 }
904 }
905 }
906 }
907 }
908
909 if (!match(Inst, (m_Trunc(m_Select(m_ICmp(P1, m_Value(L1), m_Constant(RC1)),
910 m_Value(T1), m_Constant(FC1))))) ||
911 (T1 != L1 || FC1 != RC1))
912 return false;
913
914 if (!match(L1, m_Select(m_ICmp(P2, m_Value(L2), m_Constant(RC2)), m_Value(T2),
915 m_Constant(FC2))) ||
916 (T2 != L2 || FC2 != RC2))
917 return false;
918
919 if (!((P1 == CmpInst::ICMP_SGT && P2 == CmpInst::ICMP_SLT) ||
920 (P1 == CmpInst::ICMP_SLT && P2 == CmpInst::ICMP_SGT)))
921 return false;
922
923 std::pair<int, int> MinMax;
924 if ((P1 == CmpInst::ICMP_SGT) && (P2 == CmpInst::ICMP_SLT)) {
925 if (!getMinMax(RC1, RC2, MinMax))
926 return false;
927 } else if (!getMinMax(RC2, RC1, MinMax))
928 return false;
929
930 Value *S = L2; // Value being saturated
931
932 // Only AShr instructions are handled.
933 // Also, second operand to AShr must be a scalar.
934 Value *OP1 = nullptr, *ShiftByVal = nullptr;
935 if (!match(S, m_AShr(m_Value(OP1),
936 m_Shuffle(m_InsertElt(m_Poison(), m_Value(ShiftByVal),
937 m_Zero()),
938 m_Poison(), m_ZeroMask()))))
939 return false;
940
941 bool IsResSigned;
942 if (!isSaturatingVAsr(Inst, S, MinMax.first, MinMax.second, IsResSigned))
943 return false;
944
945 Value *NewIn = createVAsrIntrinsic(Inst, OP1, ShiftByVal, IsResSigned);
946 Inst->replaceAllUsesWith(NewIn);
947 return true;
948}
949
950Value *HexagonGenWideningVecInstr::extendShiftByVal(Value *ShiftByVal,
951 IRBuilder<> &IRB) {
952 using namespace PatternMatch;
953 Value *A = nullptr;
954 if (match(ShiftByVal, m_Trunc(m_Value(A))))
955 return A;
956 return IRB.CreateZExt(ShiftByVal, IRB.getInt32Ty());
957}
958
959bool HexagonGenWideningVecInstr::getMinMax(Constant *MinC, Constant *MaxC,
960 std::pair<int, int> &MinMax) {
961 Value *SplatV;
962 if (!(SplatV = MinC->getSplatValue()) || !(dyn_cast<ConstantInt>(SplatV)))
963 return false;
964 if (!(SplatV = MaxC->getSplatValue()) || !(dyn_cast<ConstantInt>(SplatV)))
965 return false;
966
967 ConstantInt *MinI = dyn_cast<ConstantInt>(MinC->getSplatValue());
968 ConstantInt *MaxI = dyn_cast<ConstantInt>(MaxC->getSplatValue());
969 MinMax = std::pair<int, int>(MinI->getSExtValue(), MaxI->getSExtValue());
970 return true;
971}
972
973bool HexagonGenWideningVecInstr::isSaturatingVAsr(Instruction *Inst, Value *S,
974 int MinV, int MaxV,
975 bool &IsResSigned) {
976 if (MinV >= MaxV)
977 return false;
978
979 IsResSigned = true;
980 Type *InstTy = Inst->getType();
981 Type *EltTy = cast<VectorType>(InstTy)->getElementType();
982 unsigned TruncSize = EltTy->getPrimitiveSizeInBits();
983
984 int MaxRange, MinRange;
985 if (MinV < 0) { // Saturate to a signed value
986 MaxRange = (1 << (TruncSize - 1)) - 1;
987 MinRange = -(1 << (TruncSize - 1));
988 } else if (MinV == 0) { // Saturate to an unsigned value
989 MaxRange = (1 << (TruncSize)) - 1;
990 MinRange = 0;
991 IsResSigned = false;
992 } else
993 return false;
994
995 if (MinV != MinRange || MaxV != MaxRange)
996 return false;
997
998 auto *SInst = dyn_cast<Instruction>(S);
999 if (SInst->getOpcode() == Instruction::AShr) {
1000 Type *SInstTy = SInst->getType();
1001 Type *SEltTy = cast<VectorType>(SInstTy)->getElementType();
1002 unsigned SInstEltSize = SEltTy->getPrimitiveSizeInBits();
1003 if (SInstEltSize != 2 * TruncSize || TruncSize > 16)
1004 return false;
1005 }
1006 return true;
1007}
1008
1009Intrinsic::ID HexagonGenWideningVecInstr::getVAsrIntrinsic(bool IsInSigned,
1010 bool IsResSigned) {
1011 if (!IsResSigned)
1012 return (IsInSigned) ? Intrinsic::hexagon_vasrsat_su
1013 : Intrinsic::hexagon_vasrsat_uu;
1014 return Intrinsic::hexagon_vasrsat_ss;
1015}
1016
1017Value *HexagonGenWideningVecInstr::createVAsrIntrinsic(Instruction *Inst,
1018 Value *VecOP,
1019 Value *ShiftByVal,
1020 bool IsResSigned) {
1021 IRBuilder<> IRB(Inst);
1022 Type *ShiftByTy = ShiftByVal->getType();
1023 if (ShiftByTy->getPrimitiveSizeInBits() < 32)
1024 ShiftByVal = extendShiftByVal(ShiftByVal, IRB);
1025
1026 Type *InstTy = Inst->getType();
1027 Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
1028 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
1029 unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
1030
1031 // Replace the instruction with saturating vasr intrinsic.
1032 // Since vasr with saturation interleaves elements from both input vectors,
1033 // they must be deinterleaved for output to end up in the right order.
1034 SmallVector<Constant *, 16> ShuffleMask;
1035 unsigned HalfElts = NumElts / 2;
1036 // Even elements
1037 for (unsigned i = 0; i < HalfElts; ++i)
1038 ShuffleMask.push_back(IRB.getInt32(i * 2));
1039 // Odd elements
1040 for (unsigned i = 0; i < HalfElts; ++i)
1041 ShuffleMask.push_back(IRB.getInt32(i * 2 + 1));
1042
1043 VecOP = IRB.CreateShuffleVector(VecOP, PoisonValue::get(VecOP->getType()),
1044 ConstantVector::get(ShuffleMask));
1045
1046 auto *InVecOPTy =
1047 FixedVectorType::get(getElementTy(InstEltSize * 2, IRB), HalfElts);
1048 std::pair<Value *, Value *> HiLo = opSplit(VecOP, Inst, InVecOPTy);
1049 Intrinsic::ID IntID = getVAsrIntrinsic(true, IsResSigned);
1050 Function *F = Intrinsic::getOrInsertDeclaration(M, IntID, InVecOPTy);
1051 Value *NewIn = IRB.CreateCall(F, {HiLo.first, HiLo.second, ShiftByVal});
1052 return IRB.CreateBitCast(NewIn, InstTy);
1053}
1054
1055// Generate vavg instruction.
1056bool HexagonGenWideningVecInstr::genVAvg(Instruction *Inst) {
1057 using namespace PatternMatch;
1058 Type *InstTy = Inst->getType();
1059 assert(InstTy->isVectorTy());
1060
1061 bool Match = false;
1062 Value *OP1 = nullptr, *OP2 = nullptr;
1063 bool IsSigned;
1064 if ((Match = (match(Inst, m_Trunc(m_LShr(m_Add(m_ZExt(m_Value(OP1)),
1065 m_ZExt(m_Value(OP2))),
1066 m_SpecificInt(1)))))))
1067 IsSigned = false;
1068 if (!Match &&
1069 (Match = (match(Inst, m_Trunc(m_LShr(m_Add(m_SExt(m_Value(OP1)),
1070 m_SExt(m_Value(OP2))),
1071 m_SpecificInt(1))))) ||
1072 match(Inst, m_LShr(m_Add(m_Value(OP1), m_Value(OP2)),
1073 m_SpecificInt(1)))))
1074 IsSigned = true;
1075
1076 if (!Match)
1077 return false;
1078
1079 unsigned OP1EltSize = getElementSizeInBits(OP1);
1080 unsigned OP2EltSize = getElementSizeInBits(OP2);
1081 unsigned NewEltSize = std::max(OP1EltSize, OP2EltSize);
1082
1083 Type *EltTy = cast<FixedVectorType>(InstTy)->getElementType();
1084 unsigned InstEltSize = EltTy->getPrimitiveSizeInBits();
1085 unsigned InstLen = InstTy->getPrimitiveSizeInBits();
1086
1087 // Only vectors that are either smaller, same or twice of the hardware
1088 // vector length are allowed.
1089 if (InstEltSize < NewEltSize || (InstLen > 2 * HwVLen))
1090 return false;
1091
1092 if ((InstLen > HwVLen) && (InstLen % HwVLen != 0))
1093 return false;
1094
1095 IRBuilder<> IRB(Inst);
1096 unsigned NumElts = cast<FixedVectorType>(InstTy)->getNumElements();
1097 auto *AvgInstTy =
1098 FixedVectorType::get(getElementTy(NewEltSize, IRB), NumElts);
1099 if (OP1EltSize < NewEltSize)
1100 OP1 = (IsSigned) ? IRB.CreateSExt(OP1, AvgInstTy)
1101 : IRB.CreateZExt(OP1, AvgInstTy);
1102 if (OP2EltSize < NewEltSize)
1103 OP2 = (IsSigned) ? IRB.CreateSExt(OP2, AvgInstTy)
1104 : IRB.CreateZExt(OP2, AvgInstTy);
1105
1106 Intrinsic::ID AvgIntID =
1107 (IsSigned) ? Intrinsic::hexagon_vavgs : Intrinsic::hexagon_vavgu;
1108 Value *NewIn = nullptr;
1109
1110 // Split operands if they need more than a vector length.
1111 if (NewEltSize * NumElts > HwVLen) {
1112 unsigned HalfElts = NumElts / 2;
1113 auto *ResType =
1114 FixedVectorType::get(getElementTy(NewEltSize, IRB), HalfElts);
1115 std::pair<Value *, Value *> SplitOP1 = opSplit(OP1, Inst, ResType);
1116 std::pair<Value *, Value *> SplitOP2 = opSplit(OP2, Inst, ResType);
1117 Value *NewHi = createIntrinsic(AvgIntID, Inst, SplitOP1.first,
1118 SplitOP2.first, ResType, NumElts, false);
1119 Value *NewLo = createIntrinsic(AvgIntID, Inst, SplitOP1.second,
1120 SplitOP2.second, ResType, NumElts, false);
1121 SmallVector<Constant *, 8> ShuffleMask;
1122 for (unsigned i = 0; i < NumElts; ++i)
1123 ShuffleMask.push_back(IRB.getInt32(i));
1124 // Concat Hi and Lo.
1125 NewIn =
1126 IRB.CreateShuffleVector(NewLo, NewHi, ConstantVector::get(ShuffleMask));
1127 } else
1128 NewIn =
1129 createIntrinsic(AvgIntID, Inst, OP1, OP2, AvgInstTy, NumElts, false);
1130
1131 if (InstEltSize > NewEltSize)
1132 // Extend the output to match the original instruction type.
1133 NewIn = (IsSigned) ? IRB.CreateSExt(NewIn, InstTy)
1134 : IRB.CreateZExt(NewIn, InstTy);
1135 Inst->replaceAllUsesWith(NewIn);
1136 return true;
1137}
1138
1139bool HexagonGenWideningVecInstr::visitBlock(BasicBlock *B) {
1140 bool Changed = false;
1141 for (auto &I : *B) {
1142 Type *InstTy = I.getType();
1143 if (!InstTy->isVectorTy() || !HST->isTypeForHVX(cast<VectorType>(InstTy)))
1144 continue;
1145
1146 unsigned InstLen = InstTy->getPrimitiveSizeInBits();
1147 if (InstLen < HwVLen && !WidenShortVector)
1148 continue;
1149
1150 Changed |= processInstructionForVMPA(&I);
1151 Changed |= genSaturatingInst(&I);
1152 Changed |= genVAvg(&I);
1153 }
1154 // Generate widening instructions.
1155 for (auto &I : *B)
1156 Changed |= processInstruction(&I);
1157 return Changed;
1158}
1159
1160bool HexagonGenWideningVecInstr::runOnFunction(Function &F) {
1161 M = F.getParent();
1162 HST = TM->getSubtargetImpl(F);
1163
1164 // Return if useHVX128BOps is not set. It can be enabled for 64B mode
1165 // but wil require some changes. For example, bitcast for intrinsics
1166 // assumes 128B mode.
1167 if (skipFunction(F) || !HST->useHVX128BOps())
1168 return false;
1169
1170 HwVLen = HST->getVectorLength() * 8; // Vector Length in bits
1171 bool Changed = false;
1172 for (auto &B : F)
1173 Changed |= visitBlock(&B);
1174
1175 return Changed;
1176}
1177
1178FunctionPass *
1180 return new HexagonGenWideningVecInstr(&TM);
1181}
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
widening Hexagon generate widening vector static false bool hasNegativeValues(Constant *C)
static cl::opt< bool > WidenShortVector("hexagon-widen-short-vector", cl::desc("Generate widening instructions for short vectors."), cl::Hidden)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
#define T1
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static std::optional< OperandInfo > getOperandInfo(const MachineOperand &MO)
#define OP(OPC)
Definition Instruction.h:46
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
LLVM_ABI Constant * getSplatValue(bool AllowPoison=false) const
If all elements of the vector constant have the same value, return that value.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
unsigned getVectorLength() const
bool isTypeForHVX(Type *VecTy, bool IncludeBool=false) const
const HexagonSubtarget * getSubtargetImpl(const Function &F) const override
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Value * CreateInsertElement(Type *VecTy, Value *NewElt, Value *Idx, const Twine &Name="")
Definition IRBuilder.h:2579
Value * CreateSExt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2097
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:562
IntegerType * getInt16Ty()
Fetch the type representing a 16-bit integer.
Definition IRBuilder.h:557
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition IRBuilder.h:522
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2207
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2085
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition IRBuilder.h:2601
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2071
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:1708
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:552
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
void push_back(const T &Elt)
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
class_match< PoisonValue > m_Poison()
Match an arbitrary poison constant.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::AShr > m_AShr(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
CastInst_match< OpTy, SExtInst > m_SExt(const OpTy &Op)
Matches SExt.
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
constexpr double e
support::detail::packed_endian_specific_integral< T, E, support::unaligned > packed
Definition SFrame.h:84
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
void initializeHexagonGenWideningVecInstrPass(PassRegistry &)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
FunctionPass * createHexagonGenWideningVecInstr(const HexagonTargetMachine &)