diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 3a49a8ff10860a..328f567e24e567 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -861,6 +861,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ScalarizeVecOp_UnaryOp(SDNode *N); SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); + SDValue ScalarizeVecOp_INSERT_SUBVECTOR(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_VSELECT(SDNode *N); SDValue ScalarizeVecOp_VSETCC(SDNode *N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 5a21ad7ac7e2cd..2ea595c31a8ab5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -765,6 +765,9 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; + case ISD::INSERT_SUBVECTOR: + Res = ScalarizeVecOp_INSERT_SUBVECTOR(N, OpNo); + break; case ISD::EXTRACT_VECTOR_ELT: Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); break; @@ -882,6 +885,19 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops); } +/// The inserted subvector is to be scalarized - use insert vector element +/// instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_INSERT_SUBVECTOR(SDNode *N, + unsigned OpNo) { + // We should not be attempting to scalarize the containing vector + assert(OpNo == 1); + SDValue Elt = GetScalarizedVector(N->getOperand(1)); + SDValue ContainingVec = N->getOperand(0); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), + ContainingVec.getValueType(), ContainingVec, Elt, + N->getOperand(2)); +} + /// If the input is a vector that needs to be scalarized, it must be <1 x ty>, /// so just return the element, ignoring the index. SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { diff --git a/llvm/test/CodeGen/AMDGPU/scalarize-insert-subvector.ll b/llvm/test/CodeGen/AMDGPU/scalarize-insert-subvector.ll new file mode 100644 index 00000000000000..f6f367cf85f729 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/scalarize-insert-subvector.ll @@ -0,0 +1,24 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck -check-prefix=GCN %s + +define void @scalarize_insert_subvector(ptr addrspace(3) %inptr, ptr addrspace(3) %inptr1, ptr addrspace(3) %outptr) { +; GCN-LABEL: scalarize_insert_subvector: +; GCN: ; %bb.0: +; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: ds_read_b64 v[4:5], v0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ds_read_b32 v5, v1 offset:4 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: ds_write_b64 v2, v[4:5] +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_setpc_b64 s[30:31] + %load0 = load <2 x i32>, ptr addrspace(3) %inptr, align 8 + %load1= load <2 x i32>, ptr addrspace(3) %inptr1, align 8 + %shuffle0 = shufflevector <2 x i32> %load1, <2 x i32> poison, <1 x i32> + %bitcast0 = bitcast <1 x i32> %shuffle0 to <2 x half> + %bitcast1 = bitcast <2 x i32> %load0 to <4 x half> + %shuffle1 = shufflevector <2 x half> %bitcast0, <2 x half> poison, <4 x i32> + %shuffle2 = shufflevector <4 x half> %bitcast1, <4 x half> %shuffle1, <4 x i32> + store <4 x half> %shuffle2, ptr addrspace(3) %outptr + ret void +}