Skip to content

Commit

Permalink
apply comments
Browse files Browse the repository at this point in the history
  • Loading branch information
alvoron committed Feb 14, 2025
1 parent 9735e01 commit db8959d
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 73 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -230,35 +230,6 @@ OV_CPU_MAYBE_UNUSED_FUNCTION static inline bool noPostOps(const FCConfig& config
template <>
const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations() {
static const std::vector<ExecutorImplementation<FCAttrs>> fullyconnectedImplementations {
#if defined(OV_CPU_WITH_KLEIDIAI)
OV_CPU_INSTANCE_KLEIDIAI(
"fullyconnected_kleidiai",
ExecutorType::Kleidiai,
OperationType::MatMul,
ShapeTolerance::Agnostic,
// supports
[](const FCConfig& config) -> bool {
VERIFY(noPostOps(config), UNSUPPORTED_POST_OPS);
VERIFY(noSparseDecompression(config), UNSUPPORTED_SPARSE_WEIGHTS);
VERIFY(noWeightsDecompression(config), UNSUPPORTED_WEIGHTS_DECOMPRESSION);
VERIFY(everyone_is(f32, srcType(config), weiType(config), dstType(config)), UNSUPPORTED_SRC_PRECISIONS);
VERIFY(srcRank(config) == 2U, UNSUPPORTED_SRC_RANK);
VERIFY(weiRank(config) == 2U, UNSUPPORTED_WEI_RANK);
return MatMulKleidiAIExecutor::supports(config);
},
// requiresFallback
[](const FCConfig& config) -> ov::optional<executor::Config<FCAttrs>> {
return {};
},
// acceptsShapes
[](const MemoryArgs& memory) -> bool {
return true;
},
// create
[](const FCAttrs& attrs, const PostOps& postOps, const MemoryArgs& memory, ExecutorContext::CPtr context) {
return std::make_shared<MatMulKleidiAIExecutor>(attrs, postOps, memory, context);
})
#endif
OV_CPU_INSTANCE_MLAS_X64(
"fullyconnected_mlas",
ExecutorType::Mlas,
Expand Down Expand Up @@ -453,6 +424,33 @@ const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations() {
const ExecutorContext::CPtr& context) {
return std::make_shared<ACLLowpFullyConnectedExecutor>(attrs, postOps, memory, context);
})
OV_CPU_INSTANCE_KLEIDIAI(
"fullyconnected_kleidiai",
ExecutorType::Kleidiai,
OperationType::MatMul,
ShapeTolerance::Agnostic,
// supports
[](const FCConfig& config) -> bool {
VERIFY(noPostOps(config), UNSUPPORTED_POST_OPS);
VERIFY(noSparseDecompression(config), UNSUPPORTED_SPARSE_WEIGHTS);
VERIFY(noWeightsDecompression(config), UNSUPPORTED_WEIGHTS_DECOMPRESSION);
VERIFY(everyone_is(f32, srcType(config), weiType(config), biaType(config), dstType(config)), UNSUPPORTED_SRC_PRECISIONS);
VERIFY(srcRank(config) == 2U, UNSUPPORTED_SRC_RANK);
VERIFY(weiRank(config) == 2U, UNSUPPORTED_WEI_RANK);
return MatMulKleidiAIExecutor::supports(config);
},
// requiresFallback
[](const FCConfig& config) -> ov::optional<executor::Config<FCAttrs>> {
return {};
},
// acceptsShapes
[](const MemoryArgs& memory) -> bool {
return true;
},
// create
[](const FCAttrs& attrs, const PostOps& postOps, const MemoryArgs& memory, ExecutorContext::CPtr context) {
return std::make_shared<MatMulKleidiAIExecutor>(attrs, postOps, memory, context);
})
OV_CPU_INSTANCE_SHL(
"fullyconnected_shl",
ExecutorType::Shl,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,12 @@
//
#pragma once

#include "../acl/acl_fullyconnected_utils.hpp"

#include <memory>
#include <oneapi/dnnl/dnnl.hpp>
#include "arm_neon.h"

#include "cpu_memory.h"
#include "nodes/executors/acl/acl_fullyconnected_utils.hpp"
#include "nodes/executors/fullyconnected_config.hpp"
#include "kai/ukernels/matmul/pack/kai_rhs_pack_kxn_f32p8x1biasf32_f32_f32_neon.h"
#include "kai/ukernels/matmul/matmul_clamp_f32_f32_f32p/kai_matmul_clamp_f32_f32_f32p_interface.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,14 @@ namespace MatMul {
/* ============= MatMul ============= */
namespace matmul {

static const std::vector<CPUSpecificParams>& filterAclSpecificParamsFC() {
static const std::vector<CPUSpecificParams>& filterSpecificParamsFC() {
static const std::vector<CPUSpecificParams> specificParams = {CPUSpecificParams{{}, {}, {"acl"}, "acl"}};
return specificParams;
}

static const std::vector<CPUSpecificParams>& filterKleidiaiSpecificParamsFC() {
static const std::vector<CPUSpecificParams> specificParams = {CPUSpecificParams{{}, {}, {"kleidiai"}, "kleidiai"}};
return specificParams;
}

const auto testParams2D_kleidiai_smoke = ::testing::Combine(::testing::Combine(::testing::ValuesIn(IS2D_smoke()),
::testing::Values(ElementType::f32),
::testing::Values(ElementType::undefined),
::testing::Values(ElementType::undefined),
::testing::Values(utils::InputLayerType::CONSTANT),
::testing::Values(ov::test::utils::DEVICE_CPU),
::testing::Values(emptyAdditionalConfig())),
::testing::Values(MatMulNodeType::FullyConnected),
::testing::ValuesIn({emptyFusingSpec, fusingBias}),
::testing::ValuesIn(filterCPUInfo(filterKleidiaiSpecificParamsFC())));
INSTANTIATE_TEST_SUITE_P(smoke_FC_KleidiAI_2D, MatMulLayerCPUTest, testParams2D_kleidiai_smoke, MatMulLayerCPUTest::getTestCaseName);

std::vector<fusingSpecificParams> fusingParamsSet2D_smoke {
emptyFusingSpec,
fusingBias,
fusingRelu,
fusingTanh
};
Expand All @@ -51,8 +36,9 @@ const auto testParams2D_smoke = ::testing::Combine(::testing::Combine(::testing:
::testing::Values(emptyAdditionalConfig())),
::testing::Values(MatMulNodeType::FullyConnected),
::testing::ValuesIn(fusingParamsSet2D_smoke),
::testing::ValuesIn(filterCPUInfo(filterAclSpecificParamsFC())));
INSTANTIATE_TEST_SUITE_P(smoke_FC_Acl_2D, MatMulLayerCPUTest, testParams2D_smoke, MatMulLayerCPUTest::getTestCaseName);
::testing::ValuesIn(filterCPUInfo(filterSpecificParamsFC())));
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D, MatMulLayerCPUTest, testParams2D_smoke, MatMulLayerCPUTest::getTestCaseName);


std::vector<fusingSpecificParams> fusingParamsSet2D_smoke_f16 {
emptyFusingSpec,
Expand All @@ -69,8 +55,8 @@ const auto testParams2D_smoke_f16 = ::testing::Combine(::testing::Combine(::test
ov::AnyMap({ov::hint::inference_precision(ov::element::f16)}))),
::testing::Values(MatMulNodeType::FullyConnected),
::testing::ValuesIn(fusingParamsSet2D_smoke_f16),
::testing::ValuesIn(filterCPUInfo(filterAclSpecificParamsFC())));
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_ARM_f16, MatMulLayerCPUTest, testParams2D_smoke_f16, MatMulLayerCPUTest::getTestCaseName);
::testing::ValuesIn(filterCPUInfo(filterSpecificParamsFC())));
INSTANTIATE_TEST_SUITE_P(smoke_FC_2D_f16, MatMulLayerCPUTest, testParams2D_smoke_f16, MatMulLayerCPUTest::getTestCaseName);

std::vector<fusingSpecificParams> fusingParamsSet3D_smoke {
emptyFusingSpec,
Expand Down Expand Up @@ -101,11 +87,11 @@ const auto fullyConnectedParams3D_smoke_f16 = ::testing::Combine(::testing::Valu
const auto testParams3D_smoke = ::testing::Combine(fullyConnectedParams3D_smoke,
::testing::Values(MatMulNodeType::FullyConnected),
::testing::ValuesIn(fusingParamsSet3D_smoke),
::testing::ValuesIn(filterCPUInfo(filterAclSpecificParamsFC())));
::testing::ValuesIn(filterCPUInfo(filterSpecificParamsFC())));
const auto testParams3D_smoke_f16 = ::testing::Combine(fullyConnectedParams3D_smoke_f16,
::testing::Values(MatMulNodeType::FullyConnected),
::testing::ValuesIn(fusingParamsSet3D_smoke_f16),
::testing::ValuesIn(filterCPUInfo(filterAclSpecificParamsFC())));
::testing::ValuesIn(filterCPUInfo(filterSpecificParamsFC())));
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D, MatMulLayerCPUTest, testParams3D_smoke, MatMulLayerCPUTest::getTestCaseName);
INSTANTIATE_TEST_SUITE_P(smoke_FC_3D_f16, MatMulLayerCPUTest, testParams3D_smoke_f16, MatMulLayerCPUTest::getTestCaseName);

Expand All @@ -131,7 +117,7 @@ const auto testParams4D_smoke = ::testing::Combine(::testing::Combine(::testing:
::testing::Values(emptyAdditionalConfig())),
::testing::Values(MatMulNodeType::FullyConnected),
::testing::ValuesIn(fusingParamsSet4D_smoke),
::testing::ValuesIn(filterCPUInfo(filterAclSpecificParamsFC())));
::testing::ValuesIn(filterCPUInfo(filterSpecificParamsFC())));
INSTANTIATE_TEST_SUITE_P(smoke_FC_4D, MatMulLayerCPUTest, testParams4D_smoke, MatMulLayerCPUTest::getTestCaseName);

std::vector<fusingSpecificParams> fusingParamsSet4D_smoke_f16 {
Expand All @@ -149,10 +135,10 @@ const auto testParams4D_smoke_f16 = ::testing::Combine(::testing::Combine(::test
ov::AnyMap({ov::hint::inference_precision(ov::element::f16)}))),
::testing::Values(MatMulNodeType::FullyConnected),
::testing::ValuesIn(fusingParamsSet4D_smoke_f16),
::testing::ValuesIn(filterCPUInfo(filterAclSpecificParamsFC())));
::testing::ValuesIn(filterCPUInfo(filterSpecificParamsFC())));
INSTANTIATE_TEST_SUITE_P(smoke_FC_4D_f16, MatMulLayerCPUTest, testParams4D_smoke_f16, MatMulLayerCPUTest::getTestCaseName);

} // namespace matmul
} // namespace MatMul
} // namespace test
} // namespace ov
} // namespace ov
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,11 @@ class ConvsAndSums : virtual public SubgraphBaseStaticTest {

auto result = std::make_shared<ov::op::v0::Result>(relu3);
function = std::make_shared<ov::Model>(result, params, "SimpleNet");

#if defined(OPENVINO_ARCH_ARM64) || defined(OPENVINO_ARCH_ARM)
abs_threshold = 2e-3;
#else
abs_threshold = 9e-4;
#endif
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,13 +207,6 @@ class MatMulDecompressConvertTest : public testing::WithParamInterface<MatMulDec
}

std::string cpuNodeType = "FullyConnected";
// replace kleidiai with acl type if input shapes are not 2D
#if defined(OPENVINO_ARCH_ARM64)
if (selectedType == "kleidiai" &&
(inShapeA.rank().get_length() != 2 || inShapeB.rank().get_length() != 2)) {
selectedType = "acl";
}
#endif
selectedType = makeSelectedTypeStr(selectedType, outType);

ov::ParameterVector params{std::make_shared<ov::op::v0::Parameter>(inType, inShapeA)};
Expand Down Expand Up @@ -285,10 +278,8 @@ std::vector<ov::AnyMap> filter_additional_config_bf16() {

std::vector<CPUSpecificParams> filter_specific_params(bool trySetMlas) {
std::vector<CPUSpecificParams> specificParams;
#if defined(OPENVINO_ARCH_ARM)
#if defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)
specificParams.push_back(CPUSpecificParams{{}, {}, {"acl"}, "acl"});
#elif defined(OPENVINO_ARCH_ARM64)
specificParams.push_back(CPUSpecificParams{{}, {}, {"kleidiai"}, "kleidiai"});
#else
if (trySetMlas) {
#ifdef OV_CPU_WITH_MLAS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,10 +327,6 @@ std::vector<std::string> disabledTestPatterns() {
// Issue: 141705
retVector.emplace_back(R"(.*smoke_arm_Deconv_2D_Planar_FP16/DeconvolutionLayerCPUTest.*INFERENCE_PRECISION_HINT=f16.*)");
retVector.emplace_back(R"(.*ConcatMultiQuerySDPTest.*u8.*)");
#if defined(OV_CPU_WITH_KLEIDIAI)
// Issue: FC KleidiAI executor does not support f16 yet
retVector.emplace_back(R"(smoke_FC.*f16.*)");
#endif
#endif

#if defined(OPENVINO_ARCH_ARM)
Expand Down

0 comments on commit db8959d

Please sign in to comment.