Initial commit

2025-06-09 18:06:36 +02:00 · 2025-06-09 18:06:36 +02:00 · ce3dd83b9f
commit ce3dd83b9f
1470 changed files with 1054449 additions and 0 deletions
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/CMakeLists.txt
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/CMakeLists.txt
@ -0,0 +1,56 @@
+cmake_minimum_required (VERSION 3.14)
+
+project(CMSISDSPDistance)
+
+include(configLib)
+include(configDsp)
+
+file(GLOB SRC "./*_*.c")
+
+add_library(CMSISDSPDistance STATIC)
+
+target_sources(CMSISDSPDistance PRIVATE arm_boolean_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_braycurtis_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_canberra_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_chebyshev_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_chebyshev_distance_f64.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cityblock_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cityblock_distance_f64.c)
+target_sources(CMSISDSPDistance PRIVATE arm_correlation_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cosine_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cosine_distance_f64.c)
+target_sources(CMSISDSPDistance PRIVATE arm_dice_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_euclidean_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_euclidean_distance_f64.c)
+target_sources(CMSISDSPDistance PRIVATE arm_hamming_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_jaccard_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_jensenshannon_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_kulsinski_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_minkowski_distance_f32.c)
+target_sources(CMSISDSPDistance PRIVATE arm_rogerstanimoto_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_russellrao_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_sokalmichener_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_sokalsneath_distance.c)
+target_sources(CMSISDSPDistance PRIVATE arm_yule_distance.c)
+
+
+configLib(CMSISDSPDistance ${ROOT})
+configDsp(CMSISDSPDistance ${ROOT})
+
+### Includes
+target_include_directories(CMSISDSPDistance PUBLIC "${DSP}/Include")
+target_include_directories(CMSISDSPDistance PRIVATE ".")
+
+if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
+target_sources(CMSISDSPDistance PRIVATE arm_braycurtis_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_canberra_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_chebyshev_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cityblock_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_correlation_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_cosine_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_euclidean_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_jensenshannon_distance_f16.c)
+target_sources(CMSISDSPDistance PRIVATE arm_minkowski_distance_f16.c)
+endif()
+
+ 
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctions.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctions.c
@ -0,0 +1,51 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        BayesFunctions.c
+ * Description:  Combination of all distance function source files.
+ *
+ * $Date:        16. March 2020
+ * $Revision:    V1.0.0
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_boolean_distance.c"
+#include "arm_braycurtis_distance_f32.c"
+#include "arm_canberra_distance_f32.c"
+#include "arm_chebyshev_distance_f32.c"
+#include "arm_chebyshev_distance_f64.c"
+#include "arm_cityblock_distance_f32.c"
+#include "arm_cityblock_distance_f64.c"
+#include "arm_correlation_distance_f32.c"
+#include "arm_cosine_distance_f32.c"
+#include "arm_cosine_distance_f64.c"
+#include "arm_dice_distance.c"
+#include "arm_euclidean_distance_f32.c"
+#include "arm_euclidean_distance_f64.c"
+#include "arm_hamming_distance.c"
+#include "arm_jaccard_distance.c"
+#include "arm_jensenshannon_distance_f32.c"
+#include "arm_kulsinski_distance.c"
+#include "arm_minkowski_distance_f32.c"
+#include "arm_rogerstanimoto_distance.c"
+#include "arm_russellrao_distance.c"
+#include "arm_sokalmichener_distance.c"
+#include "arm_sokalsneath_distance.c"
+#include "arm_yule_distance.c"
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctionsF16.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctionsF16.c
@ -0,0 +1,36 @@
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        DistanceFunctions.c
+ * Description:  Combination of all distance function f16 source files.
+ *
+ *
+ * Target Processor: Cortex-M cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2020 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "arm_braycurtis_distance_f16.c"
+#include "arm_canberra_distance_f16.c"
+#include "arm_chebyshev_distance_f16.c"
+#include "arm_cityblock_distance_f16.c"
+#include "arm_correlation_distance_f16.c"
+#include "arm_cosine_distance_f16.c"
+#include "arm_euclidean_distance_f16.c"
+#include "arm_jensenshannon_distance_f16.c"
+#include "arm_minkowski_distance_f16.c"
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance.c
@ -0,0 +1,80 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_svm_linear_init_f32.c
+ * Description:  SVM Linear Instance Initialization
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+
+#if defined(ARM_MATH_NEON)
+
+#include "NEMath.h"
+
+#endif
+
+
+#define TT
+#define TF 
+#define FT
+#define EXT _TT_TF_FT
+#include "arm_boolean_distance_template.h"
+
+#undef TT
+#undef FF
+#undef TF
+#undef FT 
+#undef EXT
+#define TF 
+#define FT
+#define EXT _TF_FT
+#include "arm_boolean_distance_template.h"
+
+#undef TT
+#undef FF
+#undef TF
+#undef FT 
+#undef EXT
+#define TT
+#define FF
+#define TF 
+#define FT
+#define EXT _TT_FF_TF_FT
+#include "arm_boolean_distance_template.h"
+
+#undef TT
+#undef FF
+#undef TF
+#undef FT 
+#undef EXT
+#define TT
+#define EXT _TT
+#include "arm_boolean_distance_template.h"
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance_template.h
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_boolean_distance_template.h
@ -0,0 +1,551 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_boolean_distance.c
+ * Description:  Templates for boolean distances
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+
+
+/**
+ * @defgroup DISTANCEF Distance Functions
+ *
+ * Computes Distances between vectors. 
+ *
+ * Distance functions are useful in a lot of algorithms.
+ *
+ */
+
+
+/**
+ * @addtogroup DISTANCEF
+ * @{
+ */
+
+
+
+
+#define _FUNC(A,B) A##B 
+
+#define FUNC(EXT) _FUNC(arm_boolean_distance, EXT)
+
+/**
+ * @brief        Elements of boolean distances
+ *
+ * Different values which are used to compute boolean distances
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return None
+ *
+ */
+
+#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_common_tables.h"
+
+void FUNC(EXT)(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+#ifdef TT
+       , uint32_t *cTT
+#endif
+#ifdef FF
+       , uint32_t *cFF
+#endif
+#ifdef TF
+       , uint32_t *cTF
+#endif
+#ifdef FT
+       , uint32_t *cFT
+#endif
+       )
+{
+
+#ifdef TT
+    uint32_t _ctt=0;
+#endif
+#ifdef FF
+    uint32_t _cff=0;
+#endif
+#ifdef TF
+    uint32_t _ctf=0;
+#endif
+#ifdef FT
+    uint32_t _cft=0;
+#endif
+    uint32_t        a, b, ba, bb;
+    int shift;
+    const uint8_t  *pA8 = (const uint8_t *) pA;
+    const uint8_t  *pB8 = (const uint8_t *) pB;
+
+    /* handle vector blocks */
+    uint32_t         blkCnt = numberOfBools / 128;
+
+
+
+    while (blkCnt > 0U) {
+        uint8x16_t      vecA = vld1q((const uint8_t *) pA8);
+        uint8x16_t      vecB = vld1q((const uint8_t *) pB8);
+
+#ifdef TT
+        uint8x16_t      vecTT = vecA & vecB;
+        vecTT = vldrbq_gather_offset_u8(hwLUT, vecTT);
+        _ctt += vaddvq(vecTT);
+#endif
+#ifdef FF
+        uint8x16_t      vecFF = vmvnq(vecA) & vmvnq(vecB);
+        vecFF = vldrbq_gather_offset_u8(hwLUT, vecFF);
+        _cff += vaddvq(vecFF);
+#endif
+#ifdef TF
+        uint8x16_t      vecTF = vecA & vmvnq(vecB);
+        vecTF = vldrbq_gather_offset_u8(hwLUT, vecTF);
+        _ctf += vaddvq(vecTF);
+#endif
+#ifdef FT
+        uint8x16_t      vecFT = vmvnq(vecA) & vecB;
+        vecFT = vldrbq_gather_offset_u8(hwLUT, vecFT);
+        _cft += vaddvq(vecFT);
+#endif
+
+        pA8 += 16;
+        pB8 += 16;
+        blkCnt--;
+
+    }
+
+    pA = (const uint32_t *)pA8;
+    pB = (const uint32_t *)pB8;
+
+    blkCnt = numberOfBools & 0x7F;
+    while(blkCnt >= 32)
+    {
+       a = *pA++;
+       b = *pB++;
+       shift = 0;
+       while(shift < 32)
+       {
+          ba = a & 1;
+          bb = b & 1;
+          a = a >> 1;
+          b = b >> 1;
+
+#ifdef TT
+          _ctt += (ba && bb);
+#endif
+#ifdef FF
+          _cff += ((1 ^ ba) && (1 ^ bb));
+#endif
+#ifdef TF
+          _ctf += (ba && (1 ^ bb));
+#endif
+#ifdef FT
+          _cft += ((1 ^ ba) && bb);
+#endif
+          shift ++;
+       }
+
+       blkCnt -= 32;
+    }
+
+    a = *pA++;
+    b = *pB++;
+
+    a = a >> (32 - blkCnt);
+    b = b >> (32 - blkCnt);
+
+    while(blkCnt > 0)
+    {
+          ba = a & 1;
+          bb = b & 1;
+          a = a >> 1;
+
+          b = b >> 1;
+#ifdef TT
+          _ctt += (ba && bb);
+#endif
+#ifdef FF
+          _cff += ((1 ^ ba) && (1 ^ bb));
+#endif
+#ifdef TF
+          _ctf += (ba && (1 ^ bb));
+#endif
+#ifdef FT
+          _cft += ((1 ^ ba) && bb);
+#endif
+          blkCnt --;
+    }
+
+#ifdef TT
+    *cTT = _ctt;
+#endif
+#ifdef FF
+    *cFF = _cff;
+#endif
+#ifdef TF
+    *cTF = _ctf;
+#endif
+#ifdef FT
+    *cFT = _cft;
+#endif
+}
+
+#else
+#if defined(ARM_MATH_NEON)
+
+
+void FUNC(EXT)(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+#ifdef TT
+       , uint32_t *cTT
+#endif
+#ifdef FF
+       , uint32_t *cFF
+#endif
+#ifdef TF
+       , uint32_t *cTF
+#endif
+#ifdef FT
+       , uint32_t *cFT
+#endif
+       )
+{
+#ifdef TT
+    uint32_t _ctt=0;
+#endif
+#ifdef FF
+    uint32_t _cff=0;
+#endif
+#ifdef TF
+    uint32_t _ctf=0;
+#endif
+#ifdef FT
+    uint32_t _cft=0;
+#endif
+    uint32_t nbBoolBlock;
+    uint32_t a,b,ba,bb;
+    int shift;
+    uint32x4_t aV, bV;
+#ifdef TT
+    uint32x4_t cttV;
+#endif
+#ifdef FF
+    uint32x4_t cffV;
+#endif
+#ifdef TF
+    uint32x4_t ctfV;
+#endif
+#ifdef FT
+    uint32x4_t cftV;
+#endif
+    uint8x16_t tmp;
+    uint16x8_t tmp2;
+    uint32x4_t tmp3;
+    uint64x2_t tmp4;
+#ifdef TT
+    uint64x2_t tmp4tt;
+#endif
+#ifdef FF
+    uint64x2_t tmp4ff;
+#endif
+#ifdef TF
+    uint64x2_t tmp4tf;
+#endif
+#ifdef FT
+    uint64x2_t tmp4ft;
+#endif
+
+#ifdef TT
+    tmp4tt = vdupq_n_u64(0);
+#endif
+#ifdef FF
+    tmp4ff = vdupq_n_u64(0);
+#endif
+#ifdef TF
+    tmp4tf = vdupq_n_u64(0);
+#endif
+#ifdef FT
+    tmp4ft = vdupq_n_u64(0);
+#endif
+
+    nbBoolBlock = numberOfBools >> 7;
+    while(nbBoolBlock > 0)
+    {
+       aV = vld1q_u32(pA);
+       bV = vld1q_u32(pB);
+       pA += 4;
+       pB += 4;
+
+#ifdef TT
+       cttV = vandq_u32(aV,bV);
+#endif
+#ifdef FF
+       cffV = vandq_u32(vmvnq_u32(aV),vmvnq_u32(bV));
+#endif
+#ifdef TF
+       ctfV = vandq_u32(aV,vmvnq_u32(bV));
+#endif
+#ifdef FT
+       cftV = vandq_u32(vmvnq_u32(aV),bV);
+#endif
+
+#ifdef TT
+       tmp = vcntq_u8(vreinterpretq_u8_u32(cttV));
+       tmp2 = vpaddlq_u8(tmp);
+       tmp3 = vpaddlq_u16(tmp2);
+       tmp4 = vpaddlq_u32(tmp3);
+       tmp4tt = vaddq_u64(tmp4tt, tmp4);
+#endif
+
+#ifdef FF
+       tmp = vcntq_u8(vreinterpretq_u8_u32(cffV));
+       tmp2 = vpaddlq_u8(tmp);
+       tmp3 = vpaddlq_u16(tmp2);
+       tmp4 = vpaddlq_u32(tmp3);
+       tmp4ff = vaddq_u64(tmp4ff, tmp4);
+#endif
+
+#ifdef TF
+       tmp = vcntq_u8(vreinterpretq_u8_u32(ctfV));
+       tmp2 = vpaddlq_u8(tmp);
+       tmp3 = vpaddlq_u16(tmp2);
+       tmp4 = vpaddlq_u32(tmp3);
+       tmp4tf = vaddq_u64(tmp4tf, tmp4);
+#endif 
+
+#ifdef FT
+       tmp = vcntq_u8(vreinterpretq_u8_u32(cftV));
+       tmp2 = vpaddlq_u8(tmp);
+       tmp3 = vpaddlq_u16(tmp2);
+       tmp4 = vpaddlq_u32(tmp3);
+       tmp4ft = vaddq_u64(tmp4ft, tmp4);
+#endif
+
+
+       nbBoolBlock --;
+    }
+
+#ifdef TT
+    _ctt += vgetq_lane_u64(tmp4tt, 0) + vgetq_lane_u64(tmp4tt, 1);
+#endif
+#ifdef FF
+    _cff +=vgetq_lane_u64(tmp4ff, 0) + vgetq_lane_u64(tmp4ff, 1);
+#endif
+#ifdef TF
+    _ctf += vgetq_lane_u64(tmp4tf, 0) + vgetq_lane_u64(tmp4tf, 1);
+#endif
+#ifdef FT
+    _cft += vgetq_lane_u64(tmp4ft, 0) + vgetq_lane_u64(tmp4ft, 1);
+#endif
+
+    nbBoolBlock = numberOfBools & 0x7F;
+    while(nbBoolBlock >= 32)
+    {
+       a = *pA++;
+       b = *pB++;
+       shift = 0;
+       while(shift < 32)
+       {
+          ba = a & 1;
+          bb = b & 1;
+          a = a >> 1;
+          b = b >> 1;
+
+#ifdef TT
+          _ctt += (ba && bb);
+#endif
+#ifdef FF
+          _cff += ((1 ^ ba) && (1 ^ bb));
+#endif
+#ifdef TF
+          _ctf += (ba && (1 ^ bb));
+#endif
+#ifdef FT
+          _cft += ((1 ^ ba) && bb);
+#endif
+          shift ++;
+       }
+
+       nbBoolBlock -= 32;
+    }
+
+    a = *pA++;
+    b = *pB++;
+
+    a = a >> (32 - nbBoolBlock);
+    b = b >> (32 - nbBoolBlock);
+
+    while(nbBoolBlock > 0)
+    {
+          ba = a & 1;
+          bb = b & 1;
+          a = a >> 1;
+
+          b = b >> 1;
+#ifdef TT
+          _ctt += (ba && bb);
+#endif
+#ifdef FF
+          _cff += ((1 ^ ba) && (1 ^ bb));
+#endif
+#ifdef TF
+          _ctf += (ba && (1 ^ bb));
+#endif
+#ifdef FT
+          _cft += ((1 ^ ba) && bb);
+#endif
+          nbBoolBlock --;
+    }
+
+#ifdef TT
+    *cTT = _ctt;
+#endif
+#ifdef FF
+    *cFF = _cff;
+#endif
+#ifdef TF
+    *cTF = _ctf;
+#endif
+#ifdef FT
+    *cFT = _cft;
+#endif
+}
+
+#else
+
+void FUNC(EXT)(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+#ifdef TT
+       , uint32_t *cTT
+#endif
+#ifdef FF
+       , uint32_t *cFF
+#endif
+#ifdef TF
+       , uint32_t *cTF
+#endif
+#ifdef FT
+       , uint32_t *cFT
+#endif
+       )
+{
+  
+#ifdef TT
+    uint32_t _ctt=0;
+#endif
+#ifdef FF
+    uint32_t _cff=0;
+#endif
+#ifdef TF
+    uint32_t _ctf=0;
+#endif
+#ifdef FT
+    uint32_t _cft=0;
+#endif
+    uint32_t a,b,ba,bb;
+    int shift;
+
+    while(numberOfBools >= 32)
+    {
+       a = *pA++;
+       b = *pB++;
+       shift = 0;
+       while(shift < 32)
+       {
+          ba = a & 1;
+          bb = b & 1;
+          a = a >> 1;
+          b = b >> 1;
+#ifdef TT
+          _ctt += (ba && bb);
+#endif
+#ifdef FF
+          _cff += ((1 ^ ba) && (1 ^ bb));
+#endif
+#ifdef TF
+          _ctf += (ba && (1 ^ bb));
+#endif
+#ifdef FT
+          _cft += ((1 ^ ba) && bb);
+#endif
+          shift ++;
+       }
+
+       numberOfBools -= 32;
+    }
+
+    a = *pA++;
+    b = *pB++;
+
+    a = a >> (32 - numberOfBools);
+    b = b >> (32 - numberOfBools);
+
+    while(numberOfBools > 0)
+    {
+          ba = a & 1;
+          bb = b & 1;
+          a = a >> 1;
+          b = b >> 1;
+
+#ifdef TT
+          _ctt += (ba && bb);
+#endif
+#ifdef FF
+          _cff += ((1 ^ ba) && (1 ^ bb));
+#endif
+#ifdef TF
+          _ctf += (ba && (1 ^ bb));
+#endif
+#ifdef FT
+          _cft += ((1 ^ ba) && bb);
+#endif
+          numberOfBools --;
+    }
+
+#ifdef TT
+    *cTT = _ctt;
+#endif
+#ifdef FF
+    *cFF = _cff;
+#endif
+#ifdef TF 
+    *cTF = _ctf;
+#endif
+#ifdef FT
+    *cFT = _cft;
+#endif
+}
+#endif
+#endif /* defined(ARM_MATH_MVEI) */
+
+
+/**
+ * @} end of DISTANCEF group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f16.c
@ -0,0 +1,158 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_braycurtis_distance_f16.c
+ * Description:  Bray-Curtis distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+ * @ingroup groupDistance
+ */
+
+/**
+ * @defgroup FloatDist Float Distances
+ *
+ * Distances between two vectors of float values.
+ */
+
+/**
+  @ingroup FloatDist
+ */
+
+/**
+  @defgroup braycurtis Bray-Curtis distance
+
+  Bray-Curtis distance between two vectors
+ */
+
+/**
+  @addtogroup braycurtis
+  @{
+ */
+
+
+/**
+ * @brief        Bray-Curtis distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    _Float16        accumDiff = 0.0f, accumSum = 0.0f;
+    uint32_t        blkCnt;
+    f16x8_t         a, b, c, accumDiffV, accumSumV;
+
+
+    accumDiffV = vdupq_n_f16(0.0f);
+    accumSumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        c = vabdq(a, b);
+        accumDiffV = vaddq(accumDiffV, c);
+
+        c = vaddq_f16(a, b);
+        c = vabsq_f16(c);
+        accumSumV = vaddq(accumSumV, c);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        c = vabdq(a, b);
+        accumDiffV = vaddq_m(accumDiffV, accumDiffV, c, p0);
+
+        c = vaddq_f16(a, b);
+        c = vabsq_f16(c);
+        accumSumV = vaddq_m(accumSumV, accumSumV, c, p0);
+    }
+
+    accumDiff = vecAddAcrossF16Mve(accumDiffV);
+    accumSum = vecAddAcrossF16Mve(accumSumV);
+
+    /*
+       It is assumed that accumSum is not zero. Since it is the sum of several absolute
+       values it would imply that all of them are zero. It is very unlikely for long vectors.
+     */
+    return (accumDiff / accumSum);
+}
+#else
+
+float16_t arm_braycurtis_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+   _Float16 accumDiff=0.0f16, accumSum=0.0f16, tmpA, tmpB;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      accumDiff += (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+      accumSum += (_Float16)fabsf((float32_t)((_Float16)tmpA + (_Float16)tmpB));
+      blockSize --;
+   }
+   /*
+
+   It is assumed that accumSum is not zero. Since it is the sum of several absolute
+   values it would imply that all of them are zero. It is very unlikely for long vectors.
+
+   */
+   return(accumDiff / accumSum);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of braycurtis group
+ */
+
+
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f32.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_braycurtis_distance_f32.c
@ -0,0 +1,187 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_braycurtis_distance_f32.c
+ * Description:  Bray-Curtis distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+/**
+  @addtogroup braycurtis
+  @{
+ */
+
+
+/**
+ * @brief        Bray-Curtis distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+
+float32_t arm_braycurtis_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+    float32_t       accumDiff = 0.0f, accumSum = 0.0f;
+    uint32_t        blkCnt;
+    f32x4_t         a, b, c, accumDiffV, accumSumV;
+
+
+    accumDiffV = vdupq_n_f32(0.0f);
+    accumSumV = vdupq_n_f32(0.0f);
+
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        c = vabdq(a, b);
+        accumDiffV = vaddq(accumDiffV, c);
+
+        c = vaddq_f32(a, b);
+        c = vabsq_f32(c);
+        accumSumV = vaddq(accumSumV, c);
+
+        pA += 4;
+        pB += 4;
+        blkCnt--;
+    }
+
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp32q(blkCnt);
+
+        a = vldrwq_z_f32(pA, p0);
+        b = vldrwq_z_f32(pB, p0);
+
+        c = vabdq(a, b);
+        accumDiffV = vaddq_m(accumDiffV, accumDiffV, c, p0);
+
+        c = vaddq_f32(a, b);
+        c = vabsq_f32(c);
+        accumSumV = vaddq_m(accumSumV, accumSumV, c, p0);
+    }
+
+    accumDiff = vecAddAcrossF32Mve(accumDiffV);
+    accumSum = vecAddAcrossF32Mve(accumSumV);
+
+    /*
+       It is assumed that accumSum is not zero. Since it is the sum of several absolute
+       values it would imply that all of them are zero. It is very unlikely for long vectors.
+     */
+    return (accumDiff / accumSum);
+}
+#else
+#if defined(ARM_MATH_NEON)
+
+#include "NEMath.h"
+
+float32_t arm_braycurtis_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+   float32_t accumDiff=0.0f, accumSum=0.0f;
+   uint32_t blkCnt;
+   float32x4_t a,b,c,accumDiffV, accumSumV;
+   float32x2_t accumV2;
+
+   accumDiffV = vdupq_n_f32(0.0f);
+   accumSumV = vdupq_n_f32(0.0f);
+
+   blkCnt = blockSize >> 2;
+   while(blkCnt > 0)
+   {
+        a = vld1q_f32(pA);
+        b = vld1q_f32(pB);
+
+        c = vabdq_f32(a,b);
+        accumDiffV = vaddq_f32(accumDiffV,c);
+
+        c = vaddq_f32(a,b);
+        c = vabsq_f32(c);
+        accumSumV = vaddq_f32(accumSumV,c);
+
+        pA += 4;
+        pB += 4;
+        blkCnt --;
+   }
+   accumV2 = vpadd_f32(vget_low_f32(accumDiffV),vget_high_f32(accumDiffV));
+   accumDiff = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
+
+   accumV2 = vpadd_f32(vget_low_f32(accumSumV),vget_high_f32(accumSumV));
+   accumSum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
+
+   blkCnt = blockSize & 3;
+   while(blkCnt > 0)
+   {
+      accumDiff += fabsf(*pA - *pB);
+      accumSum += fabsf(*pA++ + *pB++);
+      blkCnt --;
+   }
+   /*
+
+   It is assumed that accumSum is not zero. Since it is the sum of several absolute
+   values it would imply that all of them are zero. It is very unlikely for long vectors.
+
+   */
+   return(accumDiff / accumSum);
+}
+
+#else
+float32_t arm_braycurtis_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+   float32_t accumDiff=0.0f, accumSum=0.0f, tmpA, tmpB;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      accumDiff += fabsf(tmpA - tmpB);
+      accumSum += fabsf(tmpA + tmpB);
+      blockSize --;
+   }
+   /*
+
+   It is assumed that accumSum is not zero. Since it is the sum of several absolute
+   values it would imply that all of them are zero. It is very unlikely for long vectors.
+
+   */
+   return(accumDiff / accumSum);
+}
+#endif
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of braycurtis group
+ */
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f16.c
@ -0,0 +1,171 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_canberra_distance_f16.c
+ * Description:  Canberra distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+  @ingroup FloatDist
+ */
+
+/**
+  @defgroup Canberra Canberra distance
+
+  Canberra distance
+ */
+
+
+/**
+  @addtogroup Canberra
+  @{
+ */
+
+
+/**
+ * @brief        Canberra distance between two vectors
+ *
+ * This function may divide by zero when samples pA[i] and pB[i] are both zero.
+ * The result of the computation will be correct. So the division per zero may be
+ * ignored.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    _Float16       accum = 0.0f16;
+    uint32_t         blkCnt;
+    f16x8_t         a, b, c, accumV;
+
+    accumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        c = vabdq(a, b);
+
+        a = vabsq(a);
+        b = vabsq(b);
+        a = vaddq(a, b);
+
+        /* 
+         * May divide by zero when a and b have both the same lane at zero.
+         */
+        a = vrecip_hiprec_f16(a);
+
+        /*
+         * Force result of a division by 0 to 0. It the behavior of the
+         * sklearn canberra function.
+         */
+        a = vdupq_m_n_f16(a, 0.0f, vcmpeqq(a, 0.0f));
+        c = vmulq(c, a);
+        accumV = vaddq(accumV, c);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        c = vabdq(a, b);
+
+        a = vabsq(a);
+        b = vabsq(b);
+        a = vaddq(a, b);
+
+        /* 
+         * May divide by zero when a and b have both the same lane at zero.
+         */
+        a = vrecip_hiprec_f16(a);
+
+        /*
+         * Force result of a division by 0 to 0. It the behavior of the
+         * sklearn canberra function.
+         */
+        a = vdupq_m_n_f16(a, 0.0f, vcmpeqq(a, 0.0f));
+        c = vmulq(c, a);
+        accumV = vaddq_m(accumV, accumV, c, p0);
+    }
+
+    accum = vecAddAcrossF16Mve(accumV);
+
+    return (accum);
+}
+
+
+#else
+float16_t arm_canberra_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+   _Float16 accum=0.0f, tmpA, tmpB,diff,sum;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+
+      diff = fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+      sum = (_Float16)fabsf((float32_t)tmpA) + (_Float16)fabsf((float32_t)tmpB);
+      if (((_Float16)tmpA != 0.0f16) || ((_Float16)tmpB != 0.0f16))
+      {
+         accum += ((_Float16)diff / (_Float16)sum);
+      }
+      blockSize --;
+   }
+   return(accum);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of Canberra group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f32.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_canberra_distance_f32.c
@ -0,0 +1,222 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_canberra_distance_f32.c
+ * Description:  Canberra distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup Canberra
+  @{
+ */
+
+
+/**
+ * @brief        Canberra distance between two vectors
+ *
+ * This function may divide by zero when samples pA[i] and pB[i] are both zero.
+ * The result of the computation will be correct. So the division per zero may be
+ * ignored.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+    float32_t       accum = 0.0f;
+    uint32_t         blkCnt;
+    f32x4_t         a, b, c, accumV;
+
+    accumV = vdupq_n_f32(0.0f);
+
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        c = vabdq(a, b);
+
+        a = vabsq(a);
+        b = vabsq(b);
+        a = vaddq(a, b);
+
+        /* 
+         * May divide by zero when a and b have both the same lane at zero.
+         */
+        a = vrecip_medprec_f32(a);
+
+        /*
+         * Force result of a division by 0 to 0. It the behavior of the
+         * sklearn canberra function.
+         */
+        a = vdupq_m_n_f32(a, 0.0f, vcmpeqq(a, 0.0f));
+        c = vmulq(c, a);
+        accumV = vaddq(accumV, c);
+
+        pA += 4;
+        pB += 4;
+        blkCnt--;
+    }
+
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp32q(blkCnt);
+
+        a = vldrwq_z_f32(pA, p0);
+        b = vldrwq_z_f32(pB, p0);
+
+        c = vabdq(a, b);
+
+        a = vabsq(a);
+        b = vabsq(b);
+        a = vaddq(a, b);
+
+        /* 
+         * May divide by zero when a and b have both the same lane at zero.
+         */
+        a = vrecip_medprec_f32(a);
+
+        /*
+         * Force result of a division by 0 to 0. It the behavior of the
+         * sklearn canberra function.
+         */
+        a = vdupq_m_n_f32(a, 0.0f, vcmpeqq(a, 0.0f));
+        c = vmulq(c, a);
+        accumV = vaddq_m(accumV, accumV, c, p0);
+    }
+
+    accum = vecAddAcrossF32Mve(accumV);
+
+    return (accum);
+}
+
+#else
+#if defined(ARM_MATH_NEON)
+
+#include "NEMath.h"
+
+float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+   float32_t accum=0.0f, tmpA, tmpB,diff,sum;
+   uint32_t blkCnt;
+   float32x4_t a,b,c,accumV;
+   float32x2_t accumV2;
+   uint32x4_t   isZeroV;
+   float32x4_t zeroV = vdupq_n_f32(0.0f);
+
+   accumV = vdupq_n_f32(0.0f);
+
+   blkCnt = blockSize >> 2;
+   while(blkCnt > 0)
+   {
+        a = vld1q_f32(pA);
+        b = vld1q_f32(pB);
+
+        c = vabdq_f32(a,b);
+
+        a = vabsq_f32(a);
+        b = vabsq_f32(b);
+        a = vaddq_f32(a,b);
+        isZeroV = vceqq_f32(a,zeroV);
+
+        /* 
+         * May divide by zero when a and b have both the same lane at zero.
+         */
+        a = vinvq_f32(a);
+        
+        /*
+         * Force result of a division by 0 to 0. It the behavior of the
+         * sklearn canberra function.
+         */
+        a = vreinterpretq_f32_s32(vbicq_s32(vreinterpretq_s32_f32(a),vreinterpretq_s32_u32(isZeroV)));
+        c = vmulq_f32(c,a);
+        accumV = vaddq_f32(accumV,c);
+
+        pA += 4;
+        pB += 4;
+        blkCnt --;
+   }
+   accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
+   accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
+
+
+   blkCnt = blockSize & 3;
+   while(blkCnt > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+
+      diff = fabsf(tmpA - tmpB);
+      sum = fabsf(tmpA) + fabsf(tmpB);
+      if ((tmpA != 0.0f) || (tmpB != 0.0f))
+      {
+         accum += (diff / sum);
+      }
+      blkCnt --;
+   }
+   return(accum);
+}
+
+#else
+float32_t arm_canberra_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+   float32_t accum=0.0f, tmpA, tmpB,diff,sum;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+
+      diff = fabsf(tmpA - tmpB);
+      sum = fabsf(tmpA) + fabsf(tmpB);
+      if ((tmpA != 0.0f) || (tmpB != 0.0f))
+      {
+         accum += (diff / sum);
+      }
+      blockSize --;
+   }
+   return(accum);
+}
+#endif
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of Canberra group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f16.c
@ -0,0 +1,146 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_chebyshev_distance_f16.c
+ * Description:  Chebyshev distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+  @ingroup FloatDist
+ */
+
+/**
+  @defgroup Chebyshev Chebyshev distance
+
+  Chebyshev distance
+ */
+
+/**
+  @addtogroup Chebyshev
+  @{
+ */
+
+
+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;     /* loop counters */
+    f16x8_t         vecA, vecB;
+    f16x8_t         vecDiff = vdupq_n_f16(0.0);
+    float16_t       maxValue = 0.0f16;
+
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U) {
+        vecA = vld1q(pA);
+        pA += 8;
+        vecB = vld1q(pB);
+        pB += 8;
+        /*
+         * update per-lane max.
+         */
+        vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        vecA = vldrhq_z_f16(pA, p0);
+        vecB = vldrhq_z_f16(pB, p0);
+
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    return vmaxnmavq(maxValue, vecDiff);
+}
+
+#else
+float16_t arm_chebyshev_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+   _Float16 diff=0.0f,  maxVal,tmpA, tmpB;
+
+   tmpA = *pA++;
+   tmpB = *pB++;
+   diff = (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+   maxVal = diff;
+   blockSize--;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      diff = (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+      if ((_Float16)diff > (_Float16)maxVal)
+      {
+        maxVal = diff;
+      }
+      blockSize --;
+   }
+  
+   return(maxVal);
+}
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of Chebyshev group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f32.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f32.c
@ -0,0 +1,213 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_chebyshev_distance_f32.c
+ * Description:  Chebyshev distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup Chebyshev
+  @{
+ */
+
+
+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;     /* loop counters */
+    f32x4_t         vecA, vecB;
+    f32x4_t         vecDiff = vdupq_n_f32(0.0);
+    float32_t       maxValue = 0.0;
+
+
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0U) {
+        vecA = vld1q(pA);
+        pA += 4;
+        vecB = vld1q(pB);
+        pB += 4;
+        /*
+         * update per-lane max.
+         */
+        vecDiff = vmaxnmaq(vsubq(vecA, vecB), vecDiff);
+        /*
+         * Decrement the blockSize loop counter
+         */
+        blkCnt--;
+    }
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp32q(blkCnt);
+
+        vecA = vldrwq_z_f32(pA, p0);
+        vecB = vldrwq_z_f32(pB, p0);
+
+        /*
+         * Get current max per lane and current index per lane
+         * when a max is selected
+         */
+        vecDiff = vmaxnmaq_m(vecDiff, vsubq(vecA, vecB), p0);
+    }
+    /*
+     * Get max value across the vector
+     */
+    return vmaxnmavq(maxValue, vecDiff);
+}
+
+#else
+#if defined(ARM_MATH_NEON)
+
+#include "NEMath.h"
+
+float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+   float32_t diff=0.0f, maxVal=0.0f, tmpA, tmpB;
+   uint32_t blkCnt;
+   float32x4_t a,b,diffV, maxValV;
+   float32x2_t maxValV2;
+
+   if (blockSize <= 3)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      diff = fabsf(tmpA - tmpB);
+      maxVal = diff;
+      blockSize--;
+   
+      while(blockSize > 0)
+      {
+         tmpA = *pA++;
+         tmpB = *pB++;
+         diff = fabsf(tmpA - tmpB);
+         if (diff > maxVal)
+         {
+           maxVal = diff;
+         }
+         blockSize --;
+      }
+   }
+   else
+   {
+
+      a = vld1q_f32(pA);
+      b = vld1q_f32(pB);
+      pA += 4;
+      pB += 4;
+
+      diffV = vabdq_f32(a,b);
+
+      blockSize -= 4;
+
+      maxValV = diffV;
+
+  
+      blkCnt = blockSize >> 2;
+      while(blkCnt > 0)
+      {
+           a = vld1q_f32(pA);
+           b = vld1q_f32(pB);
+   
+           diffV = vabdq_f32(a,b);
+           maxValV = vmaxq_f32(maxValV, diffV);
+   
+           pA += 4;
+           pB += 4;
+           blkCnt --;
+      }
+      maxValV2 = vpmax_f32(vget_low_f32(maxValV),vget_high_f32(maxValV));
+      maxValV2 = vpmax_f32(maxValV2,maxValV2);
+      maxVal = vget_lane_f32(maxValV2,0);
+
+  
+      blkCnt = blockSize & 3;
+      while(blkCnt > 0)
+      {
+         tmpA = *pA++;
+         tmpB = *pB++;
+         diff = fabsf(tmpA - tmpB);
+         if (diff > maxVal)
+         {
+            maxVal = diff;
+         }
+         blkCnt --;
+      }
+   }
+   return(maxVal);
+}
+
+#else
+float32_t arm_chebyshev_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+   float32_t diff=0.0f,  maxVal,tmpA, tmpB;
+
+   tmpA = *pA++;
+   tmpB = *pB++;
+   diff = fabsf(tmpA - tmpB);
+   maxVal = diff;
+   blockSize--;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      diff = fabsf(tmpA - tmpB);
+      if (diff > maxVal)
+      {
+        maxVal = diff;
+      }
+      blockSize --;
+   }
+  
+   return(maxVal);
+}
+#endif
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of Chebyshev group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f64.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_chebyshev_distance_f64.c
@ -0,0 +1,76 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_chebyshev_distance_f64.c
+ * Description:  Chebyshev distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup Chebyshev
+  @{
+ */
+
+
+/**
+ * @brief        Chebyshev distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_chebyshev_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+   float64_t diff=0.,  maxVal,tmpA, tmpB;
+
+   tmpA = *pA++;
+   tmpB = *pB++;
+   diff = fabs(tmpA - tmpB);
+   maxVal = diff;
+   blockSize--;
+
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      diff = fabs(tmpA - tmpB);
+      if (diff > maxVal)
+      {
+        maxVal = diff;
+      }
+      blockSize --;
+   }
+  
+   return(maxVal);
+}
+
+/**
+ * @} end of Chebyshev group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f16.c
@ -0,0 +1,128 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cityblock_distance_f16.c
+ * Description:  Cityblock (Manhattan) distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+  @ingroup FloatDist
+ */
+
+/**
+  @defgroup Manhattan Cityblock (Manhattan) distance
+
+  Cityblock (Manhattan) distance
+ */
+
+/**
+  @addtogroup Manhattan
+  @{
+ */
+
+
+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    f16x8_t         a, b, accumV, tempV;
+
+    accumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        tempV = vabdq(a, b);
+        accumV = vaddq(accumV, tempV);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        tempV = vabdq(a, b);
+        accumV = vaddq_m(accumV, accumV, tempV, p0);
+    }
+
+    return vecAddAcrossF16Mve(accumV);
+}
+
+#else
+float16_t arm_cityblock_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+   _Float16 accum,tmpA, tmpB;
+
+   accum = 0.0f16;
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      accum  += (_Float16)fabsf((float32_t)((_Float16)tmpA - (_Float16)tmpB));
+      
+      blockSize --;
+   }
+  
+   return(accum);
+}
+#endif
+
+/**
+ * @} end of Manhattan group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f32.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f32.c
@ -0,0 +1,157 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cityblock_distance_f32.c
+ * Description:  Cityblock (Manhattan) distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+/**
+  @addtogroup Manhattan
+  @{
+ */
+
+
+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    f32x4_t         a, b, accumV, tempV;
+
+    accumV = vdupq_n_f32(0.0f);
+
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        tempV = vabdq(a, b);
+        accumV = vaddq(accumV, tempV);
+
+        pA += 4;
+        pB += 4;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp32q(blkCnt);
+
+        a = vldrwq_z_f32(pA, p0);
+        b = vldrwq_z_f32(pB, p0);
+
+        tempV = vabdq(a, b);
+        accumV = vaddq_m(accumV, accumV, tempV, p0);
+    }
+
+    return vecAddAcrossF32Mve(accumV);
+}
+
+#else
+#if defined(ARM_MATH_NEON)
+
+#include "NEMath.h"
+
+float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+   float32_t accum=0.0f, tmpA, tmpB;
+   uint32_t blkCnt;
+   float32x4_t a,b,accumV, tempV;
+   float32x2_t accumV2;
+
+   accumV = vdupq_n_f32(0.0f);
+
+   blkCnt = blockSize >> 2;
+   while(blkCnt > 0)
+   {
+        a = vld1q_f32(pA);
+        b = vld1q_f32(pB);
+ 
+        tempV = vabdq_f32(a,b);
+        accumV = vaddq_f32(accumV, tempV);
+ 
+        pA += 4;
+        pB += 4;
+        blkCnt --;
+   }
+   accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
+   accumV2 = vpadd_f32(accumV2,accumV2);
+   accum = vget_lane_f32(accumV2,0);
+   
+
+   blkCnt = blockSize & 3;
+   while(blkCnt > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      accum += fabsf(tmpA - tmpB);
+      
+      blkCnt --;
+   }
+   return(accum);
+}
+
+#else
+float32_t arm_cityblock_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+   float32_t accum,tmpA, tmpB;
+
+   accum = 0.0f;
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      accum  += fabsf(tmpA - tmpB);
+      
+      blockSize --;
+   }
+  
+   return(accum);
+}
+#endif
+#endif
+
+/**
+ * @} end of Manhattan group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f64.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cityblock_distance_f64.c
@ -0,0 +1,67 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cityblock_distance_f64.c
+ * Description:  Cityblock (Manhattan) distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+/**
+  @addtogroup Manhattan
+  @{
+ */
+
+
+/**
+ * @brief        Cityblock (Manhattan) distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_cityblock_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+   float64_t accum,tmpA, tmpB;
+
+   accum = 0.;
+   while(blockSize > 0)
+   {
+      tmpA = *pA++;
+      tmpB = *pB++;
+      accum  += fabs(tmpA - tmpB);
+      
+      blockSize --;
+   }
+  
+   return(accum);
+}
+
+/**
+ * @} end of Manhattan group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f16.c
@ -0,0 +1,99 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_correlation_distance_f16.c
+ * Description:  Correlation distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @ingroup FloatDist
+ */
+
+/**
+  @defgroup Correlation Correlation distance
+
+  Correlation distance
+ */
+
+/**
+  @addtogroup Correlation
+  @{
+ */
+
+
+/**
+ * @brief        Correlation distance between two vectors
+ *
+ * The input vectors are modified in place !
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float16_t arm_correlation_distance_f16(float16_t *pA,float16_t *pB, uint32_t blockSize)
+{
+    float16_t ma,mb,pwra,pwrb,dot,tmp;
+
+    arm_mean_f16(pA, blockSize, &ma);
+    arm_mean_f16(pB, blockSize, &mb);
+
+    arm_offset_f16(pA, -(_Float16)ma, pA, blockSize);
+    arm_offset_f16(pB, -(_Float16)mb, pB, blockSize);
+
+    arm_power_f16(pA, blockSize, &pwra);
+    arm_power_f16(pB, blockSize, &pwrb);
+
+    arm_dot_prod_f16(pA,pB,blockSize,&dot);
+
+    dot = (_Float16)dot / (_Float16)blockSize;
+    pwra = (_Float16)pwra / (_Float16)blockSize;
+    pwrb = (_Float16)pwrb / (_Float16)blockSize;
+
+    arm_sqrt_f16((_Float16)pwra * (_Float16)pwrb,&tmp);
+ 
+    return(1.0f16 - (_Float16)dot / (_Float16)tmp);
+
+   
+}
+
+
+
+/**
+ * @} end of Correlation group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f32.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_correlation_distance_f32.c
@ -0,0 +1,84 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_correlation_distance_f32.c
+ * Description:  Correlation distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+/**
+  @addtogroup Correlation
+  @{
+ */
+
+
+/**
+ * @brief        Correlation distance between two vectors
+ *
+ * The input vectors are modified in place !
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float32_t arm_correlation_distance_f32(float32_t *pA,float32_t *pB, uint32_t blockSize)
+{
+    float32_t ma,mb,pwra,pwrb,dot,tmp;
+
+    arm_mean_f32(pA, blockSize, &ma);
+    arm_mean_f32(pB, blockSize, &mb);
+
+    arm_offset_f32(pA, -ma, pA, blockSize);
+    arm_offset_f32(pB, -mb, pB, blockSize);
+
+    arm_power_f32(pA, blockSize, &pwra);
+    arm_power_f32(pB, blockSize, &pwrb);
+
+    arm_dot_prod_f32(pA,pB,blockSize,&dot);
+
+    dot = dot / blockSize;
+    pwra = pwra / blockSize;
+    pwrb = pwrb / blockSize;
+
+    arm_sqrt_f32(pwra * pwrb,&tmp);
+ 
+    return(1.0f - dot / tmp);
+
+   
+}
+
+
+
+/**
+ * @} end of Correlation group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f16.c
@ -0,0 +1,88 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cosine_distance_f16.c
+ * Description:  Cosine distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+  @ingroup FloatDist
+ */
+
+/**
+  @defgroup CosineDist Cosine distance
+
+  Cosine distance
+ */
+
+
+/**
+  @addtogroup CosineDist
+  @{
+ */
+
+
+
+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ * @par           Description
+ *                  cosine_distance(u,v) is 1 - u . v / (Norm(u) Norm(v))
+ */
+
+float16_t arm_cosine_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    float16_t pwra,pwrb,dot,tmp;
+
+    arm_power_f16(pA, blockSize, &pwra);
+    arm_power_f16(pB, blockSize, &pwrb);
+
+    arm_dot_prod_f16(pA,pB,blockSize,&dot);
+
+    arm_sqrt_f16((_Float16)pwra * (_Float16)pwrb, &tmp);
+    return(1.0f16 - (_Float16)dot / (_Float16)tmp);
+
+}
+
+
+
+/**
+ * @} end of CosineDist group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f32.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f32.c
@ -0,0 +1,72 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cosine_distance_f32.c
+ * Description:  Cosine distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup CosineDist
+  @{
+ */
+
+
+
+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ * @par           Description
+ *                  cosine_distance(u,v) is 1 - u . v / (Norm(u) Norm(v))
+ */
+
+float32_t arm_cosine_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+    float32_t pwra,pwrb,dot,tmp;
+
+    arm_power_f32(pA, blockSize, &pwra);
+    arm_power_f32(pB, blockSize, &pwrb);
+
+    arm_dot_prod_f32(pA,pB,blockSize,&dot);
+
+    arm_sqrt_f32(pwra * pwrb, &tmp);
+    return(1.0f - dot / tmp);
+
+}
+
+
+
+/**
+ * @} end of CosineDist group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f64.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_cosine_distance_f64.c
@ -0,0 +1,70 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_cosine_distance_f64.c
+ * Description:  Cosine distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup CosineDist
+  @{
+ */
+
+
+
+/**
+ * @brief        Cosine distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+float64_t arm_cosine_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+    float64_t pwra,pwrb,dot,tmp;
+
+    arm_power_f64(pA, blockSize, &pwra);
+    arm_power_f64(pB, blockSize, &pwrb);
+
+    arm_dot_prod_f64(pA,pB,blockSize,&dot);
+
+    tmp = sqrt(pwra * pwrb);
+    return(1. - dot / tmp);
+
+}
+
+
+
+/**
+ * @} end of CosineDist group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_dice_distance.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_dice_distance.c
@ -0,0 +1,92 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_dice_distance.c
+ * Description:  Dice distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+extern void arm_boolean_distance_TT_TF_FT(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+       , uint32_t *cTT
+       , uint32_t *cTF
+       , uint32_t *cFT
+       );
+
+
+/**
+ * @ingroup groupDistance
+ * @{
+ */
+
+/**
+ * @defgroup BoolDist Boolean Distances
+ *
+ * Distances between two vectors of boolean values.
+ *
+ * Booleans are packed in 32 bit words.
+ * numberOfBooleans argument is the number of booleans and not the
+ * number of words.
+ *
+ * Bits are packed in big-endian mode (because of behavior of numpy packbits in
+ * in version < 1.17)
+ */
+
+/**
+  @addtogroup BoolDist
+  @{
+ */
+
+/**
+ * @brief        Dice distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t arm_dice_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools)
+{
+    uint32_t ctt=0,ctf=0,cft=0;
+
+    arm_boolean_distance_TT_TF_FT(pA, pB, numberOfBools, &ctt, &ctf, &cft);
+
+    return(1.0*(ctf + cft) / (2.0*ctt + cft + ctf));
+}
+
+
+/**
+ * @} end of BoolDist group
+ */
+
+/**
+ * @} end of groupDistance group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f16.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f16.c
@ -0,0 +1,131 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_euclidean_distance_f16.c
+ * Description:  Euclidean distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @ingroup FloatDist
+ */
+
+/**
+  @defgroup Euclidean Euclidean distance
+
+  Euclidean distance
+ */
+
+
+/**
+  @addtogroup Euclidean
+  @{
+ */
+
+
+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    float16_t       tmp;
+    f16x8_t         a, b, accumV, tempV;
+
+    accumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        tempV = vsubq(a, b);
+        accumV = vfmaq(accumV, tempV, tempV);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        tempV = vsubq(a, b);
+        accumV = vfmaq_m(accumV, tempV, tempV, p0);
+    }
+
+    arm_sqrt_f16(vecAddAcrossF16Mve(accumV), &tmp);
+    return (tmp);
+}
+
+#else
+float16_t arm_euclidean_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+   _Float16 accum=0.0f,tmp;
+   float16_t result;
+
+   while(blockSize > 0)
+   {
+      tmp = (_Float16)*pA++ - (_Float16)*pB++;
+      accum += SQ(tmp);
+      blockSize --;
+   }
+   arm_sqrt_f16(accum,&result);
+   return(result);
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of Euclidean group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f32.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f32.c
@ -0,0 +1,152 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_euclidean_distance_f32.c
+ * Description:  Euclidean distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+/**
+  @addtogroup Euclidean
+  @{
+ */
+
+
+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    float32_t       tmp;
+    f32x4_t         a, b, accumV, tempV;
+
+    accumV = vdupq_n_f32(0.0f);
+
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        tempV = vsubq(a, b);
+        accumV = vfmaq(accumV, tempV, tempV);
+
+        pA += 4;
+        pB += 4;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp32q(blkCnt);
+
+        a = vldrwq_z_f32(pA, p0);
+        b = vldrwq_z_f32(pB, p0);
+
+        tempV = vsubq(a, b);
+        accumV = vfmaq_m(accumV, tempV, tempV, p0);
+    }
+
+    arm_sqrt_f32(vecAddAcrossF32Mve(accumV), &tmp);
+    return (tmp);
+}
+#else
+#if defined(ARM_MATH_NEON)
+
+#include "NEMath.h"
+
+float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+   float32_t accum=0.0f,tmp;
+   uint32_t blkCnt;
+   float32x4_t a,b,accumV;
+   float32x2_t accumV2;
+
+   accumV = vdupq_n_f32(0.0f);
+   blkCnt = blockSize >> 2;
+   while(blkCnt > 0)
+   {
+        a = vld1q_f32(pA);
+        b = vld1q_f32(pB);
+
+        a = vsubq_f32(a,b);
+        accumV = vmlaq_f32(accumV,a,a);
+        pA += 4;
+        pB += 4;
+        blkCnt --;
+   }
+   accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
+   accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
+
+   blkCnt = blockSize & 3;
+   while(blkCnt > 0)
+   {
+      tmp = *pA++ - *pB++;
+      accum += SQ(tmp);
+      blkCnt --;
+   }
+   arm_sqrt_f32(accum,&tmp);
+   return(tmp);
+}
+
+#else
+float32_t arm_euclidean_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+   float32_t accum=0.0f,tmp;
+
+   while(blockSize > 0)
+   {
+      tmp = *pA++ - *pB++;
+      accum += SQ(tmp);
+      blockSize --;
+   }
+   arm_sqrt_f32(accum,&tmp);
+   return(tmp);
+}
+#endif
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of Euclidean group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f64.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_euclidean_distance_f64.c
@ -0,0 +1,66 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_euclidean_distance_f64.c
+ * Description:  Euclidean distance between two vectors
+ *
+ * $Date:        13 September 2021
+ * $Revision:    V1.10.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+/**
+  @addtogroup Euclidean
+  @{
+ */
+
+
+/**
+ * @brief        Euclidean distance between two vectors
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+float64_t arm_euclidean_distance_f64(const float64_t *pA,const float64_t *pB, uint32_t blockSize)
+{
+   float64_t accum=0.,tmp;
+
+   while(blockSize > 0)
+   {
+      tmp = *pA++ - *pB++;
+      accum += SQ(tmp);
+      blockSize --;
+   }
+   tmp = sqrt(accum);
+   return(tmp);
+}
+
+/**
+ * @} end of Euclidean group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_hamming_distance.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_hamming_distance.c
@ -0,0 +1,70 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_hamming_distance.c
+ * Description:  Hamming distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+extern void arm_boolean_distance_TF_FT(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+       , uint32_t *cTF
+       , uint32_t *cFT
+       );
+
+/**
+  @addtogroup BoolDist
+  @{
+ */
+
+
+/**
+ * @brief        Hamming distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t arm_hamming_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools)
+{
+    uint32_t ctf=0,cft=0;
+
+    arm_boolean_distance_TF_FT(pA, pB, numberOfBools, &ctf, &cft);
+
+    return(1.0*(ctf + cft) / numberOfBools);
+}
+
+
+/**
+ * @} end of BoolDist group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_jaccard_distance.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_jaccard_distance.c
@ -0,0 +1,72 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_jaccard_distance.c
+ * Description:  Jaccard distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+extern void arm_boolean_distance_TT_TF_FT(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+       , uint32_t *cTT
+       , uint32_t *cTF
+       , uint32_t *cFT
+       );
+
+
+/**
+  @addtogroup BoolDist
+  @{
+ */
+
+/**
+ * @brief        Jaccard distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t arm_jaccard_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools)
+{
+    uint32_t ctt=0,ctf=0,cft=0;
+
+    arm_boolean_distance_TT_TF_FT(pA, pB, numberOfBools, &ctt, &ctf, &cft);
+
+    return(1.0*(ctf + cft) / (ctt + cft + ctf));
+}
+
+
+/**
+ * @} end of BoolDist group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f16.c
@ -0,0 +1,177 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_jensenshannon_distance_f16.c
+ * Description:  Jensen-Shannon distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+  @ingroup FloatDist
+ */
+
+/**
+  @defgroup JensenShannon Jensen-Shannon distance
+
+  Jensen-Shannon distance
+ */
+
+
+/**
+  @addtogroup JensenShannon
+  @{
+ */
+
+#if !defined(ARM_MATH_MVE_FLOAT16) || defined(ARM_MATH_AUTOVECTORIZE)
+/// @private
+__STATIC_INLINE float16_t rel_entr(float16_t x, float16_t y)
+{
+    return ((_Float16)x * (_Float16)logf((float32_t)((_Float16)x / (_Float16)y)));
+}
+#endif
+
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    float16_t       tmp;
+    f16x8_t         a, b, t, tmpV, accumV;
+
+    accumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        t = vaddq(a, b);
+        t = vmulq(t, 0.5f);
+
+        tmpV = vmulq(a, vrecip_medprec_f16(t));
+        tmpV = vlogq_f16(tmpV);
+        accumV = vfmaq(accumV, a, tmpV);
+
+        tmpV = vmulq_f16(b, vrecip_medprec_f16(t));
+        tmpV = vlogq_f16(tmpV);
+        accumV = vfmaq(accumV, b, tmpV);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        t = vaddq(a, b);
+        t = vmulq(t, 0.5f);
+
+        tmpV = vmulq_f16(a, vrecip_medprec_f16(t));
+        tmpV = vlogq_f16(tmpV);
+        accumV = vfmaq_m_f16(accumV, a, tmpV, p0);
+
+        tmpV = vmulq_f16(b, vrecip_medprec_f16(t));
+        tmpV = vlogq_f16(tmpV);
+        accumV = vfmaq_m_f16(accumV, b, tmpV, p0);
+
+    }
+
+    arm_sqrt_f16((_Float16)vecAddAcrossF16Mve(accumV) / 2.0f16, &tmp);
+    return (tmp);
+}
+
+#else
+
+
+/**
+ * @brief        Jensen-Shannon distance between two vectors
+ *
+ * This function is assuming that elements of second vector are > 0
+ * and 0 only when the corresponding element of first vector is 0.
+ * Otherwise the result of the computation does not make sense
+ * and for speed reasons, the cases returning NaN or Infinity are not
+ * managed.
+ *
+ * When the function is computing x log (x / y) with x == 0 and y == 0,
+ * it will compute the right result (0) but a division by zero will occur
+ * and should be ignored in client code.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+
+float16_t arm_jensenshannon_distance_f16(const float16_t *pA,const float16_t *pB, uint32_t blockSize)
+{
+    _Float16 left, right,sum, tmp;
+    float16_t result;
+    uint32_t i;
+
+    left = 0.0f16; 
+    right = 0.0f16;
+    for(i=0; i < blockSize; i++)
+    {
+      tmp = ((_Float16)pA[i] + (_Float16)pB[i]) / 2.0f16;
+      left  += (_Float16)rel_entr(pA[i], tmp);
+      right += (_Float16)rel_entr(pB[i], tmp);
+    }
+
+
+    sum = left + right;
+    arm_sqrt_f16((_Float16)sum/2.0f16, &result);
+    return(result);
+
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ * @} end of JensenShannon group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f32.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_jensenshannon_distance_f32.c
@ -0,0 +1,247 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_jensenshannon_distance_f32.c
+ * Description:  Jensen-Shannon distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup JensenShannon
+  @{
+ */
+
+#if !defined(ARM_MATH_MVEF) || defined(ARM_MATH_AUTOVECTORIZE)
+/// @private
+__STATIC_INLINE float32_t rel_entr(float32_t x, float32_t y)
+{
+    return (x * logf(x / y));
+}
+#endif
+
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    float32_t       tmp;
+    f32x4_t         a, b, t, tmpV, accumV;
+
+    accumV = vdupq_n_f32(0.0f);
+
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        t = vaddq(a, b);
+        t = vmulq(t, 0.5f);
+
+        tmpV = vmulq(a, vrecip_medprec_f32(t));
+        tmpV = vlogq_f32(tmpV);
+        accumV = vfmaq(accumV, a, tmpV);
+
+        tmpV = vmulq_f32(b, vrecip_medprec_f32(t));
+        tmpV = vlogq_f32(tmpV);
+        accumV = vfmaq(accumV, b, tmpV);
+
+        pA += 4;
+        pB += 4;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp32q(blkCnt);
+
+        a = vldrwq_z_f32(pA, p0);
+        b = vldrwq_z_f32(pB, p0);
+
+        t = vaddq(a, b);
+        t = vmulq(t, 0.5f);
+
+        tmpV = vmulq_f32(a, vrecip_medprec_f32(t));
+        tmpV = vlogq_f32(tmpV);
+        accumV = vfmaq_m_f32(accumV, a, tmpV, p0);
+
+        tmpV = vmulq_f32(b, vrecip_medprec_f32(t));
+        tmpV = vlogq_f32(tmpV);
+        accumV = vfmaq_m_f32(accumV, b, tmpV, p0);
+
+    }
+
+    arm_sqrt_f32(vecAddAcrossF32Mve(accumV) / 2.0f, &tmp);
+    return (tmp);
+}
+
+#else
+
+#if defined(ARM_MATH_NEON)
+
+#include "NEMath.h"
+
+
+/**
+ * @brief        Jensen-Shannon distance between two vectors
+ *
+ * This function is assuming that elements of second vector are > 0
+ * and 0 only when the corresponding element of first vector is 0.
+ * Otherwise the result of the computation does not make sense
+ * and for speed reasons, the cases returning NaN or Infinity are not
+ * managed.
+ *
+ * When the function is computing x log (x / y) with x == 0 and y == 0,
+ * it will compute the right result (0) but a division by zero will occur
+ * and should be ignored in client code.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+
+float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+    float32_t accum, result, tmp,a,b;
+    uint32_t blkCnt;
+    float32x4_t aV,bV,t, tmpV, accumV;
+    float32x2_t accumV2;
+
+    accum = 0.0f; 
+    accumV = vdupq_n_f32(0.0f);
+
+    blkCnt = blockSize >> 2;
+    while(blkCnt > 0)
+    {
+      aV = vld1q_f32(pA);
+      bV = vld1q_f32(pB);
+      t = vaddq_f32(aV,bV);
+      t = vmulq_n_f32(t, 0.5f);
+
+      tmpV = vmulq_f32(aV, vinvq_f32(t));
+      tmpV = vlogq_f32(tmpV);
+      accumV = vmlaq_f32(accumV, aV, tmpV);
+
+
+      tmpV = vmulq_f32(bV, vinvq_f32(t));
+      tmpV = vlogq_f32(tmpV);
+      accumV = vmlaq_f32(accumV, bV, tmpV);
+
+      pA += 4;
+      pB += 4;
+
+
+      blkCnt --;
+    }
+
+    accumV2 = vpadd_f32(vget_low_f32(accumV),vget_high_f32(accumV));
+    accum = vget_lane_f32(accumV2, 0) + vget_lane_f32(accumV2, 1);
+
+    blkCnt = blockSize & 3;
+    while(blkCnt > 0)
+    {
+      a = *pA;
+      b = *pB;
+      tmp = (a + b) / 2.0f;
+      accum += rel_entr(a, tmp);
+      accum += rel_entr(b, tmp);
+
+      pA++;
+      pB++;
+
+      blkCnt --;
+    }
+
+
+    arm_sqrt_f32(accum/2.0f, &result);
+    return(result);
+
+}
+
+#else
+
+
+/**
+ * @brief        Jensen-Shannon distance between two vectors
+ *
+ * This function is assuming that elements of second vector are > 0
+ * and 0 only when the corresponding element of first vector is 0.
+ * Otherwise the result of the computation does not make sense
+ * and for speed reasons, the cases returning NaN or Infinity are not
+ * managed.
+ *
+ * When the function is computing x log (x / y) with x == 0 and y == 0,
+ * it will compute the right result (0) but a division by zero will occur
+ * and should be ignored in client code.
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    blockSize  vector length
+ * @return distance
+ *
+ */
+
+
+float32_t arm_jensenshannon_distance_f32(const float32_t *pA,const float32_t *pB, uint32_t blockSize)
+{
+    float32_t left, right,sum, result, tmp;
+    uint32_t i;
+
+    left = 0.0f; 
+    right = 0.0f;
+    for(i=0; i < blockSize; i++)
+    {
+      tmp = (pA[i] + pB[i]) / 2.0f;
+      left  += rel_entr(pA[i], tmp);
+      right += rel_entr(pB[i], tmp);
+    }
+
+
+    sum = left + right;
+    arm_sqrt_f32(sum/2.0f, &result);
+    return(result);
+
+}
+
+#endif
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+/**
+ * @} end of JensenShannon group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_kulsinski_distance.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_kulsinski_distance.c
@ -0,0 +1,73 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_kulsinski_distance.c
+ * Description:  Kulsinski distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+extern void arm_boolean_distance_TT_TF_FT(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+       , uint32_t *cTT
+       , uint32_t *cTF
+       , uint32_t *cFT
+       );
+
+
+/**
+  @addtogroup BoolDist
+  @{
+ */
+
+
+/**
+ * @brief        Kulsinski distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t arm_kulsinski_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools)
+{
+    uint32_t ctt=0,ctf=0,cft=0;
+
+    arm_boolean_distance_TT_TF_FT(pA, pB, numberOfBools, &ctt, &ctf, &cft);
+
+    return(1.0*(ctf + cft - ctt + numberOfBools) / (cft + ctf + numberOfBools));
+}
+
+
+/**
+ * @} end of BoolDist group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f16.c
@ -0,0 +1,137 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_minkowski_distance_f16.c
+ * Description:  Minkowski distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions_f16.h"
+
+#if defined(ARM_FLOAT16_SUPPORTED)
+
+#include <limits.h>
+#include <math.h>
+
+/**
+  @ingroup FloatDist
+ */
+
+/**
+  @defgroup Minkowski Minkowski distance
+
+  Minkowski distance
+ */
+
+/**
+  @addtogroup Minkowski
+  @{
+ */
+
+
+/**
+ * @brief        Minkowski distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    order      Distance order
+ * @param[in]    blockSize  Number of samples
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math_f16.h"
+
+float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    f16x8_t         a, b, tmpV, sumV;
+
+    sumV = vdupq_n_f16(0.0f);
+
+    blkCnt = blockSize >> 3;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        tmpV = vabdq(a, b);
+        tmpV = vpowq_f16(tmpV, vdupq_n_f16(order));
+        sumV = vaddq(sumV, tmpV);
+
+        pA += 8;
+        pB += 8;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 7;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp16q(blkCnt);
+
+        a = vldrhq_z_f16(pA, p0);
+        b = vldrhq_z_f16(pB, p0);
+
+        tmpV = vabdq(a, b);
+        tmpV = vpowq_f16(tmpV, vdupq_n_f16(order));
+        sumV = vaddq_m(sumV, sumV, tmpV, p0);
+    }
+
+    return (powf((float32_t)vecAddAcrossF16Mve(sumV), (1.0f / (float32_t) order)));
+}
+
+
+#else
+
+
+float16_t arm_minkowski_distance_f16(const float16_t *pA,const float16_t *pB, int32_t order, uint32_t blockSize)
+{
+    _Float16 sum;
+    uint32_t i;
+
+    sum = 0.0f16; 
+    for(i=0; i < blockSize; i++)
+    {
+       sum += (_Float16)powf(fabsf((float32_t)((_Float16)pA[i] - (_Float16)pB[i])),order);
+    }
+
+
+    return(_Float16)(powf((float32_t)sum,(1.0f/(float32_t)order)));
+
+}
+
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of Minkowski group
+ */
+
+#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */ 
+
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f32.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_minkowski_distance_f32.c
@ -0,0 +1,188 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_minkowski_distance_f32.c
+ * Description:  Minkowski distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+/**
+  @addtogroup Minkowski
+  @{
+ */
+
+/* 6.14 bug */
+#if defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6100100) && (__ARMCC_VERSION < 6150001)
+ 
+__attribute__((weak)) float __powisf2(float a, int b)
+{ 
+    const int recip = b < 0;
+    float r = 1;
+    while (1)
+    {
+        if (b & 1)
+            r *= a;
+        b /= 2;
+        if (b == 0)
+            break;
+        a *= a;
+    }
+    return recip ? 1/r : r;
+}
+#endif 
+
+/**
+ * @brief        Minkowski distance between two vectors
+ *
+ * @param[in]    pA         First vector
+ * @param[in]    pB         Second vector
+ * @param[in]    order      Distance order
+ * @param[in]    blockSize  Number of samples
+ * @return distance
+ *
+ */
+
+#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
+
+#include "arm_helium_utils.h"
+#include "arm_vec_math.h"
+
+float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize)
+{
+    uint32_t        blkCnt;
+    f32x4_t         a, b, tmpV, sumV;
+
+    sumV = vdupq_n_f32(0.0f);
+
+    blkCnt = blockSize >> 2;
+    while (blkCnt > 0U) {
+        a = vld1q(pA);
+        b = vld1q(pB);
+
+        tmpV = vabdq(a, b);
+        tmpV = vpowq_f32(tmpV, vdupq_n_f32(order));
+        sumV = vaddq(sumV, tmpV);
+
+        pA += 4;
+        pB += 4;
+        blkCnt--;
+    }
+
+    /*
+     * tail
+     * (will be merged thru tail predication)
+     */
+    blkCnt = blockSize & 3;
+    if (blkCnt > 0U) {
+        mve_pred16_t    p0 = vctp32q(blkCnt);
+
+        a = vldrwq_z_f32(pA, p0);
+        b = vldrwq_z_f32(pB, p0);
+
+        tmpV = vabdq(a, b);
+        tmpV = vpowq_f32(tmpV, vdupq_n_f32(order));
+        sumV = vaddq_m(sumV, sumV, tmpV, p0);
+    }
+
+    return (powf(vecAddAcrossF32Mve(sumV), (1.0f / (float32_t) order)));
+}
+
+#else
+#if defined(ARM_MATH_NEON)
+
+#include "NEMath.h"
+
+float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize)
+{
+    float32_t sum;
+    uint32_t blkCnt;
+    float32x4_t sumV,aV,bV, tmpV, n;
+    float32x2_t sumV2;
+
+    sum = 0.0f; 
+    sumV = vdupq_n_f32(0.0f);
+    n = vdupq_n_f32(order);
+
+    blkCnt = blockSize >> 2;
+    while(blkCnt > 0)
+    {
+       aV = vld1q_f32(pA);
+       bV = vld1q_f32(pB);
+       pA += 4;
+       pB += 4;
+
+       tmpV = vabdq_f32(aV,bV);
+       tmpV = vpowq_f32(tmpV,n);
+       sumV = vaddq_f32(sumV, tmpV);
+
+
+       blkCnt --;
+    }
+
+    sumV2 = vpadd_f32(vget_low_f32(sumV),vget_high_f32(sumV));
+    sum = vget_lane_f32(sumV2, 0) + vget_lane_f32(sumV2, 1);
+
+    blkCnt = blockSize & 3;
+    while(blkCnt > 0)
+    {
+       sum += powf(fabsf(*pA++ - *pB++),order);
+
+       blkCnt --;
+    }
+
+
+    return(powf(sum,(1.0f/order)));
+
+}
+
+#else
+
+
+float32_t arm_minkowski_distance_f32(const float32_t *pA,const float32_t *pB, int32_t order, uint32_t blockSize)
+{
+    float32_t sum;
+    uint32_t i;
+
+    sum = 0.0f; 
+    for(i=0; i < blockSize; i++)
+    {
+       sum += powf(fabsf(pA[i] - pB[i]),order);
+    }
+
+
+    return(powf(sum,(1.0f/order)));
+
+}
+#endif
+#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
+
+
+/**
+ * @} end of Minkowski group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_rogerstanimoto_distance.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_rogerstanimoto_distance.c
@ -0,0 +1,75 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_rogerstanimoto_distance.c
+ * Description:  Roger Stanimoto distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+extern void arm_boolean_distance_TT_FF_TF_FT(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+       , uint32_t *cTT
+       , uint32_t *cFF
+       , uint32_t *cTF
+       , uint32_t *cFT
+       );
+
+
+/**
+  @addtogroup BoolDist
+  @{
+ */
+
+/**
+ * @brief        Rogers Tanimoto distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t arm_rogerstanimoto_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools)
+{
+    uint32_t ctt=0,cff=0,ctf=0,cft=0,r;
+
+    arm_boolean_distance_TT_FF_TF_FT(pA, pB, numberOfBools, &ctt,&cff, &ctf, &cft);
+
+    r = 2*(ctf + cft);
+
+    return(1.0*r / (r + ctt + cff));
+}
+
+
+/**
+ * @} end of BoolDist group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_russellrao_distance.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_russellrao_distance.c
@ -0,0 +1,72 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_russellrao_distance.c
+ * Description:  Russell-Rao distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+
+extern void arm_boolean_distance_TT(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+       , uint32_t *cTT
+       );
+
+
+/**
+  @addtogroup BoolDist
+  @{
+ */
+
+/**
+ * @brief        Russell-Rao distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t arm_russellrao_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools)
+{
+    uint32_t ctt=0;
+
+
+    arm_boolean_distance_TT(pA, pB, numberOfBools, &ctt);
+
+
+    return(1.0f*(numberOfBools - ctt) / ((float32_t)numberOfBools));
+}
+
+
+/**
+ * @} end of BoolDist group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_sokalmichener_distance.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_sokalmichener_distance.c
@ -0,0 +1,76 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_sokalmichener_distance.c
+ * Description:  Sokal-Michener distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+extern void arm_boolean_distance_TT_FF_TF_FT(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+       , uint32_t *cTT
+       , uint32_t *cFF
+       , uint32_t *cTF
+       , uint32_t *cFT
+       );
+
+
+/**
+  @addtogroup BoolDist
+  @{
+ */
+
+/**
+ * @brief        Sokal-Michener distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t arm_sokalmichener_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools)
+{
+    uint32_t ctt=0,cff=0,cft=0,ctf=0;
+    float32_t r,s;
+
+    arm_boolean_distance_TT_FF_TF_FT(pA, pB, numberOfBools, &ctt, &cff, &ctf, &cft);
+
+   r = 2.0*(ctf + cft);
+   s = 1.0*(cff + ctt);
+
+    return(r / (s+r));
+}
+
+
+/**
+ * @} end of BoolDist group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_sokalsneath_distance.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_sokalsneath_distance.c
@ -0,0 +1,74 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_sokalsneath_distance.c
+ * Description:  Sokal-Sneath distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+extern void arm_boolean_distance_TT_TF_FT(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+       , uint32_t *cTT
+       , uint32_t *cTF
+       , uint32_t *cFT
+       );
+
+
+/**
+  @addtogroup BoolDist
+  @{
+ */
+
+/**
+ * @brief        Sokal-Sneath distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t arm_sokalsneath_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools)
+{
+    uint32_t ctt=0,cft=0,ctf=0;
+    float32_t r;
+
+    arm_boolean_distance_TT_TF_FT(pA, pB, numberOfBools, &ctt, &ctf, &cft);
+
+    r = 2.0*(ctf + cft);
+
+    return(r / (r + ctt));
+}
+
+
+/**
+ * @} end of BoolDist group
+ */
--- a/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_yule_distance.c
+++ b/Drivers/CMSIS/DSP/Source/DistanceFunctions/arm_yule_distance.c
@ -0,0 +1,74 @@
+
+/* ----------------------------------------------------------------------
+ * Project:      CMSIS DSP Library
+ * Title:        arm_yule_distance.c
+ * Description:  Yule distance between two vectors
+ *
+ * $Date:        23 April 2021
+ * $Revision:    V1.9.0
+ *
+ * Target Processor: Cortex-M and Cortex-A cores
+ * -------------------------------------------------------------------- */
+/*
+ * Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "dsp/distance_functions.h"
+#include <limits.h>
+#include <math.h>
+
+
+extern void arm_boolean_distance_TT_FF_TF_FT(const uint32_t *pA
+       , const uint32_t *pB
+       , uint32_t numberOfBools
+       , uint32_t *cTT
+       , uint32_t *cFF
+       , uint32_t *cTF
+       , uint32_t *cFT
+       );
+
+/**
+  @addtogroup BoolDist
+  @{
+ */
+
+
+/**
+ * @brief        Yule distance between two vectors
+ *
+ * @param[in]    pA              First vector of packed booleans
+ * @param[in]    pB              Second vector of packed booleans
+ * @param[in]    numberOfBools   Number of booleans
+ * @return distance
+ *
+ */
+
+float32_t arm_yule_distance(const uint32_t *pA, const uint32_t *pB, uint32_t numberOfBools)
+{
+    uint32_t ctt=0,cff=0,ctf=0,cft=0,r;
+
+    arm_boolean_distance_TT_FF_TF_FT(pA, pB, numberOfBools, &ctt,&cff, &ctf, &cft);
+
+    r = 2*(ctf * cft);
+
+    return(1.0*r / (r/2.0 + ctt * cff));
+}
+
+
+/**
+ * @} end of BoolDist group
+ */