Initial commit
This commit is contained in:
commit
ce3dd83b9f
1470 changed files with 1054449 additions and 0 deletions
28
Drivers/CMSIS/DSP/Source/SupportFunctions/CMakeLists.txt
Normal file
28
Drivers/CMSIS/DSP/Source/SupportFunctions/CMakeLists.txt
Normal file
|
@ -0,0 +1,28 @@
|
|||
cmake_minimum_required (VERSION 3.14)
|
||||
|
||||
project(CMSISDSPSupport)
|
||||
|
||||
include(configLib)
|
||||
include(configDsp)
|
||||
|
||||
file(GLOB SRC "./*_*.c")
|
||||
|
||||
add_library(CMSISDSPSupport STATIC ${SRC})
|
||||
|
||||
configLib(CMSISDSPSupport ${ROOT})
|
||||
configDsp(CMSISDSPSupport ${ROOT})
|
||||
|
||||
### Includes
|
||||
target_include_directories(CMSISDSPSupport PUBLIC "${DSP}/Include")
|
||||
|
||||
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
|
||||
target_sources(CMSISDSPSupport PRIVATE arm_copy_f16.c)
|
||||
target_sources(CMSISDSPSupport PRIVATE arm_fill_f16.c)
|
||||
target_sources(CMSISDSPSupport PRIVATE arm_f16_to_q15.c)
|
||||
target_sources(CMSISDSPSupport PRIVATE arm_q15_to_f16.c)
|
||||
target_sources(CMSISDSPSupport PRIVATE arm_float_to_f16.c)
|
||||
target_sources(CMSISDSPSupport PRIVATE arm_f16_to_float.c)
|
||||
target_sources(CMSISDSPSupport PRIVATE arm_weighted_sum_f16.c)
|
||||
target_sources(CMSISDSPSupport PRIVATE arm_barycenter_f16.c)
|
||||
endif()
|
||||
|
63
Drivers/CMSIS/DSP/Source/SupportFunctions/SupportFunctions.c
Normal file
63
Drivers/CMSIS/DSP/Source/SupportFunctions/SupportFunctions.c
Normal file
|
@ -0,0 +1,63 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: SupportFunctions.c
|
||||
* Description: Combination of all support function source files.
|
||||
*
|
||||
* $Date: 16. March 2020
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_barycenter_f32.c"
|
||||
#include "arm_bitonic_sort_f32.c"
|
||||
#include "arm_bubble_sort_f32.c"
|
||||
#include "arm_copy_f32.c"
|
||||
#include "arm_copy_f64.c"
|
||||
#include "arm_copy_q15.c"
|
||||
#include "arm_copy_q31.c"
|
||||
#include "arm_copy_q7.c"
|
||||
#include "arm_fill_f32.c"
|
||||
#include "arm_fill_f64.c"
|
||||
#include "arm_fill_q15.c"
|
||||
#include "arm_fill_q31.c"
|
||||
#include "arm_fill_q7.c"
|
||||
#include "arm_heap_sort_f32.c"
|
||||
#include "arm_insertion_sort_f32.c"
|
||||
#include "arm_merge_sort_f32.c"
|
||||
#include "arm_merge_sort_init_f32.c"
|
||||
#include "arm_quick_sort_f32.c"
|
||||
#include "arm_selection_sort_f32.c"
|
||||
#include "arm_sort_f32.c"
|
||||
#include "arm_sort_init_f32.c"
|
||||
#include "arm_weighted_sum_f32.c"
|
||||
|
||||
#include "arm_float_to_q15.c"
|
||||
#include "arm_float_to_q31.c"
|
||||
#include "arm_float_to_q7.c"
|
||||
#include "arm_q15_to_float.c"
|
||||
#include "arm_q15_to_q31.c"
|
||||
#include "arm_q15_to_q7.c"
|
||||
#include "arm_q31_to_float.c"
|
||||
#include "arm_q31_to_q15.c"
|
||||
#include "arm_q31_to_q7.c"
|
||||
#include "arm_q7_to_float.c"
|
||||
#include "arm_q7_to_q15.c"
|
||||
#include "arm_q7_to_q31.c"
|
|
@ -0,0 +1,36 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: SupportFunctions.c
|
||||
* Description: Combination of all support function source files.
|
||||
*
|
||||
* $Date: 16. March 2020
|
||||
* $Revision: V1.1.0
|
||||
*
|
||||
* Target Processor: Cortex-M cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_copy_f16.c"
|
||||
#include "arm_fill_f16.c"
|
||||
#include "arm_f16_to_q15.c"
|
||||
#include "arm_f16_to_float.c"
|
||||
#include "arm_q15_to_f16.c"
|
||||
#include "arm_float_to_f16.c"
|
||||
#include "arm_weighted_sum_f16.c"
|
||||
#include "arm_barycenter_f16.c"
|
274
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c
Normal file
274
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f16.c
Normal file
|
@ -0,0 +1,274 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_barycenter_f16.c
|
||||
* Description: Barycenter
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup barycenter Barycenter
|
||||
|
||||
Barycenter of weighted vectors
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup barycenter
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Barycenter
|
||||
*
|
||||
*
|
||||
* @param[in] *in List of vectors
|
||||
* @param[in] *weights Weights of the vectors
|
||||
* @param[out] *out Barycenter
|
||||
* @param[in] nbVectors Number of vectors
|
||||
* @param[in] vecDim Dimension of space (vector dimension)
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
void arm_barycenter_f16(const float16_t *in,
|
||||
const float16_t *weights,
|
||||
float16_t *out,
|
||||
uint32_t nbVectors,
|
||||
uint32_t vecDim)
|
||||
{
|
||||
const float16_t *pIn, *pW;
|
||||
const float16_t *pIn1, *pIn2, *pIn3, *pIn4;
|
||||
float16_t *pOut;
|
||||
uint32_t blkCntVector, blkCntSample;
|
||||
float16_t accum, w;
|
||||
|
||||
blkCntVector = nbVectors;
|
||||
blkCntSample = vecDim;
|
||||
|
||||
accum = 0.0f;
|
||||
|
||||
pW = weights;
|
||||
pIn = in;
|
||||
|
||||
|
||||
arm_fill_f16(0.0f, out, vecDim);
|
||||
|
||||
|
||||
/* Sum */
|
||||
pIn1 = pIn;
|
||||
pIn2 = pIn1 + vecDim;
|
||||
pIn3 = pIn2 + vecDim;
|
||||
pIn4 = pIn3 + vecDim;
|
||||
|
||||
blkCntVector = nbVectors >> 2;
|
||||
while (blkCntVector > 0)
|
||||
{
|
||||
f16x8_t outV, inV1, inV2, inV3, inV4;
|
||||
float16_t w1, w2, w3, w4;
|
||||
|
||||
pOut = out;
|
||||
w1 = *pW++;
|
||||
w2 = *pW++;
|
||||
w3 = *pW++;
|
||||
w4 = *pW++;
|
||||
accum += (_Float16)w1 + (_Float16)w2 + (_Float16)w3 + (_Float16)w4;
|
||||
|
||||
blkCntSample = vecDim >> 3;
|
||||
while (blkCntSample > 0) {
|
||||
outV = vld1q((const float16_t *) pOut);
|
||||
inV1 = vld1q(pIn1);
|
||||
inV2 = vld1q(pIn2);
|
||||
inV3 = vld1q(pIn3);
|
||||
inV4 = vld1q(pIn4);
|
||||
outV = vfmaq(outV, inV1, w1);
|
||||
outV = vfmaq(outV, inV2, w2);
|
||||
outV = vfmaq(outV, inV3, w3);
|
||||
outV = vfmaq(outV, inV4, w4);
|
||||
vst1q(pOut, outV);
|
||||
|
||||
pOut += 8;
|
||||
pIn1 += 8;
|
||||
pIn2 += 8;
|
||||
pIn3 += 8;
|
||||
pIn4 += 8;
|
||||
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 7;
|
||||
while (blkCntSample > 0) {
|
||||
*pOut = (_Float16)*pOut + (_Float16)*pIn1++ * (_Float16)w1;
|
||||
*pOut = (_Float16)*pOut + (_Float16)*pIn2++ * (_Float16)w2;
|
||||
*pOut = (_Float16)*pOut + (_Float16)*pIn3++ * (_Float16)w3;
|
||||
*pOut = (_Float16)*pOut + (_Float16)*pIn4++ * (_Float16)w4;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
pIn1 += 3 * vecDim;
|
||||
pIn2 += 3 * vecDim;
|
||||
pIn3 += 3 * vecDim;
|
||||
pIn4 += 3 * vecDim;
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
pIn = pIn1;
|
||||
|
||||
blkCntVector = nbVectors & 3;
|
||||
while (blkCntVector > 0)
|
||||
{
|
||||
f16x8_t inV, outV;
|
||||
|
||||
pOut = out;
|
||||
w = *pW++;
|
||||
accum += (_Float16)w;
|
||||
|
||||
blkCntSample = vecDim >> 3;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
outV = vld1q_f16(pOut);
|
||||
inV = vld1q_f16(pIn);
|
||||
outV = vfmaq(outV, inV, w);
|
||||
vst1q_f16(pOut, outV);
|
||||
pOut += 8;
|
||||
pIn += 8;
|
||||
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 7;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
*pOut = (_Float16)*pOut + (_Float16)*pIn++ * (_Float16)w;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
/* Normalize */
|
||||
pOut = out;
|
||||
accum = 1.0f16 / (_Float16)accum;
|
||||
|
||||
blkCntSample = vecDim >> 3;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
f16x8_t tmp;
|
||||
|
||||
tmp = vld1q((const float16_t *) pOut);
|
||||
tmp = vmulq(tmp, accum);
|
||||
vst1q(pOut, tmp);
|
||||
pOut += 8;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 7;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
*pOut = (_Float16)*pOut * (_Float16)accum;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_barycenter_f16(const float16_t *in, const float16_t *weights, float16_t *out, uint32_t nbVectors,uint32_t vecDim)
|
||||
{
|
||||
|
||||
const float16_t *pIn,*pW;
|
||||
float16_t *pOut;
|
||||
uint32_t blkCntVector,blkCntSample;
|
||||
float16_t accum, w;
|
||||
|
||||
blkCntVector = nbVectors;
|
||||
blkCntSample = vecDim;
|
||||
|
||||
accum = 0.0f16;
|
||||
|
||||
pW = weights;
|
||||
pIn = in;
|
||||
|
||||
/* Set counters to 0 */
|
||||
blkCntSample = vecDim;
|
||||
pOut = out;
|
||||
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = 0.0f16;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
/* Sum */
|
||||
while(blkCntVector > 0)
|
||||
{
|
||||
pOut = out;
|
||||
w = *pW++;
|
||||
accum += (_Float16)w;
|
||||
|
||||
blkCntSample = vecDim;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = (_Float16)*pOut + (_Float16)*pIn++ * (_Float16)w;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
/* Normalize */
|
||||
blkCntSample = vecDim;
|
||||
pOut = out;
|
||||
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = (_Float16)*pOut / (_Float16)accum;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
* @} end of barycenter group
|
||||
*/
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
|
414
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c
Normal file
414
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_barycenter_f32.c
Normal file
|
@ -0,0 +1,414 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_barycenter_f32.c
|
||||
* Description: Barycenter
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
/**
|
||||
@ingroup barycenter
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Barycenter
|
||||
*
|
||||
*
|
||||
* @param[in] *in List of vectors
|
||||
* @param[in] *weights Weights of the vectors
|
||||
* @param[out] *out Barycenter
|
||||
* @param[in] nbVectors Number of vectors
|
||||
* @param[in] vecDim Dimension of space (vector dimension)
|
||||
* @return None
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_barycenter_f32(const float32_t *in,
|
||||
const float32_t *weights,
|
||||
float32_t *out,
|
||||
uint32_t nbVectors,
|
||||
uint32_t vecDim)
|
||||
{
|
||||
const float32_t *pIn, *pW;
|
||||
const float32_t *pIn1, *pIn2, *pIn3, *pIn4;
|
||||
float32_t *pOut;
|
||||
uint32_t blkCntVector, blkCntSample;
|
||||
float32_t accum, w;
|
||||
|
||||
blkCntVector = nbVectors;
|
||||
blkCntSample = vecDim;
|
||||
|
||||
accum = 0.0f;
|
||||
|
||||
pW = weights;
|
||||
pIn = in;
|
||||
|
||||
|
||||
arm_fill_f32(0.0f, out, vecDim);
|
||||
|
||||
|
||||
/* Sum */
|
||||
pIn1 = pIn;
|
||||
pIn2 = pIn1 + vecDim;
|
||||
pIn3 = pIn2 + vecDim;
|
||||
pIn4 = pIn3 + vecDim;
|
||||
|
||||
blkCntVector = nbVectors >> 2;
|
||||
while (blkCntVector > 0)
|
||||
{
|
||||
f32x4_t outV, inV1, inV2, inV3, inV4;
|
||||
float32_t w1, w2, w3, w4;
|
||||
|
||||
pOut = out;
|
||||
w1 = *pW++;
|
||||
w2 = *pW++;
|
||||
w3 = *pW++;
|
||||
w4 = *pW++;
|
||||
accum += w1 + w2 + w3 + w4;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while (blkCntSample > 0) {
|
||||
outV = vld1q((const float32_t *) pOut);
|
||||
inV1 = vld1q(pIn1);
|
||||
inV2 = vld1q(pIn2);
|
||||
inV3 = vld1q(pIn3);
|
||||
inV4 = vld1q(pIn4);
|
||||
outV = vfmaq(outV, inV1, w1);
|
||||
outV = vfmaq(outV, inV2, w2);
|
||||
outV = vfmaq(outV, inV3, w3);
|
||||
outV = vfmaq(outV, inV4, w4);
|
||||
vst1q(pOut, outV);
|
||||
|
||||
pOut += 4;
|
||||
pIn1 += 4;
|
||||
pIn2 += 4;
|
||||
pIn3 += 4;
|
||||
pIn4 += 4;
|
||||
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while (blkCntSample > 0) {
|
||||
*pOut = *pOut + *pIn1++ * w1;
|
||||
*pOut = *pOut + *pIn2++ * w2;
|
||||
*pOut = *pOut + *pIn3++ * w3;
|
||||
*pOut = *pOut + *pIn4++ * w4;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
pIn1 += 3 * vecDim;
|
||||
pIn2 += 3 * vecDim;
|
||||
pIn3 += 3 * vecDim;
|
||||
pIn4 += 3 * vecDim;
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
pIn = pIn1;
|
||||
|
||||
blkCntVector = nbVectors & 3;
|
||||
while (blkCntVector > 0)
|
||||
{
|
||||
f32x4_t inV, outV;
|
||||
|
||||
pOut = out;
|
||||
w = *pW++;
|
||||
accum += w;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
outV = vld1q_f32(pOut);
|
||||
inV = vld1q_f32(pIn);
|
||||
outV = vfmaq(outV, inV, w);
|
||||
vst1q_f32(pOut, outV);
|
||||
pOut += 4;
|
||||
pIn += 4;
|
||||
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut + *pIn++ * w;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
/* Normalize */
|
||||
pOut = out;
|
||||
accum = 1.0f / accum;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
f32x4_t tmp;
|
||||
|
||||
tmp = vld1q((const float32_t *) pOut);
|
||||
tmp = vmulq(tmp, accum);
|
||||
vst1q(pOut, tmp);
|
||||
pOut += 4;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while (blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut * accum;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON)
|
||||
|
||||
#include "NEMath.h"
|
||||
void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim)
|
||||
{
|
||||
|
||||
const float32_t *pIn,*pW, *pIn1, *pIn2, *pIn3, *pIn4;
|
||||
float32_t *pOut;
|
||||
uint32_t blkCntVector,blkCntSample;
|
||||
float32_t accum, w,w1,w2,w3,w4;
|
||||
|
||||
float32x4_t tmp, inV,outV, inV1, inV2, inV3, inV4;
|
||||
|
||||
blkCntVector = nbVectors;
|
||||
blkCntSample = vecDim;
|
||||
|
||||
accum = 0.0f;
|
||||
|
||||
pW = weights;
|
||||
pIn = in;
|
||||
|
||||
/* Set counters to 0 */
|
||||
tmp = vdupq_n_f32(0.0f);
|
||||
pOut = out;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
vst1q_f32(pOut, tmp);
|
||||
pOut += 4;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = 0.0f;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
/* Sum */
|
||||
|
||||
pIn1 = pIn;
|
||||
pIn2 = pIn1 + vecDim;
|
||||
pIn3 = pIn2 + vecDim;
|
||||
pIn4 = pIn3 + vecDim;
|
||||
|
||||
blkCntVector = nbVectors >> 2;
|
||||
while(blkCntVector > 0)
|
||||
{
|
||||
pOut = out;
|
||||
w1 = *pW++;
|
||||
w2 = *pW++;
|
||||
w3 = *pW++;
|
||||
w4 = *pW++;
|
||||
accum += w1 + w2 + w3 + w4;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
outV = vld1q_f32(pOut);
|
||||
inV1 = vld1q_f32(pIn1);
|
||||
inV2 = vld1q_f32(pIn2);
|
||||
inV3 = vld1q_f32(pIn3);
|
||||
inV4 = vld1q_f32(pIn4);
|
||||
outV = vmlaq_n_f32(outV,inV1,w1);
|
||||
outV = vmlaq_n_f32(outV,inV2,w2);
|
||||
outV = vmlaq_n_f32(outV,inV3,w3);
|
||||
outV = vmlaq_n_f32(outV,inV4,w4);
|
||||
vst1q_f32(pOut, outV);
|
||||
pOut += 4;
|
||||
pIn1 += 4;
|
||||
pIn2 += 4;
|
||||
pIn3 += 4;
|
||||
pIn4 += 4;
|
||||
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut + *pIn1++ * w1;
|
||||
*pOut = *pOut + *pIn2++ * w2;
|
||||
*pOut = *pOut + *pIn3++ * w3;
|
||||
*pOut = *pOut + *pIn4++ * w4;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
pIn1 += 3*vecDim;
|
||||
pIn2 += 3*vecDim;
|
||||
pIn3 += 3*vecDim;
|
||||
pIn4 += 3*vecDim;
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
pIn = pIn1;
|
||||
|
||||
blkCntVector = nbVectors & 3;
|
||||
while(blkCntVector > 0)
|
||||
{
|
||||
pOut = out;
|
||||
w = *pW++;
|
||||
accum += w;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
outV = vld1q_f32(pOut);
|
||||
inV = vld1q_f32(pIn);
|
||||
outV = vmlaq_n_f32(outV,inV,w);
|
||||
vst1q_f32(pOut, outV);
|
||||
pOut += 4;
|
||||
pIn += 4;
|
||||
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut + *pIn++ * w;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
/* Normalize */
|
||||
pOut = out;
|
||||
accum = 1.0f / accum;
|
||||
|
||||
blkCntSample = vecDim >> 2;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
tmp = vld1q_f32(pOut);
|
||||
tmp = vmulq_n_f32(tmp,accum);
|
||||
vst1q_f32(pOut, tmp);
|
||||
pOut += 4;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntSample = vecDim & 3;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut * accum;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
void arm_barycenter_f32(const float32_t *in, const float32_t *weights, float32_t *out, uint32_t nbVectors,uint32_t vecDim)
|
||||
{
|
||||
|
||||
const float32_t *pIn,*pW;
|
||||
float32_t *pOut;
|
||||
uint32_t blkCntVector,blkCntSample;
|
||||
float32_t accum, w;
|
||||
|
||||
blkCntVector = nbVectors;
|
||||
blkCntSample = vecDim;
|
||||
|
||||
accum = 0.0f;
|
||||
|
||||
pW = weights;
|
||||
pIn = in;
|
||||
|
||||
/* Set counters to 0 */
|
||||
blkCntSample = vecDim;
|
||||
pOut = out;
|
||||
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = 0.0f;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
/* Sum */
|
||||
while(blkCntVector > 0)
|
||||
{
|
||||
pOut = out;
|
||||
w = *pW++;
|
||||
accum += w;
|
||||
|
||||
blkCntSample = vecDim;
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut + *pIn++ * w;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
blkCntVector--;
|
||||
}
|
||||
|
||||
/* Normalize */
|
||||
blkCntSample = vecDim;
|
||||
pOut = out;
|
||||
|
||||
while(blkCntSample > 0)
|
||||
{
|
||||
*pOut = *pOut / accum;
|
||||
pOut++;
|
||||
blkCntSample--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
* @} end of barycenter group
|
||||
*/
|
1039
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_bitonic_sort_f32.c
Normal file
1039
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_bitonic_sort_f32.c
Normal file
File diff suppressed because it is too large
Load diff
104
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_bubble_sort_f32.c
Normal file
104
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_bubble_sort_f32.c
Normal file
|
@ -0,0 +1,104 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_bubble_sort_f32.c
|
||||
* Description: Floating point bubble sort
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
#include "arm_sorting.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Sorting
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*
|
||||
* @par Algorithm
|
||||
* The bubble sort algorithm is a simple comparison algorithm that
|
||||
* reads the elements of a vector from the beginning to the end,
|
||||
* compares the adjacent ones and swaps them if they are in the
|
||||
* wrong order. The procedure is repeated until there is nothing
|
||||
* left to swap. Bubble sort is fast for input vectors that are
|
||||
* nearly sorted.
|
||||
*
|
||||
* @par It's an in-place algorithm. In order to obtain an out-of-place
|
||||
* function, a memcpy of the source vector is performed
|
||||
*/
|
||||
|
||||
void arm_bubble_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint8_t dir = S->dir;
|
||||
uint32_t i;
|
||||
uint8_t swapped =1;
|
||||
float32_t * pA;
|
||||
float32_t temp;
|
||||
|
||||
if(pSrc != pDst) // out-of-place
|
||||
{
|
||||
memcpy(pDst, pSrc, blockSize*sizeof(float32_t) );
|
||||
pA = pDst;
|
||||
}
|
||||
else
|
||||
pA = pSrc;
|
||||
|
||||
while(swapped==1) // If nothing has been swapped after one loop stop
|
||||
{
|
||||
swapped=0;
|
||||
|
||||
for(i=0; i<blockSize-1; i++)
|
||||
{
|
||||
if(dir==(pA[i]>pA[i+1]))
|
||||
{
|
||||
// Swap
|
||||
temp = pA[i];
|
||||
pA[i] = pA[i+1];
|
||||
pA[i+1] = temp;
|
||||
|
||||
// Update flag
|
||||
swapped = 1;
|
||||
}
|
||||
}
|
||||
|
||||
blockSize--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Sorting group
|
||||
*/
|
130
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_f16.c
Normal file
130
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_f16.c
Normal file
|
@ -0,0 +1,130 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_copy_f16.c
|
||||
* Description: Copies the elements of a floating-point vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup copy
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Copies the elements of a f16 vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
void arm_copy_f16(
|
||||
const float16_t * pSrc,
|
||||
float16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
do {
|
||||
mve_pred16_t p = vctp16q(blockSize);
|
||||
|
||||
vstrhq_p_f16(pDst,
|
||||
vldrhq_z_f16((float16_t const *) pSrc, p), p);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
blockSize -= 8;
|
||||
}
|
||||
while ((int32_t) blockSize > 0);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void arm_copy_f16(
|
||||
const float16_t * pSrc,
|
||||
float16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of BasicCopy group
|
||||
*/
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
|
192
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_f32.c
Normal file
192
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_f32.c
Normal file
|
@ -0,0 +1,192 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_copy_f32.c
|
||||
* Description: Copies the elements of a floating-point vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup copy Vector Copy
|
||||
|
||||
Copies sample by sample from source vector to destination vector.
|
||||
|
||||
<pre>
|
||||
pDst[n] = pSrc[n]; 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
There are separate functions for floating point, Q31, Q15, and Q7 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup copy
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Copies the elements of a floating-point vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
void arm_copy_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vstrwq_f32(pDst, vldrwq_f32(pSrc));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
void arm_copy_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
float32x4_t inV;
|
||||
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
/* Copy and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pSrc);
|
||||
vst1q_f32(pDst, inV);
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
/* Copy and then store the results in the destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_copy_f32(
|
||||
const float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of BasicCopy group
|
||||
*/
|
71
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_f64.c
Normal file
71
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_f64.c
Normal file
|
@ -0,0 +1,71 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_copy_f64.c
|
||||
* Description: Copies the elements of a floating-point vector
|
||||
*
|
||||
* $Date: 13 September 2021
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup copy
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Copies the elements of a floating-point vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
void arm_copy_f64(
|
||||
const float64_t * pSrc,
|
||||
float64_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of BasicCopy group
|
||||
*/
|
130
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_q15.c
Normal file
130
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_q15.c
Normal file
|
@ -0,0 +1,130 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_copy_q15.c
|
||||
* Description: Copies the elements of a Q15 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup copy
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Copies the elements of a Q15 vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_copy_q15(
|
||||
const q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vstrhq_s16(pDst,vldrhq_s16(pSrc));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 7;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_copy_q15(
|
||||
const q15_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* read 2 times 2 samples at a time */
|
||||
write_q15x2_ia (&pDst, read_q15x2_ia (&pSrc));
|
||||
write_q15x2_ia (&pDst, read_q15x2_ia (&pSrc));
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicCopy group
|
||||
*/
|
135
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_q31.c
Normal file
135
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_q31.c
Normal file
|
@ -0,0 +1,135 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_copy_q31.c
|
||||
* Description: Copies the elements of a Q31 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup copy
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Copies the elements of a Q31 vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_copy_q31(
|
||||
const q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
vstrwq_s32(pDst,vldrwq_s32(pSrc));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_copy_q31(
|
||||
const q31_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicCopy group
|
||||
*/
|
132
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_q7.c
Normal file
132
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_copy_q7.c
Normal file
|
@ -0,0 +1,132 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_copy_q7.c
|
||||
* Description: Copies the elements of a Q7 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup copy
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Copies the elements of a Q7 vector.
|
||||
@param[in] pSrc points to input vector
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_copy_q7(
|
||||
const q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
|
||||
uint32_t blkCnt;
|
||||
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
vstrbq_s8(pDst,vldrbq_s8(pSrc));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 16;
|
||||
pDst += 16;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 0xF;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_copy_q7(
|
||||
const q7_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* read 4 samples at a time */
|
||||
write_q7x4_ia (&pDst, read_q7x4_ia (&pSrc));
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A */
|
||||
|
||||
/* Copy and store result in destination buffer */
|
||||
*pDst++ = *pSrc++;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of BasicCopy group
|
||||
*/
|
134
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_float.c
Normal file
134
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_float.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_float_to_q15.c
|
||||
* Description: Converts the elements of the floating-point vector to Q15 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup f16_to_x Convert 16-bit floating point value
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup f16_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the f16 vector to f32 vector.
|
||||
@param[in] pSrc points to the f16 input vector
|
||||
@param[out] pDst points to the f32 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__CMSIS_GCC_H)
|
||||
#pragma GCC warning "Scalar version of arm_f16_to_float built. Helium version has build issues with gcc."
|
||||
#endif
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(__CMSIS_GCC_H)
|
||||
|
||||
void arm_f16_to_float(
|
||||
const float16_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
int32_t blkCnt; /* loop counters */
|
||||
float16x8_t vecDst;
|
||||
float32x4x2_t tmp;
|
||||
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0)
|
||||
{
|
||||
vecDst = vldrhq_f16(pSrc);
|
||||
pSrc += 8;
|
||||
|
||||
tmp.val[0] = vcvtbq_f32_f16(vecDst);
|
||||
tmp.val[1] = vcvttq_f32_f16(vecDst);
|
||||
vst2q(pDst,tmp);
|
||||
|
||||
pDst += 8;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
* (will be merged thru tail predication)
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
while (blkCnt > 0)
|
||||
{
|
||||
|
||||
*pDst++ = (float32_t) *pSrc++;
|
||||
/*
|
||||
* Decrement the loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_f16_to_float(
|
||||
const float16_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const float16_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/*
|
||||
* Loop over blockSize number of values
|
||||
*/
|
||||
blkCnt = blockSize;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
*pDst++ = (float32_t) * pIn++;
|
||||
/*
|
||||
* Decrement the loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of f16_to_x group
|
||||
*/
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
|
157
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_q15.c
Normal file
157
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_f16_to_q15.c
Normal file
|
@ -0,0 +1,157 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_float_to_q15.c
|
||||
* Description: Converts the elements of the floating-point vector to Q15 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup f16_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the f16 vector to Q15 vector.
|
||||
@param[in] pSrc points to the f16 input vector
|
||||
@param[out] pDst points to the Q15 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q15_t)(pSrc[n] * 32768); 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||
|
||||
@note
|
||||
In order to apply rounding in scalar version, the library should be rebuilt with the ROUNDING macro
|
||||
defined in the preprocessor section of project options.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
void arm_f16_to_q15(
|
||||
const float16_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
float16_t maxQ = (float16_t) Q15_MAX;
|
||||
float16x8_t vecDst;
|
||||
|
||||
|
||||
do {
|
||||
mve_pred16_t p = vctp16q(blockSize);
|
||||
|
||||
vecDst = vldrhq_z_f16((float16_t const *) pSrc, p);
|
||||
/* C = A * 32767 */
|
||||
/* convert from float to Q15 and then store the results in the destination buffer */
|
||||
vecDst = vmulq_m(vuninitializedq_f16(), vecDst, maxQ, p);
|
||||
|
||||
vstrhq_p_s16(pDst,
|
||||
vcvtaq_m(vuninitializedq_s16(), vecDst, p), p);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 8;
|
||||
pDst += 8;
|
||||
blockSize -= 8;
|
||||
}
|
||||
while ((int32_t) blockSize > 0);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void arm_f16_to_q15(
|
||||
const float16_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const float16_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float16_t in;
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/*
|
||||
* Loop over blockSize number of values
|
||||
*/
|
||||
blkCnt = blockSize;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
/*
|
||||
* C = A * 65536
|
||||
*/
|
||||
/*
|
||||
* convert from float to Q31 and then store the results in the destination buffer
|
||||
*/
|
||||
in = *pIn++;
|
||||
in = (in * 32768.0);
|
||||
in += in > 0.0 ? 0.5 : -0.5;
|
||||
*pDst++ = clip_q31_to_q15((q31_t) (in));
|
||||
|
||||
#else
|
||||
|
||||
/*
|
||||
* C = A * 32768
|
||||
*/
|
||||
/*
|
||||
* convert from float to Q31 and then store the results in the destination buffer
|
||||
*/
|
||||
*pDst++ = clip_q31_to_q15((q31_t) ((_Float16)*pIn++ * 32768.0f16));
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/*
|
||||
* Decrement the loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of f16_to_x group
|
||||
*/
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
|
127
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_f16.c
Normal file
127
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_f16.c
Normal file
|
@ -0,0 +1,127 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_fill_f16.c
|
||||
* Description: Fills a constant value into a floating-point vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
@addtogroup Fill
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Fills a constant value into a f16 vector.
|
||||
@param[in] value input value to be filled
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
void arm_fill_f16(
|
||||
float16_t value,
|
||||
float16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
do {
|
||||
mve_pred16_t p = vctp16q(blockSize);
|
||||
|
||||
vstrhq_p_f16(pDst,
|
||||
vdupq_m_n_f16(vuninitializedq_f16(), value, p), p);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pDst += 8;
|
||||
blockSize -= 8;
|
||||
}
|
||||
while ((int32_t) blockSize > 0);
|
||||
}
|
||||
#else
|
||||
void arm_fill_f16(
|
||||
float16_t value,
|
||||
float16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of Fill group
|
||||
*/
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
|
189
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_f32.c
Normal file
189
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_f32.c
Normal file
|
@ -0,0 +1,189 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_fill_f32.c
|
||||
* Description: Fills a constant value into a floating-point vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup Fill Vector Fill
|
||||
|
||||
Fills the destination vector with a constant value.
|
||||
|
||||
<pre>
|
||||
pDst[n] = value; 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
There are separate functions for floating point, Q31, Q15, and Q7 data types.
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Fill
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Fills a constant value into a floating-point vector.
|
||||
@param[in] value input value to be filled
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_fill_f32(
|
||||
float32_t value,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
vstrwq_f32(pDst,vdupq_n_f32(value));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pDst += 4;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
void arm_fill_f32(
|
||||
float32_t value,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
|
||||
float32x4_t inV = vdupq_n_f32(value);
|
||||
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
/* Fill the value in the destination buffer */
|
||||
vst1q_f32(pDst, inV);
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
/* Fill the value in the destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_fill_f32(
|
||||
float32_t value,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of Fill group
|
||||
*/
|
71
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_f64.c
Normal file
71
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_f64.c
Normal file
|
@ -0,0 +1,71 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_fill_f64.c
|
||||
* Description: Fills a constant value into a floating-point vector
|
||||
*
|
||||
* $Date: 13 September 2021
|
||||
* $Revision: V1.10.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Fill
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Fills a constant value into a floating-point vector.
|
||||
@param[in] value input value to be filled
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
void arm_fill_f64(
|
||||
float64_t value,
|
||||
float64_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Fill group
|
||||
*/
|
134
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_q15.c
Normal file
134
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_q15.c
Normal file
|
@ -0,0 +1,134 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_fill_q15.c
|
||||
* Description: Fills a constant value into a Q15 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Fill
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Fills a constant value into a Q15 vector.
|
||||
@param[in] value input value to be filled
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_fill_q15(
|
||||
q15_t value,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
vstrhq_s16(pDst,vdupq_n_s16(value));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pDst += 8;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 7;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_fill_q15(
|
||||
q15_t value,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
q31_t packedValue; /* value packed to 32 bits */
|
||||
|
||||
/* Packing two 16 bit values to 32 bit value in order to use SIMD */
|
||||
packedValue = __PKHBT(value, value, 16U);
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* fill 2 times 2 samples at a time */
|
||||
write_q15x2_ia (&pDst, packedValue);
|
||||
write_q15x2_ia (&pDst, packedValue);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of Fill group
|
||||
*/
|
135
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_q31.c
Normal file
135
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_q31.c
Normal file
|
@ -0,0 +1,135 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_fill_q31.c
|
||||
* Description: Fills a constant value into a Q31 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Fill
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Fills a constant value into a Q31 vector.
|
||||
@param[in] value input value to be filled
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_fill_q31(
|
||||
q31_t value,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Compute 4 outputs at a time */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
vstrwq_s32(pDst,vdupq_n_s32(value));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pDst += 4;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_fill_q31(
|
||||
q31_t value,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of Fill group
|
||||
*/
|
133
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_q7.c
Normal file
133
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_fill_q7.c
Normal file
|
@ -0,0 +1,133 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_fill_q7.c
|
||||
* Description: Fills a constant value into a Q7 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Fill
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Fills a constant value into a Q7 vector.
|
||||
@param[in] value input value to be filled
|
||||
@param[out] pDst points to output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_fill_q7(
|
||||
q7_t value,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
vstrbq_s8(pDst,vdupq_n_s8(value));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pDst += 16;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 0xF;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_fill_q7(
|
||||
q7_t value,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
q31_t packedValue; /* value packed to 32 bits */
|
||||
|
||||
/* Packing four 8 bit values to 32 bit value in order to use SIMD */
|
||||
packedValue = __PACKq7(value, value, value, value);
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* fill 4 samples at a time */
|
||||
write_q7x4_ia (&pDst, packedValue);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = value */
|
||||
|
||||
/* Fill value in destination buffer */
|
||||
*pDst++ = value;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of Fill group
|
||||
*/
|
131
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_float_to_f16.c
Normal file
131
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_float_to_f16.c
Normal file
|
@ -0,0 +1,131 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_float_to_q15.c
|
||||
* Description: Converts the elements of the floating-point vector to Q15 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup float_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the floating-point vector to f16 vector.
|
||||
@param[in] pSrc points to the f32 input vector
|
||||
@param[out] pDst points to the f16 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && defined(__CMSIS_GCC_H)
|
||||
#pragma GCC warning "Scalar version of arm_float_to_f16 built. Helium version has build issues with gcc."
|
||||
#endif
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE) && !defined(__CMSIS_GCC_H)
|
||||
|
||||
void arm_float_to_f16(
|
||||
const float32_t * pSrc,
|
||||
float16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
int32_t blkCnt; /* loop counters */
|
||||
float32x4x2_t tmp;
|
||||
float16x8_t vecDst;
|
||||
float32_t const *pSrcVec;
|
||||
|
||||
|
||||
pSrcVec = (float32_t const *) pSrc;
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0)
|
||||
{
|
||||
/* convert from float32 to float16 and then store the results in the destination buffer */
|
||||
tmp = vld2q(pSrcVec); pSrcVec += 8;
|
||||
/* narrow / merge */
|
||||
vecDst = vcvtbq_f16_f32(vecDst, tmp.val[0]);
|
||||
vecDst = vcvttq_f16_f32(vecDst, tmp.val[1]);
|
||||
vst1q(pDst, vecDst); pDst += 8;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
tmp = vld2q(pSrcVec);
|
||||
vecDst = vcvtbq_f16_f32(vecDst, tmp.val[0]);
|
||||
vecDst = vcvttq_f16_f32(vecDst, tmp.val[1]);
|
||||
vstrhq_p(pDst, vecDst, p0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void arm_float_to_f16(
|
||||
const float32_t * pSrc,
|
||||
float16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const float32_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
/*
|
||||
* Loop over blockSize number of values
|
||||
*/
|
||||
blkCnt = blockSize;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
*pDst++ = (float16_t) * pIn++;
|
||||
/*
|
||||
* Decrement the loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of float_to_x group
|
||||
*/
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
|
308
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q15.c
Normal file
308
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q15.c
Normal file
|
@ -0,0 +1,308 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_float_to_q15.c
|
||||
* Description: Converts the elements of the floating-point vector to Q15 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup float_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the floating-point vector to Q15 vector.
|
||||
@param[in] pSrc points to the floating-point input vector
|
||||
@param[out] pDst points to the Q15 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q15_t)(pSrc[n] * 32768); 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||
|
||||
@note
|
||||
In order to apply rounding, the library should be rebuilt with the ROUNDING macro
|
||||
defined in the preprocessor section of project options.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_float_to_q15(
|
||||
const float32_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
float32_t maxQ = (float32_t) Q15_MAX;
|
||||
f32x4x2_t tmp;
|
||||
q15x8_t vecDst;
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
#endif
|
||||
|
||||
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 32768 */
|
||||
/* convert from float to q15 and then store the results in the destination buffer */
|
||||
tmp = vld2q(pSrc);
|
||||
|
||||
tmp.val[0] = vmulq(tmp.val[0], maxQ);
|
||||
tmp.val[1] = vmulq(tmp.val[1], maxQ);
|
||||
|
||||
vecDst = vqmovnbq(vecDst, vcvtaq_s32_f32(tmp.val[0]));
|
||||
vecDst = vqmovntq(vecDst, vcvtaq_s32_f32(tmp.val[1]));
|
||||
vst1q(pDst, vecDst);
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
pDst += 8;
|
||||
pSrc += 8;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 7;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 32768 */
|
||||
|
||||
/* convert from float to Q15 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pSrc++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 32768 */
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pSrc++ * 32768.0f), 16);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
void arm_float_to_q15(
|
||||
const float32_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const float32_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
float32x4_t inV;
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32x4_t zeroV = vdupq_n_f32(0.0f);
|
||||
float32x4_t pHalf = vdupq_n_f32(0.5f / 32768.0f);
|
||||
float32x4_t mHalf = vdupq_n_f32(-0.5f / 32768.0f);
|
||||
float32x4_t r;
|
||||
uint32x4_t cmp;
|
||||
float32_t in;
|
||||
#endif
|
||||
|
||||
int32x4_t cvt;
|
||||
int16x4_t outV;
|
||||
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
/* C = A * 32768 */
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
cmp = vcgtq_f32(inV,zeroV);
|
||||
r = vbslq_f32(cmp,pHalf,mHalf);
|
||||
inV = vaddq_f32(inV, r);
|
||||
|
||||
pIn += 4;
|
||||
|
||||
cvt = vcvtq_n_s32_f32(inV,15);
|
||||
outV = vqmovn_s32(cvt);
|
||||
|
||||
vst1_s16(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 32768 */
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
|
||||
cvt = vcvtq_n_s32_f32(inV,15);
|
||||
outV = vqmovn_s32(cvt);
|
||||
|
||||
vst1_s16(pDst, outV);
|
||||
pDst += 4;
|
||||
pIn += 4;
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
/* C = A * 32768 */
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 32768 */
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_float_to_q15(
|
||||
const float32_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const float32_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 32768 */
|
||||
|
||||
/* convert from float to Q15 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
in = (*pIn++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
in = (*pIn++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
in = (*pIn++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
#else
|
||||
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 32768 */
|
||||
|
||||
/* convert from float to Q15 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 32768.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q15_t) (__SSAT((q31_t) (in), 16));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 32768 */
|
||||
/* Convert from float to q15 and then store the results in the destination buffer */
|
||||
*pDst++ = (q15_t) __SSAT((q31_t) (*pIn++ * 32768.0f), 16);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of float_to_x group
|
||||
*/
|
314
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q31.c
Normal file
314
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q31.c
Normal file
|
@ -0,0 +1,314 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_float_to_q31.c
|
||||
* Description: Converts the elements of the floating-point vector to Q31 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup float_to_x Convert 32-bit floating point value
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup float_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the floating-point vector to Q31 vector.
|
||||
@param[in] pSrc points to the floating-point input vector
|
||||
@param[out] pDst points to the Q31 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q31_t)(pSrc[n] * 2147483648); 0 <= n < blockSize.
|
||||
</pre>
|
||||
|
||||
@par Scaling and Overflow Behavior
|
||||
The function uses saturating arithmetic.
|
||||
Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] are saturated.
|
||||
|
||||
@note
|
||||
In order to apply rounding, the library should be rebuilt with the ROUNDING macro
|
||||
defined in the preprocessor section of project options.
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_float_to_q31(
|
||||
const float32_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
float32_t maxQ = (float32_t) Q31_MAX;
|
||||
f32x4_t vecDst;
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
#endif
|
||||
|
||||
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Compute 4 outputs at a time. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
vecDst = vldrwq_f32(pSrc);
|
||||
/* C = A * 2147483648 */
|
||||
/* convert from float to Q31 and then store the results in the destination buffer */
|
||||
vecDst = vmulq(vecDst, maxQ);
|
||||
|
||||
vstrwq_s32(pDst, vcvtaq_s32_f32(vecDst));
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pSrc += 4;
|
||||
pDst += 4;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 2147483648 */
|
||||
|
||||
/* convert from float to Q31 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pSrc++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pSrc++ * 2147483648.0f));
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON)
|
||||
void arm_float_to_q31(
|
||||
const float32_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const float32_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
float32x4_t inV;
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
float32x4_t zeroV = vdupq_n_f32(0.0f);
|
||||
float32x4_t pHalf = vdupq_n_f32(0.5f / 2147483648.0f);
|
||||
float32x4_t mHalf = vdupq_n_f32(-0.5f / 2147483648.0f);
|
||||
float32x4_t r;
|
||||
uint32x4_t cmp;
|
||||
#endif
|
||||
|
||||
int32x4_t outV;
|
||||
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
/* C = A * 32768 */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
cmp = vcgtq_f32(inV,zeroV);
|
||||
r = vbslq_f32(cmp,pHalf,mHalf);
|
||||
inV = vaddq_f32(inV, r);
|
||||
|
||||
pIn += 4;
|
||||
|
||||
outV = vcvtq_n_s32_f32(inV,31);
|
||||
|
||||
vst1q_s32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
|
||||
outV = vcvtq_n_s32_f32(inV,31);
|
||||
|
||||
vst1q_s32(pDst, outV);
|
||||
pDst += 4;
|
||||
pIn += 4;
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#else
|
||||
void arm_float_to_q31(
|
||||
const float32_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const float32_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 2147483648 */
|
||||
|
||||
/* convert from float to Q31 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
in = (*pIn++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
in = (*pIn++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
in = (*pIn++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 2147483648 */
|
||||
|
||||
/* convert from float to Q31 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 2147483648.0f);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (in));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 2147483648 */
|
||||
/* Convert from float to Q31 and then store the results in the destination buffer */
|
||||
*pDst++ = clip_q63_to_q31((q63_t) (*pIn++ * 2147483648.0f));
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of float_to_x group
|
||||
*/
|
330
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q7.c
Normal file
330
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_float_to_q7.c
Normal file
|
@ -0,0 +1,330 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_float_to_q7.c
|
||||
* Description: Converts the elements of the floating-point vector to Q7 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup float_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @brief Converts the elements of the floating-point vector to Q7 vector.
|
||||
* @param[in] *pSrc points to the floating-point input vector
|
||||
* @param[out] *pDst points to the Q7 output vector
|
||||
* @param[in] blockSize length of the input vector
|
||||
* @return none.
|
||||
*
|
||||
*\par Description:
|
||||
* \par
|
||||
* The equation used for the conversion process is:
|
||||
* <pre>
|
||||
* pDst[n] = (q7_t)(pSrc[n] * 128); 0 <= n < blockSize.
|
||||
* </pre>
|
||||
* \par Scaling and Overflow Behavior:
|
||||
* \par
|
||||
* The function uses saturating arithmetic.
|
||||
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||
* \note
|
||||
* In order to apply rounding, the library should be rebuilt with the ROUNDING macro
|
||||
* defined in the preprocessor section of project options.
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_float_to_q7(
|
||||
const float32_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
float32_t maxQ = powf(2.0, 7);
|
||||
f32x4x4_t tmp;
|
||||
q15x8_t evVec, oddVec;
|
||||
q7x16_t vecDst;
|
||||
float32_t const *pSrcVec;
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
#endif
|
||||
|
||||
pSrcVec = (float32_t const *) pSrc;
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U) {
|
||||
tmp = vld4q(pSrcVec);
|
||||
pSrcVec += 16;
|
||||
/*
|
||||
* C = A * 128.0
|
||||
* convert from float to q7 and then store the results in the destination buffer
|
||||
*/
|
||||
tmp.val[0] = vmulq(tmp.val[0], maxQ);
|
||||
tmp.val[1] = vmulq(tmp.val[1], maxQ);
|
||||
tmp.val[2] = vmulq(tmp.val[2], maxQ);
|
||||
tmp.val[3] = vmulq(tmp.val[3], maxQ);
|
||||
|
||||
/*
|
||||
* convert and pack evens
|
||||
*/
|
||||
evVec = vqmovnbq(evVec, vcvtaq_s32_f32(tmp.val[0]));
|
||||
evVec = vqmovntq(evVec, vcvtaq_s32_f32(tmp.val[2]));
|
||||
/*
|
||||
* convert and pack odds
|
||||
*/
|
||||
oddVec = vqmovnbq(oddVec, vcvtaq_s32_f32(tmp.val[1]));
|
||||
oddVec = vqmovntq(oddVec, vcvtaq_s32_f32(tmp.val[3]));
|
||||
/*
|
||||
* merge
|
||||
*/
|
||||
vecDst = vqmovnbq(vecDst, evVec);
|
||||
vecDst = vqmovntq(vecDst, oddVec);
|
||||
|
||||
vst1q(pDst, vecDst);
|
||||
pDst += 16;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 0xF;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 128 */
|
||||
|
||||
/* Convert from float to q7 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pSrcVec++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
#else
|
||||
|
||||
*pDst++ = (q7_t) __SSAT((q31_t) (*pSrcVec++ * 128.0f), 8);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON)
|
||||
void arm_float_to_q7(
|
||||
const float32_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const float32_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
float32x4_t inV;
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
float32x4_t zeroV = vdupq_n_f32(0.0f);
|
||||
float32x4_t pHalf = vdupq_n_f32(0.5f / 128.0f);
|
||||
float32x4_t mHalf = vdupq_n_f32(-0.5f / 128.0f);
|
||||
float32x4_t r;
|
||||
uint32x4_t cmp;
|
||||
#endif
|
||||
|
||||
int16x4_t cvt1,cvt2;
|
||||
int8x8_t outV;
|
||||
|
||||
blkCnt = blockSize >> 3U;
|
||||
|
||||
/* Compute 8 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 7 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
/* C = A * 128 */
|
||||
/* Convert from float to q7 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
cmp = vcgtq_f32(inV,zeroV);
|
||||
r = vbslq_f32(cmp,pHalf,mHalf);
|
||||
inV = vaddq_f32(inV, r);
|
||||
cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
|
||||
pIn += 4;
|
||||
|
||||
inV = vld1q_f32(pIn);
|
||||
cmp = vcgtq_f32(inV,zeroV);
|
||||
r = vbslq_f32(cmp,pHalf,mHalf);
|
||||
inV = vaddq_f32(inV, r);
|
||||
cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
|
||||
pIn += 4;
|
||||
|
||||
outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
|
||||
vst1_s8(pDst, outV);
|
||||
pDst += 8;
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 128 */
|
||||
/* Convert from float to q7 and then store the results in the destination buffer */
|
||||
inV = vld1q_f32(pIn);
|
||||
cvt1 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
|
||||
pIn += 4;
|
||||
|
||||
inV = vld1q_f32(pIn);
|
||||
cvt2 = vqmovn_s32(vcvtq_n_s32_f32(inV,7));
|
||||
pIn += 4;
|
||||
|
||||
outV = vqmovn_s16(vcombine_s16(cvt1,cvt2));
|
||||
|
||||
vst1_s8(pDst, outV);
|
||||
pDst += 8;
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize & 7;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
/* C = A * 128 */
|
||||
/* Convert from float to q7 and then store the results in the destination buffer */
|
||||
in = *pIn++;
|
||||
in = (in * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
#else
|
||||
|
||||
/* C = A * 128 */
|
||||
/* Convert from float to q7 and then store the results in the destination buffer */
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
void arm_float_to_q7(
|
||||
const float32_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const float32_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
float32_t in;
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 128 */
|
||||
|
||||
/* Convert from float to q7 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
in = (*pIn++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
in = (*pIn++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
in = (*pIn++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
#else
|
||||
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
*pDst++ = __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = A * 128 */
|
||||
|
||||
/* Convert from float to q7 and store result in destination buffer */
|
||||
#ifdef ARM_MATH_ROUNDING
|
||||
|
||||
in = (*pIn++ * 128);
|
||||
in += in > 0.0f ? 0.5f : -0.5f;
|
||||
*pDst++ = (q7_t) (__SSAT((q15_t) (in), 8));
|
||||
|
||||
#else
|
||||
|
||||
*pDst++ = (q7_t) __SSAT((q31_t) (*pIn++ * 128.0f), 8);
|
||||
|
||||
#endif /* #ifdef ARM_MATH_ROUNDING */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of float_to_x group
|
||||
*/
|
119
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_heap_sort_f32.c
Normal file
119
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_heap_sort_f32.c
Normal file
|
@ -0,0 +1,119 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_heap_sort_f32.c
|
||||
* Description: Floating point heap sort
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
#include "arm_sorting.h"
|
||||
|
||||
|
||||
|
||||
static void arm_heapify(float32_t * pSrc, uint32_t n, uint32_t i, uint8_t dir)
|
||||
{
|
||||
/* Put all the elements of pSrc in heap order */
|
||||
uint32_t k = i; // Initialize largest/smallest as root
|
||||
uint32_t l = 2*i + 1; // left = 2*i + 1
|
||||
uint32_t r = 2*i + 2; // right = 2*i + 2
|
||||
float32_t temp;
|
||||
|
||||
if (l < n && dir==(pSrc[l] > pSrc[k]) )
|
||||
k = l;
|
||||
|
||||
if (r < n && dir==(pSrc[r] > pSrc[k]) )
|
||||
k = r;
|
||||
|
||||
if (k != i)
|
||||
{
|
||||
temp = pSrc[i];
|
||||
pSrc[i]=pSrc[k];
|
||||
pSrc[k]=temp;
|
||||
|
||||
arm_heapify(pSrc, n, k, dir);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Sorting
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*
|
||||
* @par Algorithm
|
||||
* The heap sort algorithm is a comparison algorithm that
|
||||
* divides the input array into a sorted and an unsorted region,
|
||||
* and shrinks the unsorted region by extracting the largest
|
||||
* element and moving it to the sorted region. A heap data
|
||||
* structure is used to find the maximum.
|
||||
*
|
||||
* @par It's an in-place algorithm. In order to obtain an out-of-place
|
||||
* function, a memcpy of the source vector is performed.
|
||||
*/
|
||||
void arm_heap_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
float32_t * pA;
|
||||
int32_t i;
|
||||
float32_t temp;
|
||||
|
||||
if(pSrc != pDst) // out-of-place
|
||||
{
|
||||
memcpy(pDst, pSrc, blockSize*sizeof(float32_t) );
|
||||
pA = pDst;
|
||||
}
|
||||
else
|
||||
pA = pSrc;
|
||||
|
||||
// Build the heap array so that the largest value is the root
|
||||
for (i = blockSize/2 - 1; i >= 0; i--)
|
||||
arm_heapify(pA, blockSize, i, S->dir);
|
||||
|
||||
for (i = blockSize - 1; i >= 0; i--)
|
||||
{
|
||||
// Swap
|
||||
temp = pA[i];
|
||||
pA[i] = pA[0];
|
||||
pA[0] = temp;
|
||||
|
||||
// Restore heap order
|
||||
arm_heapify(pA, i, 0, S->dir);
|
||||
}
|
||||
}
|
||||
/**
|
||||
@} end of Sorting group
|
||||
*/
|
|
@ -0,0 +1,93 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_insertion_sort_f32.c
|
||||
* Description: Floating point insertion sort
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
#include "arm_sorting.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Sorting
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*
|
||||
* @par Algorithm
|
||||
* The insertion sort is a simple sorting algorithm that
|
||||
* reads all the element of the input array and removes one element
|
||||
* at a time, finds the location it belongs in the final sorted list,
|
||||
* and inserts it there.
|
||||
*
|
||||
* @par It's an in-place algorithm. In order to obtain an out-of-place
|
||||
* function, a memcpy of the source vector is performed.
|
||||
*/
|
||||
|
||||
void arm_insertion_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t *pSrc,
|
||||
float32_t* pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
float32_t * pA;
|
||||
uint8_t dir = S->dir;
|
||||
uint32_t i, j;
|
||||
float32_t temp;
|
||||
|
||||
if(pSrc != pDst) // out-of-place
|
||||
{
|
||||
memcpy(pDst, pSrc, blockSize*sizeof(float32_t) );
|
||||
pA = pDst;
|
||||
}
|
||||
else
|
||||
pA = pSrc;
|
||||
|
||||
// Real all the element of the input array
|
||||
for(i=0; i<blockSize; i++)
|
||||
{
|
||||
// Move the i-th element to the right position
|
||||
for (j = i; j>0 && dir==(pA[j]<pA[j-1]); j--)
|
||||
{
|
||||
// Swap
|
||||
temp = pA[j];
|
||||
pA[j] = pA[j-1];
|
||||
pA[j-1] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Sorting group
|
||||
*/
|
127
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_f32.c
Normal file
127
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_merge_sort_f32.c
Normal file
|
@ -0,0 +1,127 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_merge_sort_f32.c
|
||||
* Description: Floating point merge sort
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
#include "arm_sorting.h"
|
||||
|
||||
|
||||
static void topDownMerge(float32_t * pA, uint32_t begin, uint32_t middle, uint32_t end, float32_t * pB, uint8_t dir)
|
||||
{
|
||||
/* Left array is pA[begin:middle-1]
|
||||
* Right Array is pA[middle:end-1]
|
||||
* They are merged in pB
|
||||
*/
|
||||
|
||||
uint32_t i = begin;
|
||||
uint32_t j = middle;
|
||||
uint32_t k;
|
||||
|
||||
// Read all the elements in the sublist
|
||||
for (k = begin; k < end; k++)
|
||||
{
|
||||
// Merge
|
||||
if (i < middle && (j >= end || dir==(pA[i] <= pA[j])) )
|
||||
{
|
||||
pB[k] = pA[i];
|
||||
i++;
|
||||
}
|
||||
else
|
||||
{
|
||||
pB[k] = pA[j];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void arm_merge_sort_core_f32(float32_t * pB, uint32_t begin, uint32_t end, float32_t * pA, uint8_t dir)
|
||||
{
|
||||
if((int32_t)end - (int32_t)begin >= 2 ) // If run size != 1 divide
|
||||
{
|
||||
int32_t middle = (end + begin) / 2; // Take the middle point
|
||||
|
||||
arm_merge_sort_core_f32(pA, begin, middle, pB, dir); // Sort the left part
|
||||
arm_merge_sort_core_f32(pA, middle, end, pB, dir); // Sort the right part
|
||||
|
||||
topDownMerge(pB, begin, middle, end, pA, dir);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Sorting
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*
|
||||
* @par Algorithm
|
||||
* The merge sort algorithm is a comparison algorithm that
|
||||
* divide the input array in sublists and merge them to produce
|
||||
* longer sorted sublists until there is only one list remaining.
|
||||
*
|
||||
* @par A work array is always needed. It must be allocated by the user
|
||||
* linked to the instance at initialization time.
|
||||
*
|
||||
* @par It's an in-place algorithm. In order to obtain an out-of-place
|
||||
* function, a memcpy of the source vector is performed
|
||||
*/
|
||||
|
||||
|
||||
void arm_merge_sort_f32(
|
||||
const arm_merge_sort_instance_f32 * S,
|
||||
float32_t *pSrc,
|
||||
float32_t *pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
float32_t * pA;
|
||||
|
||||
/* Out-of-place */
|
||||
if(pSrc != pDst)
|
||||
{
|
||||
memcpy(pDst, pSrc, blockSize*sizeof(float32_t));
|
||||
pA = pDst;
|
||||
}
|
||||
else
|
||||
pA = pSrc;
|
||||
|
||||
/* A working buffer is needed */
|
||||
memcpy(S->buffer, pSrc, blockSize*sizeof(float32_t));
|
||||
|
||||
arm_merge_sort_core_f32(S->buffer, 0, blockSize, pA, S->dir);
|
||||
}
|
||||
/**
|
||||
@} end of Sorting group
|
||||
*/
|
|
@ -0,0 +1,53 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_merge_sort_init_f32.c
|
||||
* Description: Floating point merge sort initialization function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Sorting
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @param[in,out] S points to an instance of the sorting structure.
|
||||
* @param[in] dir Sorting order.
|
||||
* @param[in] buffer Working buffer.
|
||||
*/
|
||||
void arm_merge_sort_init_f32(arm_merge_sort_instance_f32 * S, arm_sort_dir dir, float32_t * buffer)
|
||||
{
|
||||
S->dir = dir;
|
||||
S->buffer = buffer;
|
||||
}
|
||||
/**
|
||||
@} end of Sorting group
|
||||
*/
|
155
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_f16.c
Normal file
155
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_f16.c
Normal file
|
@ -0,0 +1,155 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_q15_to_float.c
|
||||
* Description: Converts the elements of the Q15 vector to floating-point vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup q15_to_x Convert 16-bit fixed point value
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup q15_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the Q15 vector to f16 vector.
|
||||
@param[in] pSrc points to the Q15 input vector
|
||||
@param[out] pDst points to the f16 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (float16_t) pSrc[n] / 32768; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
void arm_q15_to_f16(
|
||||
const q15_t * pSrc,
|
||||
float16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
int32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecDst;
|
||||
q15_t const *pSrcVec;
|
||||
|
||||
pSrcVec = (q15_t const *) pSrc;
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0)
|
||||
{
|
||||
/* C = (float16_t) A / 32768 */
|
||||
/* convert from q15 to float and then store the results in the destination buffer */
|
||||
vecDst = vld1q(pSrcVec); pSrcVec += 8;
|
||||
vstrhq(pDst, vcvtq_n_f16_s16(vecDst, 15)); pDst += 8;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
* (will be merged thru tail predication)
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
if (blkCnt > 0)
|
||||
{
|
||||
mve_pred16_t p0 = vctp16q(blkCnt);
|
||||
vecDst = vld1q(pSrcVec); pSrcVec += 8;
|
||||
vstrhq_p(pDst, vcvtq_n_f16_s16(vecDst, 15), p0);
|
||||
}
|
||||
}
|
||||
#else
|
||||
|
||||
void arm_q15_to_f16(
|
||||
const q15_t * pSrc,
|
||||
float16_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q15_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float16_t) A / 32768 */
|
||||
|
||||
/* Convert from q15 to float and store result in destination buffer */
|
||||
*pDst++ = ((_Float16) * pIn++ / 32768.0f16);
|
||||
*pDst++ = ((_Float16) * pIn++ / 32768.0f16);
|
||||
*pDst++ = ((_Float16) * pIn++ / 32768.0f16);
|
||||
*pDst++ = ((_Float16) * pIn++ / 32768.0f16);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float16_t) A / 32768 */
|
||||
|
||||
/* Convert from q15 to float and store result in destination buffer */
|
||||
*pDst++ = ((_Float16) *pIn++ / 32768.0f16);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of q15_to_x group
|
||||
*/
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
|
207
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c
Normal file
207
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_float.c
Normal file
|
@ -0,0 +1,207 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_q15_to_float.c
|
||||
* Description: Converts the elements of the Q15 vector to floating-point vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup q15_to_x Convert 16-bit fixed point value
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup q15_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the Q15 vector to floating-point vector.
|
||||
@param[in] pSrc points to the Q15 input vector
|
||||
@param[out] pDst points to the floating-point output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (float32_t) pSrc[n] / 32768; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_q15_to_float(
|
||||
const q15_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
|
||||
q15x8_t vecDst;
|
||||
q15_t const *pSrcVec;
|
||||
|
||||
pSrcVec = (q15_t const *) pSrc;
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 32768 */
|
||||
/* convert from q15 to float and then store the results in the destination buffer */
|
||||
vecDst = vldrhq_s32(pSrcVec);
|
||||
pSrcVec += 4;
|
||||
vstrwq(pDst, vcvtq_n_f32_s32((int32x4_t)vecDst, 15));
|
||||
pDst += 4;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 32768 */
|
||||
|
||||
/* Convert from q15 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) *pSrcVec++ / 32768.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
void arm_q15_to_float(
|
||||
const q15_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const q15_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
int16x8_t inV;
|
||||
int32x4_t inV0, inV1;
|
||||
float32x4_t outV;
|
||||
|
||||
blkCnt = blockSize >> 3U;
|
||||
|
||||
/* Compute 8 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 7 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 32768 */
|
||||
/* convert from q15 to float and then store the results in the destination buffer */
|
||||
inV = vld1q_s16(pIn);
|
||||
pIn += 8;
|
||||
|
||||
inV0 = vmovl_s16(vget_low_s16(inV));
|
||||
inV1 = vmovl_s16(vget_high_s16(inV));
|
||||
|
||||
outV = vcvtq_n_f32_s32(inV0,15);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
outV = vcvtq_n_f32_s32(inV1,15);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 8, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize & 7;
|
||||
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 32768 */
|
||||
/* convert from q15 to float and then store the results in the destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_q15_to_float(
|
||||
const q15_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q15_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 32768 */
|
||||
|
||||
/* Convert from q15 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 32768.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 32768 */
|
||||
|
||||
/* Convert from q15 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) *pIn++ / 32768.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of q15_to_x group
|
||||
*/
|
182
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q31.c
Normal file
182
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q31.c
Normal file
|
@ -0,0 +1,182 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_q15_to_q31.c
|
||||
* Description: Converts the elements of the Q15 vector to Q31 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup q15_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the Q15 vector to Q31 vector.
|
||||
@param[in] pSrc points to the Q15 input vector
|
||||
@param[out] pDst points to the Q31 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q31_t) pSrc[n] << 16; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_q15_to_q31(
|
||||
const q15_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
|
||||
uint32_t blkCnt;
|
||||
|
||||
q31x4_t vecDst;
|
||||
|
||||
blkCnt = blockSize>> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
/* C = (q31_t)A << 16 */
|
||||
/* convert from q15 to q31 and then store the results in the destination buffer */
|
||||
/* load q15 + 32-bit widening */
|
||||
vecDst = vldrhq_s32((q15_t const *) pSrc);
|
||||
vecDst = vshlq_n(vecDst, 16);
|
||||
vstrwq_s32(pDst, vecDst);
|
||||
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pDst += 4;
|
||||
pSrc += 4;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q31_t) A << 16 */
|
||||
|
||||
/* Convert from q15 to q31 and store result in destination buffer */
|
||||
*pDst++ = (q31_t) *pSrc++ << 16;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_q15_to_q31(
|
||||
const q15_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q15_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
q31_t in1, in2;
|
||||
q31_t out1, out2, out3, out4;
|
||||
#endif
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q31_t)A << 16 */
|
||||
|
||||
/* Convert from q15 to q31 and store result in destination buffer */
|
||||
in1 = read_q15x2_ia (&pIn);
|
||||
in2 = read_q15x2_ia (&pIn);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
/* extract lower 16 bits to 32 bit result */
|
||||
out1 = in1 << 16U;
|
||||
/* extract upper 16 bits to 32 bit result */
|
||||
out2 = in1 & 0xFFFF0000;
|
||||
/* extract lower 16 bits to 32 bit result */
|
||||
out3 = in2 << 16U;
|
||||
/* extract upper 16 bits to 32 bit result */
|
||||
out4 = in2 & 0xFFFF0000;
|
||||
|
||||
#else
|
||||
|
||||
/* extract upper 16 bits to 32 bit result */
|
||||
out1 = in1 & 0xFFFF0000;
|
||||
/* extract lower 16 bits to 32 bit result */
|
||||
out2 = in1 << 16U;
|
||||
/* extract upper 16 bits to 32 bit result */
|
||||
out3 = in2 & 0xFFFF0000;
|
||||
/* extract lower 16 bits to 32 bit result */
|
||||
out4 = in2 << 16U;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
*pDst++ = out1;
|
||||
*pDst++ = out2;
|
||||
*pDst++ = out3;
|
||||
*pDst++ = out4;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q31_t) A << 16 */
|
||||
|
||||
/* Convert from q15 to q31 and store result in destination buffer */
|
||||
*pDst++ = (q31_t) *pIn++ << 16;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of q15_to_x group
|
||||
*/
|
190
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q7.c
Normal file
190
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q15_to_q7.c
Normal file
|
@ -0,0 +1,190 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_q15_to_q7.c
|
||||
* Description: Converts the elements of the Q15 vector to Q7 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup q15_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the Q15 vector to Q7 vector.
|
||||
@param[in] pSrc points to the Q15 input vector
|
||||
@param[out] pDst points to the Q7 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q7_t) pSrc[n] >> 8; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_q15_to_q7(
|
||||
const q15_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8x2_t tmp;
|
||||
q15_t const *pSrcVec;
|
||||
q7x16_t vecDst;
|
||||
|
||||
|
||||
pSrcVec = (q15_t const *) pSrc;
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) A >> 8 */
|
||||
/* convert from q15 to q7 and then store the results in the destination buffer */
|
||||
tmp = vld2q(pSrcVec);
|
||||
pSrcVec += 16;
|
||||
vecDst = vqshrnbq_n_s16(vecDst, tmp.val[0], 8);
|
||||
vecDst = vqshrntq_n_s16(vecDst, tmp.val[1], 8);
|
||||
vst1q(pDst, vecDst);
|
||||
pDst += 16;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 0xF;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) A >> 8 */
|
||||
|
||||
/* Convert from q15 to q7 and store result in destination buffer */
|
||||
*pDst++ = (q7_t) (*pSrcVec++ >> 8);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_q15_to_q7(
|
||||
const q15_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q15_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
|
||||
q31_t in1, in2;
|
||||
q31_t out1, out2;
|
||||
#endif
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) A >> 8 */
|
||||
|
||||
/* Convert from q15 to q7 and store result in destination buffer */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
in1 = read_q15x2_ia (&pIn);
|
||||
in2 = read_q15x2_ia (&pIn);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
out1 = __PKHTB(in2, in1, 16);
|
||||
out2 = __PKHBT(in2, in1, 16);
|
||||
|
||||
#else
|
||||
|
||||
out1 = __PKHTB(in1, in2, 16);
|
||||
out2 = __PKHBT(in1, in2, 16);
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* rotate packed value by 24 */
|
||||
out2 = ((uint32_t) out2 << 8) | ((uint32_t) out2 >> 24);
|
||||
|
||||
/* anding with 0xff00ff00 to get two 8 bit values */
|
||||
out1 = out1 & 0xFF00FF00;
|
||||
/* anding with 0x00ff00ff to get two 8 bit values */
|
||||
out2 = out2 & 0x00FF00FF;
|
||||
|
||||
/* oring two values(contains two 8 bit values) to get four packed 8 bit values */
|
||||
out1 = out1 | out2;
|
||||
|
||||
/* store 4 samples at a time to destiantion buffer */
|
||||
write_q7x4_ia (&pDst, out1);
|
||||
|
||||
#else
|
||||
|
||||
*pDst++ = (q7_t) (*pIn++ >> 8);
|
||||
*pDst++ = (q7_t) (*pIn++ >> 8);
|
||||
*pDst++ = (q7_t) (*pIn++ >> 8);
|
||||
*pDst++ = (q7_t) (*pIn++ >> 8);
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) A >> 8 */
|
||||
|
||||
/* Convert from q15 to q7 and store result in destination buffer */
|
||||
*pDst++ = (q7_t) (*pIn++ >> 8);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of q15_to_x group
|
||||
*/
|
202
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_float.c
Normal file
202
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_float.c
Normal file
|
@ -0,0 +1,202 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_q31_to_float.c
|
||||
* Description: Converts the elements of the Q31 vector to floating-point vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup q31_to_x Convert 32-bit fixed point value
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup q31_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the Q31 vector to floating-point vector.
|
||||
@param[in] pSrc points to the Q31 input vector
|
||||
@param[out] pDst points to the floating-point output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (float32_t) pSrc[n] / 2147483648; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_q31_to_float(
|
||||
const q31_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31x4_t vecDst;
|
||||
q31_t const *pSrcVec;
|
||||
|
||||
pSrcVec = (q31_t const *) pSrc;
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 2147483648 */
|
||||
/* convert from q31 to float and then store the results in the destination buffer */
|
||||
vecDst = vld1q(pSrcVec);
|
||||
pSrcVec += 4;
|
||||
vstrwq(pDst, vcvtq_n_f32_s32(vecDst, 31));
|
||||
pDst += 4;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
* (will be merged thru tail predication)
|
||||
*/
|
||||
blkCnt = blockSize & 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 2147483648 */
|
||||
|
||||
/* Convert from q31 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) *pSrcVec++ / 2147483648.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON_EXPERIMENTAL)
|
||||
void arm_q31_to_float(
|
||||
const q31_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const q31_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
int32x4_t inV;
|
||||
float32x4_t outV;
|
||||
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
/* Compute 4 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 3 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 2147483648 */
|
||||
/* Convert from q31 to float and then store the results in the destination buffer */
|
||||
inV = vld1q_s32(pIn);
|
||||
pIn += 4;
|
||||
|
||||
outV = vcvtq_n_f32_s32(inV,31);
|
||||
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize & 3;
|
||||
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 2147483648 */
|
||||
/* Convert from q31 to float and then store the results in the destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 2147483648.0f);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_q31_to_float(
|
||||
const q31_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const q31_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 2147483648 */
|
||||
|
||||
/* Convert from q31 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
|
||||
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
|
||||
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
|
||||
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 2147483648 */
|
||||
|
||||
/* Convert from q31 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) *pIn++ / 2147483648.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of q31_to_x group
|
||||
*/
|
181
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q15.c
Normal file
181
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q15.c
Normal file
|
@ -0,0 +1,181 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_q31_to_q15.c
|
||||
* Description: Converts the elements of the Q31 vector to Q15 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup q31_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the Q31 vector to Q15 vector.
|
||||
@param[in] pSrc points to the Q31 input vector
|
||||
@param[out] pDst points to the Q15 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q15_t) pSrc[n] >> 16; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_q31_to_q15(
|
||||
const q31_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31x4x2_t tmp;
|
||||
q15x8_t vecDst;
|
||||
q31_t const *pSrcVec;
|
||||
|
||||
|
||||
pSrcVec = (q31_t const *) pSrc;
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) A >> 16 */
|
||||
/* convert from q31 to q15 and then store the results in the destination buffer */
|
||||
tmp = vld2q(pSrcVec);
|
||||
pSrcVec += 8;
|
||||
vecDst = vshrnbq_n_s32(vecDst, tmp.val[0], 16);
|
||||
vecDst = vshrntq_n_s32(vecDst, tmp.val[1], 16);
|
||||
vst1q(pDst, vecDst);
|
||||
pDst += 8;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 7;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) (A >> 16) */
|
||||
|
||||
/* Convert from q31 to q15 and store result in destination buffer */
|
||||
*pDst++ = (q15_t) (*pSrcVec++ >> 16);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_q31_to_q15(
|
||||
const q31_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q31_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
|
||||
q31_t in1, in2, in3, in4;
|
||||
q31_t out1, out2;
|
||||
#endif
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) (A >> 16) */
|
||||
|
||||
/* Convert from q31 to q15 and store result in destination buffer */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
in1 = *pIn++;
|
||||
in2 = *pIn++;
|
||||
in3 = *pIn++;
|
||||
in4 = *pIn++;
|
||||
|
||||
/* pack two higher 16-bit values from two 32-bit values */
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out1 = __PKHTB(in2, in1, 16);
|
||||
out2 = __PKHTB(in4, in3, 16);
|
||||
#else
|
||||
out1 = __PKHTB(in1, in2, 16);
|
||||
out2 = __PKHTB(in3, in4, 16);
|
||||
#endif /* #ifdef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
write_q15x2_ia (&pDst, out1);
|
||||
write_q15x2_ia (&pDst, out2);
|
||||
|
||||
#else
|
||||
|
||||
*pDst++ = (q15_t) (*pIn++ >> 16);
|
||||
*pDst++ = (q15_t) (*pIn++ >> 16);
|
||||
*pDst++ = (q15_t) (*pIn++ >> 16);
|
||||
*pDst++ = (q15_t) (*pIn++ >> 16);
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) (A >> 16) */
|
||||
|
||||
/* Convert from q31 to q15 and store result in destination buffer */
|
||||
*pDst++ = (q15_t) (*pIn++ >> 16);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of q31_to_x group
|
||||
*/
|
169
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q7.c
Normal file
169
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q31_to_q7.c
Normal file
|
@ -0,0 +1,169 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_q31_to_q7.c
|
||||
* Description: Converts the elements of the Q31 vector to Q7 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup q31_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the Q31 vector to Q7 vector.
|
||||
@param[in] pSrc points to the Q31 input vector
|
||||
@param[out] pDst points to the Q7 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q7_t) pSrc[n] >> 24; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_q31_to_q7(
|
||||
const q31_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q31x4x4_t tmp;
|
||||
q15x8_t evVec, oddVec;
|
||||
q7x16_t vecDst;
|
||||
q31_t const *pSrcVec;
|
||||
|
||||
pSrcVec = (q31_t const *) pSrc;
|
||||
blkCnt = blockSize >> 4;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
tmp = vld4q(pSrcVec);
|
||||
pSrcVec += 16;
|
||||
/* C = (q7_t) A >> 24 */
|
||||
/* convert from q31 to q7 and then store the results in the destination buffer */
|
||||
/*
|
||||
* narrow and pack evens
|
||||
*/
|
||||
evVec = vshrnbq_n_s32(evVec, tmp.val[0], 16);
|
||||
evVec = vshrntq_n_s32(evVec, tmp.val[2], 16);
|
||||
/*
|
||||
* narrow and pack odds
|
||||
*/
|
||||
oddVec = vshrnbq_n_s32(oddVec, tmp.val[1], 16);
|
||||
oddVec = vshrntq_n_s32(oddVec, tmp.val[3], 16);
|
||||
/*
|
||||
* narrow & merge
|
||||
*/
|
||||
vecDst = vshrnbq_n_s16(vecDst, evVec, 8);
|
||||
vecDst = vshrntq_n_s16(vecDst, oddVec, 8);
|
||||
|
||||
vst1q(pDst, vecDst);
|
||||
pDst += 16;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
/*
|
||||
* tail
|
||||
*/
|
||||
blkCnt = blockSize & 0xF;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) (A >> 24) */
|
||||
|
||||
/* Convert from q31 to q7 and store result in destination buffer */
|
||||
*pDst++ = (q7_t) (*pSrcVec++ >> 24);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_q31_to_q7(
|
||||
const q31_t * pSrc,
|
||||
q7_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q31_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
q7_t out1, out2, out3, out4;
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) (A >> 24) */
|
||||
|
||||
/* Convert from q31 to q7 and store result in destination buffer */
|
||||
|
||||
out1 = (q7_t) (*pIn++ >> 24);
|
||||
out2 = (q7_t) (*pIn++ >> 24);
|
||||
out3 = (q7_t) (*pIn++ >> 24);
|
||||
out4 = (q7_t) (*pIn++ >> 24);
|
||||
write_q7x4_ia (&pDst, __PACKq7(out1, out2, out3, out4));
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q7_t) (A >> 24) */
|
||||
|
||||
/* Convert from q31 to q7 and store result in destination buffer */
|
||||
*pDst++ = (q7_t) (*pIn++ >> 24);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of q31_to_x group
|
||||
*/
|
218
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c
Normal file
218
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_float.c
Normal file
|
@ -0,0 +1,218 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_q7_to_float.c
|
||||
* Description: Converts the elements of the Q7 vector to floating-point vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
* @defgroup q7_to_x Convert 8-bit fixed point value
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup q7_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the Q7 vector to floating-point vector.
|
||||
@param[in] pSrc points to the Q7 input vector
|
||||
@param[out] pDst points to the floating-point output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (float32_t) pSrc[n] / 128; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_q7_to_float(
|
||||
const q7_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q7x16_t vecDst;
|
||||
q7_t const *pSrcVec;
|
||||
|
||||
pSrcVec = (q7_t const *) pSrc;
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 32768 */
|
||||
/* convert from q7 to float and then store the results in the destination buffer */
|
||||
vecDst = vldrbq_s32(pSrcVec);
|
||||
pSrcVec += 4;
|
||||
vstrwq(pDst, vcvtq_n_f32_s32((int32x4_t)vecDst, 7));
|
||||
pDst += 4;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 128 */
|
||||
|
||||
/* Convert from q7 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) * pSrcVec++ / 128.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON)
|
||||
void arm_q7_to_float(
|
||||
const q7_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
const q7_t *pIn = pSrc; /* Src pointer */
|
||||
uint32_t blkCnt; /* loop counter */
|
||||
|
||||
int8x16_t inV;
|
||||
int16x8_t inVLO, inVHI;
|
||||
int32x4_t inVLL, inVLH, inVHL, inVHH;
|
||||
float32x4_t outV;
|
||||
|
||||
blkCnt = blockSize >> 4U;
|
||||
|
||||
/* Compute 16 outputs at a time.
|
||||
** a second loop below computes the remaining 1 to 15 samples. */
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 128 */
|
||||
/* Convert from q7 to float and then store the results in the destination buffer */
|
||||
inV = vld1q_s8(pIn);
|
||||
pIn += 16;
|
||||
|
||||
inVLO = vmovl_s8(vget_low_s8(inV));
|
||||
inVHI = vmovl_s8(vget_high_s8(inV));
|
||||
|
||||
inVLL = vmovl_s16(vget_low_s16(inVLO));
|
||||
inVLH = vmovl_s16(vget_high_s16(inVLO));
|
||||
inVHL = vmovl_s16(vget_low_s16(inVHI));
|
||||
inVHH = vmovl_s16(vget_high_s16(inVHI));
|
||||
|
||||
outV = vcvtq_n_f32_s32(inVLL,7);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
outV = vcvtq_n_f32_s32(inVLH,7);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
outV = vcvtq_n_f32_s32(inVHL,7);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
outV = vcvtq_n_f32_s32(inVHH,7);
|
||||
vst1q_f32(pDst, outV);
|
||||
pDst += 4;
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* If the blockSize is not a multiple of 16, compute any remaining output samples here.
|
||||
** No loop unrolling is used. */
|
||||
blkCnt = blockSize & 0xF;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 128 */
|
||||
/* Convert from q7 to float and then store the results in the destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
|
||||
/* Decrement the loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
#else
|
||||
void arm_q7_to_float(
|
||||
const q7_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q7_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 128 */
|
||||
|
||||
/* Convert from q7 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (float32_t) A / 128 */
|
||||
|
||||
/* Convert from q7 to float and store result in destination buffer */
|
||||
*pDst++ = ((float32_t) * pIn++ / 128.0f);
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* #if defined(ARM_MATH_NEON) */
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
@} end of q7_to_x group
|
||||
*/
|
188
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q15.c
Normal file
188
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q15.c
Normal file
|
@ -0,0 +1,188 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_q7_to_q15.c
|
||||
* Description: Converts the elements of the Q7 vector to Q15 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup q7_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the Q7 vector to Q15 vector.
|
||||
@param[in] pSrc points to the Q7 input vector
|
||||
@param[out] pDst points to the Q15 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q15_t) pSrc[n] << 8; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_q7_to_q15(
|
||||
const q7_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
|
||||
uint32_t blkCnt; /* loop counters */
|
||||
q15x8_t vecDst;
|
||||
q7_t const *pSrcVec;
|
||||
|
||||
|
||||
pSrcVec = (q7_t const *) pSrc;
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) A << 8 */
|
||||
/* convert from q7 to q15 and then store the results in the destination buffer */
|
||||
/* load q7 + 32-bit widening */
|
||||
vecDst = vldrbq_s16(pSrcVec);
|
||||
pSrcVec += 8;
|
||||
vecDst = vecDst << 8;
|
||||
vstrhq(pDst, vecDst);
|
||||
pDst += 8;
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
*/
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 7;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) A << 8 */
|
||||
|
||||
/* Convert from q7 to q15 and store result in destination buffer */
|
||||
*pDst++ = (q15_t) * pSrcVec++ << 8;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#else
|
||||
void arm_q7_to_q15(
|
||||
const q7_t * pSrc,
|
||||
q15_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q7_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL) && defined (ARM_MATH_DSP)
|
||||
q31_t in;
|
||||
q31_t in1, in2;
|
||||
q31_t out1, out2;
|
||||
#endif
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) A << 8 */
|
||||
|
||||
/* Convert from q7 to q15 and store result in destination buffer */
|
||||
#if defined (ARM_MATH_DSP)
|
||||
|
||||
in = read_q7x4_ia (&pIn);
|
||||
|
||||
/* rotatate in by 8 and extend two q7_t values to q15_t values */
|
||||
in1 = __SXTB16(__ROR(in, 8));
|
||||
|
||||
/* extend remainig two q7_t values to q15_t values */
|
||||
in2 = __SXTB16(in);
|
||||
|
||||
in1 = in1 << 8U;
|
||||
in2 = in2 << 8U;
|
||||
|
||||
in1 = in1 & 0xFF00FF00;
|
||||
in2 = in2 & 0xFF00FF00;
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
out2 = __PKHTB(in1, in2, 16);
|
||||
out1 = __PKHBT(in2, in1, 16);
|
||||
#else
|
||||
out1 = __PKHTB(in1, in2, 16);
|
||||
out2 = __PKHBT(in2, in1, 16);
|
||||
#endif
|
||||
|
||||
write_q15x2_ia (&pDst, out1);
|
||||
write_q15x2_ia (&pDst, out2);
|
||||
|
||||
#else
|
||||
|
||||
*pDst++ = (q15_t) *pIn++ << 8;
|
||||
*pDst++ = (q15_t) *pIn++ << 8;
|
||||
*pDst++ = (q15_t) *pIn++ << 8;
|
||||
*pDst++ = (q15_t) *pIn++ << 8;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_DSP) */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q15_t) A << 8 */
|
||||
|
||||
/* Convert from q7 to q15 and store result in destination buffer */
|
||||
*pDst++ = (q15_t) * pIn++ << 8;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of q7_to_x group
|
||||
*/
|
164
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q31.c
Normal file
164
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_q7_to_q31.c
Normal file
|
@ -0,0 +1,164 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_q7_to_q31.c
|
||||
* Description: Converts the elements of the Q7 vector to Q31 vector
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup q7_to_x
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
@brief Converts the elements of the Q7 vector to Q31 vector.
|
||||
@param[in] pSrc points to the Q7 input vector
|
||||
@param[out] pDst points to the Q31 output vector
|
||||
@param[in] blockSize number of samples in each vector
|
||||
@return none
|
||||
|
||||
@par Details
|
||||
The equation used for the conversion process is:
|
||||
<pre>
|
||||
pDst[n] = (q31_t) pSrc[n] << 24; 0 <= n < blockSize.
|
||||
</pre>
|
||||
*/
|
||||
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
void arm_q7_to_q31(
|
||||
const q7_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt;
|
||||
q31x4_t vecDst;
|
||||
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
|
||||
/* C = (q31_t)A << 16 */
|
||||
/* convert from q15 to q31 and then store the results in the destination buffer */
|
||||
/* load q15 + 32-bit widening */
|
||||
vecDst = vldrbq_s32((q7_t const *) pSrc);
|
||||
vecDst = vshlq_n(vecDst, 24);
|
||||
vstrwq_s32(pDst, vecDst);
|
||||
|
||||
/*
|
||||
* Decrement the blockSize loop counter
|
||||
* Advance vector source and destination pointers
|
||||
*/
|
||||
pDst += 4;
|
||||
pSrc += 4;
|
||||
blkCnt --;
|
||||
}
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q31_t) A << 24 */
|
||||
|
||||
/* Convert from q7 to q31 and store result in destination buffer */
|
||||
*pDst++ = (q31_t) *pSrc++ << 24;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
void arm_q7_to_q31(
|
||||
const q7_t * pSrc,
|
||||
q31_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t blkCnt; /* Loop counter */
|
||||
const q7_t *pIn = pSrc; /* Source pointer */
|
||||
|
||||
#if defined (ARM_MATH_LOOPUNROLL)
|
||||
|
||||
q31_t in;
|
||||
|
||||
/* Loop unrolling: Compute 4 outputs at a time */
|
||||
blkCnt = blockSize >> 2U;
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q31_t) A << 24 */
|
||||
|
||||
/* Convert from q7 to q31 and store result in destination buffer */
|
||||
in = read_q7x4_ia (&pIn);
|
||||
|
||||
#ifndef ARM_MATH_BIG_ENDIAN
|
||||
|
||||
*pDst++ = (__ROR(in, 8)) & 0xFF000000;
|
||||
*pDst++ = (__ROR(in, 16)) & 0xFF000000;
|
||||
*pDst++ = (__ROR(in, 24)) & 0xFF000000;
|
||||
*pDst++ = (in & 0xFF000000);
|
||||
|
||||
#else
|
||||
|
||||
*pDst++ = (in & 0xFF000000);
|
||||
*pDst++ = (__ROR(in, 24)) & 0xFF000000;
|
||||
*pDst++ = (__ROR(in, 16)) & 0xFF000000;
|
||||
*pDst++ = (__ROR(in, 8)) & 0xFF000000;
|
||||
|
||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
/* Loop unrolling: Compute remaining outputs */
|
||||
blkCnt = blockSize % 0x4U;
|
||||
|
||||
#else
|
||||
|
||||
/* Initialize blkCnt with number of samples */
|
||||
blkCnt = blockSize;
|
||||
|
||||
#endif /* #if defined (ARM_MATH_LOOPUNROLL) */
|
||||
|
||||
while (blkCnt > 0U)
|
||||
{
|
||||
/* C = (q31_t) A << 24 */
|
||||
|
||||
/* Convert from q7 to q31 and store result in destination buffer */
|
||||
*pDst++ = (q31_t) * pIn++ << 24;
|
||||
|
||||
/* Decrement loop counter */
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEI) */
|
||||
|
||||
/**
|
||||
@} end of q7_to_x group
|
||||
*/
|
181
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_quick_sort_f32.c
Normal file
181
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_quick_sort_f32.c
Normal file
|
@ -0,0 +1,181 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_quick_sort_f32.c
|
||||
* Description: Floating point quick sort
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_sorting.h"
|
||||
|
||||
static uint32_t arm_quick_sort_partition_f32(float32_t *pSrc, int32_t first, int32_t last, uint8_t dir)
|
||||
{
|
||||
/* This function will be called */
|
||||
int32_t i, j, pivot_index;
|
||||
float32_t pivot;
|
||||
float32_t temp;
|
||||
|
||||
/* The first element is the pivot */
|
||||
pivot_index = first;
|
||||
pivot = pSrc[pivot_index];
|
||||
|
||||
/* Initialize indices for do-while loops */
|
||||
i = first - 1;
|
||||
j = last + 1;
|
||||
|
||||
while(i < j)
|
||||
{
|
||||
/* The loop will stop as soon as the indices i and j cross each other.
|
||||
*
|
||||
* This event will happen surely since the values of the indices are incremented and
|
||||
* decrement in the do-while loops that are executed at least once.
|
||||
* It is impossible to loop forever inside the do-while loops since the pivot is
|
||||
* always an element of the array and the conditions cannot be always true (at least
|
||||
* the i-th or the j-th element will be equal to the pivot-th element).
|
||||
* For example, in the extreme case of an ordered array the do-while loop related to i will stop
|
||||
* at the first iteration (because pSrc[i]=pSrc[pivot] already), and the loop related to j
|
||||
* will stop after (last-first) iterations (when j=pivot=i=first). j is returned and
|
||||
* j+1 is going to be used as pivot by other calls of the function, until j=pivot=last. */
|
||||
|
||||
/* Move indices to the right and to the left */
|
||||
if(dir)
|
||||
{
|
||||
/* Compare left elements with pivot */
|
||||
do
|
||||
{
|
||||
i++;
|
||||
} while (pSrc[i] < pivot && i<last);
|
||||
|
||||
/* Compare right elements with pivot */
|
||||
do
|
||||
{
|
||||
j--;
|
||||
} while (pSrc[j] > pivot);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Compare left elements with pivot */
|
||||
do
|
||||
{
|
||||
i++;
|
||||
} while (pSrc[i] > pivot && i<last);
|
||||
|
||||
/* Compare right elements with pivot */
|
||||
do
|
||||
{
|
||||
j--;
|
||||
} while (pSrc[j] < pivot);
|
||||
}
|
||||
|
||||
/* If the indices didn't cross each other */
|
||||
if (i < j)
|
||||
{
|
||||
/* i and j are in the wrong position -> Swap */
|
||||
temp=pSrc[i];
|
||||
pSrc[i]=pSrc[j];
|
||||
pSrc[j]=temp;
|
||||
}
|
||||
}
|
||||
|
||||
return j;
|
||||
}
|
||||
|
||||
static void arm_quick_sort_core_f32(float32_t *pSrc, int32_t first, int32_t last, uint8_t dir)
|
||||
{
|
||||
/* If the array [first ... last] has more than one element */
|
||||
if(first<last)
|
||||
{
|
||||
int32_t pivot;
|
||||
|
||||
/* Compute pivot */
|
||||
pivot = arm_quick_sort_partition_f32(pSrc, first, last, dir);
|
||||
|
||||
/* Iterate algorithm with two sub-arrays [first ... pivot] and [pivot+1 ... last] */
|
||||
arm_quick_sort_core_f32(pSrc, first, pivot, dir);
|
||||
arm_quick_sort_core_f32(pSrc, pivot+1, last, dir);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Sorting
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in,out] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data.
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*
|
||||
* @par Algorithm
|
||||
* The quick sort algorithm is a comparison algorithm that
|
||||
* divides the input array into two smaller sub-arrays and
|
||||
* recursively sort them. An element of the array (the pivot)
|
||||
* is chosen, all the elements with values smaller than the
|
||||
* pivot are moved before the pivot, while all elements with
|
||||
* values greater than the pivot are moved after it (partition).
|
||||
*
|
||||
* @par
|
||||
* In this implementation the Hoare partition scheme has been
|
||||
* used [Hoare, C. A. R. (1 January 1962). "Quicksort". The Computer
|
||||
* Journal. 5 (1): 10...16.] The first element has always been chosen
|
||||
* as the pivot. The partition algorithm guarantees that the returned
|
||||
* pivot is never placed outside the vector, since it is returned only
|
||||
* when the pointers crossed each other. In this way it isn't
|
||||
* possible to obtain empty partitions and infinite recursion is avoided.
|
||||
*
|
||||
* @par
|
||||
* It's an in-place algorithm. In order to obtain an out-of-place
|
||||
* function, a memcpy of the source vector is performed.
|
||||
*/
|
||||
|
||||
void arm_quick_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
float32_t * pA;
|
||||
|
||||
/* Out-of-place */
|
||||
if(pSrc != pDst)
|
||||
{
|
||||
memcpy(pDst, pSrc, blockSize*sizeof(float32_t) );
|
||||
pA = pDst;
|
||||
}
|
||||
else
|
||||
pA = pSrc;
|
||||
|
||||
arm_quick_sort_core_f32(pA, 0, blockSize-1, S->dir);
|
||||
/* The previous function could be called recursively a maximum
|
||||
* of (blockSize-1) times, generating a stack consumption of 4*(blockSize-1) bytes. */
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Sorting group
|
||||
*/
|
|
@ -0,0 +1,107 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_selection_sort_f32.c
|
||||
* Description: Floating point selection sort
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_sorting.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Sorting
|
||||
@{
|
||||
*/
|
||||
|
||||
/**
|
||||
* @private
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*
|
||||
* @par Algorithm
|
||||
* The Selection sort algorithm is a comparison algorithm that
|
||||
* divides the input array into a sorted and an unsorted sublist
|
||||
* (initially the sorted sublist is empty and the unsorted sublist
|
||||
* is the input array), looks for the smallest (or biggest)
|
||||
* element in the unsorted sublist, swapping it with the leftmost
|
||||
* one, and moving the sublists boundary one element to the right.
|
||||
*
|
||||
* @par It's an in-place algorithm. In order to obtain an out-of-place
|
||||
* function, a memcpy of the source vector is performed.
|
||||
*/
|
||||
|
||||
void arm_selection_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
uint32_t i, j, k;
|
||||
uint8_t dir = S->dir;
|
||||
float32_t temp;
|
||||
|
||||
float32_t * pA;
|
||||
|
||||
if(pSrc != pDst) // out-of-place
|
||||
{
|
||||
memcpy(pDst, pSrc, blockSize*sizeof(float32_t) );
|
||||
pA = pDst;
|
||||
}
|
||||
else
|
||||
pA = pSrc;
|
||||
|
||||
/* Move the boundary one element to the right */
|
||||
for (i=0; i<blockSize-1; i++)
|
||||
{
|
||||
/* Initialize the minimum/maximum as the first element */
|
||||
k = i;
|
||||
|
||||
/* Look in the unsorted list to find the minimum/maximum value */
|
||||
for (j=i+1; j<blockSize; j++)
|
||||
{
|
||||
if (dir==(pA[j] < pA[k]) )
|
||||
{
|
||||
/* Update value */
|
||||
k = j;
|
||||
}
|
||||
}
|
||||
|
||||
if (k != i)
|
||||
{
|
||||
/* Swap the minimum/maximum with the leftmost element */
|
||||
temp=pA[i];
|
||||
pA[i]=pA[k];
|
||||
pA[k]=temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Sorting group
|
||||
*/
|
86
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_sort_f32.c
Normal file
86
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_sort_f32.c
Normal file
|
@ -0,0 +1,86 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sort_f32.c
|
||||
* Description: Floating point sort
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_sorting.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Sorting
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Generic sorting function
|
||||
*
|
||||
* @param[in] S points to an instance of the sorting structure.
|
||||
* @param[in] pSrc points to the block of input data.
|
||||
* @param[out] pDst points to the block of output data.
|
||||
* @param[in] blockSize number of samples to process.
|
||||
*/
|
||||
|
||||
void arm_sort_f32(
|
||||
const arm_sort_instance_f32 * S,
|
||||
float32_t * pSrc,
|
||||
float32_t * pDst,
|
||||
uint32_t blockSize)
|
||||
{
|
||||
switch(S->alg)
|
||||
{
|
||||
case ARM_SORT_BITONIC:
|
||||
arm_bitonic_sort_f32(S, pSrc, pDst, blockSize);
|
||||
break;
|
||||
|
||||
case ARM_SORT_BUBBLE:
|
||||
arm_bubble_sort_f32(S, pSrc, pDst, blockSize);
|
||||
break;
|
||||
|
||||
case ARM_SORT_HEAP:
|
||||
arm_heap_sort_f32(S, pSrc, pDst, blockSize);
|
||||
break;
|
||||
|
||||
case ARM_SORT_INSERTION:
|
||||
arm_insertion_sort_f32(S, pSrc, pDst, blockSize);
|
||||
break;
|
||||
|
||||
case ARM_SORT_QUICK:
|
||||
arm_quick_sort_f32(S, pSrc, pDst, blockSize);
|
||||
break;
|
||||
|
||||
case ARM_SORT_SELECTION:
|
||||
arm_selection_sort_f32(S, pSrc, pDst, blockSize);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Sorting group
|
||||
*/
|
|
@ -0,0 +1,54 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_sort_init_f32.c
|
||||
* Description: Floating point sort initialization function
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "arm_sorting.h"
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@addtogroup Sorting
|
||||
@{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @param[in,out] S points to an instance of the sorting structure.
|
||||
* @param[in] alg Selected algorithm.
|
||||
* @param[in] dir Sorting order.
|
||||
*/
|
||||
void arm_sort_init_f32(arm_sort_instance_f32 * S, arm_sort_alg alg, arm_sort_dir dir)
|
||||
{
|
||||
S->alg = alg;
|
||||
S->dir = dir;
|
||||
}
|
||||
|
||||
/**
|
||||
@} end of Sorting group
|
||||
*/
|
146
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f16.c
Normal file
146
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f16.c
Normal file
|
@ -0,0 +1,146 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_weighted_sum_f16.c
|
||||
* Description: Weighted Sum
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "dsp/support_functions_f16.h"
|
||||
|
||||
#if defined(ARM_FLOAT16_SUPPORTED)
|
||||
|
||||
/**
|
||||
@ingroup groupSupport
|
||||
*/
|
||||
|
||||
/**
|
||||
@defgroup weightedsum Weighted Sum
|
||||
|
||||
Weighted sum of values
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @addtogroup weightedsum
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Weighted sum
|
||||
*
|
||||
*
|
||||
* @param[in] *in Array of input values.
|
||||
* @param[in] *weigths Weights
|
||||
* @param[in] blockSize Number of samples in the input array.
|
||||
* @return Weighted sum
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
float16_t arm_weighted_sum_f16(const float16_t *in,const float16_t *weigths, uint32_t blockSize)
|
||||
{
|
||||
_Float16 accum1, accum2;
|
||||
float16x8_t accum1V, accum2V;
|
||||
float16x8_t inV, wV;
|
||||
const float16_t *pIn, *pW;
|
||||
uint32_t blkCnt;
|
||||
|
||||
|
||||
pIn = in;
|
||||
pW = weigths;
|
||||
|
||||
|
||||
accum1V = vdupq_n_f16(0.0f16);
|
||||
accum2V = vdupq_n_f16(0.0f16);
|
||||
|
||||
blkCnt = blockSize >> 3;
|
||||
while (blkCnt > 0)
|
||||
{
|
||||
inV = vld1q(pIn);
|
||||
wV = vld1q(pW);
|
||||
|
||||
pIn += 4;
|
||||
pW += 4;
|
||||
|
||||
accum1V = vfmaq(accum1V, inV, wV);
|
||||
accum2V = vaddq(accum2V, wV);
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
accum1 = vecAddAcrossF16Mve(accum1V);
|
||||
accum2 = vecAddAcrossF16Mve(accum2V);
|
||||
|
||||
blkCnt = blockSize & 7;
|
||||
while(blkCnt > 0)
|
||||
{
|
||||
accum1 += (_Float16)*pIn++ * (_Float16)*pW;
|
||||
accum2 += (_Float16)*pW++;
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
return (accum1 / accum2);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
float16_t arm_weighted_sum_f16(const float16_t *in, const float16_t *weigths, uint32_t blockSize)
|
||||
{
|
||||
|
||||
_Float16 accum1, accum2;
|
||||
const float16_t *pIn, *pW;
|
||||
uint32_t blkCnt;
|
||||
|
||||
|
||||
pIn = in;
|
||||
pW = weigths;
|
||||
|
||||
accum1=0.0f16;
|
||||
accum2=0.0f16;
|
||||
|
||||
blkCnt = blockSize;
|
||||
while(blkCnt > 0)
|
||||
{
|
||||
accum1 += (_Float16)*pIn++ * (_Float16)*pW;
|
||||
accum2 += (_Float16)*pW++;
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
return(accum1 / accum2);
|
||||
}
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
* @} end of weightedsum group
|
||||
*/
|
||||
|
||||
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
|
||||
|
187
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f32.c
Normal file
187
Drivers/CMSIS/DSP/Source/SupportFunctions/arm_weighted_sum_f32.c
Normal file
|
@ -0,0 +1,187 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
* Project: CMSIS DSP Library
|
||||
* Title: arm_weighted_sum_f32.c
|
||||
* Description: Weighted Sum
|
||||
*
|
||||
* $Date: 23 April 2021
|
||||
* $Revision: V1.9.0
|
||||
*
|
||||
* Target Processor: Cortex-M and Cortex-A cores
|
||||
* -------------------------------------------------------------------- */
|
||||
/*
|
||||
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
|
||||
*
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "dsp/support_functions.h"
|
||||
|
||||
/**
|
||||
* @addtogroup weightedsum
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @brief Weighted sum
|
||||
*
|
||||
*
|
||||
* @param[in] *in Array of input values.
|
||||
* @param[in] *weigths Weights
|
||||
* @param[in] blockSize Number of samples in the input array.
|
||||
* @return Weighted sum
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE)
|
||||
|
||||
#include "arm_helium_utils.h"
|
||||
|
||||
float32_t arm_weighted_sum_f32(const float32_t *in,const float32_t *weigths, uint32_t blockSize)
|
||||
{
|
||||
float32_t accum1, accum2;
|
||||
f32x4_t accum1V, accum2V;
|
||||
f32x4_t inV, wV;
|
||||
const float32_t *pIn, *pW;
|
||||
uint32_t blkCnt;
|
||||
|
||||
|
||||
pIn = in;
|
||||
pW = weigths;
|
||||
|
||||
|
||||
accum1V = vdupq_n_f32(0.0);
|
||||
accum2V = vdupq_n_f32(0.0);
|
||||
|
||||
blkCnt = blockSize >> 2;
|
||||
while (blkCnt > 0)
|
||||
{
|
||||
inV = vld1q(pIn);
|
||||
wV = vld1q(pW);
|
||||
|
||||
pIn += 4;
|
||||
pW += 4;
|
||||
|
||||
accum1V = vfmaq(accum1V, inV, wV);
|
||||
accum2V = vaddq(accum2V, wV);
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
accum1 = vecAddAcrossF32Mve(accum1V);
|
||||
accum2 = vecAddAcrossF32Mve(accum2V);
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
while(blkCnt > 0)
|
||||
{
|
||||
accum1 += *pIn++ * *pW;
|
||||
accum2 += *pW++;
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
return (accum1 / accum2);
|
||||
}
|
||||
|
||||
#else
|
||||
#if defined(ARM_MATH_NEON)
|
||||
|
||||
#include "NEMath.h"
|
||||
float32_t arm_weighted_sum_f32(const float32_t *in,const float32_t *weigths, uint32_t blockSize)
|
||||
{
|
||||
|
||||
float32_t accum1, accum2;
|
||||
float32x4_t accum1V, accum2V;
|
||||
float32x2_t tempV;
|
||||
|
||||
float32x4_t inV,wV;
|
||||
|
||||
const float32_t *pIn, *pW;
|
||||
uint32_t blkCnt;
|
||||
|
||||
|
||||
pIn = in;
|
||||
pW = weigths;
|
||||
|
||||
accum1=0.0f;
|
||||
accum2=0.0f;
|
||||
|
||||
accum1V = vdupq_n_f32(0.0f);
|
||||
accum2V = vdupq_n_f32(0.0f);
|
||||
|
||||
blkCnt = blockSize >> 2;
|
||||
while(blkCnt > 0)
|
||||
{
|
||||
inV = vld1q_f32(pIn);
|
||||
wV = vld1q_f32(pW);
|
||||
|
||||
pIn += 4;
|
||||
pW += 4;
|
||||
|
||||
accum1V = vmlaq_f32(accum1V,inV,wV);
|
||||
accum2V = vaddq_f32(accum2V,wV);
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
tempV = vpadd_f32(vget_low_f32(accum1V),vget_high_f32(accum1V));
|
||||
accum1 = vget_lane_f32(tempV, 0) + vget_lane_f32(tempV, 1);
|
||||
|
||||
tempV = vpadd_f32(vget_low_f32(accum2V),vget_high_f32(accum2V));
|
||||
accum2 = vget_lane_f32(tempV, 0) + vget_lane_f32(tempV, 1);
|
||||
|
||||
blkCnt = blockSize & 3;
|
||||
while(blkCnt > 0)
|
||||
{
|
||||
accum1 += *pIn++ * *pW;
|
||||
accum2 += *pW++;
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
|
||||
return(accum1 / accum2);
|
||||
}
|
||||
#else
|
||||
float32_t arm_weighted_sum_f32(const float32_t *in, const float32_t *weigths, uint32_t blockSize)
|
||||
{
|
||||
|
||||
float32_t accum1, accum2;
|
||||
const float32_t *pIn, *pW;
|
||||
uint32_t blkCnt;
|
||||
|
||||
|
||||
pIn = in;
|
||||
pW = weigths;
|
||||
|
||||
accum1=0.0f;
|
||||
accum2=0.0f;
|
||||
|
||||
blkCnt = blockSize;
|
||||
while(blkCnt > 0)
|
||||
{
|
||||
accum1 += *pIn++ * *pW;
|
||||
accum2 += *pW++;
|
||||
blkCnt--;
|
||||
}
|
||||
|
||||
return(accum1 / accum2);
|
||||
}
|
||||
#endif
|
||||
#endif /* defined(ARM_MATH_MVEF) && !defined(ARM_MATH_AUTOVECTORIZE) */
|
||||
|
||||
/**
|
||||
* @} end of weightedsum group
|
||||
*/
|
Loading…
Add table
Add a link
Reference in a new issue