Initial commit

This commit is contained in:
Attila Body 2025-06-09 18:06:36 +02:00
commit ce3dd83b9f
Signed by: abody
GPG key ID: BD0C6214E68FB5CF
1470 changed files with 1054449 additions and 0 deletions

View file

@ -0,0 +1,80 @@
cmake_minimum_required (VERSION 3.14)
project(CMSISDSPFastMath)
include(configLib)
include(configDsp)
add_library(CMSISDSPFastMath STATIC)
configLib(CMSISDSPFastMath ${ROOT})
configDsp(CMSISDSPFastMath ${ROOT})
include(interpol)
interpol(CMSISDSPFastMath)
if (CONFIGTABLE AND ALLFAST)
target_compile_definitions(CMSISDSPFastMath PUBLIC ARM_ALL_FAST_TABLES)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_COS_F32)
target_sources(CMSISDSPFastMath PRIVATE arm_cos_f32.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_COS_Q15)
target_sources(CMSISDSPFastMath PRIVATE arm_cos_q15.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_COS_Q31)
target_sources(CMSISDSPFastMath PRIVATE arm_cos_q31.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_F32)
target_sources(CMSISDSPFastMath PRIVATE arm_sin_f32.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_Q15)
target_sources(CMSISDSPFastMath PRIVATE arm_sin_q15.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_SIN_Q31)
target_sources(CMSISDSPFastMath PRIVATE arm_sin_q31.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_SQRT_Q31)
target_sources(CMSISDSPFastMath PRIVATE arm_sqrt_q31.c)
endif()
if (NOT CONFIGTABLE OR ALLFAST OR ARM_SQRT_Q15)
target_sources(CMSISDSPFastMath PRIVATE arm_sqrt_q15.c)
endif()
target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f32.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f64.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f32.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f64.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vlog_q31.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vlog_q15.c)
if ((NOT ARMAC5) AND (NOT DISABLEFLOAT16))
target_sources(CMSISDSPFastMath PRIVATE arm_vlog_f16.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vexp_f16.c)
target_sources(CMSISDSPFastMath PRIVATE arm_vinverse_f16.c)
target_sources(CMSISDSPFastMath PRIVATE arm_atan2_f16.c)
endif()
target_sources(CMSISDSPFastMath PRIVATE arm_divide_q15.c)
target_sources(CMSISDSPFastMath PRIVATE arm_divide_q31.c)
target_sources(CMSISDSPFastMath PRIVATE arm_atan2_f32.c)
target_sources(CMSISDSPFastMath PRIVATE arm_atan2_q31.c)
target_sources(CMSISDSPFastMath PRIVATE arm_atan2_q15.c)
### Includes
target_include_directories(CMSISDSPFastMath PUBLIC "${DSP}/Include")

View file

@ -0,0 +1,75 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: FastMathFunctions.c
* Description: Combination of all fast math function source files.
*
* $Date: 16. March 2020
* $Revision: V1.1.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_FAST_ALLOW_TABLES)
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_F32)
#include "arm_cos_f32.c"
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q15)
#include "arm_cos_q15.c"
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q31)
#include "arm_cos_q31.c"
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_F32)
#include "arm_sin_f32.c"
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q15)
#include "arm_sin_q15.c"
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SIN_Q31)
#include "arm_sin_q31.c"
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q31)
#include "arm_sqrt_q31.c"
#endif
#if !defined(ARM_DSP_CONFIG_TABLES) || defined(ARM_ALL_FAST_TABLES) || defined(ARM_TABLE_SQRT_Q15)
#include "arm_sqrt_q15.c"
#endif
#endif
#include "arm_vexp_f32.c"
#include "arm_vexp_f64.c"
#include "arm_vlog_f32.c"
#include "arm_vlog_f64.c"
#include "arm_divide_q15.c"
#include "arm_divide_q31.c"
#include "arm_vlog_q31.c"
#include "arm_vlog_q15.c"
#include "arm_atan2_f32.c"
#include "arm_atan2_q31.c"
#include "arm_atan2_q15.c"

View file

@ -0,0 +1,32 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: FastMathFunctions.c
* Description: Combination of all fast math function source files.
*
* $Date: 16. March 2020
* $Revision: V1.1.0
*
* Target Processor: Cortex-M cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2019-2020 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "arm_vexp_f16.c"
#include "arm_vlog_f16.c"
#include "arm_vinverse_f16.c"
#include "arm_atan2_f16.c"

View file

@ -0,0 +1,171 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_atan2_f16.c
* Description: float16 Arc tangent of y/x
*
* $Date: 22 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/*
atan for argument between in [0, 1.0]
*/
#define PIF16 3.14f16
#define PI16HALF 1.571f16
#define ATANHALFF16 0.463648f16
#define ATAN2_NB_COEFS_F16 5
static const float16_t atan2_coefs_f16[ATAN2_NB_COEFS_F16]={0.f16
,1.f16
,0.f16
,-0.367f16
,0.152f16
};
__STATIC_FORCEINLINE float16_t arm_atan_limited_f16(float16_t x)
{
float16_t res=atan2_coefs_f16[ATAN2_NB_COEFS_F16-1];
int i=1;
for(i=1;i<ATAN2_NB_COEFS_F16;i++)
{
res = (_Float16)x*(_Float16)res + (_Float16)atan2_coefs_f16[ATAN2_NB_COEFS_F16-1-i];
}
return(res);
}
__STATIC_FORCEINLINE float16_t arm_atan_f16(float16_t x)
{
int sign=0;
float16_t res=0.0f16;
if ((_Float16)x < 0.0f16)
{
sign=1;
x=-(_Float16)x;
}
if ((_Float16)x > 1.0f16)
{
x = 1.0f16 / (_Float16)x;
res = (_Float16)PI16HALF - (_Float16)arm_atan_limited_f16(x);
}
else
{
res += (_Float16)arm_atan_limited_f16(x);
}
if (sign)
{
res = -(_Float16)res;
}
return(res);
}
/**
@ingroup groupFastMath
*/
/**
@addtogroup atan2
@{
*/
/**
@brief Arc Tangent of y/x using sign of y and x to get right quadrant
@param[in] y y coordinate
@param[in] x x coordinate
@param[out] result Result
@return error status.
@par Compute the Arc tangent of y/x:
The sign of y and x are used to determine the right quadrant
and compute the right angle.
*/
arm_status arm_atan2_f16(float16_t y,float16_t x,float16_t *result)
{
if ((_Float16)x > 0.0f16)
{
*result=arm_atan_f16((_Float16)y/(_Float16)x);
return(ARM_MATH_SUCCESS);
}
if ((_Float16)x < 0.0f16)
{
if ((_Float16)y > 0.0f16)
{
*result=(_Float16)arm_atan_f16((_Float16)y/(_Float16)x) + (_Float16)PIF16;
}
else if ((_Float16)y < 0.0f16)
{
*result=(_Float16)arm_atan_f16((_Float16)y/(_Float16)x) - (_Float16)PIF16;
}
else
{
if (signbit(y))
{
*result= -(_Float16)PIF16;
}
else
{
*result= PIF16;
}
}
return(ARM_MATH_SUCCESS);
}
if ((_Float16)x == 0.0f16)
{
if ((_Float16)y > 0.0f16)
{
*result=PI16HALF;
return(ARM_MATH_SUCCESS);
}
if ((_Float16)y < 0.0f16)
{
*result=-(_Float16)PI16HALF;
return(ARM_MATH_SUCCESS);
}
}
return(ARM_MATH_NANINF);
}
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */
/**
@} end of atan2 group
*/

View file

@ -0,0 +1,183 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_atan2_f32.c
* Description: float32 Arc tangent of y/x
*
* $Date: 22 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
/*
atan for argument between in [0, 1.0]
*/
#define ATANHALFF32 0.463648f
#define PIHALFF32 1.5707963267948966192313f
#define ATAN2_NB_COEFS_F32 10
static const float32_t atan2_coefs_f32[ATAN2_NB_COEFS_F32]={0.0f
,1.0000001638308195518f
,-0.0000228941363602264f
,-0.3328086544578890873f
,-0.004404814619311061f
,0.2162217461808173258f
,-0.0207504842057097504f
,-0.1745263362250363339f
,0.1340557235283553386f
,-0.0323664125927477625f
};
__STATIC_FORCEINLINE float32_t arm_atan_limited_f32(float32_t x)
{
float32_t res=atan2_coefs_f32[ATAN2_NB_COEFS_F32-1];
int i=1;
for(i=1;i<ATAN2_NB_COEFS_F32;i++)
{
res = x*res + atan2_coefs_f32[ATAN2_NB_COEFS_F32-1-i];
}
return(res);
}
__STATIC_FORCEINLINE float32_t arm_atan_f32(float32_t x)
{
int sign=0;
float32_t res=0.0f;
if (x < 0.0f)
{
sign=1;
x=-x;
}
if (x > 1.0f)
{
x = 1.0f / x;
res = PIHALFF32 - arm_atan_limited_f32(x);
}
else
{
res += arm_atan_limited_f32(x);
}
if (sign)
{
res = -res;
}
return(res);
}
/**
@ingroup groupFastMath
*/
/**
@defgroup atan2 ArcTan2
Computing Arc tangent only using the ratio y/x is not enough to determine the angle
since there is an indeterminacy. Opposite quadrants are giving the same ratio.
ArcTan2 is not using y/x to compute the angle but y and x and use the sign of y and x
to determine the quadrant.
*/
/**
@addtogroup atan2
@{
*/
/**
@brief Arc Tangent of y/x using sign of y and x to get right quadrant
@param[in] y y coordinate
@param[in] x x coordinate
@param[out] result Result
@return error status.
@par Compute the Arc tangent of y/x:
The sign of y and x are used to determine the right quadrant
and compute the right angle.
*/
arm_status arm_atan2_f32(float32_t y,float32_t x,float32_t *result)
{
if (x > 0.0f)
{
*result=arm_atan_f32(y/x);
return(ARM_MATH_SUCCESS);
}
if (x < 0.0f)
{
if (y > 0.0f)
{
*result=arm_atan_f32(y/x) + PI;
}
else if (y < 0.0f)
{
*result=arm_atan_f32(y/x) - PI;
}
else
{
if (signbit(y))
{
*result= -PI;
}
else
{
*result= PI;
}
}
return(ARM_MATH_SUCCESS);
}
if (x == 0.0f)
{
if (y > 0.0f)
{
*result=PIHALFF32;
return(ARM_MATH_SUCCESS);
}
if (y < 0.0f)
{
*result=-PIHALFF32;
return(ARM_MATH_SUCCESS);
}
}
return(ARM_MATH_NANINF);
}
/**
@} end of atan2 group
*/

View file

@ -0,0 +1,201 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_atan2_q15.c
* Description: float32 Arc tangent of y/x
*
* $Date: 22 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "dsp/utils.h"
/*
atan for argument between in [0, 1.0]
*/
/* Q2.13 */
#define ATANHALFQ13 0xed6
#define PIHALFQ13 0x3244
#define PIQ13 0x6488
#define ATAN2_NB_COEFS_Q15 10
static const q15_t atan2_coefs_q15[ATAN2_NB_COEFS_Q15]={0x0000
,0x7fff
,0xffff
,0xd567
,0xff70
,0x1bad
,0xfd58
,0xe9a9
,0x1129
,0xfbdb
};
__STATIC_FORCEINLINE q15_t arm_atan_limited_q15(q15_t x)
{
q31_t res=(q31_t)atan2_coefs_q15[ATAN2_NB_COEFS_Q15-1];
int i=1;
for(i=1;i<ATAN2_NB_COEFS_Q15;i++)
{
res = ((q31_t) x * res) >> 15U;
res = res + ((q31_t) atan2_coefs_q15[ATAN2_NB_COEFS_Q15-1-i]) ;
}
res = __SSAT(res>>2,16);
return(res);
}
__STATIC_FORCEINLINE q15_t arm_atan_q15(q15_t y,q15_t x)
{
int sign=0;
q15_t res=0;
if (y<0)
{
arm_negate_q15(&y,&y,1);
sign=1-sign;
}
if (x < 0)
{
sign=1 - sign;
arm_negate_q15(&x,&x,1);
}
if (y > x)
{
q15_t ratio;
int16_t shift;
arm_divide_q15(x,y,&ratio,&shift);
arm_shift_q15(&ratio,shift,&ratio,1);
res = PIHALFQ13 - arm_atan_limited_q15(ratio);
}
else
{
q15_t ratio;
int16_t shift;
arm_divide_q15(y,x,&ratio,&shift);
arm_shift_q15(&ratio,shift,&ratio,1);
res = arm_atan_limited_q15(ratio);
}
if (sign)
{
arm_negate_q15(&res,&res,1);
}
return(res);
}
/**
@ingroup groupFastMath
*/
/**
@addtogroup atan2
@{
*/
/**
@brief Arc Tangent of y/x using sign of y and x to get right quadrant
@param[in] y y coordinate
@param[in] x x coordinate
@param[out] result Result in Q2.13
@return error status.
@par Compute the Arc tangent of y/x:
The sign of y and x are used to determine the right quadrant
and compute the right angle.
*/
arm_status arm_atan2_q15(q15_t y,q15_t x,q15_t *result)
{
if (x > 0)
{
*result=arm_atan_q15(y,x);
return(ARM_MATH_SUCCESS);
}
if (x < 0)
{
if (y > 0)
{
*result=arm_atan_q15(y,x) + PIQ13;
}
else if (y < 0)
{
*result=arm_atan_q15(y,x) - PIQ13;
}
else
{
if (y<0)
{
*result= -PIQ13;
}
else
{
*result= PIQ13;
}
}
return(ARM_MATH_SUCCESS);
}
if (x == 0)
{
if (y > 0)
{
*result=PIHALFQ13;
return(ARM_MATH_SUCCESS);
}
if (y < 0)
{
*result=-PIHALFQ13;
return(ARM_MATH_SUCCESS);
}
}
return(ARM_MATH_NANINF);
}
/**
@} end of atan2 group
*/

View file

@ -0,0 +1,202 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_atan2_q31.c
* Description: float32 Arc tangent of y/x
*
* $Date: 22 April 2022
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2022 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "dsp/utils.h"
/*
atan for argument between in [0, 1.0]
*/
/* Q2.29 */
#define ATANHALF_Q29 0xed63383
#define PIHALF_Q29 0x3243f6a9
#define PIQ29 0x6487ed51
#define ATAN2_NB_COEFS_Q31 13
static const q31_t atan2_coefs_q31[ATAN2_NB_COEFS_Q31]={0x00000000
,0x7ffffffe
,0x000001b6
,0xd555158e
,0x00036463
,0x1985f617
,0x001992ae
,0xeed53a7f
,0xf8f15245
,0x2215a3a4
,0xe0fab004
,0x0cdd4825
,0xfddbc054
};
__STATIC_FORCEINLINE q31_t arm_atan_limited_q31(q31_t x)
{
q63_t res=(q63_t)atan2_coefs_q31[ATAN2_NB_COEFS_Q31-1];
int i=1;
for(i=1;i<ATAN2_NB_COEFS_Q31;i++)
{
res = ((q63_t) x * res) >> 31U;
res = res + ((q63_t) atan2_coefs_q31[ATAN2_NB_COEFS_Q31-1-i]) ;
}
return(clip_q63_to_q31(res>>2));
}
__STATIC_FORCEINLINE q31_t arm_atan_q31(q31_t y,q31_t x)
{
int sign=0;
q31_t res=0;
if (y<0)
{
arm_negate_q31(&y,&y,1);
sign=1-sign;
}
if (x < 0)
{
sign=1 - sign;
arm_negate_q31(&x,&x,1);
}
if (y > x)
{
q31_t ratio;
int16_t shift;
arm_divide_q31(x,y,&ratio,&shift);
arm_shift_q31(&ratio,shift,&ratio,1);
res = PIHALF_Q29 - arm_atan_limited_q31(ratio);
}
else
{
q31_t ratio;
int16_t shift;
arm_divide_q31(y,x,&ratio,&shift);
arm_shift_q31(&ratio,shift,&ratio,1);
res = arm_atan_limited_q31(ratio);
}
if (sign)
{
arm_negate_q31(&res,&res,1);
}
return(res);
}
/**
@ingroup groupFastMath
*/
/**
@addtogroup atan2
@{
*/
/**
@brief Arc Tangent of y/x using sign of y and x to get right quadrant
@param[in] y y coordinate
@param[in] x x coordinate
@param[out] result Result in Q2.29
@return error status.
@par Compute the Arc tangent of y/x:
The sign of y and x are used to determine the right quadrant
and compute the right angle.
*/
arm_status arm_atan2_q31(q31_t y,q31_t x,q31_t *result)
{
if (x > 0)
{
*result=arm_atan_q31(y,x);
return(ARM_MATH_SUCCESS);
}
if (x < 0)
{
if (y > 0)
{
*result=arm_atan_q31(y,x) + PIQ29;
}
else if (y < 0)
{
*result=arm_atan_q31(y,x) - PIQ29;
}
else
{
if (y<0)
{
*result= -PIQ29;
}
else
{
*result= PIQ29;
}
}
return(ARM_MATH_SUCCESS);
}
if (x == 0)
{
if (y > 0)
{
*result=PIHALF_Q29;
return(ARM_MATH_SUCCESS);
}
if (y < 0)
{
*result=-PIHALF_Q29;
return(ARM_MATH_SUCCESS);
}
}
return(ARM_MATH_NANINF);
}
/**
@} end of atan2 group
*/

View file

@ -0,0 +1,121 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cos_f32.c
* Description: Fast cosine calculation for floating-point values
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@defgroup cos Cosine
Computes the trigonometric cosine function using a combination of table lookup
and linear interpolation. There are separate functions for
Q15, Q31, and floating-point data types.
The input to the floating-point version is in radians while the
fixed-point Q15 and Q31 have a scaled input with the range
[0 +0.9999] mapping to [0 2*pi). The fixed-point range is chosen so that a
value of 2*pi wraps around to 0.
The implementation is based on table lookup using 512 values together with linear interpolation.
The steps used are:
-# Calculation of the nearest integer table index
-# Compute the fractional portion (fract) of the table index.
-# The final result equals <code>(1.0f-fract)*a + fract*b;</code>
where
<pre>
a = Table[index];
b = Table[index+1];
</pre>
*/
/**
@addtogroup cos
@{
*/
/**
@brief Fast approximation to the trigonometric cosine function for floating-point data.
@param[in] x input value in radians
@return cos(x)
*/
float32_t arm_cos_f32(
float32_t x)
{
float32_t cosVal, fract, in; /* Temporary input, output variables */
uint16_t index; /* Index variable */
float32_t a, b; /* Two nearest output values */
int32_t n;
float32_t findex;
/* input x is in radians */
/* Scale input to [0 1] range from [0 2*PI] , divide input by 2*pi, add 0.25 (pi/2) to read sine table */
in = x * 0.159154943092f + 0.25f;
/* Calculation of floor value of input */
n = (int32_t) in;
/* Make negative values towards -infinity */
if (in < 0.0f)
{
n--;
}
/* Map input value to [0 1] */
in = in - (float32_t) n;
/* Calculation of index of the table */
findex = (float32_t)FAST_MATH_TABLE_SIZE * in;
index = (uint16_t)findex;
/* when "in" is exactly 1, we need to rotate the index down to 0 */
if (index >= FAST_MATH_TABLE_SIZE) {
index = 0;
findex -= (float32_t)FAST_MATH_TABLE_SIZE;
}
/* fractional value calculation */
fract = findex - (float32_t) index;
/* Read two nearest values of input value from the cos table */
a = sinTable_f32[index];
b = sinTable_f32[index+1];
/* Linear interpolation process */
cosVal = (1.0f - fract) * a + fract * b;
/* Return output value */
return (cosVal);
}
/**
@} end of cos group
*/

View file

@ -0,0 +1,84 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cos_q15.c
* Description: Fast cosine calculation for Q15 values
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup cos
@{
*/
/**
@brief Fast approximation to the trigonometric cosine function for Q15 data.
@param[in] x Scaled input value in radians
@return cos(x)
The Q15 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI).
*/
q15_t arm_cos_q15(
q15_t x)
{
q15_t cosVal; /* Temporary input, output variables */
int32_t index; /* Index variable */
q15_t a, b; /* Two nearest output values */
q15_t fract; /* Temporary values for fractional values */
/* add 0.25 (pi/2) to read sine table */
x = (uint16_t)x + 0x2000;
if (x < 0)
{ /* convert negative numbers to corresponding positive ones */
x = (uint16_t)x + 0x8000;
}
/* Calculate the nearest index */
index = (uint32_t)x >> FAST_MATH_Q15_SHIFT;
/* Calculation of fractional value */
fract = (x - (index << FAST_MATH_Q15_SHIFT)) << 9;
/* Read two nearest values of input value from the sin table */
a = sinTable_q15[index];
b = sinTable_q15[index+1];
/* Linear interpolation process */
cosVal = (q31_t) (0x8000 - fract) * a >> 16;
cosVal = (q15_t) ((((q31_t) cosVal << 16) + ((q31_t) fract * b)) >> 16);
/* Return output value */
return (cosVal << 1);
}
/**
@} end of cos group
*/

View file

@ -0,0 +1,84 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cos_q31.c
* Description: Fast cosine calculation for Q31 values
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup cos
@{
*/
/**
@brief Fast approximation to the trigonometric cosine function for Q31 data.
@param[in] x Scaled input value in radians
@return cos(x)
The Q31 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI).
*/
q31_t arm_cos_q31(
q31_t x)
{
q31_t cosVal; /* Temporary input, output variables */
int32_t index; /* Index variable */
q31_t a, b; /* Two nearest output values */
q31_t fract; /* Temporary values for fractional values */
/* add 0.25 (pi/2) to read sine table */
x = (uint32_t)x + 0x20000000;
if (x < 0)
{ /* convert negative numbers to corresponding positive ones */
x = (uint32_t)x + 0x80000000;
}
/* Calculate the nearest index */
index = (uint32_t)x >> FAST_MATH_Q31_SHIFT;
/* Calculation of fractional value */
fract = (x - (index << FAST_MATH_Q31_SHIFT)) << 9;
/* Read two nearest values of input value from the sin table */
a = sinTable_q31[index];
b = sinTable_q31[index+1];
/* Linear interpolation process */
cosVal = (q63_t) (0x80000000 - fract) * a >> 32;
cosVal = (q31_t) ((((q63_t) cosVal << 32) + ((q63_t) fract * b)) >> 32);
/* Return output value */
return (cosVal << 1);
}
/**
@} end of cos group
*/

View file

@ -0,0 +1,110 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cos_q15.c
* Description: Fast cosine calculation for Q15 values
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
#include <stdlib.h>
/**
@ingroup groupFastMath
*/
/**
@defgroup divide Fixed point division
*/
/**
@addtogroup divide
@{
*/
/**
@brief Fixed point division
@param[in] numerator Numerator
@param[in] denominator Denominator
@param[out] quotient Quotient value normalized between -1.0 and 1.0
@param[out] shift Shift left value to get the unnormalized quotient
@return error status
When dividing by 0, an error ARM_MATH_NANINF is returned. And the quotient is forced
to the saturated negative or positive value.
*/
arm_status arm_divide_q15(q15_t numerator,
q15_t denominator,
q15_t *quotient,
int16_t *shift)
{
int16_t sign=0;
q31_t temp;
int16_t shiftForNormalizing;
*shift = 0;
sign = (numerator>>15) ^ (denominator>>15);
if (denominator == 0)
{
if (sign)
{
*quotient = 0x8000;
}
else
{
*quotient = 0x7FFF;
}
return(ARM_MATH_NANINF);
}
arm_abs_q15(&numerator,&numerator,1);
arm_abs_q15(&denominator,&denominator,1);
temp = ((q31_t)numerator << 15) / ((q31_t)denominator);
shiftForNormalizing= 17 - __CLZ(temp);
if (shiftForNormalizing > 0)
{
*shift = shiftForNormalizing;
temp = temp >> shiftForNormalizing;
}
if (sign)
{
temp = -temp;
}
*quotient=temp;
return(ARM_MATH_SUCCESS);
}
/**
@} end of divide group
*/

View file

@ -0,0 +1,110 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_cos_q31.c
* Description: Fast cosine calculation for Q31 values
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
#include <stdlib.h>
/**
@ingroup groupFastMath
*/
/**
@defgroup divide Fixed point division
*/
/**
@addtogroup divide
@{
*/
/**
@brief Fixed point division
@param[in] numerator Numerator
@param[in] denominator Denominator
@param[out] quotient Quotient value normalized between -1.0 and 1.0
@param[out] shift Shift left value to get the unnormalized quotient
@return error status
When dividing by 0, an error ARM_MATH_NANINF is returned. And the quotient is forced
to the saturated negative or positive value.
*/
arm_status arm_divide_q31(q31_t numerator,
q31_t denominator,
q31_t *quotient,
int16_t *shift)
{
int16_t sign=0;
q63_t temp;
int16_t shiftForNormalizing;
*shift = 0;
sign = (numerator>>31) ^ (denominator>>31);
if (denominator == 0)
{
if (sign)
{
*quotient = 0x80000000;
}
else
{
*quotient = 0x7FFFFFFF;
}
return(ARM_MATH_NANINF);
}
arm_abs_q31(&numerator,&numerator,1);
arm_abs_q31(&denominator,&denominator,1);
temp = ((q63_t)numerator << 31) / ((q63_t)denominator);
shiftForNormalizing= 32 - __CLZ(temp >> 31);
if (shiftForNormalizing > 0)
{
*shift = shiftForNormalizing;
temp = temp >> shiftForNormalizing;
}
if (sign)
{
temp = -temp;
}
*quotient=(q31_t)temp;
return(ARM_MATH_SUCCESS);
}
/**
@} end of divide group
*/

View file

@ -0,0 +1,122 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sin_f32.c
* Description: Fast sine calculation for floating-point values
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@defgroup sin Sine
Computes the trigonometric sine function using a combination of table lookup
and linear interpolation. There are separate functions for
Q15, Q31, and floating-point data types.
The input to the floating-point version is in radians while the
fixed-point Q15 and Q31 have a scaled input with the range
[0 +0.9999] mapping to [0 2*pi). The fixed-point range is chosen so that a
value of 2*pi wraps around to 0.
The implementation is based on table lookup using 512 values together with linear interpolation.
The steps used are:
-# Calculation of the nearest integer table index
-# Compute the fractional portion (fract) of the table index.
-# The final result equals <code>(1.0f-fract)*a + fract*b;</code>
where
<pre>
b = Table[index];
c = Table[index+1];
</pre>
*/
/**
@addtogroup sin
@{
*/
/**
@brief Fast approximation to the trigonometric sine function for floating-point data.
@param[in] x input value in radians.
@return sin(x)
*/
float32_t arm_sin_f32(
float32_t x)
{
float32_t sinVal, fract, in; /* Temporary input, output variables */
uint16_t index; /* Index variable */
float32_t a, b; /* Two nearest output values */
int32_t n;
float32_t findex;
/* input x is in radians */
/* Scale input to [0 1] range from [0 2*PI] , divide input by 2*pi */
in = x * 0.159154943092f;
/* Calculation of floor value of input */
n = (int32_t) in;
/* Make negative values towards -infinity */
if (in < 0.0f)
{
n--;
}
/* Map input value to [0 1] */
in = in - (float32_t) n;
/* Calculation of index of the table */
findex = (float32_t)FAST_MATH_TABLE_SIZE * in;
index = (uint16_t)findex;
/* when "in" is exactly 1, we need to rotate the index down to 0 */
if (index >= FAST_MATH_TABLE_SIZE) {
index = 0;
findex -= (float32_t)FAST_MATH_TABLE_SIZE;
}
/* fractional value calculation */
fract = findex - (float32_t) index;
/* Read two nearest values of input value from the sin table */
a = sinTable_f32[index];
b = sinTable_f32[index+1];
/* Linear interpolation process */
sinVal = (1.0f - fract) * a + fract * b;
/* Return output value */
return (sinVal);
}
/**
@} end of sin group
*/

View file

@ -0,0 +1,83 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sin_q15.c
* Description: Fast sine calculation for Q15 values
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup sin
@{
*/
/**
@brief Fast approximation to the trigonometric sine function for Q15 data.
@param[in] x Scaled input value in radians
@return sin(x)
The Q15 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI).
*/
q15_t arm_sin_q15(
q15_t x)
{
q15_t sinVal; /* Temporary input, output variables */
int32_t index; /* Index variable */
q15_t a, b; /* Two nearest output values */
q15_t fract; /* Temporary values for fractional values */
if (x < 0)
{ /* convert negative numbers to corresponding positive ones */
x = (uint16_t)x + 0x8000;
}
/* Calculate the nearest index */
index = (uint32_t)x >> FAST_MATH_Q15_SHIFT;
/* Calculation of fractional value */
fract = (x - (index << FAST_MATH_Q15_SHIFT)) << 9;
/* Read two nearest values of input value from the sin table */
a = sinTable_q15[index];
b = sinTable_q15[index+1];
/* Linear interpolation process */
sinVal = (q31_t) (0x8000 - fract) * a >> 16;
sinVal = (q15_t) ((((q31_t) sinVal << 16) + ((q31_t) fract * b)) >> 16);
/* Return output value */
return (sinVal << 1);
}
/**
@} end of sin group
*/

View file

@ -0,0 +1,82 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sin_q31.c
* Description: Fast sine calculation for Q31 values
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup sin
@{
*/
/**
@brief Fast approximation to the trigonometric sine function for Q31 data.
@param[in] x Scaled input value in radians
@return sin(x)
The Q31 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*PI).
*/
q31_t arm_sin_q31(
q31_t x)
{
q31_t sinVal; /* Temporary variables for input, output */
int32_t index; /* Index variable */
q31_t a, b; /* Two nearest output values */
q31_t fract; /* Temporary values for fractional values */
if (x < 0)
{ /* convert negative numbers to corresponding positive ones */
x = (uint32_t)x + 0x80000000;
}
/* Calculate the nearest index */
index = (uint32_t)x >> FAST_MATH_Q31_SHIFT;
/* Calculation of fractional value */
fract = (x - (index << FAST_MATH_Q31_SHIFT)) << 9;
/* Read two nearest values of input value from the sin table */
a = sinTable_q31[index];
b = sinTable_q31[index+1];
/* Linear interpolation process */
sinVal = (q63_t) (0x80000000 - fract) * a >> 32;
sinVal = (q31_t) ((((q63_t) sinVal << 32) + ((q63_t) fract * b)) >> 32);
/* Return output value */
return (sinVal << 1);
}
/**
@} end of sin group
*/

View file

@ -0,0 +1,123 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sqrt_q15.c
* Description: Q15 square root function
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup SQRT
@{
*/
/**
@brief Q15 square root function.
@param[in] in input value. The range of the input value is [0 +1) or 0x0000 to 0x7FFF
@param[out] pOut points to square root of input value
@return execution status
- \ref ARM_MATH_SUCCESS : input value is positive
- \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
*/
#define Q12QUARTER 0x2000
arm_status arm_sqrt_q15(
q15_t in,
q15_t * pOut)
{
q15_t number, var1, signBits1,temp;
number = in;
/* If the input is a positive number then compute the signBits. */
if (number > 0)
{
signBits1 = __CLZ(number) - 17;
/* Shift by the number of signBits1 */
if ((signBits1 % 2) == 0)
{
number = number << signBits1;
}
else
{
number = number << (signBits1 - 1);
}
/* Start value for 1/sqrt(x) for the Newton iteration */
var1 = sqrt_initial_lut_q15[(number>> 11) - (Q12QUARTER >> 11)];
/* 0.5 var1 * (3 - number * var1 * var1) */
/* 1st iteration */
temp = ((q31_t) var1 * var1) >> 12;
temp = ((q31_t) number * temp) >> 15;
temp = 0x3000 - temp;
var1 = ((q31_t) var1 * temp) >> 13;
temp = ((q31_t) var1 * var1) >> 12;
temp = ((q31_t) number * temp) >> 15;
temp = 0x3000 - temp;
var1 = ((q31_t) var1 * temp) >> 13;
temp = ((q31_t) var1 * var1) >> 12;
temp = ((q31_t) number * temp) >> 15;
temp = 0x3000 - temp;
var1 = ((q31_t) var1 * temp) >> 13;
/* Multiply the inverse square root with the original value */
var1 = ((q15_t) (((q31_t) number * var1) >> 12));
/* Shift the output down accordingly */
if ((signBits1 % 2) == 0)
{
var1 = var1 >> (signBits1 / 2);
}
else
{
var1 = var1 >> ((signBits1 - 1) / 2);
}
*pOut = var1;
return (ARM_MATH_SUCCESS);
}
/* If the number is a negative number then store zero as its square root value */
else
{
*pOut = 0;
return (ARM_MATH_ARGUMENT_ERROR);
}
}
/**
@} end of SQRT group
*/

View file

@ -0,0 +1,126 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_sqrt_q31.c
* Description: Q31 square root function
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@addtogroup SQRT
@{
*/
/**
@brief Q31 square root function.
@param[in] in input value. The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF
@param[out] pOut points to square root of input value
@return execution status
- \ref ARM_MATH_SUCCESS : input value is positive
- \ref ARM_MATH_ARGUMENT_ERROR : input value is negative; *pOut is set to 0
*/
#define Q28QUARTER 0x20000000
arm_status arm_sqrt_q31(
q31_t in,
q31_t * pOut)
{
q31_t number, var1, signBits1 ,temp;
number = in;
/* If the input is a positive number then compute the signBits. */
if (number > 0)
{
signBits1 = __CLZ(number) - 1;
/* Shift by the number of signBits1 */
if ((signBits1 % 2) == 0)
{
number = number << signBits1;
}
else
{
number = number << (signBits1 - 1);
}
/* Start value for 1/sqrt(x) for the Newton iteration */
var1 = sqrt_initial_lut_q31[(number>> 26) - (Q28QUARTER >> 26)];
/* 0.5 var1 * (3 - number * var1 * var1) */
/* 1st iteration */
temp = ((q63_t) var1 * var1) >> 28;
temp = ((q63_t) number * temp) >> 31;
temp = 0x30000000 - temp;
var1 = ((q63_t) var1 * temp) >> 29;
/* 2nd iteration */
temp = ((q63_t) var1 * var1) >> 28;
temp = ((q63_t) number * temp) >> 31;
temp = 0x30000000 - temp;
var1 = ((q63_t) var1 * temp) >> 29;
/* 3nd iteration */
temp = ((q63_t) var1 * var1) >> 28;
temp = ((q63_t) number * temp) >> 31;
temp = 0x30000000 - temp;
var1 = ((q63_t) var1 * temp) >> 29;
/* Multiply the inverse square root with the original value */
var1 = ((q31_t) (((q63_t) number * var1) >> 28));
/* Shift the output down accordingly */
if ((signBits1 % 2) == 0)
{
var1 = var1 >> (signBits1 / 2);
}
else
{
var1 = var1 >> ((signBits1 - 1) / 2);
}
*pOut = var1;
return (ARM_MATH_SUCCESS);
}
/* If the number is a negative number then store zero as its square root value */
else
{
*pOut = 0;
return (ARM_MATH_ARGUMENT_ERROR);
}
}
/**
@} end of SQRT group
*/

View file

@ -0,0 +1,82 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vlog_f16.c
* Description: Fast vectorized log
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include "arm_common_tables.h"
#include "arm_vec_math_f16.h"
void arm_vexp_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
f16x8_t src;
f16x8_t dst;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
src = vld1q(pSrc);
dst = vexpq_f16(src);
vst1q(pDst, dst);
pSrc += 8;
pDst += 8;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 7;
#else
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
/* C = log(A) */
/* Calculate log and store result in destination buffer. */
*pDst++ = (_Float16)expf((float32_t)*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View file

@ -0,0 +1,97 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vlog_f32.c
* Description: Fast vectorized log
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_vec_math.h"
#endif
void arm_vexp_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t src;
f32x4_t dst;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
src = vld1q(pSrc);
dst = vexpq_f32(src);
vst1q(pDst, dst);
pSrc += 4;
pDst += 4;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 3;
#else
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t src;
f32x4_t dst;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
src = vld1q_f32(pSrc);
dst = vexpq_f32(src);
vst1q_f32(pDst, dst);
pSrc += 4;
pDst += 4;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 3;
#else
blkCnt = blockSize;
#endif
#endif
while (blkCnt > 0U)
{
/* C = log(A) */
/* Calculate log and store result in destination buffer. */
*pDst++ = expf(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}

View file

@ -0,0 +1,51 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vlog_f64.c
* Description: Fast vectorized log
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
void arm_vexp_f64(
const float64_t * pSrc,
float64_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
blkCnt = blockSize;
while (blkCnt > 0U)
{
/* C = log(A) */
/* Calculate log and store result in destination buffer. */
*pDst++ = exp(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}

View file

@ -0,0 +1,79 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vinverse_f16.c
* Description: Fast vectorized inverse
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
#include "arm_common_tables.h"
#include "arm_vec_math_f16.h"
void arm_vinverse_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
f16x8_t src;
f16x8_t dst;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
src = vld1q(pSrc);
dst = vrecip_hiprec_f16(src);
vst1q(pDst, dst);
pSrc += 8;
pDst += 8;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 7;
#else
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
*pDst++ = 1.0f16 / (_Float16)*pSrc++;
/* Decrement loop counter */
blkCnt--;
}
}
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View file

@ -0,0 +1,222 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vlog_f16.c
* Description: Fast vectorized log
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions_f16.h"
#include "dsp/support_functions_f16.h"
#if defined(ARM_FLOAT16_SUPPORTED)
/* Degree of the polynomial approximation */
#define NB_DEG_LOGF16 3
/*
Related to the Log2 of the number of approximations.
For instance, with 3 there are 1 + 2^3 polynomials
*/
#define NB_DIV_LOGF16 3
/* Length of the LUT table */
#define NB_LUT_LOGF16 (NB_DEG_LOGF16+1)*(1 + (1<<NB_DIV_LOGF16))
/*
LUT of polynomial approximations.
Could be generated with:
ClearAll[lut, coefs, nb, deg];
nb = 3;
deg = 3;
lut = Table[
MiniMaxApproximation[
Log[x/2^nb + i], {x, {10^-6, 1.0/2^nb}, deg, 0},
MaxIterations -> 1000][[2, 1]], {i, 1, 2, (1.0/2^nb)}];
coefs = Chop@Flatten[CoefficientList[lut, x]];
*/
static float16_t lut_logf16[NB_LUT_LOGF16]={
0,0.125,-0.00781197,0.00063974,0.117783,
0.111111,-0.00617212,0.000447935,0.223144,
0.1,-0.00499952,0.000327193,0.318454,0.0909091,
-0.00413191,0.000246234,0.405465,0.0833333,
-0.00347199,0.000189928,0.485508,0.0769231,
-0.00295841,0.00014956,0.559616,0.0714286,
-0.0025509,0.000119868,0.628609,0.0666667,
-0.00222213,0.0000975436,0.693147,
0.0625,-0.00195305,0.0000804357};
float16_t logf16_scalar(float16_t x)
{
int16_t i = arm_typecast_s16_f16(x);
int32_t vecExpUnBiased = (i >> 10) - 15;
i = i - (vecExpUnBiased << 10);
float16_t vecTmpFlt1 = arm_typecast_f16_s16(i);
float16_t *lut;
int n;
float16_t tmp,v;
tmp = ((_Float16)vecTmpFlt1 - 1.0f16) * (1 << NB_DIV_LOGF16);
n = (int)floor((double)tmp);
v = (_Float16)tmp - (_Float16)n;
lut = lut_logf16 + n * (1+NB_DEG_LOGF16);
float16_t res = lut[NB_DEG_LOGF16-1];
for(int j=NB_DEG_LOGF16-2; j >=0 ; j--)
{
res = (_Float16)lut[j] + (_Float16)v * (_Float16)res;
}
res = (_Float16)res + 0.693147f16 * (_Float16)vecExpUnBiased;
return(res);
}
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_common_tables.h"
#include "arm_vec_math_f16.h"
float16x8_t vlogq_lut_f16(float16x8_t vecIn)
{
int16x8_t i = vreinterpretq_s16_f16(vecIn);
int16x8_t vecExpUnBiased = vsubq_n_s16(vshrq_n_s16(i,10), 15);
i = vsubq_s16(i,vshlq_n_s16(vecExpUnBiased,10));
float16x8_t vecTmpFlt1 = vreinterpretq_f16_s16(i);
float16x8_t lutV;
int16x8_t n;
int16x8_t offset;
float16x8_t tmp,v,res;
tmp = vmulq_n_f16(vsubq_n_f16(vecTmpFlt1,1.0f16),(_Float16)(1 << NB_DIV_LOGF16));
n = vcvtq_s16_f16(tmp);
v = vsubq_f16(tmp,vcvtq_f16_s16(n));
offset = vmulq_n_s16(n,(1+NB_DEG_LOGF16));
offset = vaddq_n_s16(offset,NB_DEG_LOGF16-1);
res = vldrhq_gather_shifted_offset_f16(lut_logf16,(uint16x8_t)offset);
offset = vsubq_n_s16(offset,1);
for(int j=NB_DEG_LOGF16-2; j >=0 ; j--)
{
lutV = vldrhq_gather_shifted_offset_f16(lut_logf16,(uint16x8_t)offset);
res = vfmaq_f16(lutV,v,res);
offset = vsubq_n_s16(offset,1);
}
res = vfmaq_n_f16(res,vcvtq_f16_s16(vecExpUnBiased),0.693147f16);
return(res);
}
#endif
/**
@ingroup groupFastMath
*/
/**
@addtogroup vlog
@{
*/
/**
@brief Floating-point vector of log values.
@param[in] pSrc points to the input vector
@param[out] pDst points to the output vector
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_vlog_f16(
const float16_t * pSrc,
float16_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
#if defined(ARM_MATH_MVE_FLOAT16) && !defined(ARM_MATH_AUTOVECTORIZE)
f16x8_t src;
f16x8_t dst;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
src = vld1q(pSrc);
dst = vlogq_lut_f16(src);
vst1q(pDst, dst);
pSrc += 8;
pDst += 8;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 7;
#else
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
/* C = log(A) */
/* Calculate log and store result in destination buffer. */
*pDst++ = logf16_scalar(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of vlog group
*/
#endif /* #if defined(ARM_FLOAT16_SUPPORTED) */

View file

@ -0,0 +1,119 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vlog_f32.c
* Description: Fast vectorized log
*
* $Date: 23 April 2021
* $Revision: V1.9.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
/**
@ingroup groupFastMath
*/
/**
@defgroup vlog Vector Log
Compute the log values of a vector of samples.
*/
/**
@addtogroup vlog
@{
*/
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM) || defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
#include "arm_vec_math.h"
#endif
void arm_vlog_f32(
const float32_t * pSrc,
float32_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
#if (defined(ARM_MATH_MVEF) || defined(ARM_MATH_HELIUM)) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t src;
f32x4_t dst;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
src = vld1q(pSrc);
dst = vlogq_f32(src);
vst1q(pDst, dst);
pSrc += 4;
pDst += 4;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 3;
#else
#if (defined(ARM_MATH_NEON) || defined(ARM_MATH_NEON_EXPERIMENTAL)) && !defined(ARM_MATH_AUTOVECTORIZE)
f32x4_t src;
f32x4_t dst;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
src = vld1q_f32(pSrc);
dst = vlogq_f32(src);
vst1q_f32(pDst, dst);
pSrc += 4;
pDst += 4;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 3;
#else
blkCnt = blockSize;
#endif
#endif
while (blkCnt > 0U)
{
/* C = log(A) */
/* Calculate log and store result in destination buffer. */
*pDst++ = logf(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of vlog group
*/

View file

@ -0,0 +1,51 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vlog_f64.c
* Description: Fast vectorized log
*
* $Date: 13 September 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#include "arm_common_tables.h"
void arm_vlog_f64(
const float64_t * pSrc,
float64_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt;
blkCnt = blockSize;
while (blkCnt > 0U)
{
/* C = log(A) */
/* Calculate log and store result in destination buffer. */
*pDst++ = log(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}

View file

@ -0,0 +1,264 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vlog_q15
* Description: Q15 vector log
*
* $Date: 19 July 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#define LOG_Q15_ACCURACY 15
/* Bit to represent the normalization factor
It is Ceiling[Log2[LOG_Q15_ACCURACY]] of the previous value.
The Log2 algorithm is assuming that the value x is
1 <= x < 2.
But input value could be as small a 2^-LOG_Q15_ACCURACY
which would give an integer part of -15.
*/
#define LOG_Q15_INTEGER_PART 4
/* 2.0 in q14 */
#define LOQ_Q15_THRESHOLD (1u << LOG_Q15_ACCURACY)
/* HALF */
#define LOQ_Q15_Q16_HALF LOQ_Q15_THRESHOLD
#define LOQ_Q15_Q14_HALF (LOQ_Q15_Q16_HALF >> 2)
/* 1.0 / Log2[Exp[1]] in q15 */
#define LOG_Q15_INVLOG2EXP 0x58b9u
/* Clay Turner algorithm */
static uint16_t arm_scalar_log_q15(uint16_t src)
{
int i;
int16_t c = __CLZ(src)-16;
int16_t normalization=0;
/* 0.5 in q11 */
uint16_t inc = LOQ_Q15_Q16_HALF >> (LOG_Q15_INTEGER_PART + 1);
/* Will compute y = log2(x) for 1 <= x < 2.0 */
uint16_t x;
/* q11 */
uint16_t y=0;
/* q11 */
int16_t tmp;
/* Normalize and convert to q14 format */
x = src;
if ((c-1) < 0)
{
x = x >> (1-c);
}
else
{
x = x << (c-1);
}
normalization = c;
/* Compute the Log2. Result is in q11 instead of q16
because we know 0 <= y < 1.0 but
we want a result allowing to do a
product on int16 rather than having to go
through int32
*/
for(i = 0; i < LOG_Q15_ACCURACY ; i++)
{
x = (((int32_t)x*x)) >> (LOG_Q15_ACCURACY - 1);
if (x >= LOQ_Q15_THRESHOLD)
{
y += inc ;
x = x >> 1;
}
inc = inc >> 1;
}
/*
Convert the Log2 to Log and apply normalization.
We compute (y - normalisation) * (1 / Log2[e]).
*/
/* q11 */
//tmp = y - ((int32_t)normalization << (LOG_Q15_ACCURACY + 1));
tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART));
/* q4.11 */
y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15;
return(y);
}
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
q15x8_t vlogq_q15(q15x8_t src)
{
int i;
int16x8_t c = vclzq_s16(src);
int16x8_t normalization = c;
/* 0.5 in q11 */
uint16_t inc = LOQ_Q15_Q16_HALF >> (LOG_Q15_INTEGER_PART + 1);
/* Will compute y = log2(x) for 1 <= x < 2.0 */
uint16x8_t x;
/* q11 */
uint16x8_t y = vdupq_n_u16(0);
/* q11 */
int16x8_t vtmp;
mve_pred16_t p;
/* Normalize and convert to q14 format */
vtmp = vsubq_n_s16(c,1);
x = vshlq_u16((uint16x8_t)src,vtmp);
/* Compute the Log2. Result is in q11 instead of q16
because we know 0 <= y < 1.0 but
we want a result allowing to do a
product on int16 rather than having to go
through int32
*/
for(i = 0; i < LOG_Q15_ACCURACY ; i++)
{
x = vmulhq_u16(x,x);
x = vshlq_n_u16(x,2);
p = vcmphiq_u16(x,vdupq_n_u16(LOQ_Q15_THRESHOLD));
y = vaddq_m_n_u16(y, y,inc,p);
x = vshrq_m_n_u16(x,x,1,p);
inc = inc >> 1;
}
/*
Convert the Log2 to Log and apply normalization.
We compute (y - normalisation) * (1 / Log2[e]).
*/
/* q11 */
// tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART));
vtmp = vshlq_n_s16(normalization,LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART);
vtmp = vsubq_s16((int16x8_t)y,vtmp);
/* q4.11 */
// y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15;
vtmp = vqdmulhq_n_s16(vtmp,LOG_Q15_INVLOG2EXP);
return(vtmp);
}
#endif
/**
@ingroup groupFastMath
*/
/**
@addtogroup vlog
@{
*/
/**
@brief q15 vector of log values.
@param[in] pSrc points to the input vector in q15
@param[out] pDst points to the output vector in q4.11
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_vlog_q15(
const q15_t * pSrc,
q15_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
q15x8_t src;
q15x8_t dst;
blkCnt = blockSize >> 3;
while (blkCnt > 0U)
{
src = vld1q(pSrc);
dst = vlogq_q15(src);
vst1q(pDst, dst);
pSrc += 8;
pDst += 8;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 7;
#else
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
*pDst++ = arm_scalar_log_q15(*pSrc++);
/* Decrement loop counter */
blkCnt--;
}
}
/**
@} end of vlog group
*/

View file

@ -0,0 +1,258 @@
/* ----------------------------------------------------------------------
* Project: CMSIS DSP Library
* Title: arm_vlog_q31
* Description: Q31 vector log
*
* $Date: 19 July 2021
* $Revision: V1.10.0
*
* Target Processor: Cortex-M and Cortex-A cores
* -------------------------------------------------------------------- */
/*
* Copyright (C) 2010-2021 ARM Limited or its affiliates. All rights reserved.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 (the License); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "dsp/fast_math_functions.h"
#define LOG_Q31_ACCURACY 31
/* Bit to represent the normalization factor
It is Ceiling[Log2[LOG_Q31_ACCURACY]] of the previous value.
The Log2 algorithm is assuming that the value x is
1 <= x < 2.
But input value could be as small a 2^-LOG_Q31_ACCURACY
which would give an integer part of -31.
*/
#define LOG_Q31_INTEGER_PART 5
/* 2.0 in Q30 */
#define LOQ_Q31_THRESHOLD (1u << LOG_Q31_ACCURACY)
/* HALF */
#define LOQ_Q31_Q32_HALF LOQ_Q31_THRESHOLD
#define LOQ_Q31_Q30_HALF (LOQ_Q31_Q32_HALF >> 2)
/* 1.0 / Log2[Exp[1]] in Q31 */
#define LOG_Q31_INVLOG2EXP 0x58b90bfbuL
/* Clay Turner algorithm */
static uint32_t arm_scalar_log_q31(uint32_t src)
{
int32_t i;
int32_t c = __CLZ(src);
int32_t normalization=0;
/* 0.5 in q26 */
uint32_t inc = LOQ_Q31_Q32_HALF >> (LOG_Q31_INTEGER_PART + 1);
/* Will compute y = log2(x) for 1 <= x < 2.0 */
uint32_t x;
/* q26 */
uint32_t y=0;
/* q26 */
int32_t tmp;
/* Normalize and convert to q30 format */
x = src;
if ((c-1) < 0)
{
x = x >> (1-c);
}
else
{
x = x << (c-1);
}
normalization = c;
/* Compute the Log2. Result is in q26
because we know 0 <= y < 1.0 but
do not want to use q32 to allow
following computation with less instructions.
*/
for(i = 0; i < LOG_Q31_ACCURACY ; i++)
{
x = ((int64_t)x*x) >> (LOG_Q31_ACCURACY - 1);
if (x >= LOQ_Q31_THRESHOLD)
{
y += inc ;
x = x >> 1;
}
inc = inc >> 1;
}
/*
Convert the Log2 to Log and apply normalization.
We compute (y - normalisation) * (1 / Log2[e]).
*/
/* q26 */
tmp = (int32_t)y - (normalization << (LOG_Q31_ACCURACY - LOG_Q31_INTEGER_PART));
/* q5.26 */
y = ((int64_t)tmp * LOG_Q31_INVLOG2EXP) >> 31;
return(y);
}
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
q31x4_t vlogq_q31(q31x4_t src)
{
int32_t i;
int32x4_t c = vclzq_s32(src);
int32x4_t normalization = c;
/* 0.5 in q11 */
uint32_t inc = LOQ_Q31_Q32_HALF >> (LOG_Q31_INTEGER_PART + 1);
/* Will compute y = log2(x) for 1 <= x < 2.0 */
uint32x4_t x;
/* q11 */
uint32x4_t y = vdupq_n_u32(0);
/* q11 */
int32x4_t vtmp;
mve_pred16_t p;
/* Normalize and convert to q14 format */
vtmp = vsubq_n_s32(c,1);
x = vshlq_u32((uint32x4_t)src,vtmp);
/* Compute the Log2. Result is in Q26
because we know 0 <= y < 1.0 but
do not want to use Q32 to allow
following computation with less instructions.
*/
for(i = 0; i < LOG_Q31_ACCURACY ; i++)
{
x = vmulhq_u32(x,x);
x = vshlq_n_u32(x,2);
p = vcmphiq_u32(x,vdupq_n_u32(LOQ_Q31_THRESHOLD));
y = vaddq_m_n_u32(y, y,inc,p);
x = vshrq_m_n_u32(x,x,1,p);
inc = inc >> 1;
}
/*
Convert the Log2 to Log and apply normalization.
We compute (y - normalisation) * (1 / Log2[e]).
*/
/* q11 */
// tmp = (int16_t)y - (normalization << (LOG_Q15_ACCURACY - LOG_Q15_INTEGER_PART));
vtmp = vshlq_n_s32(normalization,LOG_Q31_ACCURACY - LOG_Q31_INTEGER_PART);
vtmp = vsubq_s32((int32x4_t)y,vtmp);
/* q4.11 */
// y = ((int32_t)tmp * LOG_Q15_INVLOG2EXP) >> 15;
vtmp = vqdmulhq_n_s32(vtmp,LOG_Q31_INVLOG2EXP);
return(vtmp);
}
#endif
/**
@ingroup groupFastMath
*/
/**
@addtogroup vlog
@{
*/
/**
@brief q31 vector of log values.
@param[in] pSrc points to the input vector in q31
@param[out] pDst points to the output vector q5.26
@param[in] blockSize number of samples in each vector
@return none
*/
void arm_vlog_q31(
const q31_t * pSrc,
q31_t * pDst,
uint32_t blockSize)
{
uint32_t blkCnt; /* loop counters */
#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE)
q31x4_t src;
q31x4_t dst;
blkCnt = blockSize >> 2;
while (blkCnt > 0U)
{
src = vld1q(pSrc);
dst = vlogq_q31(src);
vst1q(pDst, dst);
pSrc += 4;
pDst += 4;
/* Decrement loop counter */
blkCnt--;
}
blkCnt = blockSize & 3;
#else
blkCnt = blockSize;
#endif
while (blkCnt > 0U)
{
*pDst++=arm_scalar_log_q31(*pSrc++);
blkCnt--;
}
}
/**
@} end of vlog group
*/