GNU Radio 3.6.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_8i_s32f_convert_32f_u_H 00002 #define INCLUDED_volk_8i_s32f_convert_32f_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE4_1 00008 #include <smmintrin.h> 00009 00010 /*! 00011 \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value 00012 \param inputVector The 8 bit input data buffer 00013 \param outputVector The floating point output data buffer 00014 \param scalar The value divided against each point in the output buffer 00015 \param num_points The number of data values to be converted 00016 \note Output buffer does NOT need to be properly aligned 00017 */ 00018 static inline void volk_8i_s32f_convert_32f_u_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ 00019 unsigned int number = 0; 00020 const unsigned int sixteenthPoints = num_points / 16; 00021 00022 float* outputVectorPtr = outputVector; 00023 const float iScalar = 1.0 / scalar; 00024 __m128 invScalar = _mm_set_ps1( iScalar ); 00025 const int8_t* inputVectorPtr = inputVector; 00026 __m128 ret; 00027 __m128i inputVal; 00028 __m128i interimVal; 00029 00030 for(;number < sixteenthPoints; number++){ 00031 inputVal = _mm_loadu_si128((__m128i*)inputVectorPtr); 00032 00033 interimVal = _mm_cvtepi8_epi32(inputVal); 00034 ret = _mm_cvtepi32_ps(interimVal); 00035 ret = _mm_mul_ps(ret, invScalar); 00036 _mm_storeu_ps(outputVectorPtr, ret); 00037 outputVectorPtr += 4; 00038 00039 inputVal = _mm_srli_si128(inputVal, 4); 00040 interimVal = _mm_cvtepi8_epi32(inputVal); 00041 ret = _mm_cvtepi32_ps(interimVal); 00042 ret = _mm_mul_ps(ret, invScalar); 00043 _mm_storeu_ps(outputVectorPtr, ret); 00044 outputVectorPtr += 4; 00045 00046 inputVal = _mm_srli_si128(inputVal, 4); 00047 interimVal = _mm_cvtepi8_epi32(inputVal); 00048 ret = _mm_cvtepi32_ps(interimVal); 00049 ret = _mm_mul_ps(ret, invScalar); 00050 _mm_storeu_ps(outputVectorPtr, ret); 00051 outputVectorPtr += 4; 00052 00053 inputVal = _mm_srli_si128(inputVal, 4); 00054 interimVal = _mm_cvtepi8_epi32(inputVal); 00055 ret = _mm_cvtepi32_ps(interimVal); 00056 ret = _mm_mul_ps(ret, invScalar); 00057 _mm_storeu_ps(outputVectorPtr, ret); 00058 outputVectorPtr += 4; 00059 00060 inputVectorPtr += 16; 00061 } 00062 00063 number = sixteenthPoints * 16; 00064 for(; number < num_points; number++){ 00065 outputVector[number] = (float)(inputVector[number]) * iScalar; 00066 } 00067 } 00068 #endif /* LV_HAVE_SSE4_1 */ 00069 00070 #ifdef LV_HAVE_GENERIC 00071 /*! 00072 \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value 00073 \param inputVector The 8 bit input data buffer 00074 \param outputVector The floating point output data buffer 00075 \param scalar The value divided against each point in the output buffer 00076 \param num_points The number of data values to be converted 00077 \note Output buffer does NOT need to be properly aligned 00078 */ 00079 static inline void volk_8i_s32f_convert_32f_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ 00080 float* outputVectorPtr = outputVector; 00081 const int8_t* inputVectorPtr = inputVector; 00082 unsigned int number = 0; 00083 const float iScalar = 1.0 / scalar; 00084 00085 for(number = 0; number < num_points; number++){ 00086 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar; 00087 } 00088 } 00089 #endif /* LV_HAVE_GENERIC */ 00090 00091 00092 00093 00094 #endif /* INCLUDED_VOLK_8s_CONVERT_32f_UNALIGNED8_H */ 00095 #ifndef INCLUDED_volk_8i_s32f_convert_32f_a_H 00096 #define INCLUDED_volk_8i_s32f_convert_32f_a_H 00097 00098 #include <inttypes.h> 00099 #include <stdio.h> 00100 00101 #ifdef LV_HAVE_SSE4_1 00102 #include <smmintrin.h> 00103 00104 /*! 00105 \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value 00106 \param inputVector The 8 bit input data buffer 00107 \param outputVector The floating point output data buffer 00108 \param scalar The value divided against each point in the output buffer 00109 \param num_points The number of data values to be converted 00110 */ 00111 static inline void volk_8i_s32f_convert_32f_a_sse4_1(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ 00112 unsigned int number = 0; 00113 const unsigned int sixteenthPoints = num_points / 16; 00114 00115 float* outputVectorPtr = outputVector; 00116 const float iScalar = 1.0 / scalar; 00117 __m128 invScalar = _mm_set_ps1(iScalar); 00118 const int8_t* inputVectorPtr = inputVector; 00119 __m128 ret; 00120 __m128i inputVal; 00121 __m128i interimVal; 00122 00123 for(;number < sixteenthPoints; number++){ 00124 inputVal = _mm_load_si128((__m128i*)inputVectorPtr); 00125 00126 interimVal = _mm_cvtepi8_epi32(inputVal); 00127 ret = _mm_cvtepi32_ps(interimVal); 00128 ret = _mm_mul_ps(ret, invScalar); 00129 _mm_store_ps(outputVectorPtr, ret); 00130 outputVectorPtr += 4; 00131 00132 inputVal = _mm_srli_si128(inputVal, 4); 00133 interimVal = _mm_cvtepi8_epi32(inputVal); 00134 ret = _mm_cvtepi32_ps(interimVal); 00135 ret = _mm_mul_ps(ret, invScalar); 00136 _mm_store_ps(outputVectorPtr, ret); 00137 outputVectorPtr += 4; 00138 00139 inputVal = _mm_srli_si128(inputVal, 4); 00140 interimVal = _mm_cvtepi8_epi32(inputVal); 00141 ret = _mm_cvtepi32_ps(interimVal); 00142 ret = _mm_mul_ps(ret, invScalar); 00143 _mm_store_ps(outputVectorPtr, ret); 00144 outputVectorPtr += 4; 00145 00146 inputVal = _mm_srli_si128(inputVal, 4); 00147 interimVal = _mm_cvtepi8_epi32(inputVal); 00148 ret = _mm_cvtepi32_ps(interimVal); 00149 ret = _mm_mul_ps(ret, invScalar); 00150 _mm_store_ps(outputVectorPtr, ret); 00151 outputVectorPtr += 4; 00152 00153 inputVectorPtr += 16; 00154 } 00155 00156 number = sixteenthPoints * 16; 00157 for(; number < num_points; number++){ 00158 outputVector[number] = (float)(inputVector[number]) * iScalar; 00159 } 00160 } 00161 #endif /* LV_HAVE_SSE4_1 */ 00162 00163 #ifdef LV_HAVE_GENERIC 00164 /*! 00165 \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value 00166 \param inputVector The 8 bit input data buffer 00167 \param outputVector The floating point output data buffer 00168 \param scalar The value divided against each point in the output buffer 00169 \param num_points The number of data values to be converted 00170 */ 00171 static inline void volk_8i_s32f_convert_32f_a_generic(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ 00172 float* outputVectorPtr = outputVector; 00173 const int8_t* inputVectorPtr = inputVector; 00174 unsigned int number = 0; 00175 const float iScalar = 1.0 / scalar; 00176 00177 for(number = 0; number < num_points; number++){ 00178 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar; 00179 } 00180 } 00181 #endif /* LV_HAVE_GENERIC */ 00182 00183 #ifdef LV_HAVE_ORC 00184 /*! 00185 \brief Converts the input 8 bit integer data into floating point data, and divides the each floating point output data point by the scalar value 00186 \param inputVector The 8 bit input data buffer 00187 \param outputVector The floating point output data buffer 00188 \param scalar The value divided against each point in the output buffer 00189 \param num_points The number of data values to be converted 00190 */ 00191 extern void volk_8i_s32f_convert_32f_a_orc_impl(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points); 00192 static inline void volk_8i_s32f_convert_32f_u_orc(float* outputVector, const int8_t* inputVector, const float scalar, unsigned int num_points){ 00193 float invscalar = 1.0 / scalar; 00194 volk_8i_s32f_convert_32f_a_orc_impl(outputVector, inputVector, invscalar, num_points); 00195 } 00196 #endif /* LV_HAVE_ORC */ 00197 00198 00199 00200 #endif /* INCLUDED_VOLK_8s_CONVERT_32f_ALIGNED8_H */