GNU Radio 3.6.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H 00002 #define INCLUDED_volk_32f_s32f_convert_32i_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE2 00008 #include <emmintrin.h> 00009 /*! 00010 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value 00011 \param inputVector The floating point input data buffer 00012 \param outputVector The 32 bit output data buffer 00013 \param scalar The value multiplied against each point in the input buffer 00014 \param num_points The number of data values to be converted 00015 \note Input buffer does NOT need to be properly aligned 00016 */ 00017 static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00018 unsigned int number = 0; 00019 00020 const unsigned int quarterPoints = num_points / 4; 00021 00022 const float* inputVectorPtr = (const float*)inputVector; 00023 int32_t* outputVectorPtr = outputVector; 00024 00025 float min_val = -2147483647; 00026 float max_val = 2147483647; 00027 float r; 00028 00029 __m128 vScalar = _mm_set_ps1(scalar); 00030 __m128 inputVal1; 00031 __m128i intInputVal1; 00032 __m128 vmin_val = _mm_set_ps1(min_val); 00033 __m128 vmax_val = _mm_set_ps1(max_val); 00034 00035 for(;number < quarterPoints; number++){ 00036 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00037 00038 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); 00039 intInputVal1 = _mm_cvtps_epi32(inputVal1); 00040 00041 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1); 00042 outputVectorPtr += 4; 00043 } 00044 00045 number = quarterPoints * 4; 00046 for(; number < num_points; number++){ 00047 r = inputVector[number] * scalar; 00048 if(r > max_val) 00049 r = max_val; 00050 else if(r < min_val) 00051 r = min_val; 00052 outputVector[number] = (int32_t)(r); 00053 } 00054 } 00055 #endif /* LV_HAVE_SSE2 */ 00056 00057 #ifdef LV_HAVE_SSE 00058 #include <xmmintrin.h> 00059 /*! 00060 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value 00061 \param inputVector The floating point input data buffer 00062 \param outputVector The 32 bit output data buffer 00063 \param scalar The value multiplied against each point in the input buffer 00064 \param num_points The number of data values to be converted 00065 \note Input buffer does NOT need to be properly aligned 00066 */ 00067 static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00068 unsigned int number = 0; 00069 00070 const unsigned int quarterPoints = num_points / 4; 00071 00072 const float* inputVectorPtr = (const float*)inputVector; 00073 int32_t* outputVectorPtr = outputVector; 00074 00075 float min_val = -2147483647; 00076 float max_val = 2147483647; 00077 float r; 00078 00079 __m128 vScalar = _mm_set_ps1(scalar); 00080 __m128 ret; 00081 __m128 vmin_val = _mm_set_ps1(min_val); 00082 __m128 vmax_val = _mm_set_ps1(max_val); 00083 00084 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; 00085 00086 for(;number < quarterPoints; number++){ 00087 ret = _mm_loadu_ps(inputVectorPtr); 00088 inputVectorPtr += 4; 00089 00090 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); 00091 00092 _mm_store_ps(outputFloatBuffer, ret); 00093 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]); 00094 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[1]); 00095 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[2]); 00096 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]); 00097 } 00098 00099 number = quarterPoints * 4; 00100 for(; number < num_points; number++){ 00101 r = inputVector[number] * scalar; 00102 if(r > max_val) 00103 r = max_val; 00104 else if(r < min_val) 00105 r = min_val; 00106 outputVector[number] = (int32_t)(r); 00107 } 00108 } 00109 #endif /* LV_HAVE_SSE */ 00110 00111 #ifdef LV_HAVE_GENERIC 00112 /*! 00113 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value 00114 \param inputVector The floating point input data buffer 00115 \param outputVector The 32 bit output data buffer 00116 \param scalar The value multiplied against each point in the input buffer 00117 \param num_points The number of data values to be converted 00118 \note Input buffer does NOT need to be properly aligned 00119 */ 00120 static inline void volk_32f_s32f_convert_32i_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00121 int32_t* outputVectorPtr = outputVector; 00122 const float* inputVectorPtr = inputVector; 00123 unsigned int number = 0; 00124 float min_val = -2147483647; 00125 float max_val = 2147483647; 00126 float r; 00127 00128 for(number = 0; number < num_points; number++){ 00129 r = *inputVectorPtr++ * scalar; 00130 if(r > max_val) 00131 r = max_val; 00132 else if(r < min_val) 00133 r = min_val; 00134 *outputVectorPtr++ = (int32_t)(r); 00135 } 00136 } 00137 #endif /* LV_HAVE_GENERIC */ 00138 00139 00140 00141 00142 #endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */ 00143 #ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H 00144 #define INCLUDED_volk_32f_s32f_convert_32i_a_H 00145 00146 #include <volk/volk_common.h> 00147 #include <inttypes.h> 00148 #include <stdio.h> 00149 00150 #ifdef LV_HAVE_AVX 00151 #include <immintrin.h> 00152 /*! 00153 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value 00154 \param inputVector The floating point input data buffer 00155 \param outputVector The 32 bit output data buffer 00156 \param scalar The value multiplied against each point in the input buffer 00157 \param num_points The number of data values to be converted 00158 */ 00159 static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00160 unsigned int number = 0; 00161 00162 const unsigned int eighthPoints = num_points / 8; 00163 00164 const float* inputVectorPtr = (const float*)inputVector; 00165 int32_t* outputVectorPtr = outputVector; 00166 00167 float min_val = -2147483647; 00168 float max_val = 2147483647; 00169 float r; 00170 00171 __m256 vScalar = _mm256_set1_ps(scalar); 00172 __m256 inputVal1; 00173 __m256i intInputVal1; 00174 __m256 vmin_val = _mm256_set1_ps(min_val); 00175 __m256 vmax_val = _mm256_set1_ps(max_val); 00176 00177 for(;number < eighthPoints; number++){ 00178 inputVal1 = _mm256_load_ps(inputVectorPtr); inputVectorPtr += 8; 00179 00180 inputVal1 = _mm256_max_ps(_mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); 00181 intInputVal1 = _mm256_cvtps_epi32(inputVal1); 00182 00183 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1); 00184 outputVectorPtr += 8; 00185 } 00186 00187 number = eighthPoints * 8; 00188 for(; number < num_points; number++){ 00189 r = inputVector[number] * scalar; 00190 if(r > max_val) 00191 r = max_val; 00192 else if(r < min_val) 00193 r = min_val; 00194 outputVector[number] = (int32_t)(r); 00195 } 00196 } 00197 #endif /* LV_HAVE_AVX */ 00198 00199 #ifdef LV_HAVE_SSE2 00200 #include <emmintrin.h> 00201 /*! 00202 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value 00203 \param inputVector The floating point input data buffer 00204 \param outputVector The 32 bit output data buffer 00205 \param scalar The value multiplied against each point in the input buffer 00206 \param num_points The number of data values to be converted 00207 */ 00208 static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00209 unsigned int number = 0; 00210 00211 const unsigned int quarterPoints = num_points / 4; 00212 00213 const float* inputVectorPtr = (const float*)inputVector; 00214 int32_t* outputVectorPtr = outputVector; 00215 00216 float min_val = -2147483647; 00217 float max_val = 2147483647; 00218 float r; 00219 00220 __m128 vScalar = _mm_set_ps1(scalar); 00221 __m128 inputVal1; 00222 __m128i intInputVal1; 00223 __m128 vmin_val = _mm_set_ps1(min_val); 00224 __m128 vmax_val = _mm_set_ps1(max_val); 00225 00226 for(;number < quarterPoints; number++){ 00227 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; 00228 00229 inputVal1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); 00230 intInputVal1 = _mm_cvtps_epi32(inputVal1); 00231 00232 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1); 00233 outputVectorPtr += 4; 00234 } 00235 00236 number = quarterPoints * 4; 00237 for(; number < num_points; number++){ 00238 r = inputVector[number] * scalar; 00239 if(r > max_val) 00240 r = max_val; 00241 else if(r < min_val) 00242 r = min_val; 00243 outputVector[number] = (int32_t)(r); 00244 } 00245 } 00246 #endif /* LV_HAVE_SSE2 */ 00247 00248 #ifdef LV_HAVE_SSE 00249 #include <xmmintrin.h> 00250 /*! 00251 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value 00252 \param inputVector The floating point input data buffer 00253 \param outputVector The 32 bit output data buffer 00254 \param scalar The value multiplied against each point in the input buffer 00255 \param num_points The number of data values to be converted 00256 */ 00257 static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00258 unsigned int number = 0; 00259 00260 const unsigned int quarterPoints = num_points / 4; 00261 00262 const float* inputVectorPtr = (const float*)inputVector; 00263 int32_t* outputVectorPtr = outputVector; 00264 00265 float min_val = -2147483647; 00266 float max_val = 2147483647; 00267 float r; 00268 00269 __m128 vScalar = _mm_set_ps1(scalar); 00270 __m128 ret; 00271 __m128 vmin_val = _mm_set_ps1(min_val); 00272 __m128 vmax_val = _mm_set_ps1(max_val); 00273 00274 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; 00275 00276 for(;number < quarterPoints; number++){ 00277 ret = _mm_load_ps(inputVectorPtr); 00278 inputVectorPtr += 4; 00279 00280 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); 00281 00282 _mm_store_ps(outputFloatBuffer, ret); 00283 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[0]); 00284 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[1]); 00285 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[2]); 00286 *outputVectorPtr++ = (int32_t)(outputFloatBuffer[3]); 00287 } 00288 00289 number = quarterPoints * 4; 00290 for(; number < num_points; number++){ 00291 r = inputVector[number] * scalar; 00292 if(r > max_val) 00293 r = max_val; 00294 else if(r < min_val) 00295 r = min_val; 00296 outputVector[number] = (int32_t)(r); 00297 } 00298 } 00299 #endif /* LV_HAVE_SSE */ 00300 00301 #ifdef LV_HAVE_GENERIC 00302 /*! 00303 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 32 bit integer value 00304 \param inputVector The floating point input data buffer 00305 \param outputVector The 32 bit output data buffer 00306 \param scalar The value multiplied against each point in the input buffer 00307 \param num_points The number of data values to be converted 00308 */ 00309 static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00310 int32_t* outputVectorPtr = outputVector; 00311 const float* inputVectorPtr = inputVector; 00312 unsigned int number = 0; 00313 float min_val = -2147483647; 00314 float max_val = 2147483647; 00315 float r; 00316 00317 for(number = 0; number < num_points; number++){ 00318 r = *inputVectorPtr++ * scalar; 00319 if(r > max_val) 00320 r = max_val; 00321 else if(r < min_val) 00322 r = min_val; 00323 *outputVectorPtr++ = (int32_t)(r); 00324 } 00325 } 00326 #endif /* LV_HAVE_GENERIC */ 00327 00328 00329 00330 00331 #endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */