GNU Radio 3.6.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_convert_16i_u_H 00002 #define INCLUDED_volk_32f_s32f_convert_16i_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <math.h> 00007 00008 #ifdef LV_HAVE_SSE2 00009 #include <emmintrin.h> 00010 /*! 00011 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00012 \param inputVector The floating point input data buffer 00013 \param outputVector The 16 bit output data buffer 00014 \param scalar The value multiplied against each point in the input buffer 00015 \param num_points The number of data values to be converted 00016 \note Input buffer does NOT need to be properly aligned 00017 */ 00018 static inline void volk_32f_s32f_convert_16i_u_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00019 unsigned int number = 0; 00020 00021 const unsigned int eighthPoints = num_points / 8; 00022 00023 const float* inputVectorPtr = (const float*)inputVector; 00024 int16_t* outputVectorPtr = outputVector; 00025 00026 float min_val = -32768; 00027 float max_val = 32767; 00028 float r; 00029 00030 __m128 vScalar = _mm_set_ps1(scalar); 00031 __m128 inputVal1, inputVal2; 00032 __m128i intInputVal1, intInputVal2; 00033 __m128 ret1, ret2; 00034 __m128 vmin_val = _mm_set_ps1(min_val); 00035 __m128 vmax_val = _mm_set_ps1(max_val); 00036 00037 for(;number < eighthPoints; number++){ 00038 inputVal1 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00039 inputVal2 = _mm_loadu_ps(inputVectorPtr); inputVectorPtr += 4; 00040 00041 // Scale and clip 00042 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); 00043 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val); 00044 00045 intInputVal1 = _mm_cvtps_epi32(ret1); 00046 intInputVal2 = _mm_cvtps_epi32(ret2); 00047 00048 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); 00049 00050 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1); 00051 outputVectorPtr += 8; 00052 } 00053 00054 number = eighthPoints * 8; 00055 for(; number < num_points; number++){ 00056 r = inputVector[number] * scalar; 00057 if(r > max_val) 00058 r = max_val; 00059 else if(r < min_val) 00060 r = min_val; 00061 outputVector[number] = (int16_t)rintf(r); 00062 } 00063 } 00064 #endif /* LV_HAVE_SSE2 */ 00065 00066 #ifdef LV_HAVE_SSE 00067 #include <xmmintrin.h> 00068 /*! 00069 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00070 \param inputVector The floating point input data buffer 00071 \param outputVector The 16 bit output data buffer 00072 \param scalar The value multiplied against each point in the input buffer 00073 \param num_points The number of data values to be converted 00074 \note Input buffer does NOT need to be properly aligned 00075 */ 00076 static inline void volk_32f_s32f_convert_16i_u_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00077 unsigned int number = 0; 00078 00079 const unsigned int quarterPoints = num_points / 4; 00080 00081 const float* inputVectorPtr = (const float*)inputVector; 00082 int16_t* outputVectorPtr = outputVector; 00083 00084 float min_val = -32768; 00085 float max_val = 32767; 00086 float r; 00087 00088 __m128 vScalar = _mm_set_ps1(scalar); 00089 __m128 ret; 00090 __m128 vmin_val = _mm_set_ps1(min_val); 00091 __m128 vmax_val = _mm_set_ps1(max_val); 00092 00093 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; 00094 00095 for(;number < quarterPoints; number++){ 00096 ret = _mm_loadu_ps(inputVectorPtr); 00097 inputVectorPtr += 4; 00098 00099 // Scale and clip 00100 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); 00101 00102 _mm_store_ps(outputFloatBuffer, ret); 00103 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]); 00104 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]); 00105 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]); 00106 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]); 00107 } 00108 00109 number = quarterPoints * 4; 00110 for(; number < num_points; number++){ 00111 r = inputVector[number] * scalar; 00112 if(r > max_val) 00113 r = max_val; 00114 else if(r < min_val) 00115 r = min_val; 00116 outputVector[number] = (int16_t)rintf(r); 00117 } 00118 } 00119 #endif /* LV_HAVE_SSE */ 00120 00121 #ifdef LV_HAVE_GENERIC 00122 /*! 00123 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00124 \param inputVector The floating point input data buffer 00125 \param outputVector The 16 bit output data buffer 00126 \param scalar The value multiplied against each point in the input buffer 00127 \param num_points The number of data values to be converted 00128 \note Input buffer does NOT need to be properly aligned 00129 */ 00130 static inline void volk_32f_s32f_convert_16i_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00131 int16_t* outputVectorPtr = outputVector; 00132 const float* inputVectorPtr = inputVector; 00133 unsigned int number = 0; 00134 float min_val = -32768; 00135 float max_val = 32767; 00136 float r; 00137 00138 for(number = 0; number < num_points; number++){ 00139 r = *inputVectorPtr++ * scalar; 00140 if(r > max_val) 00141 r = max_val; 00142 else if(r < min_val) 00143 r = min_val; 00144 *outputVectorPtr++ = (int16_t)rintf(r); 00145 } 00146 } 00147 #endif /* LV_HAVE_GENERIC */ 00148 00149 00150 00151 00152 #endif /* INCLUDED_volk_32f_s32f_convert_16i_u_H */ 00153 #ifndef INCLUDED_volk_32f_s32f_convert_16i_a_H 00154 #define INCLUDED_volk_32f_s32f_convert_16i_a_H 00155 00156 #include <volk/volk_common.h> 00157 #include <inttypes.h> 00158 #include <stdio.h> 00159 #include <math.h> 00160 00161 #ifdef LV_HAVE_SSE2 00162 #include <emmintrin.h> 00163 /*! 00164 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00165 \param inputVector The floating point input data buffer 00166 \param outputVector The 16 bit output data buffer 00167 \param scalar The value multiplied against each point in the input buffer 00168 \param num_points The number of data values to be converted 00169 */ 00170 static inline void volk_32f_s32f_convert_16i_a_sse2(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00171 unsigned int number = 0; 00172 00173 const unsigned int eighthPoints = num_points / 8; 00174 00175 const float* inputVectorPtr = (const float*)inputVector; 00176 int16_t* outputVectorPtr = outputVector; 00177 00178 float min_val = -32768; 00179 float max_val = 32767; 00180 float r; 00181 00182 __m128 vScalar = _mm_set_ps1(scalar); 00183 __m128 inputVal1, inputVal2; 00184 __m128i intInputVal1, intInputVal2; 00185 __m128 ret1, ret2; 00186 __m128 vmin_val = _mm_set_ps1(min_val); 00187 __m128 vmax_val = _mm_set_ps1(max_val); 00188 00189 for(;number < eighthPoints; number++){ 00190 inputVal1 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; 00191 inputVal2 = _mm_load_ps(inputVectorPtr); inputVectorPtr += 4; 00192 00193 // Scale and clip 00194 ret1 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val); 00195 ret2 = _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal2, vScalar), vmax_val), vmin_val); 00196 00197 intInputVal1 = _mm_cvtps_epi32(ret1); 00198 intInputVal2 = _mm_cvtps_epi32(ret2); 00199 00200 intInputVal1 = _mm_packs_epi32(intInputVal1, intInputVal2); 00201 00202 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1); 00203 outputVectorPtr += 8; 00204 } 00205 00206 number = eighthPoints * 8; 00207 for(; number < num_points; number++){ 00208 r = inputVector[number] * scalar; 00209 if(r > max_val) 00210 r = max_val; 00211 else if(r < min_val) 00212 r = min_val; 00213 outputVector[number] = (int16_t)rintf(r); 00214 } 00215 } 00216 #endif /* LV_HAVE_SSE2 */ 00217 00218 #ifdef LV_HAVE_SSE 00219 #include <xmmintrin.h> 00220 /*! 00221 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00222 \param inputVector The floating point input data buffer 00223 \param outputVector The 16 bit output data buffer 00224 \param scalar The value multiplied against each point in the input buffer 00225 \param num_points The number of data values to be converted 00226 */ 00227 static inline void volk_32f_s32f_convert_16i_a_sse(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00228 unsigned int number = 0; 00229 00230 const unsigned int quarterPoints = num_points / 4; 00231 00232 const float* inputVectorPtr = (const float*)inputVector; 00233 int16_t* outputVectorPtr = outputVector; 00234 00235 float min_val = -32768; 00236 float max_val = 32767; 00237 float r; 00238 00239 __m128 vScalar = _mm_set_ps1(scalar); 00240 __m128 ret; 00241 __m128 vmin_val = _mm_set_ps1(min_val); 00242 __m128 vmax_val = _mm_set_ps1(max_val); 00243 00244 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4]; 00245 00246 for(;number < quarterPoints; number++){ 00247 ret = _mm_load_ps(inputVectorPtr); 00248 inputVectorPtr += 4; 00249 00250 // Scale and clip 00251 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val); 00252 00253 _mm_store_ps(outputFloatBuffer, ret); 00254 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[0]); 00255 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[1]); 00256 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[2]); 00257 *outputVectorPtr++ = (int16_t)rintf(outputFloatBuffer[3]); 00258 } 00259 00260 number = quarterPoints * 4; 00261 for(; number < num_points; number++){ 00262 r = inputVector[number] * scalar; 00263 if(r > max_val) 00264 r = max_val; 00265 else if(r < min_val) 00266 r = min_val; 00267 outputVector[number] = (int16_t)rintf(r); 00268 } 00269 } 00270 #endif /* LV_HAVE_SSE */ 00271 00272 #ifdef LV_HAVE_GENERIC 00273 /*! 00274 \brief Multiplies each point in the input buffer by the scalar value, then converts the result into a 16 bit integer value 00275 \param inputVector The floating point input data buffer 00276 \param outputVector The 16 bit output data buffer 00277 \param scalar The value multiplied against each point in the input buffer 00278 \param num_points The number of data values to be converted 00279 */ 00280 static inline void volk_32f_s32f_convert_16i_a_generic(int16_t* outputVector, const float* inputVector, const float scalar, unsigned int num_points){ 00281 int16_t* outputVectorPtr = outputVector; 00282 const float* inputVectorPtr = inputVector; 00283 unsigned int number = 0; 00284 float min_val = -32768; 00285 float max_val = 32767; 00286 float r; 00287 00288 for(number = 0; number < num_points; number++){ 00289 r = *inputVectorPtr++ * scalar; 00290 if(r < min_val) 00291 r = min_val; 00292 else if(r > max_val) 00293 r = max_val; 00294 *outputVectorPtr++ = (int16_t)rintf(r); 00295 } 00296 } 00297 #endif /* LV_HAVE_GENERIC */ 00298 00299 00300 00301 00302 #endif /* INCLUDED_volk_32f_s32f_convert_16i_a_H */