GNU Radio 3.6.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H 00002 #define INCLUDED_volk_32f_s32f_multiply_32f_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE 00008 #include <xmmintrin.h> 00009 /*! 00010 \brief Scalar float multiply 00011 \param cVector The vector where the results will be stored 00012 \param aVector One of the vectors to be multiplied 00013 \param scalar the scalar value 00014 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00015 */ 00016 static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00017 unsigned int number = 0; 00018 const unsigned int quarterPoints = num_points / 4; 00019 00020 float* cPtr = cVector; 00021 const float* aPtr = aVector; 00022 00023 __m128 aVal, bVal, cVal; 00024 bVal = _mm_set_ps1(scalar); 00025 for(;number < quarterPoints; number++){ 00026 00027 aVal = _mm_loadu_ps(aPtr); 00028 00029 cVal = _mm_mul_ps(aVal, bVal); 00030 00031 _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container 00032 00033 aPtr += 4; 00034 cPtr += 4; 00035 } 00036 00037 number = quarterPoints * 4; 00038 for(;number < num_points; number++){ 00039 *cPtr++ = (*aPtr++) * scalar; 00040 } 00041 } 00042 #endif /* LV_HAVE_SSE */ 00043 00044 #ifdef LV_HAVE_AVX 00045 #include <immintrin.h> 00046 /*! 00047 \brief Scalar float multiply 00048 \param cVector The vector where the results will be stored 00049 \param aVector One of the vectors to be multiplied 00050 \param scalar the scalar value 00051 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00052 */ 00053 static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00054 unsigned int number = 0; 00055 const unsigned int eighthPoints = num_points / 8; 00056 00057 float* cPtr = cVector; 00058 const float* aPtr = aVector; 00059 00060 __m256 aVal, bVal, cVal; 00061 bVal = _mm256_set1_ps(scalar); 00062 for(;number < eighthPoints; number++){ 00063 00064 aVal = _mm256_loadu_ps(aPtr); 00065 00066 cVal = _mm256_mul_ps(aVal, bVal); 00067 00068 _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container 00069 00070 aPtr += 8; 00071 cPtr += 8; 00072 } 00073 00074 number = eighthPoints * 8; 00075 for(;number < num_points; number++){ 00076 *cPtr++ = (*aPtr++) * scalar; 00077 } 00078 } 00079 #endif /* LV_HAVE_AVX */ 00080 00081 #ifdef LV_HAVE_GENERIC 00082 /*! 00083 \brief Scalar float multiply 00084 \param cVector The vector where the results will be stored 00085 \param aVector One of the vectors to be multiplied 00086 \param scalar the scalar value 00087 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00088 */ 00089 static inline void volk_32f_s32f_multiply_32f_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00090 unsigned int number = 0; 00091 const float* inputPtr = aVector; 00092 float* outputPtr = cVector; 00093 for(number = 0; number < num_points; number++){ 00094 *outputPtr = (*inputPtr) * scalar; 00095 inputPtr++; 00096 outputPtr++; 00097 } 00098 } 00099 #endif /* LV_HAVE_GENERIC */ 00100 00101 00102 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */ 00103 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H 00104 #define INCLUDED_volk_32f_s32f_multiply_32f_a_H 00105 00106 #include <inttypes.h> 00107 #include <stdio.h> 00108 00109 #ifdef LV_HAVE_SSE 00110 #include <xmmintrin.h> 00111 /*! 00112 \brief Scalar float multiply 00113 \param cVector The vector where the results will be stored 00114 \param aVector One of the vectors to be multiplied 00115 \param scalar the scalar value 00116 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00117 */ 00118 static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00119 unsigned int number = 0; 00120 const unsigned int quarterPoints = num_points / 4; 00121 00122 float* cPtr = cVector; 00123 const float* aPtr = aVector; 00124 00125 __m128 aVal, bVal, cVal; 00126 bVal = _mm_set_ps1(scalar); 00127 for(;number < quarterPoints; number++){ 00128 00129 aVal = _mm_load_ps(aPtr); 00130 00131 cVal = _mm_mul_ps(aVal, bVal); 00132 00133 _mm_store_ps(cPtr,cVal); // Store the results back into the C container 00134 00135 aPtr += 4; 00136 cPtr += 4; 00137 } 00138 00139 number = quarterPoints * 4; 00140 for(;number < num_points; number++){ 00141 *cPtr++ = (*aPtr++) * scalar; 00142 } 00143 } 00144 #endif /* LV_HAVE_SSE */ 00145 00146 #ifdef LV_HAVE_AVX 00147 #include <immintrin.h> 00148 /*! 00149 \brief Scalar float multiply 00150 \param cVector The vector where the results will be stored 00151 \param aVector One of the vectors to be multiplied 00152 \param scalar the scalar value 00153 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00154 */ 00155 static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00156 unsigned int number = 0; 00157 const unsigned int eighthPoints = num_points / 8; 00158 00159 float* cPtr = cVector; 00160 const float* aPtr = aVector; 00161 00162 __m256 aVal, bVal, cVal; 00163 bVal = _mm256_set1_ps(scalar); 00164 for(;number < eighthPoints; number++){ 00165 00166 aVal = _mm256_load_ps(aPtr); 00167 00168 cVal = _mm256_mul_ps(aVal, bVal); 00169 00170 _mm256_store_ps(cPtr,cVal); // Store the results back into the C container 00171 00172 aPtr += 8; 00173 cPtr += 8; 00174 } 00175 00176 number = eighthPoints * 8; 00177 for(;number < num_points; number++){ 00178 *cPtr++ = (*aPtr++) * scalar; 00179 } 00180 } 00181 #endif /* LV_HAVE_AVX */ 00182 00183 00184 #ifdef LV_HAVE_GENERIC 00185 /*! 00186 \brief Scalar float multiply 00187 \param cVector The vector where the results will be stored 00188 \param aVector One of the vectors to be multiplied 00189 \param scalar the scalar value 00190 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00191 */ 00192 static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00193 unsigned int number = 0; 00194 const float* inputPtr = aVector; 00195 float* outputPtr = cVector; 00196 for(number = 0; number < num_points; number++){ 00197 *outputPtr = (*inputPtr) * scalar; 00198 inputPtr++; 00199 outputPtr++; 00200 } 00201 } 00202 #endif /* LV_HAVE_GENERIC */ 00203 00204 #ifdef LV_HAVE_ORC 00205 /*! 00206 \brief Scalar float multiply 00207 \param cVector The vector where the results will be stored 00208 \param aVector One of the vectors to be multiplied 00209 \param scalar the scalar value 00210 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00211 */ 00212 extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst, const float* src, const float scalar, unsigned int num_points); 00213 static inline void volk_32f_s32f_multiply_32f_u_orc(float* cVector, const float* aVector, const float scalar, unsigned int num_points){ 00214 volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points); 00215 } 00216 #endif /* LV_HAVE_GENERIC */ 00217 00218 00219 00220 00221 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */