GNU Radio 3.6.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H 00002 #define INCLUDED_volk_32f_x2_multiply_32f_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE 00008 #include <xmmintrin.h> 00009 /*! 00010 \brief Multiplys the two input vectors and store their results in the third vector 00011 \param cVector The vector where the results will be stored 00012 \param aVector One of the vectors to be multiplied 00013 \param bVector One of the vectors to be multiplied 00014 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00015 */ 00016 static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00017 unsigned int number = 0; 00018 const unsigned int quarterPoints = num_points / 4; 00019 00020 float* cPtr = cVector; 00021 const float* aPtr = aVector; 00022 const float* bPtr= bVector; 00023 00024 __m128 aVal, bVal, cVal; 00025 for(;number < quarterPoints; number++){ 00026 00027 aVal = _mm_loadu_ps(aPtr); 00028 bVal = _mm_loadu_ps(bPtr); 00029 00030 cVal = _mm_mul_ps(aVal, bVal); 00031 00032 _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container 00033 00034 aPtr += 4; 00035 bPtr += 4; 00036 cPtr += 4; 00037 } 00038 00039 number = quarterPoints * 4; 00040 for(;number < num_points; number++){ 00041 *cPtr++ = (*aPtr++) * (*bPtr++); 00042 } 00043 } 00044 #endif /* LV_HAVE_SSE */ 00045 00046 #ifdef LV_HAVE_AVX 00047 #include <immintrin.h> 00048 /*! 00049 \brief Multiplies the two input vectors and store their results in the third vector 00050 \param cVector The vector where the results will be stored 00051 \param aVector One of the vectors to be multiplied 00052 \param bVector One of the vectors to be multiplied 00053 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00054 */ 00055 static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00056 unsigned int number = 0; 00057 const unsigned int eighthPoints = num_points / 8; 00058 00059 float* cPtr = cVector; 00060 const float* aPtr = aVector; 00061 const float* bPtr= bVector; 00062 00063 __m256 aVal, bVal, cVal; 00064 for(;number < eighthPoints; number++){ 00065 00066 aVal = _mm256_loadu_ps(aPtr); 00067 bVal = _mm256_loadu_ps(bPtr); 00068 00069 cVal = _mm256_mul_ps(aVal, bVal); 00070 00071 _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container 00072 00073 aPtr += 8; 00074 bPtr += 8; 00075 cPtr += 8; 00076 } 00077 00078 number = eighthPoints * 8; 00079 for(;number < num_points; number++){ 00080 *cPtr++ = (*aPtr++) * (*bPtr++); 00081 } 00082 } 00083 #endif /* LV_HAVE_AVX */ 00084 00085 #ifdef LV_HAVE_GENERIC 00086 /*! 00087 \brief Multiplys the two input vectors and store their results in the third vector 00088 \param cVector The vector where the results will be stored 00089 \param aVector One of the vectors to be multiplied 00090 \param bVector One of the vectors to be multiplied 00091 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00092 */ 00093 static inline void volk_32f_x2_multiply_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00094 float* cPtr = cVector; 00095 const float* aPtr = aVector; 00096 const float* bPtr= bVector; 00097 unsigned int number = 0; 00098 00099 for(number = 0; number < num_points; number++){ 00100 *cPtr++ = (*aPtr++) * (*bPtr++); 00101 } 00102 } 00103 #endif /* LV_HAVE_GENERIC */ 00104 00105 00106 #endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */ 00107 #ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H 00108 #define INCLUDED_volk_32f_x2_multiply_32f_a_H 00109 00110 #include <inttypes.h> 00111 #include <stdio.h> 00112 00113 #ifdef LV_HAVE_SSE 00114 #include <xmmintrin.h> 00115 /*! 00116 \brief Multiplys the two input vectors and store their results in the third vector 00117 \param cVector The vector where the results will be stored 00118 \param aVector One of the vectors to be multiplied 00119 \param bVector One of the vectors to be multiplied 00120 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00121 */ 00122 static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00123 unsigned int number = 0; 00124 const unsigned int quarterPoints = num_points / 4; 00125 00126 float* cPtr = cVector; 00127 const float* aPtr = aVector; 00128 const float* bPtr= bVector; 00129 00130 __m128 aVal, bVal, cVal; 00131 for(;number < quarterPoints; number++){ 00132 00133 aVal = _mm_load_ps(aPtr); 00134 bVal = _mm_load_ps(bPtr); 00135 00136 cVal = _mm_mul_ps(aVal, bVal); 00137 00138 _mm_store_ps(cPtr,cVal); // Store the results back into the C container 00139 00140 aPtr += 4; 00141 bPtr += 4; 00142 cPtr += 4; 00143 } 00144 00145 number = quarterPoints * 4; 00146 for(;number < num_points; number++){ 00147 *cPtr++ = (*aPtr++) * (*bPtr++); 00148 } 00149 } 00150 #endif /* LV_HAVE_SSE */ 00151 00152 #ifdef LV_HAVE_AVX 00153 #include <immintrin.h> 00154 /*! 00155 \brief Multiplies the two input vectors and store their results in the third vector 00156 \param cVector The vector where the results will be stored 00157 \param aVector One of the vectors to be multiplied 00158 \param bVector One of the vectors to be multiplied 00159 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00160 */ 00161 static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00162 unsigned int number = 0; 00163 const unsigned int eighthPoints = num_points / 8; 00164 00165 float* cPtr = cVector; 00166 const float* aPtr = aVector; 00167 const float* bPtr= bVector; 00168 00169 __m256 aVal, bVal, cVal; 00170 for(;number < eighthPoints; number++){ 00171 00172 aVal = _mm256_load_ps(aPtr); 00173 bVal = _mm256_load_ps(bPtr); 00174 00175 cVal = _mm256_mul_ps(aVal, bVal); 00176 00177 _mm256_store_ps(cPtr,cVal); // Store the results back into the C container 00178 00179 aPtr += 8; 00180 bPtr += 8; 00181 cPtr += 8; 00182 } 00183 00184 number = eighthPoints * 8; 00185 for(;number < num_points; number++){ 00186 *cPtr++ = (*aPtr++) * (*bPtr++); 00187 } 00188 } 00189 #endif /* LV_HAVE_AVX */ 00190 00191 #ifdef LV_HAVE_GENERIC 00192 /*! 00193 \brief Multiplys the two input vectors and store their results in the third vector 00194 \param cVector The vector where the results will be stored 00195 \param aVector One of the vectors to be multiplied 00196 \param bVector One of the vectors to be multiplied 00197 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00198 */ 00199 static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00200 float* cPtr = cVector; 00201 const float* aPtr = aVector; 00202 const float* bPtr= bVector; 00203 unsigned int number = 0; 00204 00205 for(number = 0; number < num_points; number++){ 00206 *cPtr++ = (*aPtr++) * (*bPtr++); 00207 } 00208 } 00209 #endif /* LV_HAVE_GENERIC */ 00210 00211 #ifdef LV_HAVE_ORC 00212 /*! 00213 \brief Multiplys the two input vectors and store their results in the third vector 00214 \param cVector The vector where the results will be stored 00215 \param aVector One of the vectors to be multiplied 00216 \param bVector One of the vectors to be multiplied 00217 \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector 00218 */ 00219 extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); 00220 static inline void volk_32f_x2_multiply_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00221 volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points); 00222 } 00223 #endif /* LV_HAVE_ORC */ 00224 00225 00226 #endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */