GNU Radio 3.6.4.2 C++ API
volk_32f_x2_multiply_32f.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_x2_multiply_32f_u_H
00002 #define INCLUDED_volk_32f_x2_multiply_32f_u_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009 /*!
00010   \brief Multiplys the two input vectors and store their results in the third vector
00011   \param cVector The vector where the results will be stored
00012   \param aVector One of the vectors to be multiplied
00013   \param bVector One of the vectors to be multiplied
00014   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00015 */
00016 static inline void volk_32f_x2_multiply_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00017     unsigned int number = 0;
00018     const unsigned int quarterPoints = num_points / 4;
00019 
00020     float* cPtr = cVector;
00021     const float* aPtr = aVector;
00022     const float* bPtr=  bVector;
00023 
00024     __m128 aVal, bVal, cVal;
00025     for(;number < quarterPoints; number++){
00026 
00027       aVal = _mm_loadu_ps(aPtr);
00028       bVal = _mm_loadu_ps(bPtr);
00029 
00030       cVal = _mm_mul_ps(aVal, bVal);
00031 
00032       _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
00033 
00034       aPtr += 4;
00035       bPtr += 4;
00036       cPtr += 4;
00037     }
00038 
00039     number = quarterPoints * 4;
00040     for(;number < num_points; number++){
00041       *cPtr++ = (*aPtr++) * (*bPtr++);
00042     }
00043 }
00044 #endif /* LV_HAVE_SSE */
00045 
00046 #ifdef LV_HAVE_AVX
00047 #include <immintrin.h>
00048 /*!
00049   \brief Multiplies the two input vectors and store their results in the third vector
00050   \param cVector The vector where the results will be stored
00051   \param aVector One of the vectors to be multiplied
00052   \param bVector One of the vectors to be multiplied
00053   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00054 */
00055 static inline void volk_32f_x2_multiply_32f_u_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00056     unsigned int number = 0;
00057     const unsigned int eighthPoints = num_points / 8;
00058 
00059     float* cPtr = cVector;
00060     const float* aPtr = aVector;
00061     const float* bPtr=  bVector;
00062 
00063     __m256 aVal, bVal, cVal;
00064     for(;number < eighthPoints; number++){
00065 
00066       aVal = _mm256_loadu_ps(aPtr);
00067       bVal = _mm256_loadu_ps(bPtr);
00068 
00069       cVal = _mm256_mul_ps(aVal, bVal);
00070 
00071       _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
00072 
00073       aPtr += 8;
00074       bPtr += 8;
00075       cPtr += 8;
00076     }
00077 
00078     number = eighthPoints * 8;
00079     for(;number < num_points; number++){
00080       *cPtr++ = (*aPtr++) * (*bPtr++);
00081     }
00082 }
00083 #endif /* LV_HAVE_AVX */
00084 
00085 #ifdef LV_HAVE_GENERIC
00086 /*!
00087   \brief Multiplys the two input vectors and store their results in the third vector
00088   \param cVector The vector where the results will be stored
00089   \param aVector One of the vectors to be multiplied
00090   \param bVector One of the vectors to be multiplied
00091   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00092 */
00093 static inline void volk_32f_x2_multiply_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00094     float* cPtr = cVector;
00095     const float* aPtr = aVector;
00096     const float* bPtr=  bVector;
00097     unsigned int number = 0;
00098 
00099     for(number = 0; number < num_points; number++){
00100       *cPtr++ = (*aPtr++) * (*bPtr++);
00101     }
00102 }
00103 #endif /* LV_HAVE_GENERIC */
00104 
00105 
00106 #endif /* INCLUDED_volk_32f_x2_multiply_32f_u_H */
00107 #ifndef INCLUDED_volk_32f_x2_multiply_32f_a_H
00108 #define INCLUDED_volk_32f_x2_multiply_32f_a_H
00109 
00110 #include <inttypes.h>
00111 #include <stdio.h>
00112 
00113 #ifdef LV_HAVE_SSE
00114 #include <xmmintrin.h>
00115 /*!
00116   \brief Multiplys the two input vectors and store their results in the third vector
00117   \param cVector The vector where the results will be stored
00118   \param aVector One of the vectors to be multiplied
00119   \param bVector One of the vectors to be multiplied
00120   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00121 */
00122 static inline void volk_32f_x2_multiply_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00123     unsigned int number = 0;
00124     const unsigned int quarterPoints = num_points / 4;
00125 
00126     float* cPtr = cVector;
00127     const float* aPtr = aVector;
00128     const float* bPtr=  bVector;
00129 
00130     __m128 aVal, bVal, cVal;
00131     for(;number < quarterPoints; number++){
00132 
00133       aVal = _mm_load_ps(aPtr);
00134       bVal = _mm_load_ps(bPtr);
00135 
00136       cVal = _mm_mul_ps(aVal, bVal);
00137 
00138       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
00139 
00140       aPtr += 4;
00141       bPtr += 4;
00142       cPtr += 4;
00143     }
00144 
00145     number = quarterPoints * 4;
00146     for(;number < num_points; number++){
00147       *cPtr++ = (*aPtr++) * (*bPtr++);
00148     }
00149 }
00150 #endif /* LV_HAVE_SSE */
00151 
00152 #ifdef LV_HAVE_AVX
00153 #include <immintrin.h>
00154 /*!
00155   \brief Multiplies the two input vectors and store their results in the third vector
00156   \param cVector The vector where the results will be stored
00157   \param aVector One of the vectors to be multiplied
00158   \param bVector One of the vectors to be multiplied
00159   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00160 */
00161 static inline void volk_32f_x2_multiply_32f_a_avx(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00162     unsigned int number = 0;
00163     const unsigned int eighthPoints = num_points / 8;
00164 
00165     float* cPtr = cVector;
00166     const float* aPtr = aVector;
00167     const float* bPtr=  bVector;
00168 
00169     __m256 aVal, bVal, cVal;
00170     for(;number < eighthPoints; number++){
00171 
00172       aVal = _mm256_load_ps(aPtr);
00173       bVal = _mm256_load_ps(bPtr);
00174 
00175       cVal = _mm256_mul_ps(aVal, bVal);
00176 
00177       _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
00178 
00179       aPtr += 8;
00180       bPtr += 8;
00181       cPtr += 8;
00182     }
00183 
00184     number = eighthPoints * 8;
00185     for(;number < num_points; number++){
00186       *cPtr++ = (*aPtr++) * (*bPtr++);
00187     }
00188 }
00189 #endif /* LV_HAVE_AVX */
00190 
00191 #ifdef LV_HAVE_GENERIC
00192 /*!
00193   \brief Multiplys the two input vectors and store their results in the third vector
00194   \param cVector The vector where the results will be stored
00195   \param aVector One of the vectors to be multiplied
00196   \param bVector One of the vectors to be multiplied
00197   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00198 */
00199 static inline void volk_32f_x2_multiply_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00200     float* cPtr = cVector;
00201     const float* aPtr = aVector;
00202     const float* bPtr=  bVector;
00203     unsigned int number = 0;
00204 
00205     for(number = 0; number < num_points; number++){
00206       *cPtr++ = (*aPtr++) * (*bPtr++);
00207     }
00208 }
00209 #endif /* LV_HAVE_GENERIC */
00210 
00211 #ifdef LV_HAVE_ORC
00212 /*!
00213   \brief Multiplys the two input vectors and store their results in the third vector
00214   \param cVector The vector where the results will be stored
00215   \param aVector One of the vectors to be multiplied
00216   \param bVector One of the vectors to be multiplied
00217   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00218 */
00219 extern void volk_32f_x2_multiply_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
00220 static inline void volk_32f_x2_multiply_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00221     volk_32f_x2_multiply_32f_a_orc_impl(cVector, aVector, bVector, num_points);
00222 }
00223 #endif /* LV_HAVE_ORC */
00224 
00225 
00226 #endif /* INCLUDED_volk_32f_x2_multiply_32f_a_H */