GNU Radio 3.6.4.2 C++ API
volk_32f_x2_add_32f.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_x2_add_32f_u_H
00002 #define INCLUDED_volk_32f_x2_add_32f_u_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009 /*!
00010   \brief Adds the two input vectors and store their results in the third vector
00011   \param cVector The vector where the results will be stored
00012   \param aVector One of the vectors to be added
00013   \param bVector One of the vectors to be added
00014   \param num_points The number of values in aVector and bVector to be added together and stored into cVector
00015 */
00016 static inline void volk_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00017     unsigned int number = 0;
00018     const unsigned int quarterPoints = num_points / 4;
00019 
00020     float* cPtr = cVector;
00021     const float* aPtr = aVector;
00022     const float* bPtr=  bVector;
00023 
00024     __m128 aVal, bVal, cVal;
00025     for(;number < quarterPoints; number++){
00026 
00027       aVal = _mm_loadu_ps(aPtr);
00028       bVal = _mm_loadu_ps(bPtr);
00029 
00030       cVal = _mm_add_ps(aVal, bVal);
00031 
00032       _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
00033 
00034       aPtr += 4;
00035       bPtr += 4;
00036       cPtr += 4;
00037     }
00038 
00039     number = quarterPoints * 4;
00040     for(;number < num_points; number++){
00041       *cPtr++ = (*aPtr++) + (*bPtr++);
00042     }
00043 }
00044 #endif /* LV_HAVE_SSE */
00045 
00046 #ifdef LV_HAVE_GENERIC
00047 /*!
00048   \brief Adds the two input vectors and store their results in the third vector
00049   \param cVector The vector where the results will be stored
00050   \param aVector One of the vectors to be added
00051   \param bVector One of the vectors to be added
00052   \param num_points The number of values in aVector and bVector to be added together and stored into cVector
00053 */
00054 static inline void volk_32f_x2_add_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00055     float* cPtr = cVector;
00056     const float* aPtr = aVector;
00057     const float* bPtr=  bVector;
00058     unsigned int number = 0;
00059 
00060     for(number = 0; number < num_points; number++){
00061       *cPtr++ = (*aPtr++) + (*bPtr++);
00062     }
00063 }
00064 #endif /* LV_HAVE_GENERIC */
00065 
00066 #endif /* INCLUDED_volk_32f_x2_add_32f_u_H */
00067 #ifndef INCLUDED_volk_32f_x2_add_32f_a_H
00068 #define INCLUDED_volk_32f_x2_add_32f_a_H
00069 
00070 #include <inttypes.h>
00071 #include <stdio.h>
00072 
00073 #ifdef LV_HAVE_SSE
00074 #include <xmmintrin.h>
00075 /*!
00076   \brief Adds the two input vectors and store their results in the third vector
00077   \param cVector The vector where the results will be stored
00078   \param aVector One of the vectors to be added
00079   \param bVector One of the vectors to be added
00080   \param num_points The number of values in aVector and bVector to be added together and stored into cVector
00081 */
00082 static inline void volk_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00083     unsigned int number = 0;
00084     const unsigned int quarterPoints = num_points / 4;
00085 
00086     float* cPtr = cVector;
00087     const float* aPtr = aVector;
00088     const float* bPtr=  bVector;
00089 
00090     __m128 aVal, bVal, cVal;
00091     for(;number < quarterPoints; number++){
00092 
00093       aVal = _mm_load_ps(aPtr);
00094       bVal = _mm_load_ps(bPtr);
00095 
00096       cVal = _mm_add_ps(aVal, bVal);
00097 
00098       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
00099 
00100       aPtr += 4;
00101       bPtr += 4;
00102       cPtr += 4;
00103     }
00104 
00105     number = quarterPoints * 4;
00106     for(;number < num_points; number++){
00107       *cPtr++ = (*aPtr++) + (*bPtr++);
00108     }
00109 }
00110 #endif /* LV_HAVE_SSE */
00111 
00112 #ifdef LV_HAVE_GENERIC
00113 /*!
00114   \brief Adds the two input vectors and store their results in the third vector
00115   \param cVector The vector where the results will be stored
00116   \param aVector One of the vectors to be added
00117   \param bVector One of the vectors to be added
00118   \param num_points The number of values in aVector and bVector to be added together and stored into cVector
00119 */
00120 static inline void volk_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00121     float* cPtr = cVector;
00122     const float* aPtr = aVector;
00123     const float* bPtr=  bVector;
00124     unsigned int number = 0;
00125 
00126     for(number = 0; number < num_points; number++){
00127       *cPtr++ = (*aPtr++) + (*bPtr++);
00128     }
00129 }
00130 #endif /* LV_HAVE_GENERIC */
00131 
00132 #ifdef LV_HAVE_ORC
00133 /*!
00134   \brief Adds the two input vectors and store their results in the third vector
00135   \param cVector The vector where the results will be stored
00136   \param aVector One of the vectors to be added
00137   \param bVector One of the vectors to be added
00138   \param num_points The number of values in aVector and bVector to be added together and stored into cVector
00139 */
00140 extern void volk_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points);
00141 static inline void volk_32f_x2_add_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){
00142     volk_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points);
00143 }
00144 #endif /* LV_HAVE_ORC */
00145 
00146 
00147 #endif /* INCLUDED_volk_32f_x2_add_32f_a_H */