GNU Radio 3.6.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_32f_x2_add_32f_u_H 00002 #define INCLUDED_volk_32f_x2_add_32f_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE 00008 #include <xmmintrin.h> 00009 /*! 00010 \brief Adds the two input vectors and store their results in the third vector 00011 \param cVector The vector where the results will be stored 00012 \param aVector One of the vectors to be added 00013 \param bVector One of the vectors to be added 00014 \param num_points The number of values in aVector and bVector to be added together and stored into cVector 00015 */ 00016 static inline void volk_32f_x2_add_32f_u_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00017 unsigned int number = 0; 00018 const unsigned int quarterPoints = num_points / 4; 00019 00020 float* cPtr = cVector; 00021 const float* aPtr = aVector; 00022 const float* bPtr= bVector; 00023 00024 __m128 aVal, bVal, cVal; 00025 for(;number < quarterPoints; number++){ 00026 00027 aVal = _mm_loadu_ps(aPtr); 00028 bVal = _mm_loadu_ps(bPtr); 00029 00030 cVal = _mm_add_ps(aVal, bVal); 00031 00032 _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container 00033 00034 aPtr += 4; 00035 bPtr += 4; 00036 cPtr += 4; 00037 } 00038 00039 number = quarterPoints * 4; 00040 for(;number < num_points; number++){ 00041 *cPtr++ = (*aPtr++) + (*bPtr++); 00042 } 00043 } 00044 #endif /* LV_HAVE_SSE */ 00045 00046 #ifdef LV_HAVE_GENERIC 00047 /*! 00048 \brief Adds the two input vectors and store their results in the third vector 00049 \param cVector The vector where the results will be stored 00050 \param aVector One of the vectors to be added 00051 \param bVector One of the vectors to be added 00052 \param num_points The number of values in aVector and bVector to be added together and stored into cVector 00053 */ 00054 static inline void volk_32f_x2_add_32f_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00055 float* cPtr = cVector; 00056 const float* aPtr = aVector; 00057 const float* bPtr= bVector; 00058 unsigned int number = 0; 00059 00060 for(number = 0; number < num_points; number++){ 00061 *cPtr++ = (*aPtr++) + (*bPtr++); 00062 } 00063 } 00064 #endif /* LV_HAVE_GENERIC */ 00065 00066 #endif /* INCLUDED_volk_32f_x2_add_32f_u_H */ 00067 #ifndef INCLUDED_volk_32f_x2_add_32f_a_H 00068 #define INCLUDED_volk_32f_x2_add_32f_a_H 00069 00070 #include <inttypes.h> 00071 #include <stdio.h> 00072 00073 #ifdef LV_HAVE_SSE 00074 #include <xmmintrin.h> 00075 /*! 00076 \brief Adds the two input vectors and store their results in the third vector 00077 \param cVector The vector where the results will be stored 00078 \param aVector One of the vectors to be added 00079 \param bVector One of the vectors to be added 00080 \param num_points The number of values in aVector and bVector to be added together and stored into cVector 00081 */ 00082 static inline void volk_32f_x2_add_32f_a_sse(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00083 unsigned int number = 0; 00084 const unsigned int quarterPoints = num_points / 4; 00085 00086 float* cPtr = cVector; 00087 const float* aPtr = aVector; 00088 const float* bPtr= bVector; 00089 00090 __m128 aVal, bVal, cVal; 00091 for(;number < quarterPoints; number++){ 00092 00093 aVal = _mm_load_ps(aPtr); 00094 bVal = _mm_load_ps(bPtr); 00095 00096 cVal = _mm_add_ps(aVal, bVal); 00097 00098 _mm_store_ps(cPtr,cVal); // Store the results back into the C container 00099 00100 aPtr += 4; 00101 bPtr += 4; 00102 cPtr += 4; 00103 } 00104 00105 number = quarterPoints * 4; 00106 for(;number < num_points; number++){ 00107 *cPtr++ = (*aPtr++) + (*bPtr++); 00108 } 00109 } 00110 #endif /* LV_HAVE_SSE */ 00111 00112 #ifdef LV_HAVE_GENERIC 00113 /*! 00114 \brief Adds the two input vectors and store their results in the third vector 00115 \param cVector The vector where the results will be stored 00116 \param aVector One of the vectors to be added 00117 \param bVector One of the vectors to be added 00118 \param num_points The number of values in aVector and bVector to be added together and stored into cVector 00119 */ 00120 static inline void volk_32f_x2_add_32f_a_generic(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00121 float* cPtr = cVector; 00122 const float* aPtr = aVector; 00123 const float* bPtr= bVector; 00124 unsigned int number = 0; 00125 00126 for(number = 0; number < num_points; number++){ 00127 *cPtr++ = (*aPtr++) + (*bPtr++); 00128 } 00129 } 00130 #endif /* LV_HAVE_GENERIC */ 00131 00132 #ifdef LV_HAVE_ORC 00133 /*! 00134 \brief Adds the two input vectors and store their results in the third vector 00135 \param cVector The vector where the results will be stored 00136 \param aVector One of the vectors to be added 00137 \param bVector One of the vectors to be added 00138 \param num_points The number of values in aVector and bVector to be added together and stored into cVector 00139 */ 00140 extern void volk_32f_x2_add_32f_a_orc_impl(float* cVector, const float* aVector, const float* bVector, unsigned int num_points); 00141 static inline void volk_32f_x2_add_32f_u_orc(float* cVector, const float* aVector, const float* bVector, unsigned int num_points){ 00142 volk_32f_x2_add_32f_a_orc_impl(cVector, aVector, bVector, num_points); 00143 } 00144 #endif /* LV_HAVE_ORC */ 00145 00146 00147 #endif /* INCLUDED_volk_32f_x2_add_32f_a_H */