GNU Radio 3.6.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H 00002 #define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 #include <volk/volk_complex.h> 00007 #include <float.h> 00008 00009 #ifdef LV_HAVE_SSE3 00010 #include <pmmintrin.h> 00011 /*! 00012 \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector 00013 \param cVector The vector where the results will be stored 00014 \param aVector First vector to be multiplied 00015 \param bVector Second vector that is conjugated before being multiplied 00016 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00017 */ 00018 static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ 00019 unsigned int number = 0; 00020 const unsigned int halfPoints = num_points / 2; 00021 00022 __m128 x, y, yl, yh, z, tmp1, tmp2; 00023 lv_32fc_t* c = cVector; 00024 const lv_32fc_t* a = aVector; 00025 const lv_32fc_t* b = bVector; 00026 00027 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); 00028 00029 for(;number < halfPoints; number++){ 00030 00031 x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi 00032 y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di 00033 00034 y = _mm_xor_ps(y, conjugator); // conjugate y 00035 00036 yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr 00037 yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di 00038 00039 tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr 00040 00041 x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br 00042 00043 tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di 00044 00045 z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di 00046 00047 _mm_storeu_ps((float*)c,z); // Store the results back into the C container 00048 00049 a += 2; 00050 b += 2; 00051 c += 2; 00052 } 00053 00054 if((num_points % 2) != 0) { 00055 *c = (*a) * lv_conj(*b); 00056 } 00057 } 00058 #endif /* LV_HAVE_SSE */ 00059 00060 #ifdef LV_HAVE_GENERIC 00061 /*! 00062 \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector 00063 \param cVector The vector where the results will be stored 00064 \param aVector First vector to be multiplied 00065 \param bVector Second vector that is conjugated before being multiplied 00066 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00067 */ 00068 static inline void volk_32fc_x2_multiply_conjugate_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ 00069 lv_32fc_t* cPtr = cVector; 00070 const lv_32fc_t* aPtr = aVector; 00071 const lv_32fc_t* bPtr= bVector; 00072 unsigned int number = 0; 00073 00074 for(number = 0; number < num_points; number++){ 00075 *cPtr++ = (*aPtr++) * lv_conj(*bPtr++); 00076 } 00077 } 00078 #endif /* LV_HAVE_GENERIC */ 00079 00080 00081 #endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H */ 00082 #ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H 00083 #define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H 00084 00085 #include <inttypes.h> 00086 #include <stdio.h> 00087 #include <volk/volk_complex.h> 00088 #include <float.h> 00089 00090 #ifdef LV_HAVE_SSE3 00091 #include <pmmintrin.h> 00092 /*! 00093 \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector 00094 \param cVector The vector where the results will be stored 00095 \param aVector First vector to be multiplied 00096 \param bVector Second vector that is conjugated before being multiplied 00097 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00098 */ 00099 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ 00100 unsigned int number = 0; 00101 const unsigned int halfPoints = num_points / 2; 00102 00103 __m128 x, y, yl, yh, z, tmp1, tmp2; 00104 lv_32fc_t* c = cVector; 00105 const lv_32fc_t* a = aVector; 00106 const lv_32fc_t* b = bVector; 00107 00108 __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f); 00109 00110 for(;number < halfPoints; number++){ 00111 00112 x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi 00113 y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di 00114 00115 y = _mm_xor_ps(y, conjugator); // conjugate y 00116 00117 yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr 00118 yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di 00119 00120 tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr 00121 00122 x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br 00123 00124 tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di 00125 00126 z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di 00127 00128 _mm_store_ps((float*)c,z); // Store the results back into the C container 00129 00130 a += 2; 00131 b += 2; 00132 c += 2; 00133 } 00134 00135 if((num_points % 2) != 0) { 00136 *c = (*a) * lv_conj(*b); 00137 } 00138 } 00139 #endif /* LV_HAVE_SSE */ 00140 00141 #ifdef LV_HAVE_GENERIC 00142 /*! 00143 \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector 00144 \param cVector The vector where the results will be stored 00145 \param aVector First vector to be multiplied 00146 \param bVector Second vector that is conjugated before being multiplied 00147 \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector 00148 */ 00149 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){ 00150 lv_32fc_t* cPtr = cVector; 00151 const lv_32fc_t* aPtr = aVector; 00152 const lv_32fc_t* bPtr= bVector; 00153 unsigned int number = 0; 00154 00155 for(number = 0; number < num_points; number++){ 00156 *cPtr++ = (*aPtr++) * lv_conj(*bPtr++); 00157 } 00158 } 00159 #endif /* LV_HAVE_GENERIC */ 00160 00161 00162 #endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H */