GNU Radio 3.6.4.2 C++ API
volk_32fc_x2_multiply_conjugate_32fc.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H
00002 #define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 #include <volk/volk_complex.h>
00007 #include <float.h>
00008 
00009 #ifdef LV_HAVE_SSE3
00010 #include <pmmintrin.h>
00011   /*!
00012     \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
00013     \param cVector The vector where the results will be stored
00014     \param aVector First vector to be multiplied
00015     \param bVector Second vector that is conjugated before being multiplied
00016     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00017   */
00018 static inline void volk_32fc_x2_multiply_conjugate_32fc_u_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00019   unsigned int number = 0;
00020     const unsigned int halfPoints = num_points / 2;
00021 
00022     __m128 x, y, yl, yh, z, tmp1, tmp2;
00023     lv_32fc_t* c = cVector;
00024     const lv_32fc_t* a = aVector;
00025     const lv_32fc_t* b = bVector;
00026 
00027     __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
00028 
00029     for(;number < halfPoints; number++){
00030 
00031       x = _mm_loadu_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
00032       y = _mm_loadu_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
00033 
00034       y = _mm_xor_ps(y, conjugator); // conjugate y
00035 
00036       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
00037       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
00038 
00039       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
00040 
00041       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
00042 
00043       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
00044 
00045       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
00046 
00047       _mm_storeu_ps((float*)c,z); // Store the results back into the C container
00048 
00049       a += 2;
00050       b += 2;
00051       c += 2;
00052     }
00053 
00054     if((num_points % 2) != 0) {
00055       *c = (*a) * lv_conj(*b);
00056     }
00057 }
00058 #endif /* LV_HAVE_SSE */
00059 
00060 #ifdef LV_HAVE_GENERIC
00061   /*!
00062     \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
00063     \param cVector The vector where the results will be stored
00064     \param aVector First vector to be multiplied
00065     \param bVector Second vector that is conjugated before being multiplied
00066     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00067   */
00068 static inline void volk_32fc_x2_multiply_conjugate_32fc_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00069     lv_32fc_t* cPtr = cVector;
00070     const lv_32fc_t* aPtr = aVector;
00071     const lv_32fc_t* bPtr=  bVector;
00072     unsigned int number = 0;
00073 
00074     for(number = 0; number < num_points; number++){
00075       *cPtr++ = (*aPtr++) * lv_conj(*bPtr++);
00076     }
00077 }
00078 #endif /* LV_HAVE_GENERIC */
00079 
00080 
00081 #endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_u_H */
00082 #ifndef INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
00083 #define INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H
00084 
00085 #include <inttypes.h>
00086 #include <stdio.h>
00087 #include <volk/volk_complex.h>
00088 #include <float.h>
00089 
00090 #ifdef LV_HAVE_SSE3
00091 #include <pmmintrin.h>
00092   /*!
00093     \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
00094     \param cVector The vector where the results will be stored
00095     \param aVector First vector to be multiplied
00096     \param bVector Second vector that is conjugated before being multiplied
00097     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00098   */
00099 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_sse3(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00100   unsigned int number = 0;
00101     const unsigned int halfPoints = num_points / 2;
00102 
00103     __m128 x, y, yl, yh, z, tmp1, tmp2;
00104     lv_32fc_t* c = cVector;
00105     const lv_32fc_t* a = aVector;
00106     const lv_32fc_t* b = bVector;
00107 
00108     __m128 conjugator = _mm_setr_ps(0, -0.f, 0, -0.f);
00109 
00110     for(;number < halfPoints; number++){
00111 
00112       x = _mm_load_ps((float*)a); // Load the ar + ai, br + bi as ar,ai,br,bi
00113       y = _mm_load_ps((float*)b); // Load the cr + ci, dr + di as cr,ci,dr,di
00114 
00115       y = _mm_xor_ps(y, conjugator); // conjugate y
00116 
00117       yl = _mm_moveldup_ps(y); // Load yl with cr,cr,dr,dr
00118       yh = _mm_movehdup_ps(y); // Load yh with ci,ci,di,di
00119 
00120       tmp1 = _mm_mul_ps(x,yl); // tmp1 = ar*cr,ai*cr,br*dr,bi*dr
00121 
00122       x = _mm_shuffle_ps(x,x,0xB1); // Re-arrange x to be ai,ar,bi,br
00123 
00124       tmp2 = _mm_mul_ps(x,yh); // tmp2 = ai*ci,ar*ci,bi*di,br*di
00125 
00126       z = _mm_addsub_ps(tmp1,tmp2); // ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
00127 
00128       _mm_store_ps((float*)c,z); // Store the results back into the C container
00129 
00130       a += 2;
00131       b += 2;
00132       c += 2;
00133     }
00134 
00135     if((num_points % 2) != 0) {
00136       *c = (*a) * lv_conj(*b);
00137     }
00138 }
00139 #endif /* LV_HAVE_SSE */
00140 
00141 #ifdef LV_HAVE_GENERIC
00142   /*!
00143     \brief Multiplies vector a by the conjugate of vector b and stores the results in the third vector
00144     \param cVector The vector where the results will be stored
00145     \param aVector First vector to be multiplied
00146     \param bVector Second vector that is conjugated before being multiplied
00147     \param num_points The number of complex values in aVector and bVector to be multiplied together and stored into cVector
00148   */
00149 static inline void volk_32fc_x2_multiply_conjugate_32fc_a_generic(lv_32fc_t* cVector, const lv_32fc_t* aVector, const lv_32fc_t* bVector, unsigned int num_points){
00150     lv_32fc_t* cPtr = cVector;
00151     const lv_32fc_t* aPtr = aVector;
00152     const lv_32fc_t* bPtr=  bVector;
00153     unsigned int number = 0;
00154 
00155     for(number = 0; number < num_points; number++){
00156       *cPtr++ = (*aPtr++) * lv_conj(*bPtr++);
00157     }
00158 }
00159 #endif /* LV_HAVE_GENERIC */
00160 
00161 
00162 #endif /* INCLUDED_volk_32fc_x2_multiply_conjugate_32fc_a_H */