GNU Radio 3.6.4.2 C++ API
volk_32f_s32f_multiply_32f.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
00002 #define INCLUDED_volk_32f_s32f_multiply_32f_u_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #ifdef LV_HAVE_SSE
00008 #include <xmmintrin.h>
00009 /*!
00010   \brief Scalar float multiply
00011   \param cVector The vector where the results will be stored
00012   \param aVector One of the vectors to be multiplied
00013   \param scalar the scalar value
00014   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00015 */
00016 static inline void volk_32f_s32f_multiply_32f_u_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00017     unsigned int number = 0;
00018     const unsigned int quarterPoints = num_points / 4;
00019 
00020     float* cPtr = cVector;
00021     const float* aPtr = aVector;
00022 
00023     __m128 aVal, bVal, cVal;
00024     bVal = _mm_set_ps1(scalar);
00025     for(;number < quarterPoints; number++){
00026 
00027       aVal = _mm_loadu_ps(aPtr);
00028 
00029       cVal = _mm_mul_ps(aVal, bVal);
00030 
00031       _mm_storeu_ps(cPtr,cVal); // Store the results back into the C container
00032 
00033       aPtr += 4;
00034       cPtr += 4;
00035     }
00036 
00037     number = quarterPoints * 4;
00038     for(;number < num_points; number++){
00039       *cPtr++ = (*aPtr++) * scalar;
00040     }
00041 }
00042 #endif /* LV_HAVE_SSE */
00043 
00044 #ifdef LV_HAVE_AVX
00045 #include <immintrin.h>
00046 /*!
00047   \brief Scalar float multiply
00048   \param cVector The vector where the results will be stored
00049   \param aVector One of the vectors to be multiplied
00050   \param scalar the scalar value
00051   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00052 */
00053 static inline void volk_32f_s32f_multiply_32f_u_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00054     unsigned int number = 0;
00055     const unsigned int eighthPoints = num_points / 8;
00056 
00057     float* cPtr = cVector;
00058     const float* aPtr = aVector;
00059 
00060     __m256 aVal, bVal, cVal;
00061     bVal = _mm256_set1_ps(scalar);
00062     for(;number < eighthPoints; number++){
00063 
00064       aVal = _mm256_loadu_ps(aPtr);
00065 
00066       cVal = _mm256_mul_ps(aVal, bVal);
00067 
00068       _mm256_storeu_ps(cPtr,cVal); // Store the results back into the C container
00069 
00070       aPtr += 8;
00071       cPtr += 8;
00072     }
00073 
00074     number = eighthPoints * 8;
00075     for(;number < num_points; number++){
00076       *cPtr++ = (*aPtr++) * scalar;
00077     }
00078 }
00079 #endif /* LV_HAVE_AVX */
00080 
00081 #ifdef LV_HAVE_GENERIC
00082 /*!
00083   \brief Scalar float multiply
00084   \param cVector The vector where the results will be stored
00085   \param aVector One of the vectors to be multiplied
00086   \param scalar the scalar value
00087   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00088 */
00089 static inline void volk_32f_s32f_multiply_32f_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00090   unsigned int number = 0;
00091   const float* inputPtr = aVector;
00092   float* outputPtr = cVector;
00093   for(number = 0; number < num_points; number++){
00094     *outputPtr = (*inputPtr) * scalar;
00095     inputPtr++;
00096     outputPtr++;
00097   }
00098 }
00099 #endif /* LV_HAVE_GENERIC */
00100 
00101 
00102 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_u_H */
00103 #ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
00104 #define INCLUDED_volk_32f_s32f_multiply_32f_a_H
00105 
00106 #include <inttypes.h>
00107 #include <stdio.h>
00108 
00109 #ifdef LV_HAVE_SSE
00110 #include <xmmintrin.h>
00111 /*!
00112   \brief Scalar float multiply
00113   \param cVector The vector where the results will be stored
00114   \param aVector One of the vectors to be multiplied
00115   \param scalar the scalar value
00116   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00117 */
00118 static inline void volk_32f_s32f_multiply_32f_a_sse(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00119     unsigned int number = 0;
00120     const unsigned int quarterPoints = num_points / 4;
00121 
00122     float* cPtr = cVector;
00123     const float* aPtr = aVector;
00124 
00125     __m128 aVal, bVal, cVal;
00126     bVal = _mm_set_ps1(scalar);
00127     for(;number < quarterPoints; number++){
00128 
00129       aVal = _mm_load_ps(aPtr);
00130 
00131       cVal = _mm_mul_ps(aVal, bVal);
00132 
00133       _mm_store_ps(cPtr,cVal); // Store the results back into the C container
00134 
00135       aPtr += 4;
00136       cPtr += 4;
00137     }
00138 
00139     number = quarterPoints * 4;
00140     for(;number < num_points; number++){
00141       *cPtr++ = (*aPtr++) * scalar;
00142     }
00143 }
00144 #endif /* LV_HAVE_SSE */
00145 
00146 #ifdef LV_HAVE_AVX
00147 #include <immintrin.h>
00148 /*!
00149   \brief Scalar float multiply
00150   \param cVector The vector where the results will be stored
00151   \param aVector One of the vectors to be multiplied
00152   \param scalar the scalar value
00153   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00154 */
00155 static inline void volk_32f_s32f_multiply_32f_a_avx(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00156     unsigned int number = 0;
00157     const unsigned int eighthPoints = num_points / 8;
00158 
00159     float* cPtr = cVector;
00160     const float* aPtr = aVector;
00161 
00162     __m256 aVal, bVal, cVal;
00163     bVal = _mm256_set1_ps(scalar);
00164     for(;number < eighthPoints; number++){
00165 
00166       aVal = _mm256_load_ps(aPtr);
00167 
00168       cVal = _mm256_mul_ps(aVal, bVal);
00169 
00170       _mm256_store_ps(cPtr,cVal); // Store the results back into the C container
00171 
00172       aPtr += 8;
00173       cPtr += 8;
00174     }
00175 
00176     number = eighthPoints * 8;
00177     for(;number < num_points; number++){
00178       *cPtr++ = (*aPtr++) * scalar;
00179     }
00180 }
00181 #endif /* LV_HAVE_AVX */
00182 
00183 
00184 #ifdef LV_HAVE_GENERIC
00185 /*!
00186   \brief Scalar float multiply
00187   \param cVector The vector where the results will be stored
00188   \param aVector One of the vectors to be multiplied
00189   \param scalar the scalar value
00190   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00191 */
00192 static inline void volk_32f_s32f_multiply_32f_a_generic(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00193   unsigned int number = 0;
00194   const float* inputPtr = aVector;
00195   float* outputPtr = cVector;
00196   for(number = 0; number < num_points; number++){
00197     *outputPtr = (*inputPtr) * scalar;
00198     inputPtr++;
00199     outputPtr++;
00200   }
00201 }
00202 #endif /* LV_HAVE_GENERIC */
00203 
00204 #ifdef LV_HAVE_ORC
00205 /*!
00206   \brief Scalar float multiply
00207   \param cVector The vector where the results will be stored
00208   \param aVector One of the vectors to be multiplied
00209   \param scalar the scalar value
00210   \param num_points The number of values in aVector and bVector to be multiplied together and stored into cVector
00211 */
00212 extern void volk_32f_s32f_multiply_32f_a_orc_impl(float* dst, const float* src, const float scalar, unsigned int num_points);
00213 static inline void volk_32f_s32f_multiply_32f_u_orc(float* cVector, const float* aVector, const float scalar, unsigned int num_points){
00214     volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);
00215 }
00216 #endif /* LV_HAVE_GENERIC */
00217 
00218 
00219 
00220 
00221 #endif /* INCLUDED_volk_32f_s32f_multiply_32f_a_H */