GNU Radio 3.6.4.2 C++ API
volk_32u_byteswap.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_32u_byteswap_u_H
00002 #define INCLUDED_volk_32u_byteswap_u_H
00003 
00004 #include <inttypes.h>
00005 #include <stdio.h>
00006 
00007 #ifdef LV_HAVE_SSE2
00008 #include <emmintrin.h>
00009 
00010 /*!
00011   \brief Byteswaps (in-place) an aligned vector of int32_t's.
00012   \param intsToSwap The vector of data to byte swap
00013   \param numDataPoints The number of data points
00014 */
00015 static inline void volk_32u_byteswap_u_sse2(uint32_t* intsToSwap, unsigned int num_points){
00016   unsigned int number = 0;
00017 
00018   uint32_t* inputPtr = intsToSwap;
00019   __m128i input, byte1, byte2, byte3, byte4, output;
00020   __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
00021   __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
00022 
00023   const uint64_t quarterPoints = num_points / 4;
00024   for(;number < quarterPoints; number++){
00025     // Load the 32t values, increment inputPtr later since we're doing it in-place.
00026     input = _mm_loadu_si128((__m128i*)inputPtr);
00027     // Do the four shifts
00028     byte1 = _mm_slli_epi32(input, 24);
00029     byte2 = _mm_slli_epi32(input, 8);
00030     byte3 = _mm_srli_epi32(input, 8);
00031     byte4 = _mm_srli_epi32(input, 24);
00032     // Or bytes together
00033     output = _mm_or_si128(byte1, byte4);
00034     byte2 = _mm_and_si128(byte2, byte2mask);
00035     output = _mm_or_si128(output, byte2);
00036     byte3 = _mm_and_si128(byte3, byte3mask);
00037     output = _mm_or_si128(output, byte3);
00038     // Store the results
00039     _mm_storeu_si128((__m128i*)inputPtr, output);
00040     inputPtr += 4;
00041   }
00042 
00043   // Byteswap any remaining points:
00044   number = quarterPoints*4;
00045   for(; number < num_points; number++){
00046     uint32_t outputVal = *inputPtr;
00047     outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
00048     *inputPtr = outputVal;
00049     inputPtr++;
00050   }
00051 }
00052 #endif /* LV_HAVE_SSE2 */
00053 
00054 #ifdef LV_HAVE_GENERIC
00055 /*!
00056   \brief Byteswaps (in-place) an aligned vector of int32_t's.
00057   \param intsToSwap The vector of data to byte swap
00058   \param numDataPoints The number of data points
00059 */
00060 static inline void volk_32u_byteswap_generic(uint32_t* intsToSwap, unsigned int num_points){
00061   uint32_t* inputPtr = intsToSwap;
00062 
00063   unsigned int point;
00064   for(point = 0; point < num_points; point++){
00065     uint32_t output = *inputPtr;
00066     output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
00067 
00068     *inputPtr = output;
00069     inputPtr++;
00070   }
00071 }
00072 #endif /* LV_HAVE_GENERIC */
00073 
00074 
00075 
00076 
00077 #endif /* INCLUDED_volk_32u_byteswap_u_H */
00078 #ifndef INCLUDED_volk_32u_byteswap_a_H
00079 #define INCLUDED_volk_32u_byteswap_a_H
00080 
00081 #include <inttypes.h>
00082 #include <stdio.h>
00083 
00084 #ifdef LV_HAVE_SSE2
00085 #include <emmintrin.h>
00086 
00087 /*!
00088   \brief Byteswaps (in-place) an aligned vector of int32_t's.
00089   \param intsToSwap The vector of data to byte swap
00090   \param numDataPoints The number of data points
00091 */
00092 static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int num_points){
00093   unsigned int number = 0;
00094 
00095   uint32_t* inputPtr = intsToSwap;
00096   __m128i input, byte1, byte2, byte3, byte4, output;
00097   __m128i byte2mask = _mm_set1_epi32(0x00FF0000);
00098   __m128i byte3mask = _mm_set1_epi32(0x0000FF00);
00099 
00100   const uint64_t quarterPoints = num_points / 4;
00101   for(;number < quarterPoints; number++){
00102     // Load the 32t values, increment inputPtr later since we're doing it in-place.
00103     input = _mm_load_si128((__m128i*)inputPtr);
00104     // Do the four shifts
00105     byte1 = _mm_slli_epi32(input, 24);
00106     byte2 = _mm_slli_epi32(input, 8);
00107     byte3 = _mm_srli_epi32(input, 8);
00108     byte4 = _mm_srli_epi32(input, 24);
00109     // Or bytes together
00110     output = _mm_or_si128(byte1, byte4);
00111     byte2 = _mm_and_si128(byte2, byte2mask);
00112     output = _mm_or_si128(output, byte2);
00113     byte3 = _mm_and_si128(byte3, byte3mask);
00114     output = _mm_or_si128(output, byte3);
00115     // Store the results
00116     _mm_store_si128((__m128i*)inputPtr, output);
00117     inputPtr += 4;
00118   }
00119 
00120   // Byteswap any remaining points:
00121   number = quarterPoints*4;
00122   for(; number < num_points; number++){
00123     uint32_t outputVal = *inputPtr;
00124     outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000));
00125     *inputPtr = outputVal;
00126     inputPtr++;
00127   }
00128 }
00129 #endif /* LV_HAVE_SSE2 */
00130 
00131 #ifdef LV_HAVE_GENERIC
00132 /*!
00133   \brief Byteswaps (in-place) an aligned vector of int32_t's.
00134   \param intsToSwap The vector of data to byte swap
00135   \param numDataPoints The number of data points
00136 */
00137 static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned int num_points){
00138   uint32_t* inputPtr = intsToSwap;
00139 
00140   unsigned int point;
00141   for(point = 0; point < num_points; point++){
00142     uint32_t output = *inputPtr;
00143     output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000));
00144 
00145     *inputPtr = output;
00146     inputPtr++;
00147   }
00148 }
00149 #endif /* LV_HAVE_GENERIC */
00150 
00151 
00152 
00153 
00154 #endif /* INCLUDED_volk_32u_byteswap_a_H */