GNU Radio 3.6.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_32u_byteswap_u_H 00002 #define INCLUDED_volk_32u_byteswap_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE2 00008 #include <emmintrin.h> 00009 00010 /*! 00011 \brief Byteswaps (in-place) an aligned vector of int32_t's. 00012 \param intsToSwap The vector of data to byte swap 00013 \param numDataPoints The number of data points 00014 */ 00015 static inline void volk_32u_byteswap_u_sse2(uint32_t* intsToSwap, unsigned int num_points){ 00016 unsigned int number = 0; 00017 00018 uint32_t* inputPtr = intsToSwap; 00019 __m128i input, byte1, byte2, byte3, byte4, output; 00020 __m128i byte2mask = _mm_set1_epi32(0x00FF0000); 00021 __m128i byte3mask = _mm_set1_epi32(0x0000FF00); 00022 00023 const uint64_t quarterPoints = num_points / 4; 00024 for(;number < quarterPoints; number++){ 00025 // Load the 32t values, increment inputPtr later since we're doing it in-place. 00026 input = _mm_loadu_si128((__m128i*)inputPtr); 00027 // Do the four shifts 00028 byte1 = _mm_slli_epi32(input, 24); 00029 byte2 = _mm_slli_epi32(input, 8); 00030 byte3 = _mm_srli_epi32(input, 8); 00031 byte4 = _mm_srli_epi32(input, 24); 00032 // Or bytes together 00033 output = _mm_or_si128(byte1, byte4); 00034 byte2 = _mm_and_si128(byte2, byte2mask); 00035 output = _mm_or_si128(output, byte2); 00036 byte3 = _mm_and_si128(byte3, byte3mask); 00037 output = _mm_or_si128(output, byte3); 00038 // Store the results 00039 _mm_storeu_si128((__m128i*)inputPtr, output); 00040 inputPtr += 4; 00041 } 00042 00043 // Byteswap any remaining points: 00044 number = quarterPoints*4; 00045 for(; number < num_points; number++){ 00046 uint32_t outputVal = *inputPtr; 00047 outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000)); 00048 *inputPtr = outputVal; 00049 inputPtr++; 00050 } 00051 } 00052 #endif /* LV_HAVE_SSE2 */ 00053 00054 #ifdef LV_HAVE_GENERIC 00055 /*! 00056 \brief Byteswaps (in-place) an aligned vector of int32_t's. 00057 \param intsToSwap The vector of data to byte swap 00058 \param numDataPoints The number of data points 00059 */ 00060 static inline void volk_32u_byteswap_generic(uint32_t* intsToSwap, unsigned int num_points){ 00061 uint32_t* inputPtr = intsToSwap; 00062 00063 unsigned int point; 00064 for(point = 0; point < num_points; point++){ 00065 uint32_t output = *inputPtr; 00066 output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000)); 00067 00068 *inputPtr = output; 00069 inputPtr++; 00070 } 00071 } 00072 #endif /* LV_HAVE_GENERIC */ 00073 00074 00075 00076 00077 #endif /* INCLUDED_volk_32u_byteswap_u_H */ 00078 #ifndef INCLUDED_volk_32u_byteswap_a_H 00079 #define INCLUDED_volk_32u_byteswap_a_H 00080 00081 #include <inttypes.h> 00082 #include <stdio.h> 00083 00084 #ifdef LV_HAVE_SSE2 00085 #include <emmintrin.h> 00086 00087 /*! 00088 \brief Byteswaps (in-place) an aligned vector of int32_t's. 00089 \param intsToSwap The vector of data to byte swap 00090 \param numDataPoints The number of data points 00091 */ 00092 static inline void volk_32u_byteswap_a_sse2(uint32_t* intsToSwap, unsigned int num_points){ 00093 unsigned int number = 0; 00094 00095 uint32_t* inputPtr = intsToSwap; 00096 __m128i input, byte1, byte2, byte3, byte4, output; 00097 __m128i byte2mask = _mm_set1_epi32(0x00FF0000); 00098 __m128i byte3mask = _mm_set1_epi32(0x0000FF00); 00099 00100 const uint64_t quarterPoints = num_points / 4; 00101 for(;number < quarterPoints; number++){ 00102 // Load the 32t values, increment inputPtr later since we're doing it in-place. 00103 input = _mm_load_si128((__m128i*)inputPtr); 00104 // Do the four shifts 00105 byte1 = _mm_slli_epi32(input, 24); 00106 byte2 = _mm_slli_epi32(input, 8); 00107 byte3 = _mm_srli_epi32(input, 8); 00108 byte4 = _mm_srli_epi32(input, 24); 00109 // Or bytes together 00110 output = _mm_or_si128(byte1, byte4); 00111 byte2 = _mm_and_si128(byte2, byte2mask); 00112 output = _mm_or_si128(output, byte2); 00113 byte3 = _mm_and_si128(byte3, byte3mask); 00114 output = _mm_or_si128(output, byte3); 00115 // Store the results 00116 _mm_store_si128((__m128i*)inputPtr, output); 00117 inputPtr += 4; 00118 } 00119 00120 // Byteswap any remaining points: 00121 number = quarterPoints*4; 00122 for(; number < num_points; number++){ 00123 uint32_t outputVal = *inputPtr; 00124 outputVal = (((outputVal >> 24) & 0xff) | ((outputVal >> 8) & 0x0000ff00) | ((outputVal << 8) & 0x00ff0000) | ((outputVal << 24) & 0xff000000)); 00125 *inputPtr = outputVal; 00126 inputPtr++; 00127 } 00128 } 00129 #endif /* LV_HAVE_SSE2 */ 00130 00131 #ifdef LV_HAVE_GENERIC 00132 /*! 00133 \brief Byteswaps (in-place) an aligned vector of int32_t's. 00134 \param intsToSwap The vector of data to byte swap 00135 \param numDataPoints The number of data points 00136 */ 00137 static inline void volk_32u_byteswap_a_generic(uint32_t* intsToSwap, unsigned int num_points){ 00138 uint32_t* inputPtr = intsToSwap; 00139 00140 unsigned int point; 00141 for(point = 0; point < num_points; point++){ 00142 uint32_t output = *inputPtr; 00143 output = (((output >> 24) & 0xff) | ((output >> 8) & 0x0000ff00) | ((output << 8) & 0x00ff0000) | ((output << 24) & 0xff000000)); 00144 00145 *inputPtr = output; 00146 inputPtr++; 00147 } 00148 } 00149 #endif /* LV_HAVE_GENERIC */ 00150 00151 00152 00153 00154 #endif /* INCLUDED_volk_32u_byteswap_a_H */