GNU Radio 3.6.4.2 C++ API
|
00001 #ifndef INCLUDED_volk_64u_byteswap_u_H 00002 #define INCLUDED_volk_64u_byteswap_u_H 00003 00004 #include <inttypes.h> 00005 #include <stdio.h> 00006 00007 #ifdef LV_HAVE_SSE2 00008 #include <emmintrin.h> 00009 00010 /*! 00011 \brief Byteswaps (in-place) an aligned vector of int64_t's. 00012 \param intsToSwap The vector of data to byte swap 00013 \param numDataPoints The number of data points 00014 */ 00015 static inline void volk_64u_byteswap_u_sse2(uint64_t* intsToSwap, unsigned int num_points){ 00016 uint32_t* inputPtr = (uint32_t*)intsToSwap; 00017 __m128i input, byte1, byte2, byte3, byte4, output; 00018 __m128i byte2mask = _mm_set1_epi32(0x00FF0000); 00019 __m128i byte3mask = _mm_set1_epi32(0x0000FF00); 00020 uint64_t number = 0; 00021 const unsigned int halfPoints = num_points / 2; 00022 for(;number < halfPoints; number++){ 00023 // Load the 32t values, increment inputPtr later since we're doing it in-place. 00024 input = _mm_loadu_si128((__m128i*)inputPtr); 00025 00026 // Do the four shifts 00027 byte1 = _mm_slli_epi32(input, 24); 00028 byte2 = _mm_slli_epi32(input, 8); 00029 byte3 = _mm_srli_epi32(input, 8); 00030 byte4 = _mm_srli_epi32(input, 24); 00031 // Or bytes together 00032 output = _mm_or_si128(byte1, byte4); 00033 byte2 = _mm_and_si128(byte2, byte2mask); 00034 output = _mm_or_si128(output, byte2); 00035 byte3 = _mm_and_si128(byte3, byte3mask); 00036 output = _mm_or_si128(output, byte3); 00037 00038 // Reorder the two words 00039 output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1)); 00040 00041 // Store the results 00042 _mm_storeu_si128((__m128i*)inputPtr, output); 00043 inputPtr += 4; 00044 } 00045 00046 // Byteswap any remaining points: 00047 number = halfPoints*2; 00048 for(; number < num_points; number++){ 00049 uint32_t output1 = *inputPtr; 00050 uint32_t output2 = inputPtr[1]; 00051 00052 output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000)); 00053 00054 output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000)); 00055 00056 *inputPtr++ = output2; 00057 *inputPtr++ = output1; 00058 } 00059 } 00060 #endif /* LV_HAVE_SSE2 */ 00061 00062 #ifdef LV_HAVE_GENERIC 00063 /*! 00064 \brief Byteswaps (in-place) an aligned vector of int64_t's. 00065 \param intsToSwap The vector of data to byte swap 00066 \param numDataPoints The number of data points 00067 */ 00068 static inline void volk_64u_byteswap_generic(uint64_t* intsToSwap, unsigned int num_points){ 00069 uint32_t* inputPtr = (uint32_t*)intsToSwap; 00070 unsigned int point; 00071 for(point = 0; point < num_points; point++){ 00072 uint32_t output1 = *inputPtr; 00073 uint32_t output2 = inputPtr[1]; 00074 00075 output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000)); 00076 00077 output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000)); 00078 00079 *inputPtr++ = output2; 00080 *inputPtr++ = output1; 00081 } 00082 } 00083 #endif /* LV_HAVE_GENERIC */ 00084 00085 00086 00087 00088 #endif /* INCLUDED_volk_64u_byteswap_u_H */ 00089 #ifndef INCLUDED_volk_64u_byteswap_a_H 00090 #define INCLUDED_volk_64u_byteswap_a_H 00091 00092 #include <inttypes.h> 00093 #include <stdio.h> 00094 00095 #ifdef LV_HAVE_SSE2 00096 #include <emmintrin.h> 00097 00098 /*! 00099 \brief Byteswaps (in-place) an aligned vector of int64_t's. 00100 \param intsToSwap The vector of data to byte swap 00101 \param numDataPoints The number of data points 00102 */ 00103 static inline void volk_64u_byteswap_a_sse2(uint64_t* intsToSwap, unsigned int num_points){ 00104 uint32_t* inputPtr = (uint32_t*)intsToSwap; 00105 __m128i input, byte1, byte2, byte3, byte4, output; 00106 __m128i byte2mask = _mm_set1_epi32(0x00FF0000); 00107 __m128i byte3mask = _mm_set1_epi32(0x0000FF00); 00108 uint64_t number = 0; 00109 const unsigned int halfPoints = num_points / 2; 00110 for(;number < halfPoints; number++){ 00111 // Load the 32t values, increment inputPtr later since we're doing it in-place. 00112 input = _mm_load_si128((__m128i*)inputPtr); 00113 00114 // Do the four shifts 00115 byte1 = _mm_slli_epi32(input, 24); 00116 byte2 = _mm_slli_epi32(input, 8); 00117 byte3 = _mm_srli_epi32(input, 8); 00118 byte4 = _mm_srli_epi32(input, 24); 00119 // Or bytes together 00120 output = _mm_or_si128(byte1, byte4); 00121 byte2 = _mm_and_si128(byte2, byte2mask); 00122 output = _mm_or_si128(output, byte2); 00123 byte3 = _mm_and_si128(byte3, byte3mask); 00124 output = _mm_or_si128(output, byte3); 00125 00126 // Reorder the two words 00127 output = _mm_shuffle_epi32(output, _MM_SHUFFLE(2, 3, 0, 1)); 00128 00129 // Store the results 00130 _mm_store_si128((__m128i*)inputPtr, output); 00131 inputPtr += 4; 00132 } 00133 00134 // Byteswap any remaining points: 00135 number = halfPoints*2; 00136 for(; number < num_points; number++){ 00137 uint32_t output1 = *inputPtr; 00138 uint32_t output2 = inputPtr[1]; 00139 00140 output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000)); 00141 00142 output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000)); 00143 00144 *inputPtr++ = output2; 00145 *inputPtr++ = output1; 00146 } 00147 } 00148 #endif /* LV_HAVE_SSE2 */ 00149 00150 #ifdef LV_HAVE_GENERIC 00151 /*! 00152 \brief Byteswaps (in-place) an aligned vector of int64_t's. 00153 \param intsToSwap The vector of data to byte swap 00154 \param numDataPoints The number of data points 00155 */ 00156 static inline void volk_64u_byteswap_a_generic(uint64_t* intsToSwap, unsigned int num_points){ 00157 uint32_t* inputPtr = (uint32_t*)intsToSwap; 00158 unsigned int point; 00159 for(point = 0; point < num_points; point++){ 00160 uint32_t output1 = *inputPtr; 00161 uint32_t output2 = inputPtr[1]; 00162 00163 output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) | ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000)); 00164 00165 output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) | ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000)); 00166 00167 *inputPtr++ = output2; 00168 *inputPtr++ = output1; 00169 } 00170 } 00171 #endif /* LV_HAVE_GENERIC */ 00172 00173 00174 00175 00176 #endif /* INCLUDED_volk_64u_byteswap_a_H */