GNU Radio 3.6.4.2 C++ API
volk_16i_max_star_horizontal_16i.h
Go to the documentation of this file.
00001 #ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a_H
00002 #define INCLUDED_volk_16i_max_star_horizontal_16i_a_H
00003 
00004 #include <volk/volk_common.h>
00005 
00006 #include<inttypes.h>
00007 #include<stdio.h>
00008 
00009 
00010 #ifdef LV_HAVE_SSSE3
00011 
00012 #include<xmmintrin.h>
00013 #include<emmintrin.h>
00014 #include<tmmintrin.h>
00015 
00016 static inline  void volk_16i_max_star_horizontal_16i_a_ssse3(int16_t* target, int16_t* src0, unsigned int num_points) {
00017 
00018   const unsigned int num_bytes = num_points*2;
00019 
00020   const static uint8_t shufmask0[16] = {0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
00021   const static uint8_t shufmask1[16] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d};
00022   const static uint8_t andmask0[16] = {0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
00023   const static uint8_t andmask1[16] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02};
00024 
00025 
00026 
00027   __m128i xmm0, xmm1, xmm2, xmm3, xmm4;
00028   __m128i  xmm5, xmm6, xmm7, xmm8;
00029 
00030   xmm4 = _mm_load_si128((__m128i*)shufmask0);
00031   xmm5 = _mm_load_si128((__m128i*)shufmask1);
00032   xmm6 = _mm_load_si128((__m128i*)andmask0);
00033   xmm7 = _mm_load_si128((__m128i*)andmask1);
00034 
00035   __m128i *p_target, *p_src0;
00036 
00037   p_target = (__m128i*)target;
00038   p_src0 = (__m128i*)src0;
00039 
00040   int bound = num_bytes >> 5;
00041   int intermediate = (num_bytes >> 4) & 1;
00042   int leftovers = (num_bytes >> 1) & 7;
00043 
00044   int i = 0;
00045 
00046 
00047   for(i = 0; i < bound; ++i) {
00048 
00049     xmm0 = _mm_load_si128(p_src0);
00050     xmm1 = _mm_load_si128(&p_src0[1]);
00051 
00052 
00053 
00054     xmm2 = _mm_xor_si128(xmm2, xmm2);
00055     p_src0 += 2;
00056 
00057     xmm3 = _mm_hsub_epi16(xmm0, xmm1);
00058 
00059     xmm2 = _mm_cmpgt_epi16(xmm2, xmm3);
00060 
00061     xmm8 = _mm_and_si128(xmm2, xmm6);
00062     xmm3 = _mm_and_si128(xmm2, xmm7);
00063 
00064 
00065     xmm8 = _mm_add_epi8(xmm8, xmm4);
00066     xmm3 = _mm_add_epi8(xmm3, xmm5);
00067 
00068     xmm0 = _mm_shuffle_epi8(xmm0, xmm8);
00069     xmm1 = _mm_shuffle_epi8(xmm1, xmm3);
00070 
00071 
00072     xmm3 = _mm_add_epi16(xmm0, xmm1);
00073 
00074 
00075     _mm_store_si128(p_target, xmm3);
00076 
00077     p_target += 1;
00078 
00079   }
00080 
00081   for(i = 0; i < intermediate; ++i) {
00082 
00083     xmm0 = _mm_load_si128(p_src0);
00084 
00085 
00086     xmm2 = _mm_xor_si128(xmm2, xmm2);
00087     p_src0 += 1;
00088 
00089     xmm3 = _mm_hsub_epi16(xmm0, xmm1);
00090     xmm2 = _mm_cmpgt_epi16(xmm2, xmm3);
00091 
00092     xmm8 = _mm_and_si128(xmm2, xmm6);
00093 
00094     xmm3 = _mm_add_epi8(xmm8, xmm4);
00095 
00096     xmm0 = _mm_shuffle_epi8(xmm0, xmm3);
00097 
00098     _mm_storel_pd((double*)p_target, bit128_p(&xmm0)->double_vec);
00099 
00100     p_target = (__m128i*)((int8_t*)p_target + 8);
00101 
00102   }
00103 
00104   for(i = (bound << 4) + (intermediate << 3); i < (bound << 4) + (intermediate << 3) + leftovers ; i += 2) {
00105     target[i>>1] = ((int16_t)(src0[i] - src0[i + 1]) > 0) ? src0[i] : src0[i + 1];
00106   }
00107 
00108 
00109 }
00110 
00111 #endif /*LV_HAVE_SSSE3*/
00112 
00113 
00114 #ifdef LV_HAVE_GENERIC
00115 static inline void volk_16i_max_star_horizontal_16i_generic(int16_t* target, int16_t* src0, unsigned int num_points) {
00116 
00117         const unsigned int num_bytes = num_points*2;
00118 
00119         int i = 0;
00120 
00121         int bound = num_bytes >> 1;
00122 
00123 
00124         for(i = 0; i < bound; i += 2) {
00125           target[i >> 1] = ((int16_t) (src0[i] - src0[i + 1]) > 0) ? src0[i] : src0[i+1];
00126         }
00127 
00128 }
00129 
00130 
00131 
00132 #endif /*LV_HAVE_GENERIC*/
00133 
00134 #endif /*INCLUDED_volk_16i_max_star_horizontal_16i_a_H*/