00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifndef EIGEN_COMPLEX_NEON_H
00026 #define EIGEN_COMPLEX_NEON_H
00027
00028 namespace internal {
00029
00030 static uint32x4_t p4ui_CONJ_XOR = { 0x00000000, 0x80000000, 0x00000000, 0x80000000 };
00031 static uint32x2_t p2ui_CONJ_XOR = { 0x00000000, 0x80000000 };
00032
00033
00034 struct Packet2cf
00035 {
00036 EIGEN_STRONG_INLINE Packet2cf() {}
00037 EIGEN_STRONG_INLINE explicit Packet2cf(const Packet4f& a) : v(a) {}
00038 Packet4f v;
00039 };
00040
00041 template<> struct packet_traits<std::complex<float> > : default_packet_traits
00042 {
00043 typedef Packet2cf type;
00044 enum {
00045 Vectorizable = 1,
00046 size = 2,
00047
00048 HasAdd = 1,
00049 HasSub = 1,
00050 HasMul = 1,
00051 HasDiv = 1,
00052 HasNegate = 1,
00053 HasAbs = 0,
00054 HasAbs2 = 0,
00055 HasMin = 0,
00056 HasMax = 0,
00057 HasSetLinear = 0
00058 };
00059 };
00060
00061 template<> struct unpacket_traits<Packet2cf> { typedef std::complex<float> type; enum {size=2}; };
00062
00063 template<> EIGEN_STRONG_INLINE Packet2cf pset1<Packet2cf>(const std::complex<float>& from)
00064 {
00065 float32x2_t r64;
00066 r64 = vld1_f32((float *)&from);
00067
00068 return Packet2cf(vcombine_f32(r64, r64));
00069 }
00070
00071 template<> EIGEN_STRONG_INLINE Packet2cf padd<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(padd<Packet4f>(a.v,b.v)); }
00072 template<> EIGEN_STRONG_INLINE Packet2cf psub<Packet2cf>(const Packet2cf& a, const Packet2cf& b) { return Packet2cf(psub<Packet4f>(a.v,b.v)); }
00073 template<> EIGEN_STRONG_INLINE Packet2cf pnegate(const Packet2cf& a) { return Packet2cf(pnegate<Packet4f>(a.v)); }
00074 template<> EIGEN_STRONG_INLINE Packet2cf pconj(const Packet2cf& a)
00075 {
00076 Packet4ui b = vreinterpretq_u32_f32(a.v);
00077 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(b, p4ui_CONJ_XOR)));
00078 }
00079
00080 template<> EIGEN_STRONG_INLINE Packet2cf pmul<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
00081 {
00082 Packet4f v1, v2;
00083 float32x2_t a_lo, a_hi;
00084
00085
00086 v1 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 0), vdup_lane_f32(vget_high_f32(a.v), 0));
00087
00088 v2 = vcombine_f32(vdup_lane_f32(vget_low_f32(a.v), 1), vdup_lane_f32(vget_high_f32(a.v), 1));
00089
00090 v1 = vmulq_f32(v1, b.v);
00091
00092 v2 = vmulq_f32(v2, b.v);
00093
00094 v2 = vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(v2), p4ui_CONJ_XOR));
00095
00096 a_lo = vrev64_f32(vget_low_f32(v2));
00097 a_hi = vrev64_f32(vget_high_f32(v2));
00098 v2 = vcombine_f32(a_lo, a_hi);
00099
00100 return Packet2cf(vaddq_f32(v1, v2));
00101 }
00102
00103 template<> EIGEN_STRONG_INLINE Packet2cf pand <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
00104 {
00105 return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
00106 }
00107 template<> EIGEN_STRONG_INLINE Packet2cf por <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
00108 {
00109 return Packet2cf(vreinterpretq_f32_u32(vorrq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
00110 }
00111 template<> EIGEN_STRONG_INLINE Packet2cf pxor <Packet2cf>(const Packet2cf& a, const Packet2cf& b)
00112 {
00113 return Packet2cf(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
00114 }
00115 template<> EIGEN_STRONG_INLINE Packet2cf pandnot<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
00116 {
00117 return Packet2cf(vreinterpretq_f32_u32(vbicq_u32(vreinterpretq_u32_f32(a.v),vreinterpretq_u32_f32(b.v))));
00118 }
00119
00120 template<> EIGEN_STRONG_INLINE Packet2cf pload<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_ALIGNED_LOAD return Packet2cf(pload<Packet4f>((const float*)from)); }
00121 template<> EIGEN_STRONG_INLINE Packet2cf ploadu<Packet2cf>(const std::complex<float>* from) { EIGEN_DEBUG_UNALIGNED_LOAD return Packet2cf(ploadu<Packet4f>((const float*)from)); }
00122
00123 template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<float>* from) { return pset1<Packet2cf>(*from); }
00124
00125 template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
00126 template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
00127
00128 template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); }
00129
00130 template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
00131 {
00132 std::complex<float> EIGEN_ALIGN16 x[2];
00133 vst1q_f32((float *)x, a.v);
00134 return x[0];
00135 }
00136
00137 template<> EIGEN_STRONG_INLINE Packet2cf preverse(const Packet2cf& a)
00138 {
00139 float32x2_t a_lo, a_hi;
00140 Packet4f a_r128;
00141
00142 a_lo = vget_low_f32(a.v);
00143 a_hi = vget_high_f32(a.v);
00144 a_r128 = vcombine_f32(a_hi, a_lo);
00145
00146 return Packet2cf(a_r128);
00147 }
00148
00149 template<> EIGEN_STRONG_INLINE Packet2cf pcplxflip<Packet2cf>(const Packet2cf& a)
00150 {
00151 return Packet2cf(vrev64q_f32(a.v));
00152 }
00153
00154 template<> EIGEN_STRONG_INLINE std::complex<float> predux<Packet2cf>(const Packet2cf& a)
00155 {
00156 float32x2_t a1, a2;
00157 std::complex<float> s;
00158
00159 a1 = vget_low_f32(a.v);
00160 a2 = vget_high_f32(a.v);
00161 a2 = vadd_f32(a1, a2);
00162 vst1_f32((float *)&s, a2);
00163
00164 return s;
00165 }
00166
00167 template<> EIGEN_STRONG_INLINE Packet2cf preduxp<Packet2cf>(const Packet2cf* vecs)
00168 {
00169 Packet4f sum1, sum2, sum;
00170
00171
00172 sum1 = vcombine_f32(vget_low_f32(vecs[0].v), vget_low_f32(vecs[1].v));
00173 sum2 = vcombine_f32(vget_high_f32(vecs[0].v), vget_high_f32(vecs[1].v));
00174 sum = vaddq_f32(sum1, sum2);
00175
00176 return Packet2cf(sum);
00177 }
00178
00179 template<> EIGEN_STRONG_INLINE std::complex<float> predux_mul<Packet2cf>(const Packet2cf& a)
00180 {
00181 float32x2_t a1, a2, v1, v2, prod;
00182 std::complex<float> s;
00183
00184 a1 = vget_low_f32(a.v);
00185 a2 = vget_high_f32(a.v);
00186
00187 v1 = vdup_lane_f32(a1, 0);
00188
00189 v2 = vdup_lane_f32(a1, 1);
00190
00191 v1 = vmul_f32(v1, a2);
00192
00193 v2 = vmul_f32(v2, a2);
00194
00195 v2 = vreinterpret_f32_u32(veor_u32(vreinterpret_u32_f32(v2), p2ui_CONJ_XOR));
00196
00197 v2 = vrev64_f32(v2);
00198
00199 prod = vadd_f32(v1, v2);
00200
00201 vst1_f32((float *)&s, prod);
00202
00203 return s;
00204 }
00205
00206 template<int Offset>
00207 struct palign_impl<Offset,Packet2cf>
00208 {
00209 EIGEN_STRONG_INLINE static void run(Packet2cf& first, const Packet2cf& second)
00210 {
00211 if (Offset==1)
00212 {
00213 first.v = vextq_f32(first.v, second.v, 2);
00214 }
00215 }
00216 };
00217
00218 template<> struct conj_helper<Packet2cf, Packet2cf, false,true>
00219 {
00220 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
00221 { return padd(pmul(x,y),c); }
00222
00223 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
00224 {
00225 return internal::pmul(a, pconj(b));
00226 }
00227 };
00228
00229 template<> struct conj_helper<Packet2cf, Packet2cf, true,false>
00230 {
00231 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
00232 { return padd(pmul(x,y),c); }
00233
00234 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
00235 {
00236 return internal::pmul(pconj(a), b);
00237 }
00238 };
00239
00240 template<> struct conj_helper<Packet2cf, Packet2cf, true,true>
00241 {
00242 EIGEN_STRONG_INLINE Packet2cf pmadd(const Packet2cf& x, const Packet2cf& y, const Packet2cf& c) const
00243 { return padd(pmul(x,y),c); }
00244
00245 EIGEN_STRONG_INLINE Packet2cf pmul(const Packet2cf& a, const Packet2cf& b) const
00246 {
00247 return pconj(internal::pmul(a, b));
00248 }
00249 };
00250
00251 template<> EIGEN_STRONG_INLINE Packet2cf pdiv<Packet2cf>(const Packet2cf& a, const Packet2cf& b)
00252 {
00253
00254 Packet2cf res = conj_helper<Packet2cf,Packet2cf,false,true>().pmul(a,b);
00255 Packet4f s, rev_s;
00256 float32x2_t a_lo, a_hi;
00257
00258
00259 s = vmulq_f32(b.v, b.v);
00260 a_lo = vrev64_f32(vget_low_f32(s));
00261 a_hi = vrev64_f32(vget_high_f32(s));
00262 rev_s = vcombine_f32(a_lo, a_hi);
00263
00264 return Packet2cf(pdiv(res.v, vaddq_f32(s,rev_s)));
00265 }
00266
00267 }
00268
00269 #endif // EIGEN_COMPLEX_NEON_H