00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #ifndef EIGEN_COEFFBASED_PRODUCT_H
00027 #define EIGEN_COEFFBASED_PRODUCT_H
00028
00029 namespace internal {
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044 template<int Traversal, int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00045 struct product_coeff_impl;
00046
00047 template<int StorageOrder, int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00048 struct product_packet_impl;
00049
00050 template<typename LhsNested, typename RhsNested, int NestingFlags>
00051 struct traits<CoeffBasedProduct<LhsNested,RhsNested,NestingFlags> >
00052 {
00053 typedef MatrixXpr XprKind;
00054 typedef typename remove_all<LhsNested>::type _LhsNested;
00055 typedef typename remove_all<RhsNested>::type _RhsNested;
00056 typedef typename scalar_product_traits<typename _LhsNested::Scalar, typename _RhsNested::Scalar>::ReturnType Scalar;
00057 typedef typename promote_storage_type<typename traits<_LhsNested>::StorageKind,
00058 typename traits<_RhsNested>::StorageKind>::ret StorageKind;
00059 typedef typename promote_index_type<typename traits<_LhsNested>::Index,
00060 typename traits<_RhsNested>::Index>::type Index;
00061
00062 enum {
00063 LhsCoeffReadCost = _LhsNested::CoeffReadCost,
00064 RhsCoeffReadCost = _RhsNested::CoeffReadCost,
00065 LhsFlags = _LhsNested::Flags,
00066 RhsFlags = _RhsNested::Flags,
00067
00068 RowsAtCompileTime = _LhsNested::RowsAtCompileTime,
00069 ColsAtCompileTime = _RhsNested::ColsAtCompileTime,
00070 InnerSize = EIGEN_SIZE_MIN_PREFER_FIXED(_LhsNested::ColsAtCompileTime, _RhsNested::RowsAtCompileTime),
00071
00072 MaxRowsAtCompileTime = _LhsNested::MaxRowsAtCompileTime,
00073 MaxColsAtCompileTime = _RhsNested::MaxColsAtCompileTime,
00074
00075 LhsRowMajor = LhsFlags & RowMajorBit,
00076 RhsRowMajor = RhsFlags & RowMajorBit,
00077
00078 SameType = is_same<typename _LhsNested::Scalar,typename _RhsNested::Scalar>::value,
00079
00080 CanVectorizeRhs = RhsRowMajor && (RhsFlags & PacketAccessBit)
00081 && (ColsAtCompileTime == Dynamic
00082 || ( (ColsAtCompileTime % packet_traits<Scalar>::size) == 0
00083 && (RhsFlags&AlignedBit)
00084 )
00085 ),
00086
00087 CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit)
00088 && (RowsAtCompileTime == Dynamic
00089 || ( (RowsAtCompileTime % packet_traits<Scalar>::size) == 0
00090 && (LhsFlags&AlignedBit)
00091 )
00092 ),
00093
00094 EvalToRowMajor = (MaxRowsAtCompileTime==1&&MaxColsAtCompileTime!=1) ? 1
00095 : (MaxColsAtCompileTime==1&&MaxRowsAtCompileTime!=1) ? 0
00096 : (RhsRowMajor && !CanVectorizeLhs),
00097
00098 Flags = ((unsigned int)(LhsFlags | RhsFlags) & HereditaryBits & ~RowMajorBit)
00099 | (EvalToRowMajor ? RowMajorBit : 0)
00100 | NestingFlags
00101 | (LhsFlags & RhsFlags & AlignedBit)
00102
00103 | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0),
00104
00105 CoeffReadCost = InnerSize == Dynamic ? Dynamic
00106 : InnerSize * (NumTraits<Scalar>::MulCost + LhsCoeffReadCost + RhsCoeffReadCost)
00107 + (InnerSize - 1) * NumTraits<Scalar>::AddCost,
00108
00109
00110
00111
00112
00113
00114 CanVectorizeInner = SameType
00115 && LhsRowMajor
00116 && (!RhsRowMajor)
00117 && (LhsFlags & RhsFlags & ActualPacketAccessBit)
00118 && (LhsFlags & RhsFlags & AlignedBit)
00119 && (InnerSize % packet_traits<Scalar>::size == 0)
00120 };
00121 };
00122
00123 }
00124
00125 template<typename LhsNested, typename RhsNested, int NestingFlags>
00126 class CoeffBasedProduct
00127 : internal::no_assignment_operator,
00128 public MatrixBase<CoeffBasedProduct<LhsNested, RhsNested, NestingFlags> >
00129 {
00130 public:
00131
00132 typedef MatrixBase<CoeffBasedProduct> Base;
00133 EIGEN_DENSE_PUBLIC_INTERFACE(CoeffBasedProduct)
00134 typedef typename Base::PlainObject PlainObject;
00135
00136 private:
00137
00138 typedef typename internal::traits<CoeffBasedProduct>::_LhsNested _LhsNested;
00139 typedef typename internal::traits<CoeffBasedProduct>::_RhsNested _RhsNested;
00140
00141 enum {
00142 PacketSize = internal::packet_traits<Scalar>::size,
00143 InnerSize = internal::traits<CoeffBasedProduct>::InnerSize,
00144 Unroll = CoeffReadCost != Dynamic && CoeffReadCost <= EIGEN_UNROLLING_LIMIT,
00145 CanVectorizeInner = internal::traits<CoeffBasedProduct>::CanVectorizeInner
00146 };
00147
00148 typedef internal::product_coeff_impl<CanVectorizeInner ? InnerVectorizedTraversal : DefaultTraversal,
00149 Unroll ? InnerSize-1 : Dynamic,
00150 _LhsNested, _RhsNested, Scalar> ScalarCoeffImpl;
00151
00152 typedef CoeffBasedProduct<LhsNested,RhsNested,NestByRefBit> LazyCoeffBasedProductType;
00153
00154 public:
00155
00156 inline CoeffBasedProduct(const CoeffBasedProduct& other)
00157 : Base(), m_lhs(other.m_lhs), m_rhs(other.m_rhs)
00158 {}
00159
00160 template<typename Lhs, typename Rhs>
00161 inline CoeffBasedProduct(const Lhs& lhs, const Rhs& rhs)
00162 : m_lhs(lhs), m_rhs(rhs)
00163 {
00164
00165
00166 EIGEN_STATIC_ASSERT((internal::is_same<typename Lhs::RealScalar, typename Rhs::RealScalar>::value),
00167 YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY)
00168 eigen_assert(lhs.cols() == rhs.rows()
00169 && "invalid matrix product"
00170 && "if you wanted a coeff-wise or a dot product use the respective explicit functions");
00171 }
00172
00173 EIGEN_STRONG_INLINE Index rows() const { return m_lhs.rows(); }
00174 EIGEN_STRONG_INLINE Index cols() const { return m_rhs.cols(); }
00175
00176 EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const
00177 {
00178 Scalar res;
00179 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
00180 return res;
00181 }
00182
00183
00184
00185
00186 EIGEN_STRONG_INLINE const Scalar coeff(Index index) const
00187 {
00188 Scalar res;
00189 const Index row = RowsAtCompileTime == 1 ? 0 : index;
00190 const Index col = RowsAtCompileTime == 1 ? index : 0;
00191 ScalarCoeffImpl::run(row, col, m_lhs, m_rhs, res);
00192 return res;
00193 }
00194
00195 template<int LoadMode>
00196 EIGEN_STRONG_INLINE const PacketScalar packet(Index row, Index col) const
00197 {
00198 PacketScalar res;
00199 internal::product_packet_impl<Flags&RowMajorBit ? RowMajor : ColMajor,
00200 Unroll ? InnerSize-1 : Dynamic,
00201 _LhsNested, _RhsNested, PacketScalar, LoadMode>
00202 ::run(row, col, m_lhs, m_rhs, res);
00203 return res;
00204 }
00205
00206
00207 EIGEN_STRONG_INLINE operator const PlainObject& () const
00208 {
00209 m_result.lazyAssign(*this);
00210 return m_result;
00211 }
00212
00213 const _LhsNested& lhs() const { return m_lhs; }
00214 const _RhsNested& rhs() const { return m_rhs; }
00215
00216 const Diagonal<const LazyCoeffBasedProductType,0> diagonal() const
00217 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
00218
00219 template<int DiagonalIndex>
00220 const Diagonal<const LazyCoeffBasedProductType,DiagonalIndex> diagonal() const
00221 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this); }
00222
00223 const Diagonal<const LazyCoeffBasedProductType,Dynamic> diagonal(Index index) const
00224 { return reinterpret_cast<const LazyCoeffBasedProductType&>(*this).diagonal(index); }
00225
00226 protected:
00227 const LhsNested m_lhs;
00228 const RhsNested m_rhs;
00229
00230 mutable PlainObject m_result;
00231 };
00232
00233 namespace internal {
00234
00235
00236
00237 template<typename Lhs, typename Rhs, int N, typename PlainObject>
00238 struct nested<CoeffBasedProduct<Lhs,Rhs,EvalBeforeNestingBit|EvalBeforeAssigningBit>, N, PlainObject>
00239 {
00240 typedef PlainObject const& type;
00241 };
00242
00243
00244
00245
00246
00247
00248
00249
00250
00251 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00252 struct product_coeff_impl<DefaultTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
00253 {
00254 typedef typename Lhs::Index Index;
00255 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00256 {
00257 product_coeff_impl<DefaultTraversal, UnrollingIndex-1, Lhs, Rhs, RetScalar>::run(row, col, lhs, rhs, res);
00258 res += lhs.coeff(row, UnrollingIndex) * rhs.coeff(UnrollingIndex, col);
00259 }
00260 };
00261
00262 template<typename Lhs, typename Rhs, typename RetScalar>
00263 struct product_coeff_impl<DefaultTraversal, 0, Lhs, Rhs, RetScalar>
00264 {
00265 typedef typename Lhs::Index Index;
00266 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00267 {
00268 res = lhs.coeff(row, 0) * rhs.coeff(0, col);
00269 }
00270 };
00271
00272 template<typename Lhs, typename Rhs, typename RetScalar>
00273 struct product_coeff_impl<DefaultTraversal, Dynamic, Lhs, Rhs, RetScalar>
00274 {
00275 typedef typename Lhs::Index Index;
00276 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar& res)
00277 {
00278 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00279 res = lhs.coeff(row, 0) * rhs.coeff(0, col);
00280 for(Index i = 1; i < lhs.cols(); ++i)
00281 res += lhs.coeff(row, i) * rhs.coeff(i, col);
00282 }
00283 };
00284
00285
00286
00287
00288
00289 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet>
00290 struct product_coeff_vectorized_unroller
00291 {
00292 typedef typename Lhs::Index Index;
00293 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
00294 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
00295 {
00296 product_coeff_vectorized_unroller<UnrollingIndex-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
00297 pres = padd(pres, pmul( lhs.template packet<Aligned>(row, UnrollingIndex) , rhs.template packet<Aligned>(UnrollingIndex, col) ));
00298 }
00299 };
00300
00301 template<typename Lhs, typename Rhs, typename Packet>
00302 struct product_coeff_vectorized_unroller<0, Lhs, Rhs, Packet>
00303 {
00304 typedef typename Lhs::Index Index;
00305 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::PacketScalar &pres)
00306 {
00307 pres = pmul(lhs.template packet<Aligned>(row, 0) , rhs.template packet<Aligned>(0, col));
00308 }
00309 };
00310
00311 template<int UnrollingIndex, typename Lhs, typename Rhs, typename RetScalar>
00312 struct product_coeff_impl<InnerVectorizedTraversal, UnrollingIndex, Lhs, Rhs, RetScalar>
00313 {
00314 typedef typename Lhs::PacketScalar Packet;
00315 typedef typename Lhs::Index Index;
00316 enum { PacketSize = packet_traits<typename Lhs::Scalar>::size };
00317 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, RetScalar &res)
00318 {
00319 Packet pres;
00320 product_coeff_vectorized_unroller<UnrollingIndex+1-PacketSize, Lhs, Rhs, Packet>::run(row, col, lhs, rhs, pres);
00321 product_coeff_impl<DefaultTraversal,UnrollingIndex,Lhs,Rhs,RetScalar>::run(row, col, lhs, rhs, res);
00322 res = predux(pres);
00323 }
00324 };
00325
00326 template<typename Lhs, typename Rhs, int LhsRows = Lhs::RowsAtCompileTime, int RhsCols = Rhs::ColsAtCompileTime>
00327 struct product_coeff_vectorized_dyn_selector
00328 {
00329 typedef typename Lhs::Index Index;
00330 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00331 {
00332 res = lhs.row(row).transpose().cwiseProduct(rhs.col(col)).sum();
00333 }
00334 };
00335
00336
00337
00338 template<typename Lhs, typename Rhs, int RhsCols>
00339 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,RhsCols>
00340 {
00341 typedef typename Lhs::Index Index;
00342 EIGEN_STRONG_INLINE static void run(Index , Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00343 {
00344 res = lhs.transpose().cwiseProduct(rhs.col(col)).sum();
00345 }
00346 };
00347
00348 template<typename Lhs, typename Rhs, int LhsRows>
00349 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,LhsRows,1>
00350 {
00351 typedef typename Lhs::Index Index;
00352 EIGEN_STRONG_INLINE static void run(Index row, Index , const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00353 {
00354 res = lhs.row(row).transpose().cwiseProduct(rhs).sum();
00355 }
00356 };
00357
00358 template<typename Lhs, typename Rhs>
00359 struct product_coeff_vectorized_dyn_selector<Lhs,Rhs,1,1>
00360 {
00361 typedef typename Lhs::Index Index;
00362 EIGEN_STRONG_INLINE static void run(Index , Index , const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00363 {
00364 res = lhs.transpose().cwiseProduct(rhs).sum();
00365 }
00366 };
00367
00368 template<typename Lhs, typename Rhs, typename RetScalar>
00369 struct product_coeff_impl<InnerVectorizedTraversal, Dynamic, Lhs, Rhs, RetScalar>
00370 {
00371 typedef typename Lhs::Index Index;
00372 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, typename Lhs::Scalar &res)
00373 {
00374 product_coeff_vectorized_dyn_selector<Lhs,Rhs>::run(row, col, lhs, rhs, res);
00375 }
00376 };
00377
00378
00379
00380
00381
00382 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00383 struct product_packet_impl<RowMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
00384 {
00385 typedef typename Lhs::Index Index;
00386 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00387 {
00388 product_packet_impl<RowMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
00389 res = pmadd(pset1<Packet>(lhs.coeff(row, UnrollingIndex)), rhs.template packet<LoadMode>(UnrollingIndex, col), res);
00390 }
00391 };
00392
00393 template<int UnrollingIndex, typename Lhs, typename Rhs, typename Packet, int LoadMode>
00394 struct product_packet_impl<ColMajor, UnrollingIndex, Lhs, Rhs, Packet, LoadMode>
00395 {
00396 typedef typename Lhs::Index Index;
00397 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00398 {
00399 product_packet_impl<ColMajor, UnrollingIndex-1, Lhs, Rhs, Packet, LoadMode>::run(row, col, lhs, rhs, res);
00400 res = pmadd(lhs.template packet<LoadMode>(row, UnrollingIndex), pset1<Packet>(rhs.coeff(UnrollingIndex, col)), res);
00401 }
00402 };
00403
00404 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00405 struct product_packet_impl<RowMajor, 0, Lhs, Rhs, Packet, LoadMode>
00406 {
00407 typedef typename Lhs::Index Index;
00408 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00409 {
00410 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
00411 }
00412 };
00413
00414 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00415 struct product_packet_impl<ColMajor, 0, Lhs, Rhs, Packet, LoadMode>
00416 {
00417 typedef typename Lhs::Index Index;
00418 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet &res)
00419 {
00420 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
00421 }
00422 };
00423
00424 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00425 struct product_packet_impl<RowMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
00426 {
00427 typedef typename Lhs::Index Index;
00428 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
00429 {
00430 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00431 res = pmul(pset1<Packet>(lhs.coeff(row, 0)),rhs.template packet<LoadMode>(0, col));
00432 for(Index i = 1; i < lhs.cols(); ++i)
00433 res = pmadd(pset1<Packet>(lhs.coeff(row, i)), rhs.template packet<LoadMode>(i, col), res);
00434 }
00435 };
00436
00437 template<typename Lhs, typename Rhs, typename Packet, int LoadMode>
00438 struct product_packet_impl<ColMajor, Dynamic, Lhs, Rhs, Packet, LoadMode>
00439 {
00440 typedef typename Lhs::Index Index;
00441 EIGEN_STRONG_INLINE static void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, Packet& res)
00442 {
00443 eigen_assert(lhs.cols()>0 && "you are using a non initialized matrix");
00444 res = pmul(lhs.template packet<LoadMode>(row, 0), pset1<Packet>(rhs.coeff(0, col)));
00445 for(Index i = 1; i < lhs.cols(); ++i)
00446 res = pmadd(lhs.template packet<LoadMode>(row, i), pset1<Packet>(rhs.coeff(i, col)), res);
00447 }
00448 };
00449
00450 }
00451
00452 #endif // EIGEN_COEFFBASED_PRODUCT_H