32 #ifndef REMORA_KERNELS_DEFAULT_SYEV_HPP 33 #define REMORA_KERNELS_DEFAULT_SYEV_HPP 35 #include "../../detail/traits.hpp" 37 namespace remora{
namespace bindings {
39 template <
typename MatA,
typename V>
42 matrix_expression<MatA, cpu_tag>& matA,
43 vector_expression<V, cpu_tag>& eigenValues
45 REMORA_SIZE_CHECK(eigenValues().size() == matA().size1());
46 REMORA_SIZE_CHECK(matA().size1() == matA().size2());
48 std::size_t n = eigenValues().size();
50 for (std::size_t i = 0; i < n - 1; i++)
52 std::size_t l = arg_max(subrange(eigenValues,i,n))+i;
55 std::swap(eigenValues()( l ),eigenValues()( i ));
57 for (std::size_t j = 0; j < n; j++) {
58 std::swap(matA()( j , l ), matA()( j , i ));
64 template <
typename MatA,
typename V>
66 matrix_expression<MatA, cpu_tag>& vmatA,
67 vector_expression<V, cpu_tag>& dvecA
69 REMORA_SIZE_CHECK(vmatA().size1() == vmatA().size2());
70 REMORA_SIZE_CHECK(vmatA().size1() == dvecA().size());
72 const std::size_t maxIterC = 50;
73 std::size_t n = vmatA().size1();
75 vector<double> odvecA(n,0.0);
77 std::size_t j, k, l, m;
78 double b, c, f, g,
h, hh, p, r, s, scale;
84 for (std::size_t i = n; i-- > 1;) {
90 for (std::size_t k = 0; k < i; k++) {
91 scale += std::fabs(vmatA()(i, k));
96 odvecA(i) = vmatA()(i, i-1);
99 for (k = 0; k < i; k++) {
100 vmatA()(i, k) /= scale;
101 h += vmatA()(i, k) * vmatA()(i, k);
105 g = f > 0 ? -std::sqrt(h) :
std::sqrt(h);
106 odvecA(i) = scale * g;
108 vmatA()(i, i-1) = f - g;
111 for (j = 0; j < i; j++) {
112 vmatA()(j, i) = vmatA()(i, j) / (scale * h);
116 for (k = 0; k <= j; k++) {
117 g += vmatA()(j, k) * vmatA()(i, k);
120 for (k = j + 1; k < i; k++) {
121 g += vmatA()(k, j) * vmatA()(i, k);
125 f += (odvecA(j) = g /
h) * vmatA()(i, j);
131 for (j = 0; j < i; j++) {
133 g = odvecA(j) - hh * f;
136 for (k = 0; k <= j; k++) {
137 vmatA()(j, k) -= f * odvecA(k) + g * vmatA()(i, k);
142 vmatA()(i, k) *= scale;
149 dvecA()(0) = odvecA(0) = 0.0;
152 for (std::size_t i = 0; i < n; i++) {
154 for (j = 0; j < i; j++) {
157 for (k = 0; k < i; k++) {
158 g += vmatA()(i, k) * vmatA()(k, j);
161 for (k = 0; k < i; k++) {
162 vmatA()(k, j) -= g * vmatA()(k, i);
167 dvecA()(i) = vmatA()(i, i);
170 for (j = 0; j < i; j++) {
171 vmatA()(i, j) = vmatA()(j, i) = 0.0;
182 for (std::size_t i = 1; i < n; i++) {
183 odvecA(i-1) = odvecA(i);
188 for (l = 0; l < n; l++) {
193 for (m = l; m < n - 1; m++) {
194 s = std::fabs(dvecA()(m)) + std::fabs(dvecA()(m+1));
195 if (std::fabs(odvecA(m)) + s == s) {
204 throw std::invalid_argument(
"too many iterations in eigendecomposition");
207 g = (dvecA()(l+1) - p) / (2.0 * odvecA(l));
208 r = std::sqrt(g * g + 1.0);
209 g = dvecA()(m) - p + odvecA(l) / (g + ((g) > 0 ? std::fabs(r) : -
std::fabs(r)));
213 for (std::size_t i = m; i-- > l;) {
217 if (std::fabs(f) >= std::fabs(g)) {
219 r = std::sqrt(c * c + 1.0);
226 r = std::sqrt(s * s + 1.0);
232 g = dvecA()(i+1) - p;
233 r = (dvecA()(i) - g) * s + 2.0 * c * b;
235 dvecA()(i+1) = g + p;
239 for (k = 0; k < n; k++) {
241 vmatA()(k, i+1) = s * vmatA()(k, i) + c * f;
242 vmatA()(k, i ) = c * vmatA()(k, i) - s * f;
257 eigensort(vmatA, dvecA);
262 for (std::size_t j = n; j--;) {
264 for (std::size_t i = n; i--;) {
265 s += vmatA()(i, j) * vmatA()(i, j);
269 for (std::size_t i = n; i--;) {