00001 #ifndef OPENTISSUE_CORE_MATH_MATH_KMEANS_H
00002 #define OPENTISSUE_CORE_MATH_MATH_KMEANS_H
00003
00004
00005
00006
00007
00008
00009
00010 #include <OpenTissue/configuration.h>
00011
00012 #include <OpenTissue/core/math/math_random.h>
00013 #include <OpenTissue/core/math/math_matrix3x3.h>
00014 #include <OpenTissue/core/math/math_covariance.h>
00015 #include <OpenTissue/core/math/math_constants.h>
00016 #include <OpenTissue/core/math/math_value_traits.h>
00017
00018 #include <boost/iterator/indirect_iterator.hpp>
00019
00020 #include <algorithm>
00021 #include <list>
00022 #include <vector>
00023 #include <cassert>
00024
00025 namespace OpenTissue
00026 {
00027 namespace math
00028 {
00029 namespace detail
00030 {
00037 template< typename V, typename M >
00038 class KMeans
00039 {
00040 protected:
00041
00042 typedef V vector_type;
00043 typedef typename V::value_type real_type;
00044 typedef M matrix_type;
00045
00046 typedef OpenTissue::math::ValueTraits<real_type> value_traits;
00047
00048 typedef std::list< vector_type * > feature_ptr_container;
00049 typedef typename feature_ptr_container::iterator feature_ptr_iterator;
00050 typedef boost::indirect_iterator<feature_ptr_iterator,vector_type> feature_iterator;
00051
00052 protected:
00053
00060 class cluster_type
00061 {
00062 public:
00063
00064 vector_type m_mean;
00065 matrix_type m_C;
00066 matrix_type m_invC;
00067 size_t m_index;
00068 feature_ptr_container m_features;
00069
00070 public:
00071
00072 feature_iterator begin() { return m_features.begin(); }
00073 feature_iterator end() { return m_features.end(); }
00074
00075 public:
00076
00077 cluster_type()
00078 : m_mean(value_traits::zero(),value_traits::zero(),value_traits::zero())
00079 , m_C( value_traits::one(), value_traits::zero(), value_traits::zero(), value_traits::zero(), value_traits::one(), value_traits::zero(), value_traits::zero(), value_traits::zero(), value_traits::one() )
00080 , m_invC( value_traits::one(), value_traits::zero(), value_traits::zero(), value_traits::zero(), value_traits::one(), value_traits::zero() , value_traits::zero(), value_traits::zero(), value_traits::one() )
00081 , m_index(0u)
00082 {}
00083
00084 void update()
00085 {
00086 using OpenTissue::math::covariance;
00087 using OpenTissue::math::inverse;
00088
00089 covariance( begin(), end(), m_mean, m_C);
00090 m_invC = inverse(m_C);
00091 }
00092
00093 };
00094
00095
00101 class membership_info
00102 {
00103 public:
00104
00105 size_t m_index;
00106 cluster_type * m_cluster;
00107 vector_type * m_p;
00108
00109 public:
00110
00111 membership_info()
00112 : m_index(0)
00113 , m_cluster(0)
00114 , m_p(0)
00115 {}
00116
00117 membership_info(size_t const & idx, vector_type * p)
00118 : m_index(idx)
00119 , m_cluster(0)
00120 , m_p(p)
00121 {}
00122
00123 membership_info(membership_info const & info)
00124 : m_index(info.m_index)
00125 , m_cluster(info.m_cluster)
00126 , m_p(info.m_p)
00127 {}
00128
00129 };
00130
00131
00132 typedef typename std::vector< membership_info > membership_container;
00133 typedef typename std::list<cluster_type> cluster_container;
00134
00135 cluster_container m_clusters;
00136 membership_container m_memberships;
00137
00138 public:
00139
00140 typedef typename cluster_container::iterator cluster_iterator;
00141 typedef typename membership_container::iterator membership_iterator;
00142
00148 cluster_iterator cluster_begin() { return m_clusters.begin(); }
00149
00155 cluster_iterator cluster_end() { return m_clusters.end(); }
00156
00162 membership_iterator membership_begin() { return m_memberships.begin(); }
00163
00169 membership_iterator membership_end() { return m_memberships.end(); }
00170
00176 size_t feature_size() const { return m_memberships.size(); }
00177
00178 protected:
00179
00187 template<typename vector_iterator>
00188 void initialize(
00189 vector_iterator const & begin
00190 , vector_iterator const & end
00191 , size_t const & K
00192 )
00193 {
00194 using std::min;
00195 using std::max;
00196 using OpenTissue::math::random;
00197
00198
00199
00200 size_t N = std::distance( begin, end );
00201 assert(N > K || !"KMeans::initialize() There must be more feature points than number of clusters.");
00202
00203 {
00204 m_memberships.resize(N);
00205 size_t index = 0u;
00206 vector_iterator p = begin;
00207 for( ; p != end; ++p, ++index)
00208 m_memberships[index] = membership_info( index, &(*p) );
00209 }
00210
00211
00212 vector_type min_coord = (*begin);
00213 vector_type max_coord = (*begin);
00214 for( vector_iterator p = begin; p != end; ++p)
00215 {
00216 min_coord = min( min_coord, (*p) );
00217 max_coord = max( max_coord, (*p) );
00218 }
00219
00220
00221 std::random_shuffle(m_memberships.begin(), m_memberships.end());
00222
00223
00224 m_clusters.clear();
00225 for(size_t i=0;i<K;++i)
00226 {
00227
00228 m_clusters.push_back(cluster_type());
00229 cluster_type & cluster = m_clusters.back();
00230
00231 random( cluster.m_mean, min_coord, max_coord );
00232 cluster.m_index = i;
00233 }
00234
00235
00236 distribute_features();
00237 }
00238
00249 bool const distribute_features( )
00250 {
00251 using OpenTissue::math::inner_prod;
00252
00253
00254 {
00255 cluster_iterator end = m_clusters.end();
00256 cluster_iterator cluster = m_clusters.begin();
00257 for(;cluster!=end;++cluster)
00258 cluster->m_features.clear();
00259 }
00260
00261 bool changed = false;
00262
00263 {
00264 membership_iterator m = m_memberships.begin();
00265 membership_iterator m_end = m_memberships.end();
00266 for(;m != m_end; ++m)
00267 {
00268 vector_type const * p = m->m_p;
00269 cluster_type * owner = 0;
00270
00271 real_type min_squared_distance = value_traits::infinity();
00272 cluster_iterator c = m_clusters.begin();
00273 cluster_iterator c_end = m_clusters.end();
00274 for(;c!=c_end;++c)
00275 {
00276 vector_type diff = *p - c->m_mean;
00277
00278
00279
00280 real_type squared_distance = inner_prod( diff, diff );
00281 if(squared_distance < min_squared_distance)
00282 {
00283 min_squared_distance = squared_distance;
00284 owner = &(*c);
00285 }
00286 }
00287 assert(owner || !"distribute_features() Fatal error could not find a cluster owner for feature point");
00288 changed = (m->m_cluster != owner) ? true : changed;
00289 m->m_cluster = owner;
00290 owner->m_features.push_back( const_cast<vector_type*>( p ) );
00291 }
00292 }
00293
00294
00295 for(cluster_iterator cluster=m_clusters.begin();cluster!=m_clusters.end();++cluster)
00296 {
00297 if(! cluster->m_features.empty())
00298 cluster->update();
00299 }
00300 return changed;
00301 }
00302
00303 public:
00304
00315 template<typename vector_iterator>
00316 void run(
00317 vector_iterator const & begin
00318 , vector_iterator const & end
00319 , size_t const & K
00320 , size_t & iteration
00321 , size_t const & max_iterations
00322 )
00323 {
00324 iteration = 0u;
00325 initialize(begin,end,K);
00326 bool changed = false;
00327 do
00328 {
00329 ++iteration;
00330 changed = distribute_features();
00331 if(iteration >= max_iterations)
00332 return;
00333 }while(changed);
00334 }
00335 };
00336
00337 }
00338
00339
00357 template<typename vector_iterator, typename vector_container, typename index_container>
00358 inline void kmeans(
00359 vector_iterator const & begin
00360 , vector_iterator const & end
00361 , vector_container & centers
00362 , index_container & membership
00363 , size_t K
00364 , size_t & iteration
00365 , size_t const & max_iterations
00366 )
00367 {
00368 typedef typename vector_container::value_type vector_type;
00369 typedef typename vector_type::value_type real_type;
00370 typedef OpenTissue::math::Matrix3x3<real_type> matrix_type;
00371
00372 typedef OpenTissue::math::detail::KMeans<vector_type, matrix_type> kmeans_algorithm;
00373
00374 typedef typename kmeans_algorithm::cluster_iterator cluster_iterator;
00375 typedef typename kmeans_algorithm::membership_iterator membership_iterator;
00376
00377 kmeans_algorithm kmeans;
00378
00379 kmeans.run( begin , end, K, iteration, max_iterations );
00380
00381 centers.clear();
00382 centers.resize(K);
00383
00384 cluster_iterator c_end = kmeans.cluster_end();
00385 cluster_iterator c = kmeans.cluster_begin();
00386 for(; c!=c_end; ++c)
00387 centers[c->m_index] = c->m_mean;
00388
00389 membership.clear();
00390 membership.resize( kmeans.feature_size() );
00391
00392 membership_iterator m_end = kmeans.membership_end();
00393 membership_iterator m = kmeans.membership_begin();
00394 for(;m!=m_end;++m)
00395 membership[m->m_index] = m->m_cluster->m_index;
00396 }
00397
00398 }
00399 }
00400
00401
00402 #endif