36 #ifndef SHARK_ALGORITHMS_KMEANS_H 37 #define SHARK_ALGORITHMS_KMEANS_H 78 SHARK_EXPORT_SYMBOL std::size_t
kMeans(Data<RealVector>
const& data, std::size_t k, Centroids& centroids, std::size_t maxIterations = 0);
140 template<
class InputType>
143 maxIterations = std::numeric_limits<std::size_t>::max();
147 UIntVector clusterMembership(n);
148 UIntVector clusterSizes(k,0);
149 RealVector ckck(k,0);
152 for(
unsigned int i = 0; i != n; ++i){
153 clusterMembership(i) = i % k;
156 for(std::size_t i = 0; i != n; ++i){
157 ++clusterSizes(clusterMembership(i));
161 std::size_t iter = 0;
163 for(; iter != maxIterations && !equal; ++iter) {
169 for(std::size_t i = 0; i != n; ++i){
170 std::size_t c1 = clusterMembership(i);
171 for(std::size_t j = 0; j != n; ++j){
172 std::size_t c2 = clusterMembership(j);
173 if(c1 != c2)
continue;
174 ckck(c1) += kernelMatrix(i,j);
177 noalias(ckck) = safe_div(ckck,
sqr(clusterSizes),0);
179 UIntVector newClusterMembership(n);
180 RealVector currentDistances(k);
181 for(std::size_t i = 0; i != n; ++i){
184 noalias(currentDistances) = ckck;
185 for(std::size_t j = 0; j != n; ++j){
186 std::size_t c = clusterMembership(j);
187 currentDistances(c) -= 2* kernelMatrix(i,j)/clusterSizes(c);
190 newClusterMembership(i) = (
unsigned int) arg_min(currentDistances);
192 equal = boost::equal(
193 newClusterMembership,
196 noalias(clusterMembership) = newClusterMembership;
198 clusterSizes.clear();
199 for(std::size_t i = 0; i != n; ++i){
200 ++clusterSizes(clusterMembership(i));
204 for(
unsigned int i = 0; i != k; ++i){
205 if(clusterSizes(i) == 0){
207 --clusterSizes(clusterMembership(elem));
208 clusterMembership(elem)=i;
217 expansion.
offset() = -ckck;
218 expansion.
alpha().clear();
219 for(std::size_t i = 0; i != n; ++i){
220 std::size_t c = clusterMembership(i);
221 expansion.
alpha()(i,c) = 2.0 / clusterSizes(c);