AbstractClustering.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Super class for clustering definitions.
6  *
7  *
8  *
9  * \author T. Glasmachers
10  * \date 2011
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H
36 #define SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H
37 
38 
40 #include <shark/Core/Flags.h>
41 #include <shark/Core/INameable.h>
44 #include <shark/Core/Shape.h>
45 
46 namespace shark {
47 
48 
49 ///
50 /// \brief Base class for clustering.
51 ///
52 /// \par
53 /// Clustering algorithms vary widely in the data structures
54 /// on which they operate. For example, simple centroid-based
55 /// approaches such as k-means are mutually incompatible with
56 /// tree-based hierarchical clustering. This interface class
57 /// is the attempt to cast different cluster descriptions into
58 /// a minimal common interface that is useful for prediction.
59 ///
60 /// \par
61 /// There are at least two common types of predictions made
62 /// with clusterings. The first one is the assignment of the
63 /// best matching cluster to a patters, such as in vector
64 /// quantization or unsupervised clustering. This is often
65 /// referred to as "hard clustering". The second one is the
66 /// computation of a membership function ("soft clustering").
67 /// The membership of a pattern to a cluster is non-negative,
68 /// and the total membership should add to one.
69 ///
70 /// \par
71 /// This interface makes minimal assumptions to allow for
72 /// these types of predictions. It assumes that clusters can
73 /// be enumbered (identified by an index), and that at least
74 /// a membership test can be made (hard clustering). It is
75 /// optional to provide a membership function. Only one of
76 /// the two interfaces for best matching cluster or membership
77 /// function need to be implemented, since the best matching
78 /// cluster can be deduced from the membership function.
79 /// However, often the best matching cluster can be computed
80 /// more efficiently than the membership function. In these
81 /// cases both interface functions should be implemented.
82 ///
83 /// \par
84 /// The purpose of this interface is to act as a common
85 /// super class to different data structures describing the
86 /// outcome of a clustering operation. The interface allows
87 /// all of these data structures to be used in the two
88 /// clustering model classes: HardClusteringModel and
89 /// SoftClusteringModel.
90 ///
91 template <class InputT>
93 {
94 public:
95  typedef InputT InputType;
96  typedef unsigned int OutputType;
99 
100  enum Feature {
102  };
104 
105  /// Tests whether the clustering can compute a (soft)
106  /// member ship function, describing the membership
107  /// of a sample to the different clusters.
110  }
111 
112  /// return the number of clusters
113  virtual std::size_t numberOfClusters() const = 0;
114 
115  virtual Shape inputShape() const = 0;
116 
117  /// \brief Compute best matching cluster.
118  ///
119  /// \par
120  /// This function should be overriden by sub-classes to
121  /// compute the cluster best matching the input pattern.
122  /// The (typically slow) default implementation is to
123  /// create a batch of size 1 and return the result of the batch call to hardMembership
124  virtual unsigned int hardMembership(InputType const& pattern) const{
126  getBatchElement(b,0) = pattern;
127  return hardMembership(b)(0);
128  }
129 
130  /// \brief Compute best matching cluster for a batch of inputs.
131  ///
132  /// \par
133  /// This function should be overriden by sub-classes to
134  /// compute the cluster best matching the input pattern.
135  /// The (typically slow) default implementation is to
136  /// return the arg max of the soft membership function for every pattern.
137  virtual BatchOutputType hardMembership(BatchInputType const& patterns) const{
138  std::size_t numPatterns = batchSize(patterns);
139  RealMatrix f = softMembership(patterns);
140  SHARK_ASSERT(f.size2() > 0);
141  SHARK_ASSERT(f.size1() == numPatterns);
142  BatchOutputType outputs(numPatterns);
143  for(std::size_t i = 0; i != numPatterns;++i){
144  auto membership = row(f,i);
145  outputs(i) = (unsigned int)(std::max_element(membership.begin(),membership.end())-membership.begin());
146  }
147  return outputs;
148  }
149 
150  /// \brief Compute cluster membership function.
151  ///
152  /// \par
153  /// This function should be overriden by sub-classes to
154  /// compute the membership of a pattern to the clusters.
155  /// The default implementation creates a batch of size 1
156  /// and calls the batched version. If this is not overriden, an xception is thrown.
157  virtual RealVector softMembership(InputType const& pattern) const{
158  return row(softMembership(Batch<InputType>::createBatch(pattern)),0);
159  }
160 
161  /// \brief Compute cluster membership function.
162  ///
163  /// \par
164  /// This function should be overriden by sub-classes to
165  /// compute the membership of a pattern to the clusters.
166  /// This default implementation throws an exception.
167  virtual RealMatrix softMembership(BatchInputType const& patterns) const{
169  }
170 
171  /// empty default implementation of ISerializable::read
172  void read(InArchive& archive) {}
173 
174  /// empty default implementation of ISerializable::write
175  void write(OutArchive& archive) const {}
176 };
177 
178 
179 }
180 #endif