Centroids.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief Clusters defined by centroids.
6  *
7  *
8  *
9  * \author T. Glasmachers
10  * \date 2011
11  *
12  *
13  * \par Copyright 1995-2017 Shark Development Team
14  *
15  * <BR><HR>
16  * This file is part of Shark.
17  * <http://shark-ml.org/>
18  *
19  * Shark is free software: you can redistribute it and/or modify
20  * it under the terms of the GNU Lesser General Public License as published
21  * by the Free Software Foundation, either version 3 of the License, or
22  * (at your option) any later version.
23  *
24  * Shark is distributed in the hope that it will be useful,
25  * but WITHOUT ANY WARRANTY; without even the implied warranty of
26  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27  * GNU Lesser General Public License for more details.
28  *
29  * You should have received a copy of the GNU Lesser General Public License
30  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
31  *
32  */
33 //===========================================================================
34 
35 #ifndef SHARK_MODELS_CLUSTERING_CENTROIDS_H
36 #define SHARK_MODELS_CLUSTERING_CENTROIDS_H
37 
38 #include <shark/Core/DLLSupport.h>
40 #include <shark/Data/Dataset.h>
41 
42 
43 namespace shark {
44 
45 
46 /// \brief Clusters defined by centroids.
47 ///
48 /// \par
49 /// Centroids are an elementary way to define clusters by means
50 /// of the one-nearest-neighbor rule. This rule defines a hard
51 /// clustering decision.
52 ///
53 /// \par
54 /// The Centroids class uses inverse distances to compute soft
55 /// clustering memberships. This is arbitrary and can be changed
56 /// by overriding the membershipKernel function.
57 ///
58 class Centroids : public AbstractClustering<RealVector>
59 {
61 
62 public:
63  /// Default constructor
65 
66  /// Constructor
67  ///
68  /// \param centroids number of centroids in the model (initially zero)
69  /// \param dimension dimension of the input space, and thus of the centroids
70  SHARK_EXPORT_SYMBOL Centroids(std::size_t centroids, std::size_t dimension);
71 
72  /// Constructor
73  ///
74  /// \param centroids centroid vectors
76 
77  /// \brief From INameable: return the class name.
78  std::string name() const
79  { return "Centroids"; }
80 
81  /// from IParameterizable
82  SHARK_EXPORT_SYMBOL RealVector parameterVector() const;
83 
84  /// from IParameterizable
85  SHARK_EXPORT_SYMBOL void setParameterVector(RealVector const& newParameters);
86 
87  /// from IParameterizable
88  SHARK_EXPORT_SYMBOL std::size_t numberOfParameters() const;
89 
90  /// return the dimension of the inputs
91  Shape inputShape() const{
92  return dataDimension(m_centroids);
93  }
94 
95  /// return the number of centroids in the model
96  SHARK_EXPORT_SYMBOL std::size_t numberOfClusters() const;
97 
98  /// read access to the centroid vectors
99  Data<RealVector> const& centroids() const{
100  return m_centroids;
101  }
102 
103  /// overwrite the centroid vectors
104  void setCentroids(Data<RealVector> const& newCentroids){
105  m_centroids = newCentroids;
106  }
107 
108  /// from ISerializable
109  SHARK_EXPORT_SYMBOL void read(InArchive& archive);
110 
111  /// from ISerializable
112  SHARK_EXPORT_SYMBOL void write(OutArchive& archive) const;
113 
114  /// from AbstractClustering: Compute cluster memberships.
115  SHARK_EXPORT_SYMBOL RealVector softMembership(RealVector const& pattern) const;
116  /// From AbstractClustering: Compute cluster memberships for a batch of patterns.
117  SHARK_EXPORT_SYMBOL RealMatrix softMembership(BatchInputType const& patterns) const;
118 
119  /// Computes the distances of each pattern to all cluster centers
120  SHARK_EXPORT_SYMBOL RealMatrix distances(BatchInputType const& patterns) const;
121 
122 
123  /// initialize centroids from labeled data: take the first
124  /// data points with different labels; if there are more
125  /// centroids than classes, the remaining centroids are filled
126  /// with the first elements in the data set
127  ///
128  /// \param data dataset from which to take the centroids
129  /// \param noClusters number of centroids in the model, default 0 is mapped to the number of classes in the data set
130  /// \param noClasses number of clases in the dataset, default 0 means that the number is computed
131  SHARK_EXPORT_SYMBOL void initFromData(ClassificationDataset const& data, std::size_t noClusters = 0, std::size_t noClasses = 0);
132 
133  /// initialize centroids from unlabeled data:
134  /// take a random subset of data points
135  ///
136  /// \param dataset dataset from which to take the centroids
137  /// \param noClusters number of centroids in the model
138  SHARK_EXPORT_SYMBOL void initFromData(Data<RealVector> const& dataset, std::size_t noClusters);
139 
140 protected:
141  /// Compute unnormalized membership from distance.
142  /// The default implementation is to return exp(-distance)
143  SHARK_EXPORT_SYMBOL virtual double membershipKernel(double dist) const;
144 
145  /// centroid vectors
147 };
148 
149 
150 }
151 #endif