Shark machine learning library
About Shark
News!
Contribute
Credits and copyright
Downloads
Getting Started
Installation
Using the docs
Documentation
Tutorials
Quick references
Class list
Global functions
FAQ
Showroom
include
shark
Models
Clustering
Centroids.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Clusters defined by centroids.
6
*
7
*
8
*
9
* \author T. Glasmachers
10
* \date 2011
11
*
12
*
13
* \par Copyright 1995-2017 Shark Development Team
14
*
15
* <BR><HR>
16
* This file is part of Shark.
17
* <http://shark-ml.org/>
18
*
19
* Shark is free software: you can redistribute it and/or modify
20
* it under the terms of the GNU Lesser General Public License as published
21
* by the Free Software Foundation, either version 3 of the License, or
22
* (at your option) any later version.
23
*
24
* Shark is distributed in the hope that it will be useful,
25
* but WITHOUT ANY WARRANTY; without even the implied warranty of
26
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27
* GNU Lesser General Public License for more details.
28
*
29
* You should have received a copy of the GNU Lesser General Public License
30
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
31
*
32
*/
33
//===========================================================================
34
35
#ifndef SHARK_MODELS_CLUSTERING_CENTROIDS_H
36
#define SHARK_MODELS_CLUSTERING_CENTROIDS_H
37
38
#include <
shark/Core/DLLSupport.h
>
39
#include <
shark/Models/Clustering/AbstractClustering.h
>
40
#include <
shark/Data/Dataset.h
>
41
42
43
namespace
shark
{
44
45
46
/// \brief Clusters defined by centroids.
47
///
48
/// \par
49
/// Centroids are an elementary way to define clusters by means
50
/// of the one-nearest-neighbor rule. This rule defines a hard
51
/// clustering decision.
52
///
53
/// \par
54
/// The Centroids class uses inverse distances to compute soft
55
/// clustering memberships. This is arbitrary and can be changed
56
/// by overriding the membershipKernel function.
57
///
58
class
Centroids
:
public
AbstractClustering
<RealVector>
59
{
60
typedef
AbstractClustering<RealVector>
base_type
;
61
62
public
:
63
/// Default constructor
64
SHARK_EXPORT_SYMBOL
Centroids
();
65
66
/// Constructor
67
///
68
/// \param centroids number of centroids in the model (initially zero)
69
/// \param dimension dimension of the input space, and thus of the centroids
70
SHARK_EXPORT_SYMBOL
Centroids
(std::size_t
centroids
, std::size_t dimension);
71
72
/// Constructor
73
///
74
/// \param centroids centroid vectors
75
SHARK_EXPORT_SYMBOL
Centroids
(
Data<RealVector>
const
& centroids);
76
77
/// \brief From INameable: return the class name.
78
std::string
name
()
const
79
{
return
"Centroids"
; }
80
81
/// from IParameterizable
82
SHARK_EXPORT_SYMBOL
RealVector
parameterVector
()
const
;
83
84
/// from IParameterizable
85
SHARK_EXPORT_SYMBOL
void
setParameterVector
(RealVector
const
& newParameters);
86
87
/// from IParameterizable
88
SHARK_EXPORT_SYMBOL
std::size_t
numberOfParameters
()
const
;
89
90
/// return the dimension of the inputs
91
Shape
inputShape
()
const
{
92
return
dataDimension
(
m_centroids
);
93
}
94
95
/// return the number of centroids in the model
96
SHARK_EXPORT_SYMBOL
std::size_t
numberOfClusters
()
const
;
97
98
/// read access to the centroid vectors
99
Data<RealVector>
const
&
centroids
()
const
{
100
return
m_centroids
;
101
}
102
103
/// overwrite the centroid vectors
104
void
setCentroids
(
Data<RealVector>
const
& newCentroids){
105
m_centroids
= newCentroids;
106
}
107
108
/// from ISerializable
109
SHARK_EXPORT_SYMBOL
void
read
(
InArchive
& archive);
110
111
/// from ISerializable
112
SHARK_EXPORT_SYMBOL
void
write
(
OutArchive
& archive)
const
;
113
114
/// from AbstractClustering: Compute cluster memberships.
115
SHARK_EXPORT_SYMBOL
RealVector
softMembership
(RealVector
const
& pattern)
const
;
116
/// From AbstractClustering: Compute cluster memberships for a batch of patterns.
117
SHARK_EXPORT_SYMBOL
RealMatrix
softMembership
(
BatchInputType
const
& patterns)
const
;
118
119
/// Computes the distances of each pattern to all cluster centers
120
SHARK_EXPORT_SYMBOL
RealMatrix
distances
(
BatchInputType
const
& patterns)
const
;
121
122
123
/// initialize centroids from labeled data: take the first
124
/// data points with different labels; if there are more
125
/// centroids than classes, the remaining centroids are filled
126
/// with the first elements in the data set
127
///
128
/// \param data dataset from which to take the centroids
129
/// \param noClusters number of centroids in the model, default 0 is mapped to the number of classes in the data set
130
/// \param noClasses number of clases in the dataset, default 0 means that the number is computed
131
SHARK_EXPORT_SYMBOL
void
initFromData
(
ClassificationDataset
const
& data, std::size_t noClusters = 0, std::size_t noClasses = 0);
132
133
/// initialize centroids from unlabeled data:
134
/// take a random subset of data points
135
///
136
/// \param dataset dataset from which to take the centroids
137
/// \param noClusters number of centroids in the model
138
SHARK_EXPORT_SYMBOL
void
initFromData
(
Data<RealVector>
const
& dataset, std::size_t noClusters);
139
140
protected
:
141
/// Compute unnormalized membership from distance.
142
/// The default implementation is to return exp(-distance)
143
SHARK_EXPORT_SYMBOL
virtual
double
membershipKernel
(
double
dist)
const
;
144
145
/// centroid vectors
146
Data<RealVector>
m_centroids
;
147
};
148
149
150
}
151
#endif