Shark machine learning library
About Shark
News!
Contribute
Credits and copyright
Downloads
Getting Started
Installation
Using the docs
Documentation
Tutorials
Quick references
Class list
Global functions
FAQ
Showroom
include
shark
Models
Clustering
AbstractClustering.h
Go to the documentation of this file.
1
//===========================================================================
2
/*!
3
*
4
*
5
* \brief Super class for clustering definitions.
6
*
7
*
8
*
9
* \author T. Glasmachers
10
* \date 2011
11
*
12
*
13
* \par Copyright 1995-2017 Shark Development Team
14
*
15
* <BR><HR>
16
* This file is part of Shark.
17
* <http://shark-ml.org/>
18
*
19
* Shark is free software: you can redistribute it and/or modify
20
* it under the terms of the GNU Lesser General Public License as published
21
* by the Free Software Foundation, either version 3 of the License, or
22
* (at your option) any later version.
23
*
24
* Shark is distributed in the hope that it will be useful,
25
* but WITHOUT ANY WARRANTY; without even the implied warranty of
26
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27
* GNU Lesser General Public License for more details.
28
*
29
* You should have received a copy of the GNU Lesser General Public License
30
* along with Shark. If not, see <http://www.gnu.org/licenses/>.
31
*
32
*/
33
//===========================================================================
34
35
#ifndef SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H
36
#define SHARK_MODELS_CLUSTERING_ABSTRACTCLUSTERING_H
37
38
39
#include <
shark/Data/BatchInterface.h
>
40
#include <
shark/Core/Flags.h
>
41
#include <
shark/Core/INameable.h
>
42
#include <
shark/Core/IParameterizable.h
>
43
#include <
shark/Core/ISerializable.h
>
44
#include <
shark/Core/Shape.h
>
45
46
namespace
shark
{
47
48
49
///
50
/// \brief Base class for clustering.
51
///
52
/// \par
53
/// Clustering algorithms vary widely in the data structures
54
/// on which they operate. For example, simple centroid-based
55
/// approaches such as k-means are mutually incompatible with
56
/// tree-based hierarchical clustering. This interface class
57
/// is the attempt to cast different cluster descriptions into
58
/// a minimal common interface that is useful for prediction.
59
///
60
/// \par
61
/// There are at least two common types of predictions made
62
/// with clusterings. The first one is the assignment of the
63
/// best matching cluster to a patters, such as in vector
64
/// quantization or unsupervised clustering. This is often
65
/// referred to as "hard clustering". The second one is the
66
/// computation of a membership function ("soft clustering").
67
/// The membership of a pattern to a cluster is non-negative,
68
/// and the total membership should add to one.
69
///
70
/// \par
71
/// This interface makes minimal assumptions to allow for
72
/// these types of predictions. It assumes that clusters can
73
/// be enumbered (identified by an index), and that at least
74
/// a membership test can be made (hard clustering). It is
75
/// optional to provide a membership function. Only one of
76
/// the two interfaces for best matching cluster or membership
77
/// function need to be implemented, since the best matching
78
/// cluster can be deduced from the membership function.
79
/// However, often the best matching cluster can be computed
80
/// more efficiently than the membership function. In these
81
/// cases both interface functions should be implemented.
82
///
83
/// \par
84
/// The purpose of this interface is to act as a common
85
/// super class to different data structures describing the
86
/// outcome of a clustering operation. The interface allows
87
/// all of these data structures to be used in the two
88
/// clustering model classes: HardClusteringModel and
89
/// SoftClusteringModel.
90
///
91
template
<
class
InputT>
92
class
AbstractClustering
:
public
INameable
,
public
IParameterizable
<>,
public
ISerializable
93
{
94
public
:
95
typedef
InputT
InputType
;
96
typedef
unsigned
int
OutputType
;
97
typedef
typename
Batch<InputType>::type
BatchInputType
;
98
typedef
Batch<OutputType>::type
BatchOutputType
;
99
100
enum
Feature
{
101
HAS_SOFT_MEMBERSHIP
= 1,
102
};
103
SHARK_FEATURE_INTERFACE
;
104
105
/// Tests whether the clustering can compute a (soft)
106
/// member ship function, describing the membership
107
/// of a sample to the different clusters.
108
bool
hasSoftMembershipFunction
()
const
{
109
return
m_features
&
HAS_SOFT_MEMBERSHIP
;
110
}
111
112
/// return the number of clusters
113
virtual
std::size_t
numberOfClusters
()
const
= 0;
114
115
virtual
Shape
inputShape
()
const
= 0;
116
117
/// \brief Compute best matching cluster.
118
///
119
/// \par
120
/// This function should be overriden by sub-classes to
121
/// compute the cluster best matching the input pattern.
122
/// The (typically slow) default implementation is to
123
/// create a batch of size 1 and return the result of the batch call to hardMembership
124
virtual
unsigned
int
hardMembership
(InputType
const
& pattern)
const
{
125
typename
Batch<InputType>::type
b =
Batch<InputType>::createBatch
(pattern);
126
getBatchElement
(b,0) = pattern;
127
return
hardMembership
(b)(0);
128
}
129
130
/// \brief Compute best matching cluster for a batch of inputs.
131
///
132
/// \par
133
/// This function should be overriden by sub-classes to
134
/// compute the cluster best matching the input pattern.
135
/// The (typically slow) default implementation is to
136
/// return the arg max of the soft membership function for every pattern.
137
virtual
BatchOutputType
hardMembership
(BatchInputType
const
& patterns)
const
{
138
std::size_t numPatterns =
batchSize
(patterns);
139
RealMatrix f =
softMembership
(patterns);
140
SHARK_ASSERT
(f.size2() > 0);
141
SHARK_ASSERT
(f.size1() == numPatterns);
142
BatchOutputType outputs(numPatterns);
143
for
(std::size_t i = 0; i != numPatterns;++i){
144
auto
membership = row(f,i);
145
outputs(i) = (
unsigned
int)(std::max_element(membership.begin(),membership.end())-membership.begin());
146
}
147
return
outputs;
148
}
149
150
/// \brief Compute cluster membership function.
151
///
152
/// \par
153
/// This function should be overriden by sub-classes to
154
/// compute the membership of a pattern to the clusters.
155
/// The default implementation creates a batch of size 1
156
/// and calls the batched version. If this is not overriden, an xception is thrown.
157
virtual
RealVector
softMembership
(InputType
const
& pattern)
const
{
158
return
row(
softMembership
(
Batch<InputType>::createBatch
(pattern)),0);
159
}
160
161
/// \brief Compute cluster membership function.
162
///
163
/// \par
164
/// This function should be overriden by sub-classes to
165
/// compute the membership of a pattern to the clusters.
166
/// This default implementation throws an exception.
167
virtual
RealMatrix
softMembership
(BatchInputType
const
& patterns)
const
{
168
SHARK_FEATURE_EXCEPTION
(
HAS_SOFT_MEMBERSHIP
);
169
}
170
171
/// empty default implementation of ISerializable::read
172
void
read
(
InArchive
& archive) {}
173
174
/// empty default implementation of ISerializable::write
175
void
write
(
OutArchive
& archive)
const
{}
176
};
177
178
179
}
180
#endif