DistantModes.h
Go to the documentation of this file.
1 /*!
2  * \brief Implements the DistantModes/ArtificialModes benchmark problem
3  *
4  * \author O. Krause, A.Fischer, K.Bruegge
5  * \date 2012
6  *
7  *
8  * \par Copyright 1995-2017 Shark Development Team
9  *
10  * <BR><HR>
11  * This file is part of Shark.
12  * <http://shark-ml.org/>
13  *
14  * Shark is free software: you can redistribute it and/or modify
15  * it under the terms of the GNU Lesser General Public License as published
16  * by the Free Software Foundation, either version 3 of the License, or
17  * (at your option) any later version.
18  *
19  * Shark is distributed in the hope that it will be useful,
20  * but WITHOUT ANY WARRANTY; without even the implied warranty of
21  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22  * GNU Lesser General Public License for more details.
23  *
24  * You should have received a copy of the GNU Lesser General Public License
25  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
26  *
27  */
28 #ifndef UNSUPERVISED_RBM_PROBLEMS_DISTANTMODES_H
29 #define UNSUPERVISED_RBM_PROBLEMS_DISTANTMODES_H
30 
31 #include <shark/Data/Dataset.h>
32 #include <shark/LinAlg/Base.h>
33 #include <shark/Core/Random.h>
35 
36 namespace shark{
37 
38 
39 ///\brief Creates a set of pattern (each later representing a mode)
40 /// which than are randomly perturbed to create the data set.
41 /// The dataset was introduced in Desjardins et al. (2010) (Parallel Tempering for training restricted Boltzmann machines, AISTATS 2010)
42 ///
43 ///The higher the perturbation is the harder it is to classify,
44 ///but the closer are the modes and thus the easier the data distribution is to learn.
46 private:
48 
49  double m_p;
50  unsigned m_dim;
51  unsigned m_modes;
52  unsigned m_copies;
53  std::size_t m_batchSize;
54 
55  //Generates a basic pattern representing the "center" of a mode.
56  void modePrototype(RealVector& pattern, unsigned mode) const {
57  for (std::size_t i = 0; i != pattern.size(); ++i){
58  pattern(i) = (mode % 2) ^ (i * (mode / 2 + 1) / pattern.size()) % 2;
59  }
60  }
61 
62 
63  ///Perturbates the pattern by randomly flipping pixels
64  ///@param pattern the pattern
65  ///@param p the flipping probability
66  void perturbate(RealVector& pattern, double p)const{
67  for (std::size_t i = 0; i < pattern.size(); ++i){
68  if (random::uni(random::globalRng, 0,1) > p){
69  pattern(i) = !pattern(i);
70  }
71  }
72  }
73 
74  void init() {
75  std::vector<RealVector> data(m_modes * m_copies,RealVector(m_dim));
76  for (std::size_t i = 0; i != data.size(); ++i) {
77  RealVector& element=data[i];
78  unsigned mode = i % m_modes;
79  modePrototype(element, mode);
80  perturbate(element, m_p);
81  }
82  m_data = createDataFromRange(data, m_batchSize);
83  }
84 
85 public:
86  ///generates the DistantModes distribution.
87  ///
88  ///\param p the probability of changing a input neuron
89  ///\param dim the dimensionality of the data.
90  ///\param modes the number of modes, should be a multiple of 2
91  ///\param copies the number of disturbed copies for each mode
92  ///\param batchSize the size of the batches in which the generated data set is organized
93  DistantModes(double p = 0, unsigned dim = 16, unsigned modes=4, unsigned copies =2500, size_t batchSize=0)
94  :m_p(p), m_dim(dim), m_modes(modes), m_copies(copies), m_batchSize(batchSize) {
95  init();
96  }
97 
98  ///returns the generated dataset
100  return m_data;
101  };
102 
103  ///returns the dimensionality of the data
104  std::size_t inputDimension() const {
105  return m_dim;
106  }
107 };
108 
109 }
110 #endif