Energy.h
Go to the documentation of this file.
1 /*!
2  *
3  *
4  * \brief -
5  *
6  * \author -
7  * \date -
8  *
9  *
10  * \par Copyright 1995-2017 Shark Development Team
11  *
12  * <BR><HR>
13  * This file is part of Shark.
14  * <http://shark-ml.org/>
15  *
16  * Shark is free software: you can redistribute it and/or modify
17  * it under the terms of the GNU Lesser General Public License as published
18  * by the Free Software Foundation, either version 3 of the License, or
19  * (at your option) any later version.
20  *
21  * Shark is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24  * GNU Lesser General Public License for more details.
25  *
26  * You should have received a copy of the GNU Lesser General Public License
27  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
28  *
29  */
30 #ifndef SHARK_UNSUPERVISED_RBm_ENERGY_H
31 #define SHARK_UNSUPERVISED_RBm_ENERGY_H
32 
33 #include <shark/LinAlg/Base.h>
35 
36 namespace shark{
37 
38 /// \brief The Energy function determining the Gibbs distribution of an RBM.
39 ///
40 ///General Energy function which uses the information given by the neurons to automatize
41 ///the calculation of the value of the energy for certain states, the derivative of the energy
42 ///and the factorization of the probability.
43 ///
44 /// Following (but slightly simplifying from the formulas given by)
45 /// Welling at al. a general form of an RBM's Energy function is given by
46 /// \f$ E(\vec v,\vec h)= f_h(\vec h) + f_v(\vec v) + \sum_{k,l} \phi_{hk}(\vec h) W_{k,l} \phi_{vl}(\vec v) \f$
47 /// We call \f$ f_h(\vec h) \f$ and \f$ f_v(\vec v) \f$ the term of the Energy (energy term)
48 /// associated to the hidden or the visible neurons respectively.
49 /// \f$ \sum_{k,l} \phi_{hk}(\vec h) W_{k,l} \phi_{vl}(\vec v) \f$ is called the interaction term.
50 /// In the standard case of an binary RBM we have \f$ f_h(\vec h) = \vec h \vec c \f$
51 /// and \f$ f_v(\vec v) = \vec v \vec b \f$, where \f$ \vec c \f$ and \f$ \vec b \f$
52 /// are the vectors of the bias parameters for the hidden and the visible neurons respectively.
53 /// Furthermore, the interaction term simplifies to \f$ \vec h W \vec v \f$, so we have just
54 /// one singe 'phi-function' for each layer that is the identity function.
55 
56 template<class RBM>
57 struct Energy{
58  typedef typename RBM::HiddenType HiddenType; //< type of the hidden layer
59  typedef typename RBM::VisibleType VisibleType; //< type of the visible layer
60 
61  //typedefs for single element
62  typedef typename HiddenType::SufficientStatistics HiddenStatistics;
63  typedef typename VisibleType::SufficientStatistics VisibleStatistics;
64 
65  //batch typedefs
66  typedef typename HiddenType::StatisticsBatch HiddenStatisticsBatch;
67  typedef typename VisibleType::StatisticsBatch VisibleStatisticsBatch;
68 
69  Energy(RBM const& rbm)
70  : m_rbm(rbm)
71  , m_hiddenNeurons(rbm.hiddenNeurons())
72  , m_visibleNeurons(rbm.visibleNeurons()){}
73 
74  ///\brief Calculates the Energy given the states of batches of hidden and visible variables .
75  RealVector energy(RealMatrix const& hidden, RealMatrix const& visible)const{
76  SIZE_CHECK(visible.size1() == hidden.size1());
77 
78  std::size_t batchSize = visible.size1();
79  RealMatrix input(batchSize,m_hiddenNeurons.size());
80  inputHidden( input, visible);
81 
82  return energyFromHiddenInput( input, hidden, visible);
83  }
84 
85  ///\brief Calculates the input of the hidden neurons given the state of the visible in a batch-vise fassion.
86  ///
87  ///@param inputs the batch of vectors the input of the hidden neurons is stored in
88  ///@param visibleStates the batch of states of the visible neurons@
89  ///@todo Remove this and replace fully by the rbm method if possible
90  void inputHidden(RealMatrix& inputs, RealMatrix const& visibleStates)const{
91  m_rbm.inputHidden(inputs,visibleStates);
92  }
93 
94 
95  ///\brief Calculates the input of the visible neurons given the state of the hidden.
96  ///
97  ///@param inputs the vector the input of the visible neurons is stored in
98  ///@param hiddenStates the state of the hidden neurons
99  ///@todo Remove this and replace fully by the rbm method if possible
100  void inputVisible(RealMatrix& inputs, RealMatrix const& hiddenStates)const{
101  m_rbm.inputVisible(inputs,hiddenStates);
102  }
103 
104  ///\brief Computes the logarithm of the unnormalized probability of each state of the
105  /// hidden neurons in a batch by using the precomputed input/activation of the visible neurons.
106  ///
107  ///@param hiddenState the batch of states of the hidden neurons
108  ///@param visibleInput the batch of current inputs for he visible units given hiddenState
109  ///@param beta the inverse temperature
110  ///@return the unnormalized probability
111  template<class BetaVector>
113  RealMatrix const& hiddenState,
114  RealMatrix const& visibleInput,
115  BetaVector const& beta
116  )const{
117  SIZE_CHECK(hiddenState.size1()==visibleInput.size1());
118  SIZE_CHECK(hiddenState.size1()==beta.size());
119  std::size_t batchSize = hiddenState.size1();
120 
121  //calculate the energy terms of the hidden neurons for the whole batch
122  RealVector energyTerms = m_hiddenNeurons.energyTerm(hiddenState,beta);
123 
124  //calculate resulting probabilities in sequence
125  RealVector p(batchSize);
126  for(std::size_t i = 0; i != batchSize; ++i){
127  p(i) = m_visibleNeurons.logMarginalize(row(visibleInput,i),beta(i))+energyTerms(i);
128  }
129  return p;
130  }
131 
132 
133  ///\brief Computes the logarithm of the unnormalized probability of each state of the
134  /// visible neurons in a batch by using the precomputed input/activation of the hidden neurons.
135  ///
136  ///@param visibleState the batch of states of the hidden neurons
137  ///@param hiddenInput the batch of current inputs for he visible units given visibleState
138  ///@param beta the inverse temperature
139  ///@return the unnormalized probability
140  template<class BetaVector>
142  RealMatrix const& visibleState,
143  RealMatrix const& hiddenInput,
144  BetaVector const& beta
145  )const{
146  SIZE_CHECK(visibleState.size1()==hiddenInput.size1());
147  SIZE_CHECK(visibleState.size1()==beta.size());
148  std::size_t batchSize = visibleState.size1();
149 
150  //calculate the energy terms of the visible neurons for the whole batch
151  RealVector energyTerms = m_visibleNeurons.energyTerm(visibleState,beta);
152 
153  RealVector p(batchSize);
154  for(std::size_t i = 0; i != batchSize; ++i){
155  p(i) = m_hiddenNeurons.logMarginalize(row(hiddenInput,i),beta(i))+energyTerms(i);
156  }
157  return p;
158  }
159 
160 
161  ///\brief Computes the logarithm of the unnormalized probability for each state of the visible neurons from a batch.
162  ///
163  ///@param visibleStates the batch of states of the hidden neurons
164  ///@param beta the inverse temperature
165  template<class BetaVector>
166  RealVector logUnnormalizedProbabilityVisible(RealMatrix const& visibleStates, BetaVector const& beta)const{
167  SIZE_CHECK(visibleStates.size1() == beta.size());
168 
169  RealMatrix hiddenInputs(beta.size(),m_hiddenNeurons.size());
170  inputHidden(hiddenInputs,visibleStates);
171  return logUnnormalizedProbabilityVisible(visibleStates, hiddenInputs, beta);
172  }
173 
174  ///\brief Computes the logarithm of the unnormalized probability of each state of the hidden neurons from a batch.
175  ///
176  ///@param hiddenStates a batch of states of the hidden neurons
177  ///@param beta the inverse temperature
178  template<class BetaVector>
179  RealVector logUnnormalizedProbabilityHidden(RealMatrix const& hiddenStates, BetaVector const& beta)const{
180  SIZE_CHECK(hiddenStates.size1() == beta.size());
181 
182  RealMatrix visibleInputs(beta.size(),m_visibleNeurons.size());
183  inputVisible(visibleInputs,hiddenStates);
184  return logUnnormalizedProbabilityHidden(hiddenStates, visibleInputs, beta);
185  }
186 
187  ///\brief Optimization of the calculation of the energy, when the input of the hidden units is already available.
188  ///@param hiddenInput the vector of inputs of the hidden neurons
189  ///@param hidden the states of the hidden neurons
190  ///@param visible the states of the visible neurons
191  ///@return the value of the energy function
193  RealMatrix const& hiddenInput,
194  RealMatrix const& hidden,
195  RealMatrix const& visible
196  )const{
197  RealMatrix const& phiOfH = m_hiddenNeurons.phi(hidden);
198  std::size_t batchSize = hiddenInput.size1();
199  RealVector energies(batchSize);
200  for(std::size_t i = 0; i != batchSize; ++i){
201  energies(i) = -inner_prod(row(hiddenInput,i),row(phiOfH,i));
202  }
203  energies -= m_hiddenNeurons.energyTerm(hidden,blas::repeat(1.0,batchSize));
204  energies -= m_visibleNeurons.energyTerm(visible,blas::repeat(1.0,batchSize));
205  return energies;
206  }
207 
208 
209  ///\brief Optimization of the calculation of the energy, when the input of the visible units is already available.
210  ///@param visibleInput the vector of inputs of the visible neurons
211  ///@param hidden the states of the hidden neurons
212  ///@param visible the states of the visible neurons
213  ///@return the value of the energy function
215  RealMatrix const& visibleInput,
216  RealMatrix const& hidden,
217  RealMatrix const& visible
218  )const{
219  RealMatrix const& phiOfV = m_visibleNeurons.phi(visible);
220  std::size_t batchSize = visibleInput.size1();
221  RealVector energies(batchSize);
222  for(std::size_t i = 0; i != batchSize; ++i){
223  energies(i) = -inner_prod(row(phiOfV,i),row(visibleInput,i));
224  }
225  energies -= m_hiddenNeurons.energyTerm(hidden,blas::repeat(1.0,batchSize));
226  energies -= m_visibleNeurons.energyTerm(visible,blas::repeat(1.0,batchSize));
227  return energies;
228  }
229 private:
230  RBM const& m_rbm;
231  HiddenType const& m_hiddenNeurons;
232  VisibleType const& m_visibleNeurons;
233 };
234 
235 }
236 
237 #endif