LBFGS.h
Go to the documentation of this file.
1 //===========================================================================
2 /*!
3  *
4  *
5  * \brief LBFGS
6  *
7  * The Limited-Memory Broyden, Fletcher, Goldfarb, Shannon (BFGS) algorithm
8  * is a quasi-Newton method for unconstrained real-valued optimization.
9  * See: http://en.wikipedia.org/wiki/LBFGS for details.
10  *
11  *
12  *
13  * \author S. Dahlgaard, O.Krause
14  * \date 2013
15  *
16  *
17  * \par Copyright 1995-2017 Shark Development Team
18  *
19  * <BR><HR>
20  * This file is part of Shark.
21  * <http://shark-ml.org/>
22  *
23  * Shark is free software: you can redistribute it and/or modify
24  * it under the terms of the GNU Lesser General Public License as published
25  * by the Free Software Foundation, either version 3 of the License, or
26  * (at your option) any later version.
27  *
28  * Shark is distributed in the hope that it will be useful,
29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31  * GNU Lesser General Public License for more details.
32  *
33  * You should have received a copy of the GNU Lesser General Public License
34  * along with Shark. If not, see <http://www.gnu.org/licenses/>.
35  *
36  */
37 //===========================================================================
38 
39 
40 #ifndef SHARK_ML_OPTIMIZER_LBFGS_H
41 #define SHARK_ML_OPTIMIZER_LBFGS_H
42 
43 #include <shark/Core/DLLSupport.h>
45 #include <deque>
46 
47 namespace shark {
48 
49 /// \brief Limited-Memory Broyden, Fletcher, Goldfarb, Shannon algorithm.
50 ///
51 /// BFGS is one of the best performing quasi-newton methods. However for large scale
52 /// optimization, storing the full hessian approximation is infeasible due to O(n^2) memory requirement.
53 /// The L-BFGS algorithm does not store the full hessian approximation but only stores the
54 /// data used for updating in the last steps. The matrix itself is then regenerated on-the-fly in
55 /// an implicit matrix scheme. This brings runtime and memory rquirements
56 /// of a single step down to O(n*hist_size).
57 ///
58 /// The number of steps stored can be set with setHistCount. This is 100 as default.
59 ///
60 /// The algorithm is implemented for unconstrained and constrained optimization
61 /// in the case of box constraints. When box constraints are present and the algorithm
62 /// encounters a constraint, a dog-leg style algorithm is used:
63 ///
64 /// first, all variables with active constraints (e.g. x_i = l_i and g_i > 0)
65 /// are fixed, i.e. p_i = 0. For the remaining variables, the unconstrained optimization
66 /// problem is solved. If the solution does not statisfy the box constraints, in the next step
67 /// the cauchy point is computed. If the cauchy point is feasible, we search the point
68 /// along the line between unconstrained optimum and cauchy point that lies exactly on the constraint.
69 /// This is the point with smallest value along the path.
70 /// This does not find the true optimal step in the unconstrained problem, however a cheap and reasonably good optimum
71 /// which often improves over naive coordinate descent.
73 public:
74  LBFGS() :m_numHist(100){
76  }
77 
78  /// \brief From INameable: return the class name.
79  std::string name() const
80  { return "LBFGS"; }
81 
82  /// \brief Specify the amount of steps to be memorized and used to find the L-BFGS direction.
83  ///
84  ///\param numhist The amount of steps to use.
85  void setHistCount(unsigned int numhist) {
86  SHARK_RUNTIME_CHECK(numhist > 0, "An empty history is not allowed");
87  m_numHist = numhist;
88  }
89 
90  //from ISerializable
91  SHARK_EXPORT_SYMBOL void read(InArchive &archive);
92  SHARK_EXPORT_SYMBOL void write(OutArchive &archive) const;
93 protected: // Methods inherited from AbstractLineSearchOptimizer
96 private:
97  ///\brief Stores another step and searchDirection, discarding the oldest on if necessary.
98  ///
99  /// \param step Last performed step
100  /// \param y difference in gradients
101  SHARK_EXPORT_SYMBOL void updateHist(RealVector& y, RealVector &step);
102  /// \brief Compute B^{-1}x
103  ///
104  /// The history is used to define B which is easy to invert
105  SHARK_EXPORT_SYMBOL void multBInv(RealVector& searchDirection)const;
106 
107  /// \brief Compute Bx
108  SHARK_EXPORT_SYMBOL void multB(RealVector& searchDirection)const;
109 
110  /// \brief Get the box-constrained LBFGS direction.
111  ///
112  /// Approximately solves the constrained optimization problem
113  /// min_p 1/2 p^TBp +g^Tp
114  /// s.t. l_i <= x_i + p_i <= u_i
115  /// This is done using a constrained dogleg approach.
116  ///
117  /// first, all variables with active constraints (e.g. x_i = l_i and g_i > 0)
118  /// are fixed, i.e. p_i = 0. For the remaining variables, the unconstrained optimization
119  /// problem is solved. If the solution does not statisfy the box constraints, in the next step
120  /// the cauchy point is computed. If the cauchy point is feasible, we search the point
121  /// along the line between unconstrained optimum and cauchy point that lies exactly on the constraint.
122  /// This is the point with smallest value along the path.
123  SHARK_EXPORT_SYMBOL void getBoxConstrainedDirection(
124  RealVector& searchDirection,
125  RealVector const& lower,
126  RealVector const& upper
127  )const;
128 
129  double m_updThres;///<Threshold for when to update history.
130  unsigned int m_numHist; ///< Number of steps to use for LBFGS.
131  // Initial Hessian approximation. We use a diagonal matrix, where each element is
132  // the same, so we only need to store one double.
133  double m_bdiag;
134 
135  // Saved steps for creating the approximation.
136  // Use deque as it gives fast pop.front, push.back and access. Supposedly.
137  // steps holds the values x_(k+1) - x_k
138  // gradientDifferences holds the values g_(k+1) - g_k
139  std::deque<RealVector> m_steps;
140  std::deque<RealVector> m_gradientDifferences;
141 };
142 
143 }
144 #endif