GetFEM  5.5
gmm_solver_bfgs.h
Go to the documentation of this file.
1 /* -*- c++ -*- (enables emacs c++ mode) */
2 /*===========================================================================
3 
4  Copyright (C) 2004-2026 Yves Renard
5 
6  This file is a part of GetFEM
7 
8  GetFEM is free software; you can redistribute it and/or modify it
9  under the terms of the GNU Lesser General Public License as published
10  by the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version along with the GCC Runtime Library
12  Exception either version 3.1 or (at your option) any later version.
13  This program is distributed in the hope that it will be useful, but
14  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15  or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
16  License and GCC Runtime Library Exception for more details.
17  You should have received a copy of the GNU Lesser General Public License
18  along with this program. If not, see https://www.gnu.org/licenses/.
19 
20  As a special exception, you may use this file as it is a part of a free
21  software library without restriction. Specifically, if other files
22  instantiate templates or use macros or inline functions from this file,
23  or you compile this file and link it with other files to produce an
24  executable, this file does not by itself cause the resulting executable
25  to be covered by the GNU Lesser General Public License. This exception
26  does not however invalidate any other reasons why the executable file
27  might be covered by the GNU Lesser General Public License.
28 
29 ===========================================================================*/
30 
31 /**@file gmm_solver_bfgs.h
32  @author Yves Renard <Yves.Renard@insa-lyon.fr>
33  @date October 14 2004.
34  @brief Implements BFGS (Broyden, Fletcher, Goldfarb, Shanno) algorithm.
35  */
36 #ifndef GMM_BFGS_H
37 #define GMM_BFGS_H
38 
39 #include "gmm_kernel.h"
40 #include "gmm_iter.h"
41 
42 namespace gmm {
43 
44  // BFGS algorithm (Broyden, Fletcher, Goldfarb, Shanno)
45  // Quasi Newton method for optimization problems.
46  // with Wolfe Line search.
47 
48 
49  // delta[k] = x[k+1] - x[k]
50  // gamma[k] = grad f(x[k+1]) - grad f(x[k])
51  // H[0] = I
52  // BFGS : zeta[k] = delta[k] - H[k] gamma[k]
53  // DFP : zeta[k] = H[k] gamma[k]
54  // tau[k] = gamma[k]^T zeta[k]
55  // rho[k] = 1 / gamma[k]^T delta[k]
56  // BFGS : H[k+1] = H[k] + rho[k](zeta[k] delta[k]^T + delta[k] zeta[k]^T)
57  // - rho[k]^2 tau[k] delta[k] delta[k]^T
58  // DFP : H[k+1] = H[k] + rho[k] delta[k] delta[k]^T
59  // - (1/tau[k])zeta[k] zeta[k]^T
60 
61  // Object representing the inverse of the Hessian
62  template <typename VECTOR> struct bfgs_invhessian {
63 
64  typedef typename linalg_traits<VECTOR>::value_type T;
65  typedef typename number_traits<T>::magnitude_type R;
66 
67  std::vector<VECTOR> delta, gamma, zeta;
68  std::vector<T> tau, rho;
69  int version;
70 
71  template<typename VEC1, typename VEC2> void hmult(const VEC1 &X, VEC2 &Y) {
72  copy(X, Y);
73  for (size_type k = 0 ; k < delta.size(); ++k) {
74  T xdelta = vect_sp(X, delta[k]), xzeta = vect_sp(X, zeta[k]);
75  switch (version) {
76  case 0 : // BFGS
77  add(scaled(zeta[k], rho[k]*xdelta), Y);
78  add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
79  break;
80  case 1 : // DFP
81  add(scaled(delta[k], rho[k]*xdelta), Y);
82  add(scaled(zeta[k], -xzeta/tau[k]), Y);
83  break;
84  }
85  }
86  }
87 
88  void restart(void) {
89  delta.resize(0); gamma.resize(0); zeta.resize(0);
90  tau.resize(0); rho.resize(0);
91  }
92 
93  template<typename VECT1, typename VECT2>
94  void update(const VECT1 &deltak, const VECT2 &gammak) {
95  T vsp = vect_sp(deltak, gammak);
96  if (vsp == T(0)) return;
97  size_type N = vect_size(deltak), k = delta.size();
98  VECTOR Y(N);
99  hmult(gammak, Y);
100  delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
101  tau.resize(k+1); rho.resize(k+1);
102  resize(delta[k], N); resize(gamma[k], N); resize(zeta[k], N);
103  gmm::copy(deltak, delta[k]);
104  gmm::copy(gammak, gamma[k]);
105  rho[k] = R(1) / vsp;
106  if (version == 0)
107  add(delta[k], scaled(Y, -1), zeta[k]);
108  else
109  gmm::copy(Y, zeta[k]);
110  tau[k] = vect_sp(gammak, zeta[k]);
111  }
112 
113  bfgs_invhessian(int v = 0) { version = v; }
114  };
115 
116 
117  template <typename FUNCTION, typename DERIVATIVE, typename VECTOR>
118  void bfgs(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
119  int restart, iteration& iter, int version = 0,
120  double lambda_init=0.001, double print_norm=1.0) {
121 
122  typedef typename linalg_traits<VECTOR>::value_type T;
123  typedef typename number_traits<T>::magnitude_type R;
124 
125  bfgs_invhessian<VECTOR> invhessian(version);
126  VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
127  grad(x, r);
128  R lambda = lambda_init, valx = f(x), valy;
129  int nb_restart(0);
130 
131  if (iter.get_noisy() >= 1) cout << "value " << valx / print_norm << " ";
132  while (! iter.finished_vect(r)) {
133 
134  invhessian.hmult(r, d); gmm::scale(d, T(-1));
135 
136  // Wolfe Line search
137  R derivative = gmm::vect_sp(r, d);
138  R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
139  bool unbounded = true, blocked = false, grad_computed = false;
140 
141  for(;;) {
142  add(x, scaled(d, lambda), y);
143  valy = f(y);
144  if (iter.get_noisy() >= 2) {
145  cout.precision(15);
146  cout << "Wolfe line search, lambda = " << lambda
147  << " value = " << valy /print_norm << endl;
148 // << " derivative = " << derivative
149 // << " lambda min = " << lambda_min << " lambda max = "
150 // << lambda_max << endl; getchar();
151  }
152  if (valy <= valx + m1 * lambda * derivative) {
153  grad(y, r2); grad_computed = true;
154  T derivative2 = gmm::vect_sp(r2, d);
155  if (derivative2 >= m2*derivative) break;
156  lambda_min = lambda;
157  }
158  else {
159  lambda_max = lambda;
160  unbounded = false;
161  }
162  if (unbounded) lambda *= R(10);
163  else lambda = (lambda_max + lambda_min) / R(2);
164  if (lambda == lambda_max || lambda == lambda_min) break;
165  // valy <= R(2)*valx replaced by
166  // valy <= valx + gmm::abs(derivative)*lambda_init
167  // for compatibility with negative values (08.24.07).
168  if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
169  (lambda < R(lambda_init*1E-8) ||
170  (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
171  { blocked = true; lambda = lambda_init; break; }
172  }
173 
174  // Rank two update
175  ++iter;
176  if (!grad_computed) grad(y, r2);
177  gmm::add(scaled(r2, -1), r);
178  if ((iter.get_iteration() % restart) == 0 || blocked) {
179  if (iter.get_noisy() >= 1) cout << "Restart\n";
180  invhessian.restart();
181  if (++nb_restart > 10) {
182  if (iter.get_noisy() >= 1) cout << "BFGS is blocked, exiting\n";
183  return;
184  }
185  }
186  else {
187  invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
188  nb_restart = 0;
189  }
190  copy(r2, r); copy(y, x); valx = valy;
191  if (iter.get_noisy() >= 1)
192  cout << "BFGS value " << valx/print_norm << "\t";
193  }
194 
195  }
196 
197 
198  template <typename FUNCTION, typename DERIVATIVE, typename VECTOR>
199  inline void dfp(const FUNCTION &f, const DERIVATIVE &grad, VECTOR &x,
200  int restart, iteration& iter, int version = 1) {
201  bfgs(f, grad, x, restart, iter, version);
202 
203  }
204 
205 
206 }
207 
208 #endif
209 
void copy(const L1 &l1, L2 &l2)
*‍/
Definition: gmm_blas.h:976
void resize(V &v, size_type n)
*‍/
Definition: gmm_blas.h:209
strongest_value_type< V1, V2 >::value_type vect_sp(const V1 &v1, const V2 &v2)
*‍/
Definition: gmm_blas.h:263
void add(const L1 &l1, L2 &l2)
*‍/
Definition: gmm_blas.h:1275
Iteration object.
Include the base gmm files.
size_t size_type
used as the common size type in the library
Definition: bgeot_poly.h:48