62 template <
typename VECTOR>
struct bfgs_invhessian {
64 typedef typename linalg_traits<VECTOR>::value_type T;
65 typedef typename number_traits<T>::magnitude_type R;
67 std::vector<VECTOR> delta, gamma, zeta;
68 std::vector<T> tau, rho;
71 template<
typename VEC1,
typename VEC2>
void hmult(
const VEC1 &X, VEC2 &Y) {
73 for (
size_type k = 0 ; k < delta.size(); ++k) {
77 add(scaled(zeta[k], rho[k]*xdelta), Y);
78 add(scaled(delta[k], rho[k]*(xzeta-rho[k]*tau[k]*xdelta)), Y);
81 add(scaled(delta[k], rho[k]*xdelta), Y);
82 add(scaled(zeta[k], -xzeta/tau[k]), Y);
89 delta.resize(0); gamma.resize(0); zeta.resize(0);
90 tau.resize(0); rho.resize(0);
93 template<
typename VECT1,
typename VECT2>
94 void update(
const VECT1 &deltak,
const VECT2 &gammak) {
95 T vsp =
vect_sp(deltak, gammak);
96 if (vsp == T(0))
return;
97 size_type N = vect_size(deltak), k = delta.size();
100 delta.resize(k+1); gamma.resize(k+1); zeta.resize(k+1);
101 tau.resize(k+1); rho.resize(k+1);
107 add(delta[k], scaled(Y, -1), zeta[k]);
110 tau[k] =
vect_sp(gammak, zeta[k]);
113 bfgs_invhessian(
int v = 0) { version = v; }
117 template <
typename FUNCTION,
typename DERIVATIVE,
typename VECTOR>
118 void bfgs(
const FUNCTION &f,
const DERIVATIVE &grad, VECTOR &x,
119 int restart, iteration& iter,
int version = 0,
120 double lambda_init=0.001,
double print_norm=1.0) {
122 typedef typename linalg_traits<VECTOR>::value_type T;
123 typedef typename number_traits<T>::magnitude_type R;
125 bfgs_invhessian<VECTOR> invhessian(version);
126 VECTOR r(vect_size(x)), d(vect_size(x)), y(vect_size(x)), r2(vect_size(x));
128 R lambda = lambda_init, valx = f(x), valy;
131 if (iter.get_noisy() >= 1) cout <<
"value " << valx / print_norm <<
" ";
132 while (! iter.finished_vect(r)) {
134 invhessian.hmult(r, d); gmm::scale(d, T(-1));
138 R lambda_min(0), lambda_max(0), m1 = 0.27, m2 = 0.57;
139 bool unbounded =
true, blocked =
false, grad_computed =
false;
142 add(x, scaled(d, lambda), y);
144 if (iter.get_noisy() >= 2) {
146 cout <<
"Wolfe line search, lambda = " << lambda
147 <<
" value = " << valy /print_norm << endl;
152 if (valy <= valx + m1 * lambda * derivative) {
153 grad(y, r2); grad_computed =
true;
155 if (derivative2 >= m2*derivative)
break;
162 if (unbounded) lambda *= R(10);
163 else lambda = (lambda_max + lambda_min) / R(2);
164 if (lambda == lambda_max || lambda == lambda_min)
break;
168 if (valy <= valx + R(2)*gmm::abs(derivative)*lambda &&
169 (lambda < R(lambda_init*1E-8) ||
170 (!unbounded && lambda_max-lambda_min < R(lambda_init*1E-8))))
171 { blocked =
true; lambda = lambda_init;
break; }
176 if (!grad_computed) grad(y, r2);
178 if ((iter.get_iteration() % restart) == 0 || blocked) {
179 if (iter.get_noisy() >= 1) cout <<
"Restart\n";
180 invhessian.restart();
181 if (++nb_restart > 10) {
182 if (iter.get_noisy() >= 1) cout <<
"BFGS is blocked, exiting\n";
187 invhessian.update(gmm::scaled(d,lambda), gmm::scaled(r,-1));
190 copy(r2, r);
copy(y, x); valx = valy;
191 if (iter.get_noisy() >= 1)
192 cout <<
"BFGS value " << valx/print_norm <<
"\t";
198 template <
typename FUNCTION,
typename DERIVATIVE,
typename VECTOR>
199 inline void dfp(
const FUNCTION &f,
const DERIVATIVE &grad, VECTOR &x,
200 int restart, iteration& iter,
int version = 1) {
201 bfgs(f, grad, x, restart, iter, version);
void copy(const L1 &l1, L2 &l2)
*/
void resize(V &v, size_type n)
*/
strongest_value_type< V1, V2 >::value_type vect_sp(const V1 &v1, const V2 &v2)
*/
void add(const L1 &l1, L2 &l2)
*/
Include the base gmm files.
size_t size_type
used as the common size type in the library