sicmutils
sicmutils copied to clipboard
port remaining multimin methods from numerics.optimize.multimin
This probably involves creating a nice API to SELECT these.
Note that #108 rounds out the unimin options.
Here are the methods for multimin:
;;; Variable Metric Methods:
;;; Fletcher-Powell (choice of line search)
;;; Broyden-Fletcher-Goldfarb-Shanno (Davidon's line search)
;;; The following utility procedure returns a gradient function given a
;;; differentiable function f of a vector of length n. In general, a gradient
;;; function accepts an n-vector and returns a vector of derivatives. In this
;;; case, the derivatives are estimated by richardson-extrapolation of a
;;; central difference quotient, with the convergence tolerance being a
;;; specified parameter.
(define (generate-gradient-procedure f n tol)
(lambda (x)
(generate-vector
n
(lambda (i)
(richardson-limit
(let ((fi (lambda (t)
(f (vector+vector x
(scalar*vector t
(v:make-basis-unit n i)))))))
(lambda (h) (/ (- (fi h) (fi (- h))) 2 h)))
(max 0.1 (* 0.1 (abs (vector-ref x i)))) ;starting h
2 ;ord -- see doc for RE.SCM
2 ;inc
tol)))))
;;; The following line-minimization procedure is Davidon's original
;;; recommendation. It does a bracketing search using gradients, and
;;; then interpolates the straddled minimum with a cubic.
(define (line-min-davidon f g x v est)
(define (t->x t) (vector+vector x (scalar*vector t v)))
(define (linef t) (f (t->x t)))
(define (lineg t) (g (t->x t)))
(define f0 (linef 0))
(define g0 (lineg 0))
(define s0 (/ (- f0 est) -.5 (v:dot-product g0 v)))
(let loop ((t (if (and (positive? s0) (< s0 1)) s0 1))
(iter 0))
(if (> iter 100)
(list 'no-min)
(let ((ft (linef t))
(gt (lineg t)))
(if (or (>= ft f0)
(>= (v:dot-product v gt) 0))
(let* ((vg0 (v:dot-product v g0))
(vgt (v:dot-product v gt))
(z (+ (* 3 (- f0 ft) (/ 1 t)) vg0 vgt))
(w (sqrt (- (* z z) (* vg0 vgt))))
(tstar (* t (- 1 (/ (+ vgt w (- z))
(+ vgt (- vg0) (* 2 w))))))
(fstar (linef tstar)))
(if (< fstar f0)
(list 'ok (t->x tstar) fstar)
(loop tstar (+ iter 1))))
(loop (* t 2) (+ iter 1)))))))
;;; The following line-minimization procedure is based on Brent's
;;; algorithm.
(define (line-min-brent f g x v est)
(define (t->x t) (vector+vector x (scalar*vector t v)))
(define (linef t) (f (t->x t)))
(define (lineg t) (g (t->x t)))
(define f0 (linef 0))
(define g0 (lineg 0))
(define s0 (/ (- f0 est) -.5 (v:dot-product g0 v)))
(let loop ((t (if (and (positive? s0) (< s0 1)) s0 1))
(iter 0))
(if (> iter 100)
(list 'no-min)
(let ((ft (linef t))
(gt (lineg t)))
(if (or (>= ft f0)
(>= (v:dot-product v gt) 0))
(let* ((result (brent-min linef 0 t *sqrt-machine-epsilon*))
(tstar (car result))
(fstar (cadr result)))
(list 'ok (t->x tstar) fstar))
(loop (* t 2) (+ iter 1)))))))
;;; In the following implementation of the Davidon-Fletcher-Powell
;;; algorithm, f is a function of a single vector argument that returns
;;; a real value to be minimized, g is the vector-valued gradient and
;;; x is a (vector) starting point, and est is an estimate of the minimum
;;; function value. If g is '(), then a numerical approximation is
;;; substituted using GENERATE-GRADIENT-PROCEDURE. ftol is the convergence
;;; criterion: the search is stopped when the relative change in f falls
;;; below ftol.
(define fletcher-powell-wallp? false)
(define (fletcher-powell line-search f g x est ftol maxiter)
(let ((n (vector-length x)))
(if (null? g) (set! g (generate-gradient-procedure
f n (* 1000 *machine-epsilon*))))
(let loop ((H (m:make-identity n))
(x x)
(fx (f x))
(gx (g x))
(count 0))
(if fletcher-powell-wallp? (print (list x fx gx)))
(let ((v (matrix*vector H (scalar*vector -1 gx))))
(if (positive? (v:dot-product v gx))
(begin
(if fletcher-powell-wallp?
(display (list "H reset to Identity at iteration" count)))
(loop (m:make-identity n) x fx gx count))
(let ((r (line-search f g x v est)))
(if (eq? (car r) 'no-min)
(list 'no-min (cons x fx) count)
(let ((newx (cadr r))
(newfx (caddr r)))
(if (close-enuf? newfx fx ftol) ;convergence criterion
(list 'ok (cons newx newfx) count)
(if (fix:= count maxiter)
(list 'maxcount (cons newx newfx) count)
(let* ((newgx (g newx))
(dx (vector-vector newx x))
(dg (vector-vector newgx gx))
(Hdg (matrix*vector H dg))
(A (matrix*scalar
(m:outer-product (vector->column-matrix dx)
(vector->row-matrix dx))
(/ 1 (v:dot-product dx dg))))
(B (matrix*scalar
(m:outer-product (vector->column-matrix Hdg)
(vector->row-matrix Hdg))
(/ -1 (v:dot-product dg Hdg))))
(newH (matrix+matrix H (matrix+matrix A B))))
(loop newH newx newfx newgx (fix:+ count 1)))))))))))))
;;; The following procedures, DFP and DFP-BRENT, call directly upon
;;; FLETCHER-POWELL. The first uses Davidon's line search which is
;;; efficient, and would be the normal choice. The second uses Brent's
;;; line search, which is less efficient but more reliable.
(define (dfp f g x est ftol maxiter)
(fletcher-powell line-min-davidon f g x est ftol maxiter))
(define (dfp-brent f g x est ftol maxiter)
(fletcher-powell line-min-brent f g x est ftol maxiter))
;;; The following is a variation on DFP, due (independently, we are told)
;;; to Broyden, Fletcher, Goldfarb, and Shanno. It differs in the formula
;;; used to update H, and is said to be more immune than DFP to imprecise
;;; line-search. Consequently, it is offered with Davidon's line search
;;; wired in.
(define bfgs-wallp? false)
(define (bfgs f g x est ftol maxiter)
(let ((n (vector-length x)))
(if (null? g) (set! g (generate-gradient-procedure
f n (* 1000 *machine-epsilon*))))
(let loop ((H (m:make-identity n))
(x x)
(fx (f x))
(gx (g x))
(count 0))
(if bfgs-wallp? (print (list x fx gx)))
(let ((v (matrix*vector H (scalar*vector -1 gx))))
(if (positive? (v:dot-product v gx))
(begin
(if bfgs-wallp?
(display (list "H reset to Identity at iteration" count)))
(loop (m:make-identity n) x fx gx count))
(let ((r (line-min-davidon f g x v est)))
(if (eq? (car r) 'no-min)
(list 'no-min (cons x fx) count)
(let ((newx (cadr r))
(newfx (caddr r)))
(if (close-enuf? newfx fx ftol) ;convergence criterion
(list 'ok (cons newx newfx) count)
(if (fix:= count maxiter)
(list 'maxcount (cons newx newfx) count)
(let* ((newgx (g newx))
(dx (vector-vector newx x))
(dg (vector-vector newgx gx))
(Hdg (matrix*vector H dg))
(dxdg (v:dot-product dx dg))
(dgHdg (v:dot-product dg Hdg))
(u (vector-vector (scalar*vector (/ 1 dxdg) dx)
(scalar*vector (/ 1 dgHdg) Hdg)))
(A (matrix*scalar
(m:outer-product (vector->column-matrix dx)
(vector->row-matrix dx))
(/ 1 dxdg)))
(B (matrix*scalar
(m:outer-product (vector->column-matrix Hdg)
(vector->row-matrix Hdg))
(/ -1 dgHdg)))
(C (matrix*scalar
(m:outer-product (vector->column-matrix u)
(vector->row-matrix u))
dgHdg))
(newH
(matrix+matrix (matrix+matrix H A)
(matrix+matrix B C))))
(loop newH newx newfx newgx (fix:+ count 1)))))))))))))