port remaining multimin methods from numerics.optimize.multimin

Open sritchie opened this issue 4 years ago • 0 comments
This probably involves creating a nice API to SELECT these.
Note that #108 rounds out the unimin options.
Here are the methods for multimin:
;;; Variable Metric Methods:
;;;    Fletcher-Powell (choice of line search)
;;;    Broyden-Fletcher-Goldfarb-Shanno (Davidon's line search)


;;; The following utility procedure returns a gradient function given a
;;; differentiable function f of a vector of length n. In general, a gradient
;;; function accepts an n-vector and returns a vector of derivatives. In this
;;; case, the derivatives are estimated by richardson-extrapolation of a
;;; central difference quotient, with the convergence tolerance being a
;;; specified parameter.

(define (generate-gradient-procedure f n tol)
  (lambda (x)
    (generate-vector
      n
      (lambda (i)
        (richardson-limit
          (let ((fi (lambda (t)
                      (f (vector+vector x
         (scalar*vector t
           (v:make-basis-unit n i)))))))
            (lambda (h) (/ (- (fi h) (fi (- h))) 2 h)))
          (max 0.1 (* 0.1 (abs (vector-ref x i)))) ;starting h
          2  ;ord -- see doc for RE.SCM
          2  ;inc
          tol)))))



;;; The following line-minimization procedure is Davidon's original
;;; recommendation. It does a bracketing search using gradients, and
;;; then interpolates the straddled minimum with a cubic.

(define (line-min-davidon f g x v est)
  (define (t->x t) (vector+vector x (scalar*vector t v)))
  (define (linef t) (f (t->x t)))
  (define (lineg t) (g (t->x t)))
  (define f0 (linef 0))
  (define g0 (lineg 0))
  (define s0 (/ (- f0 est) -.5 (v:dot-product g0 v)))
  (let loop ((t (if (and (positive? s0) (< s0 1)) s0 1))
             (iter 0))
    (if (> iter 100)
        (list 'no-min)
        (let ((ft (linef t))
              (gt (lineg t)))
          (if (or (>= ft f0)
                  (>= (v:dot-product v gt) 0))
              (let* ((vg0 (v:dot-product v g0))
                     (vgt (v:dot-product v gt))
                     (z (+ (* 3 (- f0 ft) (/ 1 t)) vg0 vgt))
                     (w (sqrt (- (* z z) (* vg0 vgt))))
                     (tstar (* t (- 1 (/ (+ vgt w (- z))
                                         (+ vgt (- vg0) (* 2 w))))))
                     (fstar (linef tstar)))
                 (if (< fstar f0)
                     (list 'ok (t->x tstar) fstar)
                     (loop tstar (+ iter 1))))
              (loop (* t 2) (+ iter 1)))))))



;;; The following line-minimization procedure is based on Brent's
;;; algorithm.

(define (line-min-brent f g x v est)
  (define (t->x t) (vector+vector x (scalar*vector t v)))
  (define (linef t) (f (t->x t)))
  (define (lineg t) (g (t->x t)))
  (define f0 (linef 0))
  (define g0 (lineg 0))
  (define s0 (/ (- f0 est) -.5 (v:dot-product g0 v)))
  (let loop ((t (if (and (positive? s0) (< s0 1)) s0 1))
             (iter 0))
    (if (> iter 100)
        (list 'no-min)
        (let ((ft (linef t))
              (gt (lineg t)))
          (if (or (>= ft f0)
                  (>= (v:dot-product v gt) 0))
              (let* ((result (brent-min linef 0 t *sqrt-machine-epsilon*))
                     (tstar (car result))
                     (fstar (cadr result)))
                (list 'ok (t->x tstar) fstar))
              (loop (* t 2) (+ iter 1)))))))


;;; In the following implementation of the Davidon-Fletcher-Powell
;;;  algorithm, f is a function of a single vector argument that returns
;;;  a real value to be minimized, g is the vector-valued gradient and
;;;  x is a (vector) starting point, and est is an estimate of the minimum
;;;  function value. If g is '(), then a numerical approximation is
;;;  substituted using GENERATE-GRADIENT-PROCEDURE. ftol is the convergence
;;;  criterion: the search is stopped when the relative change in f falls
;;;  below ftol.

(define fletcher-powell-wallp? false)

(define (fletcher-powell line-search f g x est ftol maxiter)
  (let ((n (vector-length x)))
    (if (null? g) (set! g (generate-gradient-procedure
                           f n (* 1000 *machine-epsilon*))))
    (let loop ((H (m:make-identity n))
               (x x)
               (fx (f x))
               (gx (g x))
               (count 0))
      (if fletcher-powell-wallp? (print (list x fx gx)))
      (let ((v (matrix*vector H (scalar*vector -1 gx))))
        (if (positive? (v:dot-product v gx))
            (begin
              (if fletcher-powell-wallp?
                  (display (list "H reset to Identity at iteration" count)))
              (loop (m:make-identity n) x fx gx count))
            (let ((r (line-search f g x v est)))
              (if (eq? (car r) 'no-min)
                  (list 'no-min (cons x fx) count)
                  (let ((newx (cadr r))
                        (newfx (caddr r)))
                    (if (close-enuf? newfx fx ftol) ;convergence criterion
                        (list 'ok (cons newx newfx) count)
                        (if (fix:= count maxiter)
                            (list 'maxcount (cons newx newfx) count)
                            (let* ((newgx (g newx))
                                   (dx (vector-vector newx x))
                                   (dg (vector-vector newgx gx))
                                   (Hdg (matrix*vector H dg))
                                   (A (matrix*scalar
                                       (m:outer-product (vector->column-matrix dx)
                                                        (vector->row-matrix dx))
                                       (/ 1 (v:dot-product dx dg))))
                                   (B (matrix*scalar
                                       (m:outer-product (vector->column-matrix Hdg)
                                                        (vector->row-matrix Hdg))
                                       (/ -1 (v:dot-product dg Hdg))))
                                   (newH (matrix+matrix H (matrix+matrix A B))))
                              (loop newH newx newfx newgx (fix:+ count 1)))))))))))))


;;; The following procedures, DFP and DFP-BRENT, call directly upon
;;; FLETCHER-POWELL. The first uses Davidon's line search which is
;;; efficient, and would be the normal choice. The second uses Brent's
;;; line search, which is less efficient but more reliable.

(define (dfp f g x est ftol maxiter)
  (fletcher-powell line-min-davidon f g x est ftol maxiter))

(define (dfp-brent f g x est ftol maxiter)
  (fletcher-powell line-min-brent f g x est ftol maxiter))

;;; The following is a variation on DFP, due (independently, we are told)
;;; to Broyden, Fletcher, Goldfarb, and Shanno. It differs in the formula
;;; used to update H, and is said to be more immune than DFP to imprecise
;;; line-search. Consequently, it is offered with Davidon's line search
;;; wired in.

(define bfgs-wallp? false)

(define (bfgs f g x est ftol maxiter)
  (let ((n (vector-length x)))
    (if (null? g) (set! g (generate-gradient-procedure
                           f n (* 1000 *machine-epsilon*))))
    (let loop ((H (m:make-identity n))
               (x x)
               (fx (f x))
               (gx (g x))
               (count 0))
      (if bfgs-wallp? (print (list x fx gx)))
      (let ((v (matrix*vector H (scalar*vector -1 gx))))
        (if (positive? (v:dot-product v gx))
            (begin
              (if bfgs-wallp?
                  (display (list "H reset to Identity at iteration" count)))
              (loop (m:make-identity n) x fx gx count))
            (let ((r (line-min-davidon f g x v est)))
              (if (eq? (car r) 'no-min)
                  (list 'no-min (cons x fx) count)
                  (let ((newx (cadr r))
                        (newfx (caddr r)))
                    (if (close-enuf? newfx fx ftol) ;convergence criterion
                        (list 'ok (cons newx newfx) count)
                        (if (fix:= count maxiter)
                            (list 'maxcount (cons newx newfx) count)
                            (let* ((newgx (g newx))
                                   (dx (vector-vector newx x))
                                   (dg (vector-vector newgx gx))
                                   (Hdg (matrix*vector H dg))
                                   (dxdg (v:dot-product dx dg))
                                   (dgHdg (v:dot-product dg Hdg))
                                   (u (vector-vector (scalar*vector (/ 1 dxdg) dx)
                                                     (scalar*vector (/ 1 dgHdg) Hdg)))
                                   (A (matrix*scalar
                                       (m:outer-product (vector->column-matrix dx)
                                                        (vector->row-matrix dx))
                                       (/ 1 dxdg)))
                                   (B (matrix*scalar
                                       (m:outer-product (vector->column-matrix Hdg)
                                                        (vector->row-matrix Hdg))
                                       (/ -1 dgHdg)))
                                   (C (matrix*scalar
                                       (m:outer-product (vector->column-matrix u)
                                                        (vector->row-matrix u))
                                       dgHdg))
                                   (newH
                                    (matrix+matrix (matrix+matrix H A)
                                                   (matrix+matrix B C))))
                              (loop newH newx newfx newgx (fix:+ count 1)))))))))))))
Dec 01 '21 04:12 sritchie