Implement bound constraints (e.g. with ADMM or Dog Leg)
Implementing bound constraints with ADMM should be relatively easy:
http://stanford.edu/~eryu/nnlsqr.html
https://web.stanford.edu/~boyd/papers/pdf/admm_slides.pdf (Slide 27)
Quick and dirty python implementation:
import numpy as np
import scipy.optimize
def lsmr_with_init(A,b,x0):
r0 = b - scipy.sparse.linalg.aslinearoperator(A).matvec(x0)
deltax_pack = scipy.sparse.linalg.lsmr(A,r0)
return x0 + deltax_pack[0]
def admm_nnlsqr(A,b):
# ADMM parameter
# Optimal choice product of the maximum and minimum singular values
# Heuristic choice: mean of singular values, i.e. squared Frobenius norm over dimension
rho = np.dot(A.ravel(),A.ravel()) / A.shape[1]
sqrt_half_rho = np.sqrt(rho/2)
print 'sqrt_half_rho=',sqrt_half_rho
# initialisation
zero_init = False
if zero_init:
x_pack = (np.zeros(A.shape[1]),0)
z = np.zeros(A.shape[1])
u = np.zeros(A.shape[1])
else:
# from x=z=u=0 the first iteration is a normally a projection of
# the unconstrained damped least squares. Let's forget the damping and check
# whether we need to constrain things
x_pack = scipy.sparse.linalg.lsmr(A,b)
if np.all(x_pack[0]>0):
# no constraint needed
return x_pack[0]
z = np.clip(x_pack[0],0,np.inf)
u = x_pack[0]-z
# construct the damped least squares matrix
# todo try and use directly the damped version of lsmr
Adamped = scipy.sparse.linalg.LinearOperator(
A.shape+np.array([A.shape[1], 0]),
matvec = lambda y: np.concatenate([ A.dot(y), sqrt_half_rho*y ]),
rmatvec = lambda y: y[0:A.shape[0]].dot(A) + sqrt_half_rho*y[A.shape[0]:],
)
tol = 1e-5
max_iter = 5000
diff = np.inf
iter = 0
while ( diff>tol and iter<max_iter ):
iter += 1
xold = x_pack[0]
#x_pack = scipy.sparse.linalg.lsmr( Adamped, np.concatenate([ b, sqrt_half_rho*(z-u) ]) )
x_pack = (lsmr_with_init( Adamped, np.concatenate([ b, sqrt_half_rho*(z-u) ]), xold ), 0)
zold = z
z = np.clip(x_pack[0]+u,0,np.inf)
#diff = np.linalg.norm(z-zold)
#diff = np.linalg.norm(x_pack[0]-xold)
diff = np.linalg.norm(x_pack[0]-z)
u += x_pack[0]-z
return z
An alternative could be to implement a Dog Leg type optimisation routines:
http://dx.doi.org/10.1002/nla.502 from Bellaria et al.
http://www.cs.uoi.gr/~lagaris/papers/PREPRINTS/dogbox.pdf from Voglis and Lagers
Quick and dirty python implementation
def dogbox_nnlsqr(A,b):
# A Rectangular Trust Region Dogleg Approach for
# Unconstrained and Bound Constrained Nonlinear Optimization
Aop = scipy.sparse.linalg.aslinearoperator(A)
def isfeasible(xx, xlb, xub):
return np.all([xx>=xlb, xx<=xub])
x = np.zeros(A.shape[1])
#passive_set = np.ones(A.shape[1], dtype=bool)
delta = np.inf
tol = 1e-5
max_iter = 50
iter = 0
while (iter<max_iter ):
iter += 1
lb = np.fmax(-x,-delta)
ub = delta*np.ones(A.shape[1])
b_centered = b-Aop.matvec(x)
mg = Aop.rmatvec(b_centered)
print 'iter', iter
#print 'lb', lb
#print 'ub', ub
#print 'mg', mg
print 'x', x
passive_set = ~( ( (x<=lb) & (mg<0) ) | ( (x>=ub) & (mg>0) ) )
print 'passive_set',passive_set
def xreduce(xx):
return xx[passive_set]
def xexpand(xred):
xe = np.zeros(A.shape[1])
xe[passive_set] = xred
return xe
Ared = scipy.sparse.linalg.LinearOperator(
np.array([A.shape[0], np.count_nonzero(passive_set)]),
matvec = lambda y: Aop.matvec(xexpand(y)),
rmatvec = lambda y: xreduce(Aop.rmatvec(y)),
)
xrold = xreduce(x)
br_centered = b-Ared.matvec(xrold)
xr_pack = scipy.sparse.linalg.lsmr(Ared,br_centered)
xr_newton = xr_pack[0];
lbr = xreduce(lb)
ubr = xreduce(ub)
if isfeasible(xr_newton, lbr, ubr):
sr = xr_newton
else:
mgr = Ared.rmatvec(b)
gTgr = np.dot(mgr,mgr)
mAgr = Ared.matvec(mgr)
gTATAgr = np.dot(mAgr,mAgr)
xr_cauchy = (gTgr/gTATAgr)*mgr
if isfeasible(xr_cauchy, lbr, ubr):
NnC = xr_newton-xr_cauchy;
idx = (NnC>0)
alphau = np.min( (ubr[idx] - xr_cauchy[idx]) / NnC[idx] )
alphal = np.min( (lbr[~idx] - xr_cauchy[~idx]) / NnC[~idx] )
print 'alpha', alphau, alphal
sr = xr_cauchy + np.fmin(alphau,alphal)*NnC
else:
idx = (xr_cauchy>0)
betau = np.min( ubr[idx] / xr_cauchy[idx] )
betal = np.min( lbr[~idx] / xr_cauchy[~idx] )
print 'beta', betau, betal
PC = np.fmin(betau,betal)*xr_cauchy
NnPC = xr_newton-PC;
idx = (NnPC>0)
alphau = np.min( (ubr[idx] - PC[idx]) / NnPC[idx] )
alphal = np.min( lbr[~idx] - PC[~idx] / NnPC[~idx] )
print 'alpha', alphau, alphal
sr = PC + np.fmin(alphau,alphal)*NnPC
if ( np.dot(sr,sr)<tol ):
break
x = x + xexpand(sr)
return x
It would also be nice to compare it to the approach of BCLS:
Section 4.2 of https://www.cs.ubc.ca/~mpf/pdfs/2007FriedlanderHatz.pdf
https://www.cs.ubc.ca/~mpf/pubs/computing-nonnegative-tensor-factorizations/
http://www.cs.ubc.ca/~mpf/bcls/