custom_jacobian depends on structure of underlying function
Following https://github.com/casadi/casadi/discussions/3245 I came across the following problems (using Matlab):
I am trying to use the custom_jacobian option. However, if the jacobian of f has structural zeros, then my custom jacobian will be zero at those entries as well. Using a dirty fix (setting the option addVariableSum of my function to 1), I can circumvent this problem. Furthermore, I do not understand the options "jac_penalty","always_inline", "never_inline" and why they are needed in order to "successfully" supply a custom jacobian or why they have an effect on the result. Attached some code to reproduce the issue.
~~~~~CODE~~~~~
clc
x = casadi.MX.sym('x',2,1);
y = casadi.MX.sym('y',2,1);
f = [x(1)+y(2); x(2)+y(1)];
x_eval = randn(2,1);
y_eval = randn(2,1);
% regular jacobian
jac_x_f_fun = casadi.Function('jac_x_f',{x,y},{jacobian(f,x)});
jac_y_f_fun = casadi.Function('jac_y_f',{x,y},{jacobian(f,y)});
jac_x_f_eval = jac_x_f_fun(x_eval,y_eval)
jac_y_f_eval = jac_y_f_fun(x_eval,y_eval)
% custom_jacobian
custom_jac_x_f = casadi.MX(reshape((1:4),2,2));
custom_jac_y_f = casadi.MX(reshape(4+(1:4),2,2));
for ii=0:1
fprintf(['\n\n',repmat('-',1,30),'\naddVariableSum=%d\n'],ii)
f_custom_jac = createCasadiFunctionWithCustomJacobian(f,{x,y},{custom_jac_x_f,custom_jac_y_f},'addVariableSum',ii);
jac_x_f_custom_jac_fun = casadi.Function('jac_x_f_custom_jac',{x,y},{jacobian(f_custom_jac,x)});
jac_y_f_custom_jac_fun = casadi.Function('jac_y_f_custom_jac',{x,y},{jacobian(f_custom_jac,y)});
jac_x_f_custom_jac_eval = jac_x_f_custom_jac_fun(x_eval,y_eval)
jac_y_f_custom_jac_eval = jac_y_f_custom_jac_fun(x_eval,y_eval)
end
%% functions
function f_custom_jacobian = createCasadiFunctionWithCustomJacobian(f,inputVariables,customJacobians,options)
%% setup
arguments
f
inputVariables
customJacobians % has to have the form {df/d_inputVariables{1},df/d_inputVariables{2},...}
options.functionName = 'f_with_custom_jacobian'
options.addVariableSum {mustBeMember(options.addVariableSum,[0,1])} = 1
end
if ~isa(inputVariables,'cell')
inputVariables = {inputVariables}; % one input variable
end
if ~isa(customJacobians,'cell')
customJacobians = {customJacobians}; % one jacobian
end
switch class(inputVariables{1})
case 'casadi.MX'
CasadiSym = casadi.MX;
case 'casadi.SX'
CasadiSym = casadi.SX;
end
%% set custom jacobian
dummy = CasadiSym.sym('dummy',size(f,1)); % why is this needed with this dimension?
jac_f_custom_fun = casadi.Function(['jac_',options.functionName],{inputVariables{:},dummy},{customJacobians{:}});
% Create new function with custom jacobian
if options.addVariableSum==1 % jacobian of f w.r.t. all variables is nonzero everywhere
variableSum = sum(vertcat(inputVariables{:})); % suppose inputVariables={x,y} -> sum(vertcat(inputVariables{:}) = [x_1+x_2+...x_{n_x}+y_1+y_2+...+y_{n_y}]
f_dense_jac = repmat(variableSum,size(f,1),1)*10^-124; % jacobian(f_dense_jac,vertcat(inputVariables{:}))=ones(size(f,1),n_x+n_y)*10^-124
f = f + f_dense_jac; % jacobian is nonzero everywhere
end
custom_jacobian_options = struct("custom_jacobian", jac_f_custom_fun, "jac_penalty",0, "always_inline",false, "never_inline",true); % why do I need other options than custom_jacobian? what is their meaning?
f_custom_jacobian_fun = casadi.Function(options.functionName, {inputVariables{:}}, {f}, custom_jacobian_options);
f_custom_jacobian = f_custom_jacobian_fun(inputVariables{:});
end
~~~~~OUTPUT~~~~~
jac_x_f_eval =
[[1, 00],
[00, 1]]
jac_y_f_eval =
[[00, 1],
[1, 00]]
------------------------------
addVariableSum=0
jac_x_f_custom_jac_eval =
[[4, 00],
[00, 6]]
jac_y_f_custom_jac_eval =
[[00, 12],
[14, 00]]
------------------------------
addVariableSum=1
jac_x_f_custom_jac_eval =
[[1, 3],
[2, 4]]
jac_y_f_custom_jac_eval =
[[5, 7],
[6, 8]]
I've never heard of addVariableSum. Can you post/link to self-contained code?
The option addVariableSum is used in my function createCasadiFunctionWithCustomJacobian, not in the options struct in casadi.Function. The posted code should be self-contained.
Yo.
I think that's the same thing as in here: Case 1: using ad_weight_sp = -1
import casadi as ca
x_sym = ca.MX.sym('x', 1,1)
y_sym = ca.MX.sym('y', 1,1)
# out = x_sym**3*y_sym**2+y_sym**5
out = x_sym + y_sym
out = 0
# out = x_sym**4*y_sym**4 + y_sym**5*x_sym**4
# out = x_sym**3
# If using sparsity, the custom hessian/jacobian only works if the "true" hessian/jacobian wouldn't be zero
ad_weight_sp = -1 # = 1, no sparse matrices, fix above problem
f = ca.Function('t',[x_sym,y_sym],[out],
{
'never_inline':True,
# 'der_options': {'never_inline':True,'jac_penalty':0,'jacobian_options': {'never_inline':True,'jac_penalty':0}},
# 'jac_penalty':0,
# 'ad_weight_sp':ad_weight_sp,
}
)
mx_in = f.mx_in()
jac_f = f.jacobian()
jac_jac_f = jac_f.jacobian()
jac_jac_f_mx_in = jac_jac_f.mx_in()
jac_jac_f_result = jac_jac_f.call({name_in:val for name_in,val in zip(jac_jac_f.name_in(),jac_jac_f_mx_in)})
jac_jac_f_result['jac_jac_o0_i0_i0'] = 1.2
jac_jac_f_result['jac_jac_o0_i0_i1'] = 2
jac_jac_f_result['jac_jac_o0_i1_i0'] = 3.1
jac_jac_f_result['jac_jac_o0_i1_i1'] = 4.2
jac_jac_f = ca.Function(jac_jac_f.name(),{name_in:val for name_in,val in zip(jac_jac_f.name_in(),jac_jac_f_mx_in)} | {k:ca.cse(val) for k, val in jac_jac_f_result.items()}, jac_jac_f.name_in(),jac_jac_f.name_out(),
{
'print_in':False,
'never_inline':True,
'ad_weight_sp':ad_weight_sp,
})
jac_mx_in = jac_f.mx_in()
jac_result = jac_f.call({name_in:val for name_in,val in zip(jac_f.name_in(),jac_mx_in)})
jac_result['jac_o0_i0'] = 55
jac_result['jac_o0_i1'] = 2
jac_f = ca.Function(jac_f.name(),{name_in:val for name_in,val in zip(jac_f.name_in(),jac_mx_in)} | {k:ca.cse(val) for k, val in jac_result.items()},jac_f.name_in(),jac_f.name_out(),
{
'never_inline':True,
'custom_jacobian':jac_jac_f,
'jac_penalty':0,
'ad_weight_sp':ad_weight_sp,
},)
f = ca.Function('t',mx_in,[f(*mx_in)],
{
'never_inline':True,
'custom_jacobian':jac_f,
'jac_penalty':0,
'ad_weight_sp':ad_weight_sp,
}
)
print(ca.jacobian(f(x_sym,y_sym),x_sym))
print(ca.gradient(f(x_sym,y_sym),x_sym))
print(ca.jacobian(ca.jacobian(f(x_sym,y_sym),x_sym),x_sym)[0])
print(ca.hessian(f(x_sym,y_sym),ca.veccat(*mx_in))[0])
f_hess = ca.Function(f.name()+'_hess', mx_in, [ca.cse(ca.hessian(f(*mx_in),ca.veccat(*mx_in))[0])],{'cse':True})
print(f_hess(1,1))
f_jac = ca.Function(f.name()+'_jac', mx_in, [ca.cse(ca.jacobian(f(*mx_in),ca.veccat(*mx_in)))],{'cse':True})
print(f_jac(1,1))
Case 2: no ad_weight_sp option
import casadi as ca
x_sym = ca.MX.sym('x', 1,1)
y_sym = ca.MX.sym('y', 1,1)
# out = x_sym**3*y_sym**2+y_sym**5
# out = x_sym + y_sym
# out = 0
out = x_sym**4*y_sym**4 + y_sym**5*x_sym**4
# out = x_sym**3
f = ca.Function('t',[x_sym,y_sym],[out],
{
'never_inline':True,
}
)
mx_in = f.mx_in()
jac_f = f.jacobian()
jac_jac_f = jac_f.jacobian()
jac_jac_f_mx_in = jac_jac_f.mx_in()
jac_jac_f_result = jac_jac_f.call({name_in:val for name_in,val in zip(jac_jac_f.name_in(),jac_jac_f_mx_in)})
jac_jac_f_result['jac_jac_o0_i0_i0'] = 1.2
jac_jac_f_result['jac_jac_o0_i0_i1'] = 2
jac_jac_f_result['jac_jac_o0_i1_i0'] = 3.1
jac_jac_f_result['jac_jac_o0_i1_i1'] = 4.2
jac_jac_f = ca.Function(jac_jac_f.name(),{name_in:val for name_in,val in zip(jac_jac_f.name_in(),jac_jac_f_mx_in)} | {k:ca.cse(val) for k, val in jac_jac_f_result.items()}, jac_jac_f.name_in(),jac_jac_f.name_out(),
{
'print_in':False,
'never_inline':True,
})
jac_mx_in = jac_f.mx_in()
jac_result = jac_f.call({name_in:val for name_in,val in zip(jac_f.name_in(),jac_mx_in)})
# jac_result['jac_o0_i0'] = 55
jac_result['jac_o0_i0'] = jac_mx_in[0]
jac_result['jac_o0_i1'] = 4
jac_f = ca.Function(jac_f.name(),{name_in:val for name_in,val in zip(jac_f.name_in(),jac_mx_in)} | {k:ca.cse(val) for k, val in jac_result.items()},jac_f.name_in(),jac_f.name_out(),
{
'never_inline':True,
'custom_jacobian':jac_jac_f,
'jac_penalty':0,
},)
f = ca.Function('t',mx_in,[f(*mx_in)],
{
'never_inline':True,
'custom_jacobian':jac_f,
'jac_penalty':0,
}
)
print(ca.jacobian(f(x_sym,y_sym),x_sym))
print(ca.gradient(f(x_sym,y_sym),x_sym))
print(ca.jacobian(ca.jacobian(f(x_sym,y_sym),x_sym),x_sym)[0])
print(ca.hessian(f(x_sym,y_sym),ca.veccat(*mx_in))[0])
f_hess = ca.Function(f.name()+'_hess', mx_in, [ca.cse(ca.hessian(f(*mx_in),ca.veccat(*mx_in))[0])],{'cse':True})
print(f_hess(1,1))
f_jac = ca.Function(f.name()+'_jac', mx_in, [ca.cse(ca.jacobian(f(*mx_in),ca.veccat(*mx_in)))],{'cse':True})
print(f_jac(1,1))
Play around by uncommenting the different out expressions. The results from custom jacobians will only show up if the true derivative (original, as in, from automatic diff.) is not 00. So for example, if we set our out = constant, the custom jacobian I've defined won't be used.
Weird stuff also happens. For example, in case 2 set:
out = x_sym
and
jac_result = jac_f.call({name_in:val for name_in,val in zip(jac_f.name_in(),jac_mx_in)})
jac_result['jac_o0_i0'] = 55
jac_result['jac_o0_i1'] = 4
I get:
print(f_jac(1,1))
[[59, 00]]