casadi custom_jacobian depends on structure of underlying function

Following https://github.com/casadi/casadi/discussions/3245 I came across the following problems (using Matlab):

I am trying to use the custom_jacobian option. However, if the jacobian of f has structural zeros, then my custom jacobian will be zero at those entries as well. Using a dirty fix (setting the option addVariableSum of my function to 1), I can circumvent this problem. Furthermore, I do not understand the options "jac_penalty","always_inline", "never_inline" and why they are needed in order to "successfully" supply a custom jacobian or why they have an effect on the result. Attached some code to reproduce the issue.

~~~~~CODE~~~~~
clc
x = casadi.MX.sym('x',2,1);
y = casadi.MX.sym('y',2,1);

f = [x(1)+y(2); x(2)+y(1)];

x_eval = randn(2,1);
y_eval = randn(2,1);

% regular jacobian
    jac_x_f_fun = casadi.Function('jac_x_f',{x,y},{jacobian(f,x)});
    jac_y_f_fun = casadi.Function('jac_y_f',{x,y},{jacobian(f,y)});
    
    jac_x_f_eval = jac_x_f_fun(x_eval,y_eval)
    jac_y_f_eval = jac_y_f_fun(x_eval,y_eval)


% custom_jacobian
    custom_jac_x_f = casadi.MX(reshape((1:4),2,2));
    custom_jac_y_f = casadi.MX(reshape(4+(1:4),2,2));
    
    for ii=0:1
        fprintf(['\n\n',repmat('-',1,30),'\naddVariableSum=%d\n'],ii)
        f_custom_jac = createCasadiFunctionWithCustomJacobian(f,{x,y},{custom_jac_x_f,custom_jac_y_f},'addVariableSum',ii);
        jac_x_f_custom_jac_fun = casadi.Function('jac_x_f_custom_jac',{x,y},{jacobian(f_custom_jac,x)});
        jac_y_f_custom_jac_fun = casadi.Function('jac_y_f_custom_jac',{x,y},{jacobian(f_custom_jac,y)});
        
        jac_x_f_custom_jac_eval = jac_x_f_custom_jac_fun(x_eval,y_eval)
        jac_y_f_custom_jac_eval = jac_y_f_custom_jac_fun(x_eval,y_eval)
    end



%% functions
function f_custom_jacobian = createCasadiFunctionWithCustomJacobian(f,inputVariables,customJacobians,options)
    %% setup
    arguments
        f
        inputVariables
        customJacobians % has to have the form {df/d_inputVariables{1},df/d_inputVariables{2},...}
        options.functionName = 'f_with_custom_jacobian'
        options.addVariableSum {mustBeMember(options.addVariableSum,[0,1])} = 1
    end
    
    if ~isa(inputVariables,'cell')
        inputVariables = {inputVariables}; % one input variable
    end
    if ~isa(customJacobians,'cell')
        customJacobians = {customJacobians}; % one jacobian
    end

    switch class(inputVariables{1})
        case 'casadi.MX'
            CasadiSym = casadi.MX;
        case 'casadi.SX'
            CasadiSym = casadi.SX;
    end
       
    %% set custom jacobian
    dummy = CasadiSym.sym('dummy',size(f,1)); % why is this needed with this dimension?
    jac_f_custom_fun = casadi.Function(['jac_',options.functionName],{inputVariables{:},dummy},{customJacobians{:}});
      
    % Create new function with custom jacobian
    if options.addVariableSum==1 % jacobian of f w.r.t. all variables is nonzero everywhere
        variableSum = sum(vertcat(inputVariables{:})); % suppose inputVariables={x,y} -> sum(vertcat(inputVariables{:}) = [x_1+x_2+...x_{n_x}+y_1+y_2+...+y_{n_y}]
        f_dense_jac = repmat(variableSum,size(f,1),1)*10^-124; % jacobian(f_dense_jac,vertcat(inputVariables{:}))=ones(size(f,1),n_x+n_y)*10^-124
        f = f + f_dense_jac; % jacobian is nonzero everywhere
    end
    custom_jacobian_options = struct("custom_jacobian", jac_f_custom_fun, "jac_penalty",0, "always_inline",false, "never_inline",true); % why do I need other options than custom_jacobian? what is their meaning?
    f_custom_jacobian_fun = casadi.Function(options.functionName, {inputVariables{:}}, {f}, custom_jacobian_options);
    f_custom_jacobian = f_custom_jacobian_fun(inputVariables{:});

end



~~~~~OUTPUT~~~~~
jac_x_f_eval = 


[[1, 00], 
 [00, 1]]

jac_y_f_eval = 


[[00, 1], 
 [1, 00]]


------------------------------
addVariableSum=0

jac_x_f_custom_jac_eval = 


[[4, 00], 
 [00, 6]]

jac_y_f_custom_jac_eval = 


[[00, 12], 
 [14, 00]]


------------------------------
addVariableSum=1

jac_x_f_custom_jac_eval = 


[[1, 3], 
 [2, 4]]

jac_y_f_custom_jac_eval = 


[[5, 7], 
 [6, 8]]

Aug 28 '23 09:08 MHW-UDS

I've never heard of addVariableSum. Can you post/link to self-contained code?

Aug 28 '23 09:08 jgillis

The option addVariableSum is used in my function createCasadiFunctionWithCustomJacobian, not in the options struct in casadi.Function. The posted code should be self-contained.

Aug 28 '23 10:08 MHW-UDS

Yo.

I think that's the same thing as in here: Case 1: using ad_weight_sp = -1

import casadi as ca
x_sym = ca.MX.sym('x', 1,1)
y_sym = ca.MX.sym('y', 1,1)
# out = x_sym**3*y_sym**2+y_sym**5
out = x_sym + y_sym
out = 0
# out = x_sym**4*y_sym**4 + y_sym**5*x_sym**4
# out = x_sym**3
# If using sparsity, the custom hessian/jacobian only works if the "true" hessian/jacobian wouldn't be zero
ad_weight_sp = -1 # = 1, no sparse matrices, fix above problem
f = ca.Function('t',[x_sym,y_sym],[out],
                {
                    'never_inline':True,
                    # 'der_options': {'never_inline':True,'jac_penalty':0,'jacobian_options': {'never_inline':True,'jac_penalty':0}},
                    # 'jac_penalty':0,
                    # 'ad_weight_sp':ad_weight_sp,
                 }
                )

mx_in = f.mx_in()
jac_f = f.jacobian()
jac_jac_f = jac_f.jacobian()
jac_jac_f_mx_in = jac_jac_f.mx_in()
jac_jac_f_result = jac_jac_f.call({name_in:val for name_in,val in zip(jac_jac_f.name_in(),jac_jac_f_mx_in)})
jac_jac_f_result['jac_jac_o0_i0_i0'] = 1.2
jac_jac_f_result['jac_jac_o0_i0_i1'] = 2
jac_jac_f_result['jac_jac_o0_i1_i0'] = 3.1
jac_jac_f_result['jac_jac_o0_i1_i1'] = 4.2
jac_jac_f = ca.Function(jac_jac_f.name(),{name_in:val for name_in,val in zip(jac_jac_f.name_in(),jac_jac_f_mx_in)} |  {k:ca.cse(val) for k, val in jac_jac_f_result.items()}, jac_jac_f.name_in(),jac_jac_f.name_out(),
                        {
                            'print_in':False,
                            'never_inline':True,       
                            'ad_weight_sp':ad_weight_sp,                     
                            })


jac_mx_in = jac_f.mx_in()
jac_result = jac_f.call({name_in:val for name_in,val in zip(jac_f.name_in(),jac_mx_in)})
jac_result['jac_o0_i0'] = 55
jac_result['jac_o0_i1'] = 2
jac_f = ca.Function(jac_f.name(),{name_in:val for name_in,val in zip(jac_f.name_in(),jac_mx_in)} | {k:ca.cse(val) for k, val in jac_result.items()},jac_f.name_in(),jac_f.name_out(),
                {                    
                   'never_inline':True,
                    'custom_jacobian':jac_jac_f,
                    'jac_penalty':0,
                     'ad_weight_sp':ad_weight_sp,
                    },)


f = ca.Function('t',mx_in,[f(*mx_in)],
                
                {                    
                 'never_inline':True,
                    'custom_jacobian':jac_f,
                    'jac_penalty':0,
                     'ad_weight_sp':ad_weight_sp,
                    }
                
                )
print(ca.jacobian(f(x_sym,y_sym),x_sym))
print(ca.gradient(f(x_sym,y_sym),x_sym))
print(ca.jacobian(ca.jacobian(f(x_sym,y_sym),x_sym),x_sym)[0])
print(ca.hessian(f(x_sym,y_sym),ca.veccat(*mx_in))[0])

f_hess = ca.Function(f.name()+'_hess', mx_in, [ca.cse(ca.hessian(f(*mx_in),ca.veccat(*mx_in))[0])],{'cse':True})
print(f_hess(1,1))
f_jac = ca.Function(f.name()+'_jac', mx_in, [ca.cse(ca.jacobian(f(*mx_in),ca.veccat(*mx_in)))],{'cse':True})
print(f_jac(1,1))

Case 2: no ad_weight_sp option

import casadi as ca
x_sym = ca.MX.sym('x', 1,1)
y_sym = ca.MX.sym('y', 1,1)
# out = x_sym**3*y_sym**2+y_sym**5
# out = x_sym + y_sym
# out = 0
out = x_sym**4*y_sym**4 + y_sym**5*x_sym**4
# out = x_sym**3
f = ca.Function('t',[x_sym,y_sym],[out],
                {
                    'never_inline':True,
                 }
                )

mx_in = f.mx_in()
jac_f = f.jacobian()
jac_jac_f = jac_f.jacobian()
jac_jac_f_mx_in = jac_jac_f.mx_in()
jac_jac_f_result = jac_jac_f.call({name_in:val for name_in,val in zip(jac_jac_f.name_in(),jac_jac_f_mx_in)})
jac_jac_f_result['jac_jac_o0_i0_i0'] = 1.2
jac_jac_f_result['jac_jac_o0_i0_i1'] = 2
jac_jac_f_result['jac_jac_o0_i1_i0'] = 3.1
jac_jac_f_result['jac_jac_o0_i1_i1'] = 4.2
jac_jac_f = ca.Function(jac_jac_f.name(),{name_in:val for name_in,val in zip(jac_jac_f.name_in(),jac_jac_f_mx_in)} |  {k:ca.cse(val) for k, val in jac_jac_f_result.items()}, jac_jac_f.name_in(),jac_jac_f.name_out(),
                        {
                            'print_in':False,
                            'never_inline':True,                      
                            })


jac_mx_in = jac_f.mx_in()
jac_result = jac_f.call({name_in:val for name_in,val in zip(jac_f.name_in(),jac_mx_in)})
# jac_result['jac_o0_i0'] = 55
jac_result['jac_o0_i0'] = jac_mx_in[0]
jac_result['jac_o0_i1'] = 4
jac_f = ca.Function(jac_f.name(),{name_in:val for name_in,val in zip(jac_f.name_in(),jac_mx_in)} | {k:ca.cse(val) for k, val in jac_result.items()},jac_f.name_in(),jac_f.name_out(),
                {                    
                   'never_inline':True,
                    'custom_jacobian':jac_jac_f,
                    'jac_penalty':0,
                    },)


f = ca.Function('t',mx_in,[f(*mx_in)],
                
                {                    
                 'never_inline':True,
                    'custom_jacobian':jac_f,
                    'jac_penalty':0,
                    }
                
                )
print(ca.jacobian(f(x_sym,y_sym),x_sym))
print(ca.gradient(f(x_sym,y_sym),x_sym))
print(ca.jacobian(ca.jacobian(f(x_sym,y_sym),x_sym),x_sym)[0])
print(ca.hessian(f(x_sym,y_sym),ca.veccat(*mx_in))[0])

f_hess = ca.Function(f.name()+'_hess', mx_in, [ca.cse(ca.hessian(f(*mx_in),ca.veccat(*mx_in))[0])],{'cse':True})
print(f_hess(1,1))
f_jac = ca.Function(f.name()+'_jac', mx_in, [ca.cse(ca.jacobian(f(*mx_in),ca.veccat(*mx_in)))],{'cse':True})
print(f_jac(1,1))

Play around by uncommenting the different out expressions. The results from custom jacobians will only show up if the true derivative (original, as in, from automatic diff.) is not 00. So for example, if we set our out = constant, the custom jacobian I've defined won't be used.

Weird stuff also happens. For example, in case 2 set: out = x_sym and

jac_result = jac_f.call({name_in:val for name_in,val in zip(jac_f.name_in(),jac_mx_in)})
jac_result['jac_o0_i0'] = 55
jac_result['jac_o0_i1'] = 4

I get:

print(f_jac(1,1))
[[59, 00]]

Apr 27 '24 19:04 ernestds