pyscfad Orbital response for PM localized orbitals does not appear to agree with finite differences

Hi everyone,

I am trying to compare the orbital response with respect to an external electric field to finite differences. Unfortunately, I am not able to get these to agree:

import numpy as np
import jax
from jax import numpy as jnp
from pyscfad import gto, scf, config
from pyscfad.lo import pipek
from pyscfad.ops import stop_grad

jax.config.update("jax_enable_x64", True)
config.update("pyscfad_scf_implicit_diff", True)


# decimal tolerance
TOL = 5

# finite difference shift
FIN_DIFF = 1e-5

# external electric field
e_field = np.zeros(3, dtype=np.float64)

# gauge origin
gauge_origin = np.zeros(3, dtype=np.float64)


def orb_loc(e_field, mol, gauge_origin):
    """
    orbital localization
    """
    # mf calc
    mf = scf.RHF(mol)

    # electronic contribution due to electric field
    with mol.with_common_origin(gauge_origin):
        ao_dip = mol.intor_symmetric("int1e_r", comp=3)
    ext_el = jnp.einsum("x,xij->ij", e_field, ao_dip)

    # modified one-electron hamiltonian
    h1 = mf.get_hcore()
    mf.get_hcore = lambda *args, **kwargs: h1 + ext_el
    mf.kernel()

    # occupied orbitals
    occ_mo = np.where(mf.mo_occ == 2.0)[0]

    # stop tracing mo coefficients as these only serve as initial guess and the
    # localization condition is implicitly differentiated anyways
    mo_coeff_occ = np.asarray(stop_grad(mf.mo_coeff[:, occ_mo]))

    # perform localization
    mlo = pipek.PM(mol, mo_coeff_occ)
    mlo.pop_method = "mulliken"
    mlo.conv_tol = 1e-12
    _ = mlo.kernel()

    # perform Jacobi sweep to ensure stable minimum is found
    mlo = pipek.jacobi_sweep(mlo)

    # construct untraced initial guess
    u0 = mo_coeff_occ.T @ mf.get_ovlp() @ mlo.mo_coeff

    # perform orbital localization which can be implicitly differentiated, the
    # occupied space is now traced again
    mo_coeff = pipek.pm(
        mol,
        mf.mo_coeff[:, occ_mo],
        conv_tol=1e-12,
        pop_method="mulliken",
        init_guess=u0,
    )

    return mo_coeff


# init molecule
mol = gto.Mole(
    verbose=0,
    output=None,
    basis="sto-3g",
    symmetry=True,
    atom="""
        O        0.000000    0.000000    0.117790
        H        0.000000    0.755453   -0.471161
        H        0.000000   -0.755453   -0.471161
    """,
)
mol.build(trace_coords=False, trace_exp=False, trace_ctr_coeff=False)

# AD gradient
ad_grad = jax.jacrev(orb_loc)(e_field, mol, gauge_origin)

# finite differences gradient
fin_diff_grad = np.empty_like(ad_grad)
for i in range(3):
    shift = np.zeros(3, dtype=np.float64)
    shift[i] = FIN_DIFF
    c_p = orb_loc(
        e_field + shift,
        mol,
        gauge_origin,
    )
    c_m = orb_loc(
        e_field - shift,
        mol,
        gauge_origin,
    )

    # ensure numerical orbital gradient is calculated for the same orbitals
    s = c_p.T @ mol.intor_symmetric("int1e_ovlp") @ c_m
    selected_rows = np.full(s.shape[0], -1)
    signs = np.ones(s.shape[0])
    assigned_rows = set()
    assigned_cols = set()
    flat_indices = np.dstack(np.unravel_index(np.argsort(-np.abs(s).ravel()), s.shape))[
        0
    ]
    for row, col in flat_indices:
        if row not in assigned_rows and col not in assigned_cols:
            selected_rows[col] = row
            signs[col] = np.sign(s[col, row])
            assigned_rows.add(row)
            assigned_cols.add(col)
            if len(assigned_rows) == s.shape[0]:
                break
    c_m = signs * c_m[:, selected_rows]

    # ensure correct minima are found with respect to sign changes due to symmetry
    def fun(mo_m, mo_p):
        return jnp.where(
            pipek.cost_function(np.array([]), mol, mo_m[:, np.newaxis])
            - pipek.cost_function(
                np.array([]), mol, (jnp.sign(mo_p * mo_m) * mo_m)[:, np.newaxis]
            )
            < 1e-13,
            jnp.sign(mo_p * mo_m) * mo_m,
            mo_m,
        )
    c_m = jax.vmap(fun, (1, 1), 1)(c_m, c_p)

    # calculate numerical gradient
    grad = (c_p - c_m) / (2 * FIN_DIFF)

    # ensure correct orbital gradients are compared
    euc_dist = np.linalg.norm(
        np.abs(grad[:, :, np.newaxis]) - np.abs(ad_grad[:, np.newaxis, :, i]), axis=0
    )
    selected_rows = np.full(euc_dist.shape[0], -1)
    assigned_rows = set()
    assigned_cols = set()
    flat_indices = np.dstack(
        np.unravel_index(np.argsort(euc_dist.ravel()), euc_dist.shape)
    )[0]
    for row, col in flat_indices:
        if row not in assigned_rows and col not in assigned_cols:
            selected_rows[col] = row
            assigned_rows.add(row)
            assigned_cols.add(col)
            if len(assigned_rows) == euc_dist.shape[0]:
                break
    fin_diff_grad[:, :, i] = grad[:, selected_rows]

print(ad_grad)
print(fin_diff_grad)
print(np.abs(ad_grad) - np.abs(fin_diff_grad))

The code is a bit convoluted because I need to ensure that the phases of the orbitals from finite differences agree and that the correct minima are used with respect to sign changes of specific orbitals due to symmetry. Additionally, the gradients for the correct orbitals have to be compared. Nevertheless, the gradients do not appear to agree. While the majority of the elements are the same, some specific elements are completely different. When I try to do the same for IBOs, the differences are even more pronounced. It would be great if somebody has any insight about what else I need to change to get these to agree.

Nov 14 '24 16:11 jonas-greiner

@fishjojo: Do you have any insight about why this might be? It does not appear to be related to symmetry since the localized orbital response also differs for non-symmetric geometries.

Dec 05 '24 10:12 jonas-greiner

I tried a simpler system, and the result seems fine.

import jax
from pyscfad import numpy as np
from pyscfad import gto, scf
from pyscfad.lo import pipek
from pyscfad import config

config.update('pyscfad_scf_implicit_diff', True)

mol = gto.Mole()
mol.atom = '''
He 0 0 0
He 0 0 .74
'''
mol.basis = 'ccpvdz'
mol.verbose = 5
mol.build()

def pmorb(mol, e_field):
    mf = scf.RHF(mol)
    mf.conv_tol = 1e-14

    with mol.with_common_origin(np.zeros(3)):
        ao_dip = mol.intor_symmetric("int1e_r", comp=3)
    ext_el = np.einsum("x,xij->ij", e_field, ao_dip)

    h1 = mf.get_hcore()
    mf.get_hcore = lambda *a: h1 + ext_el
    ehf = mf.kernel()

    orbloc = pipek.pm(mol, mf.mo_coeff[:,mf.mo_occ>0], conv_tol=1e-10)
    return orbloc

e_field = np.zeros(3)
g = jax.jacrev(pmorb, 1)(mol, e_field)

delta = 1e-4
e_p = np.array([0, 0, delta])
e_m = np.array([0, 0,-delta])

orb_p = pmorb(mol, e_p)
orb_m = pmorb(mol, e_m)
g_fd = (orb_p-orb_m) / (2*delta)

print(g_fd)
print((g_fd - g[..., 2]).max())

My experience with finite difference is that it is difficult to get the correct result in general, because the localization can be easily trapped in different local minima, and the gauge of the orbitals is hard to fix.

Dec 05 '24 21:12 fishjojo

Hi Xing, thank you for your response. It appears you are right, the perturbations will sometimes lead the localizer to different minima. For small perturbations, it appears like this can be avoided by starting the localizer from the orbitals closest to the unperturbed localized orbitals. Unfortunately, the changes in the value of the localization cost function induced by the perturbation are sometimes so small that the orbitals are not changed when starting the localizer as close as possible to the unperturbed MO coefficients, leading to a vanishing gradient. So I agree, comparing to finite differences appears to be challenging for localized orbitals.

Nevertheless, I believe to have found another issue, please see the following code:

import numpy as np
import jax
from jax import numpy as jnp
from pyscfad import gto, scf, config
from pyscfad.lo import pipek
from pyscfad.ops import stop_grad

jax.config.update("jax_enable_x64", True)
config.update("pyscfad_scf_implicit_diff", True)


# finite difference shift
FIN_DIFF = 1e-3

# external electric field
e_field = np.zeros(3, dtype=np.float64)

# gauge origin
gauge_origin = np.zeros(3, dtype=np.float64)


def orb_loc(e_field, mol, gauge_origin, init_c=None):
    # mf calc
    mf = scf.RHF(mol)

    # electronic contribution due to electric field
    with mol.with_common_origin(gauge_origin):
        ao_dip = mol.intor_symmetric("int1e_r", comp=3)
    ext_el = jnp.einsum("x,xij->ij", e_field, ao_dip)

    # modified one-electron hamiltonian
    h1 = mf.get_hcore()
    mf.get_hcore = lambda *args, **kwargs: h1 + ext_el
    mf.kernel()

    # occupied orbitals
    occ_mo = np.where(mf.mo_occ == 2.0)[0]

    # stop tracing mo coefficients as these only serve as initial guess and the
    # localization condition is implicitly differentiated anyways
    mo_coeff_hf = np.asarray(stop_grad(mf.mo_coeff[:, occ_mo]))

    # perform localization
    mlo = pipek.PM(mol, mo_coeff_hf)
    mlo.pop_method = "iao"
    mlo.conv_tol = 1e-13
    if init_c is None:
        _ = mlo.kernel()
    else:
        # start from orbitals closest to initial orbitals
        ovlp = init_c.T @ mf.get_ovlp() @ mf.mo_coeff[:, occ_mo]
        u, _, vt = np.linalg.svd(ovlp)
        orb_rot = (u @ vt).T
        mo_coeff_init = np.asarray(stop_grad(mf.mo_coeff[:, occ_mo] @ orb_rot))
        _ = mlo.kernel(mo_coeff=mo_coeff_init)

    # perform Jacobi sweep to ensure stable minimum is found
    mlo = pipek.jacobi_sweep(mlo)

    if init_c is None:
        # construct untraced initial guess
        u0 = mo_coeff_hf.T @ mf.get_ovlp() @ mlo.mo_coeff

        # perform orbital localization which can be implicitly differentiated, the
        # occupied space is now traced again
        mo_coeff = pipek.pm(
            mol,
            mf.mo_coeff[:, occ_mo],
            conv_tol=1e-13,
            pop_method="iao",
            init_guess=u0,
        )

    else:
        mo_coeff = mlo.mo_coeff

    return mo_coeff


# init molecule
mol = gto.Mole(
    verbose=0,
    output=None,
    basis="cc-pVDZ",
    symmetry=True,
    atom="""
        O        0.000000    0.000000    0.117790
        H        0.000000    0.755453   -0.471161
        H        0.000000   -0.755453   -0.471161
    """,
)
mol.build(trace_coords=False, trace_exp=False, trace_ctr_coeff=False)

# AD gradient
mo_coeff = orb_loc(e_field, mol, gauge_origin)
ad_grad = jax.jacrev(orb_loc)(e_field, mol, gauge_origin)

# finite differences gradient
fin_diff_grad = np.empty_like(ad_grad)
for i in range(3):
    e_shift = np.zeros(3, dtype=np.float64)
    e_shift[i] = FIN_DIFF
    c_p = orb_loc(e_field + e_shift, mol, gauge_origin, init_c=mo_coeff)
    c_m = orb_loc(e_field - e_shift, mol, gauge_origin, init_c=mo_coeff)

    # calculate numerical gradient
    fin_diff_grad[:, :, i] = (c_p - c_m) / (2 * FIN_DIFF)

with np.printoptions(precision=2):
    print("MO coefficients:")
    print(mo_coeff)
    print("AD gradient in y-direction:")
    print(ad_grad[:, :, 1])
    print("Numerical gradient in y-direction:")
    print(fin_diff_grad[:, :, 1])

For some reason, the AD orbital gradient appears to be different for the two AOs (AOs 6 and 7) located at the hydrogen atoms for some of these MOs (MOs 0 and 4, affected elements marked in bold):

[[-1.26e-01 -1.71e-01 -1.71e-01 -4.59e-01  8.58e-01]
 [ 4.13e-01  1.12e+00  1.12e+00 -2.25e-02 -1.13e+00]
 [-5.15e-01 -2.44e-08 -2.25e-08  1.30e+00  1.67e-04]
 [-1.38e-01  2.70e-01 -2.70e-01 -1.06e+00 -8.30e-05]
 [ 2.76e-01 -2.33e-01 -2.33e-01 -3.29e-02 -7.33e-01]
 [-5.12e-02 -9.31e-01 -1.94e-01  1.83e-01  1.82e-01]
 [-9.29e-02 -1.94e-01 -9.31e-01 -1.49e-01  1.82e-01]]

The MO coefficients for these AOs on the other hand are equivalent:

[[ 9.88e-01 -5.02e-02  5.02e-02 -2.70e-01 -1.19e-04]
 [ 4.86e-02  3.10e-01 -3.10e-01  8.89e-01  7.64e-05]
 [ 9.60e-05 -4.86e-11  2.61e-10 -9.11e-05  1.00e+00]
 [ 1.71e-10  4.29e-01  4.29e-01  6.81e-11 -9.12e-11]
 [ 7.09e-02 -3.61e-01  3.61e-01  5.94e-01  4.73e-05]
 [-3.66e-02  5.12e-01  1.18e-01 -1.55e-01 -1.06e-05]
 [-3.66e-02 -1.18e-01 -5.12e-01 -1.55e-01 -1.06e-05]]

For these localized orbitals, MOs 0 and 4 are invariant to swapping AOs 6 and 7 and I therefore see no reason why the corresponding MO coefficients should react differently to the electric field. If I calculate the numerical orbital gradients (some of which are wrong for the aforementioned reasons), the correct behavior is reproduced:

[[-1.24e-10 -1.71e-01 -1.71e-01 -4.58e-10  7.29e-11]
 [ 4.12e-10  1.12e+00  1.12e+00 -6.38e-12 -1.70e-10]
 [-2.12e-11 -7.71e-11 -7.58e-11  1.93e-10 -5.55e-14]
 [-1.38e-01  2.70e-01 -2.70e-01 -1.06e+00 -8.31e-05]
 [ 2.72e-10 -2.33e-01 -2.33e-01 -5.18e-11 -1.14e-10]
 [ 2.08e-02 -9.30e-01 -1.94e-01  1.66e-01  1.31e-05]
 [-2.08e-02 -1.94e-01 -9.30e-01 -1.66e-01 -1.31e-05]]

It appears to only happen when I use IBOs, for Pipek-Mezey with Mulliken charges, the gradient respects the symmetry properties of the MO coefficients. Thus it might have something to do with using IAO charges.

Jan 13 '25 09:01 jonas-greiner