Orbital response for PM localized orbitals does not appear to agree with finite differences
Hi everyone,
I am trying to compare the orbital response with respect to an external electric field to finite differences. Unfortunately, I am not able to get these to agree:
import numpy as np
import jax
from jax import numpy as jnp
from pyscfad import gto, scf, config
from pyscfad.lo import pipek
from pyscfad.ops import stop_grad
jax.config.update("jax_enable_x64", True)
config.update("pyscfad_scf_implicit_diff", True)
# decimal tolerance
TOL = 5
# finite difference shift
FIN_DIFF = 1e-5
# external electric field
e_field = np.zeros(3, dtype=np.float64)
# gauge origin
gauge_origin = np.zeros(3, dtype=np.float64)
def orb_loc(e_field, mol, gauge_origin):
"""
orbital localization
"""
# mf calc
mf = scf.RHF(mol)
# electronic contribution due to electric field
with mol.with_common_origin(gauge_origin):
ao_dip = mol.intor_symmetric("int1e_r", comp=3)
ext_el = jnp.einsum("x,xij->ij", e_field, ao_dip)
# modified one-electron hamiltonian
h1 = mf.get_hcore()
mf.get_hcore = lambda *args, **kwargs: h1 + ext_el
mf.kernel()
# occupied orbitals
occ_mo = np.where(mf.mo_occ == 2.0)[0]
# stop tracing mo coefficients as these only serve as initial guess and the
# localization condition is implicitly differentiated anyways
mo_coeff_occ = np.asarray(stop_grad(mf.mo_coeff[:, occ_mo]))
# perform localization
mlo = pipek.PM(mol, mo_coeff_occ)
mlo.pop_method = "mulliken"
mlo.conv_tol = 1e-12
_ = mlo.kernel()
# perform Jacobi sweep to ensure stable minimum is found
mlo = pipek.jacobi_sweep(mlo)
# construct untraced initial guess
u0 = mo_coeff_occ.T @ mf.get_ovlp() @ mlo.mo_coeff
# perform orbital localization which can be implicitly differentiated, the
# occupied space is now traced again
mo_coeff = pipek.pm(
mol,
mf.mo_coeff[:, occ_mo],
conv_tol=1e-12,
pop_method="mulliken",
init_guess=u0,
)
return mo_coeff
# init molecule
mol = gto.Mole(
verbose=0,
output=None,
basis="sto-3g",
symmetry=True,
atom="""
O 0.000000 0.000000 0.117790
H 0.000000 0.755453 -0.471161
H 0.000000 -0.755453 -0.471161
""",
)
mol.build(trace_coords=False, trace_exp=False, trace_ctr_coeff=False)
# AD gradient
ad_grad = jax.jacrev(orb_loc)(e_field, mol, gauge_origin)
# finite differences gradient
fin_diff_grad = np.empty_like(ad_grad)
for i in range(3):
shift = np.zeros(3, dtype=np.float64)
shift[i] = FIN_DIFF
c_p = orb_loc(
e_field + shift,
mol,
gauge_origin,
)
c_m = orb_loc(
e_field - shift,
mol,
gauge_origin,
)
# ensure numerical orbital gradient is calculated for the same orbitals
s = c_p.T @ mol.intor_symmetric("int1e_ovlp") @ c_m
selected_rows = np.full(s.shape[0], -1)
signs = np.ones(s.shape[0])
assigned_rows = set()
assigned_cols = set()
flat_indices = np.dstack(np.unravel_index(np.argsort(-np.abs(s).ravel()), s.shape))[
0
]
for row, col in flat_indices:
if row not in assigned_rows and col not in assigned_cols:
selected_rows[col] = row
signs[col] = np.sign(s[col, row])
assigned_rows.add(row)
assigned_cols.add(col)
if len(assigned_rows) == s.shape[0]:
break
c_m = signs * c_m[:, selected_rows]
# ensure correct minima are found with respect to sign changes due to symmetry
def fun(mo_m, mo_p):
return jnp.where(
pipek.cost_function(np.array([]), mol, mo_m[:, np.newaxis])
- pipek.cost_function(
np.array([]), mol, (jnp.sign(mo_p * mo_m) * mo_m)[:, np.newaxis]
)
< 1e-13,
jnp.sign(mo_p * mo_m) * mo_m,
mo_m,
)
c_m = jax.vmap(fun, (1, 1), 1)(c_m, c_p)
# calculate numerical gradient
grad = (c_p - c_m) / (2 * FIN_DIFF)
# ensure correct orbital gradients are compared
euc_dist = np.linalg.norm(
np.abs(grad[:, :, np.newaxis]) - np.abs(ad_grad[:, np.newaxis, :, i]), axis=0
)
selected_rows = np.full(euc_dist.shape[0], -1)
assigned_rows = set()
assigned_cols = set()
flat_indices = np.dstack(
np.unravel_index(np.argsort(euc_dist.ravel()), euc_dist.shape)
)[0]
for row, col in flat_indices:
if row not in assigned_rows and col not in assigned_cols:
selected_rows[col] = row
assigned_rows.add(row)
assigned_cols.add(col)
if len(assigned_rows) == euc_dist.shape[0]:
break
fin_diff_grad[:, :, i] = grad[:, selected_rows]
print(ad_grad)
print(fin_diff_grad)
print(np.abs(ad_grad) - np.abs(fin_diff_grad))
The code is a bit convoluted because I need to ensure that the phases of the orbitals from finite differences agree and that the correct minima are used with respect to sign changes of specific orbitals due to symmetry. Additionally, the gradients for the correct orbitals have to be compared. Nevertheless, the gradients do not appear to agree. While the majority of the elements are the same, some specific elements are completely different. When I try to do the same for IBOs, the differences are even more pronounced. It would be great if somebody has any insight about what else I need to change to get these to agree.
@fishjojo: Do you have any insight about why this might be? It does not appear to be related to symmetry since the localized orbital response also differs for non-symmetric geometries.
I tried a simpler system, and the result seems fine.
import jax
from pyscfad import numpy as np
from pyscfad import gto, scf
from pyscfad.lo import pipek
from pyscfad import config
config.update('pyscfad_scf_implicit_diff', True)
mol = gto.Mole()
mol.atom = '''
He 0 0 0
He 0 0 .74
'''
mol.basis = 'ccpvdz'
mol.verbose = 5
mol.build()
def pmorb(mol, e_field):
mf = scf.RHF(mol)
mf.conv_tol = 1e-14
with mol.with_common_origin(np.zeros(3)):
ao_dip = mol.intor_symmetric("int1e_r", comp=3)
ext_el = np.einsum("x,xij->ij", e_field, ao_dip)
h1 = mf.get_hcore()
mf.get_hcore = lambda *a: h1 + ext_el
ehf = mf.kernel()
orbloc = pipek.pm(mol, mf.mo_coeff[:,mf.mo_occ>0], conv_tol=1e-10)
return orbloc
e_field = np.zeros(3)
g = jax.jacrev(pmorb, 1)(mol, e_field)
delta = 1e-4
e_p = np.array([0, 0, delta])
e_m = np.array([0, 0,-delta])
orb_p = pmorb(mol, e_p)
orb_m = pmorb(mol, e_m)
g_fd = (orb_p-orb_m) / (2*delta)
print(g_fd)
print((g_fd - g[..., 2]).max())
My experience with finite difference is that it is difficult to get the correct result in general, because the localization can be easily trapped in different local minima, and the gauge of the orbitals is hard to fix.
Hi Xing, thank you for your response. It appears you are right, the perturbations will sometimes lead the localizer to different minima. For small perturbations, it appears like this can be avoided by starting the localizer from the orbitals closest to the unperturbed localized orbitals. Unfortunately, the changes in the value of the localization cost function induced by the perturbation are sometimes so small that the orbitals are not changed when starting the localizer as close as possible to the unperturbed MO coefficients, leading to a vanishing gradient. So I agree, comparing to finite differences appears to be challenging for localized orbitals.
Nevertheless, I believe to have found another issue, please see the following code:
import numpy as np
import jax
from jax import numpy as jnp
from pyscfad import gto, scf, config
from pyscfad.lo import pipek
from pyscfad.ops import stop_grad
jax.config.update("jax_enable_x64", True)
config.update("pyscfad_scf_implicit_diff", True)
# finite difference shift
FIN_DIFF = 1e-3
# external electric field
e_field = np.zeros(3, dtype=np.float64)
# gauge origin
gauge_origin = np.zeros(3, dtype=np.float64)
def orb_loc(e_field, mol, gauge_origin, init_c=None):
# mf calc
mf = scf.RHF(mol)
# electronic contribution due to electric field
with mol.with_common_origin(gauge_origin):
ao_dip = mol.intor_symmetric("int1e_r", comp=3)
ext_el = jnp.einsum("x,xij->ij", e_field, ao_dip)
# modified one-electron hamiltonian
h1 = mf.get_hcore()
mf.get_hcore = lambda *args, **kwargs: h1 + ext_el
mf.kernel()
# occupied orbitals
occ_mo = np.where(mf.mo_occ == 2.0)[0]
# stop tracing mo coefficients as these only serve as initial guess and the
# localization condition is implicitly differentiated anyways
mo_coeff_hf = np.asarray(stop_grad(mf.mo_coeff[:, occ_mo]))
# perform localization
mlo = pipek.PM(mol, mo_coeff_hf)
mlo.pop_method = "iao"
mlo.conv_tol = 1e-13
if init_c is None:
_ = mlo.kernel()
else:
# start from orbitals closest to initial orbitals
ovlp = init_c.T @ mf.get_ovlp() @ mf.mo_coeff[:, occ_mo]
u, _, vt = np.linalg.svd(ovlp)
orb_rot = (u @ vt).T
mo_coeff_init = np.asarray(stop_grad(mf.mo_coeff[:, occ_mo] @ orb_rot))
_ = mlo.kernel(mo_coeff=mo_coeff_init)
# perform Jacobi sweep to ensure stable minimum is found
mlo = pipek.jacobi_sweep(mlo)
if init_c is None:
# construct untraced initial guess
u0 = mo_coeff_hf.T @ mf.get_ovlp() @ mlo.mo_coeff
# perform orbital localization which can be implicitly differentiated, the
# occupied space is now traced again
mo_coeff = pipek.pm(
mol,
mf.mo_coeff[:, occ_mo],
conv_tol=1e-13,
pop_method="iao",
init_guess=u0,
)
else:
mo_coeff = mlo.mo_coeff
return mo_coeff
# init molecule
mol = gto.Mole(
verbose=0,
output=None,
basis="cc-pVDZ",
symmetry=True,
atom="""
O 0.000000 0.000000 0.117790
H 0.000000 0.755453 -0.471161
H 0.000000 -0.755453 -0.471161
""",
)
mol.build(trace_coords=False, trace_exp=False, trace_ctr_coeff=False)
# AD gradient
mo_coeff = orb_loc(e_field, mol, gauge_origin)
ad_grad = jax.jacrev(orb_loc)(e_field, mol, gauge_origin)
# finite differences gradient
fin_diff_grad = np.empty_like(ad_grad)
for i in range(3):
e_shift = np.zeros(3, dtype=np.float64)
e_shift[i] = FIN_DIFF
c_p = orb_loc(e_field + e_shift, mol, gauge_origin, init_c=mo_coeff)
c_m = orb_loc(e_field - e_shift, mol, gauge_origin, init_c=mo_coeff)
# calculate numerical gradient
fin_diff_grad[:, :, i] = (c_p - c_m) / (2 * FIN_DIFF)
with np.printoptions(precision=2):
print("MO coefficients:")
print(mo_coeff)
print("AD gradient in y-direction:")
print(ad_grad[:, :, 1])
print("Numerical gradient in y-direction:")
print(fin_diff_grad[:, :, 1])
For some reason, the AD orbital gradient appears to be different for the two AOs (AOs 6 and 7) located at the hydrogen atoms for some of these MOs (MOs 0 and 4, affected elements marked in bold):
[[-1.26e-01 -1.71e-01 -1.71e-01 -4.59e-01 8.58e-01]
[ 4.13e-01 1.12e+00 1.12e+00 -2.25e-02 -1.13e+00]
[-5.15e-01 -2.44e-08 -2.25e-08 1.30e+00 1.67e-04]
[-1.38e-01 2.70e-01 -2.70e-01 -1.06e+00 -8.30e-05]
[ 2.76e-01 -2.33e-01 -2.33e-01 -3.29e-02 -7.33e-01]
[-5.12e-02 -9.31e-01 -1.94e-01 1.83e-01 1.82e-01]
[-9.29e-02 -1.94e-01 -9.31e-01 -1.49e-01 1.82e-01]]
The MO coefficients for these AOs on the other hand are equivalent:
[[ 9.88e-01 -5.02e-02 5.02e-02 -2.70e-01 -1.19e-04]
[ 4.86e-02 3.10e-01 -3.10e-01 8.89e-01 7.64e-05]
[ 9.60e-05 -4.86e-11 2.61e-10 -9.11e-05 1.00e+00]
[ 1.71e-10 4.29e-01 4.29e-01 6.81e-11 -9.12e-11]
[ 7.09e-02 -3.61e-01 3.61e-01 5.94e-01 4.73e-05]
[-3.66e-02 5.12e-01 1.18e-01 -1.55e-01 -1.06e-05]
[-3.66e-02 -1.18e-01 -5.12e-01 -1.55e-01 -1.06e-05]]
For these localized orbitals, MOs 0 and 4 are invariant to swapping AOs 6 and 7 and I therefore see no reason why the corresponding MO coefficients should react differently to the electric field. If I calculate the numerical orbital gradients (some of which are wrong for the aforementioned reasons), the correct behavior is reproduced:
[[-1.24e-10 -1.71e-01 -1.71e-01 -4.58e-10 7.29e-11]
[ 4.12e-10 1.12e+00 1.12e+00 -6.38e-12 -1.70e-10]
[-2.12e-11 -7.71e-11 -7.58e-11 1.93e-10 -5.55e-14]
[-1.38e-01 2.70e-01 -2.70e-01 -1.06e+00 -8.31e-05]
[ 2.72e-10 -2.33e-01 -2.33e-01 -5.18e-11 -1.14e-10]
[ 2.08e-02 -9.30e-01 -1.94e-01 1.66e-01 1.31e-05]
[-2.08e-02 -1.94e-01 -9.30e-01 -1.66e-01 -1.31e-05]]
It appears to only happen when I use IBOs, for Pipek-Mezey with Mulliken charges, the gradient respects the symmetry properties of the MO coefficients. Thus it might have something to do with using IAO charges.