Skip to content

Commit 3ca7958

Browse files
authored
Merge pull request #148 from PiaLJP/feature/correlated_combined_fit
Feature/correlated combined fit
2 parents ae68e5b + b0b5d88 commit 3ca7958

File tree

2 files changed

+128
-26
lines changed

2 files changed

+128
-26
lines changed

pyerrors/fits.py

Lines changed: 73 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -705,9 +705,6 @@ def chisqfunc_compact(d):
705705

706706
def _combined_fit(x, y, func, silent=False, **kwargs):
707707

708-
if kwargs.get('correlated_fit') is True:
709-
raise Exception("Correlated fit has not been implemented yet")
710-
711708
output = Fit_result()
712709
output.fit_function = func
713710

@@ -735,6 +732,9 @@ def _combined_fit(x, y, func, silent=False, **kwargs):
735732
if len(x_all.shape) > 2:
736733
raise Exception('Unknown format for x values')
737734

735+
if np.any(np.asarray(dy_f) <= 0.0):
736+
raise Exception('No y errors available, run the gamma method first.')
737+
738738
# number of fit parameters
739739
n_parms_ls = []
740740
for key in key_ls:
@@ -766,6 +766,22 @@ def _combined_fit(x, y, func, silent=False, **kwargs):
766766
else:
767767
x0 = [0.1] * n_parms
768768

769+
if kwargs.get('correlated_fit') is True:
770+
corr = covariance(y_all, correlation=True, **kwargs)
771+
covdiag = np.diag(1 / np.asarray(dy_f))
772+
condn = np.linalg.cond(corr)
773+
if condn > 0.1 / np.finfo(float).eps:
774+
raise Exception(f"Cannot invert correlation matrix as its condition number exceeds machine precision ({condn:1.2e})")
775+
if condn > 1e13:
776+
warnings.warn("Correlation matrix may be ill-conditioned, condition number: {%1.2e}" % (condn), RuntimeWarning)
777+
chol = np.linalg.cholesky(corr)
778+
chol_inv = scipy.linalg.solve_triangular(chol, covdiag, lower=True)
779+
780+
def chisqfunc_corr(p):
781+
model = np.concatenate([np.array(func[key](p, np.asarray(x[key]))) for key in key_ls])
782+
chisq = anp.sum(anp.dot(chol_inv, (y_f - model)) ** 2)
783+
return chisq
784+
769785
def chisqfunc(p):
770786
func_list = np.concatenate([[func[k]] * len(x[k]) for k in key_ls])
771787
model = anp.array([func_list[i](p, x_all[i]) for i in range(len(x_all))])
@@ -782,30 +798,46 @@ def chisqfunc(p):
782798
if 'tol' in kwargs:
783799
tolerance = kwargs.get('tol')
784800
fit_result = iminuit.minimize(chisqfunc, x0, tol=tolerance) # Stopping criterion 0.002 * tol * errordef
801+
if kwargs.get('correlated_fit') is True:
802+
fit_result = iminuit.minimize(chisqfunc_corr, fit_result.x, tol=tolerance)
785803
output.iterations = fit_result.nfev
786804
else:
787805
tolerance = 1e-12
788806
if 'tol' in kwargs:
789807
tolerance = kwargs.get('tol')
790808
fit_result = scipy.optimize.minimize(chisqfunc, x0, method=kwargs.get('method'), tol=tolerance)
809+
if kwargs.get('correlated_fit') is True:
810+
fit_result = scipy.optimize.minimize(chisqfunc_corr, fit_result.x, method=kwargs.get('method'), tol=tolerance)
791811
output.iterations = fit_result.nit
792812

793813
chisquare = fit_result.fun
794814

795815
else:
816+
if kwargs.get('correlated_fit') is True:
817+
def chisqfunc_residuals_corr(p):
818+
model = np.concatenate([np.array(func[key](p, np.asarray(x[key]))) for key in key_ls])
819+
chisq = anp.dot(chol_inv, (y_f - model))
820+
return chisq
821+
796822
def chisqfunc_residuals(p):
797823
model = np.concatenate([np.array(func[key](p, np.asarray(x[key]))) for key in key_ls])
798824
chisq = ((y_f - model) / dy_f)
799825
return chisq
826+
800827
if 'tol' in kwargs:
801828
print('tol cannot be set for Levenberg-Marquardt')
829+
802830
fit_result = scipy.optimize.least_squares(chisqfunc_residuals, x0, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
831+
if kwargs.get('correlated_fit') is True:
832+
fit_result = scipy.optimize.least_squares(chisqfunc_residuals_corr, fit_result.x, method='lm', ftol=1e-15, gtol=1e-15, xtol=1e-15)
803833

804834
chisquare = np.sum(fit_result.fun ** 2)
805-
assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14)
806-
output.iterations = fit_result.nfev
835+
if kwargs.get('correlated_fit') is True:
836+
assert np.isclose(chisquare, chisqfunc_corr(fit_result.x), atol=1e-14)
837+
else:
838+
assert np.isclose(chisquare, chisqfunc(fit_result.x), atol=1e-14)
807839

808-
output.message = fit_result.message
840+
output.iterations = fit_result.nfev
809841

810842
if not fit_result.success:
811843
raise Exception('The minimization procedure did not converge.')
@@ -818,17 +850,12 @@ def chisqfunc_residuals(p):
818850
else:
819851
output.chisquare_by_dof = float('nan')
820852

853+
output.message = fit_result.message
821854
if not silent:
822855
print(fit_result.message)
823856
print('chisquare/d.o.f.:', output.chisquare_by_dof)
824857
print('fit parameters', fit_result.x)
825858

826-
def chisqfunc_compact(d):
827-
func_list = np.concatenate([[func[k]] * len(x[k]) for k in key_ls])
828-
model = anp.array([func_list[i](d[:n_parms], x_all[i]) for i in range(len(x_all))])
829-
chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2)
830-
return chisq
831-
832859
def prepare_hat_matrix():
833860
hat_vector = []
834861
for key in key_ls:
@@ -838,16 +865,43 @@ def prepare_hat_matrix():
838865
hat_vector = [item for sublist in hat_vector for item in sublist]
839866
return hat_vector
840867

841-
fitp = fit_result.x
868+
if kwargs.get('expected_chisquare') is True:
869+
if kwargs.get('correlated_fit') is not True:
870+
W = np.diag(1 / np.asarray(dy_f))
871+
cov = covariance(y_all)
872+
hat_vector = prepare_hat_matrix()
873+
A = W @ hat_vector # hat_vector = 'jacobian(func)(fit_result.x, x)'
874+
P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
875+
expected_chisquare = np.trace((np.identity(x_all.shape[-1]) - P_phi) @ W @ cov @ W)
876+
output.chisquare_by_expected_chisquare = output.chisquare / expected_chisquare
877+
if not silent:
878+
print('chisquare/expected_chisquare:', output.chisquare_by_expected_chisquare)
842879

880+
fitp = fit_result.x
843881
if np.any(np.asarray(dy_f) <= 0.0):
844882
raise Exception('No y errors available, run the gamma method first.')
845883

846884
try:
847-
hess = hessian(chisqfunc)(fitp)
885+
if kwargs.get('correlated_fit') is True:
886+
hess = hessian(chisqfunc_corr)(fitp)
887+
else:
888+
hess = hessian(chisqfunc)(fitp)
848889
except TypeError:
849890
raise Exception("It is required to use autograd.numpy instead of numpy within fit functions, see the documentation for details.") from None
850891

892+
if kwargs.get('correlated_fit') is True:
893+
def chisqfunc_compact(d):
894+
func_list = np.concatenate([[func[k]] * len(x[k]) for k in key_ls])
895+
model = anp.array([func_list[i](d[:n_parms], x_all[i]) for i in range(len(x_all))])
896+
chisq = anp.sum(anp.dot(chol_inv, (d[n_parms:] - model)) ** 2)
897+
return chisq
898+
else:
899+
def chisqfunc_compact(d):
900+
func_list = np.concatenate([[func[k]] * len(x[k]) for k in key_ls])
901+
model = anp.array([func_list[i](d[:n_parms], x_all[i]) for i in range(len(x_all))])
902+
chisq = anp.sum(((d[n_parms:] - model) / dy_f) ** 2)
903+
return chisq
904+
851905
jac_jac_y = hessian(chisqfunc_compact)(np.concatenate((fitp, y_f)))
852906

853907
# Compute hess^{-1} @ jac_jac_y[:n_parms + m, n_parms + m:] using LAPACK dgesv
@@ -856,24 +910,17 @@ def prepare_hat_matrix():
856910
except np.linalg.LinAlgError:
857911
raise Exception("Cannot invert hessian matrix.")
858912

859-
if kwargs.get('expected_chisquare') is True:
860-
if kwargs.get('correlated_fit') is not True:
861-
W = np.diag(1 / np.asarray(dy_f))
862-
cov = covariance(y_all)
863-
hat_vector = prepare_hat_matrix()
864-
A = W @ hat_vector # hat_vector = 'jacobian(func)(fit_result.x, x)'
865-
P_phi = A @ np.linalg.pinv(A.T @ A) @ A.T
866-
expected_chisquare = np.trace((np.identity(x_all.shape[-1]) - P_phi) @ W @ cov @ W)
867-
output.chisquare_by_expected_chisquare = output.chisquare / expected_chisquare
868-
if not silent:
869-
print('chisquare/expected_chisquare:', output.chisquare_by_expected_chisquare)
870-
871913
result = []
872914
for i in range(n_parms):
873915
result.append(derived_observable(lambda x_all, **kwargs: (x_all[0] + np.finfo(np.float64).eps) / (y_all[0].value + np.finfo(np.float64).eps) * fitp[i], list(y_all), man_grad=list(deriv_y[i])))
874916

875917
output.fit_parameters = result
876918

919+
if kwargs.get('correlated_fit') is True:
920+
n_cov = np.min(np.vectorize(lambda x_all: x_all.N)(y_all))
921+
output.t2_p_value = 1 - scipy.stats.f.cdf((n_cov - output.dof) / (output.dof * (n_cov - 1)) * output.chisquare,
922+
output.dof, n_cov - output.dof)
923+
877924
return output
878925

879926

tests/fits_test.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -703,6 +703,8 @@ def func_valid(a,x):
703703
yvals.append(pe.pseudo_Obs(x + np.random.normal(0.0, err), err, 'test1') + pe.pseudo_Obs(0, err / 100, 'test2', samples=87))
704704
with pytest.raises(Exception):
705705
pe.least_squares({'a':xvals}, {'b':yvals}, {'a':func_valid})
706+
with pytest.raises(Exception):
707+
pe.least_squares({'a':xvals}, {'a':yvals}, {'a':func_valid})
706708

707709
def test_combined_fit_no_autograd():
708710

@@ -833,6 +835,59 @@ def func_auto_b(a,x):
833835
assert(no_order_x_y[0] == order[0])
834836
assert(no_order_x_y[1] == order[1])
835837

838+
def test_correlated_combined_fit_vs_correlated_standard_fit():
839+
840+
x_const = {'a':[0, 1, 2, 3, 4, 5, 6, 7, 8, 9], 'b':np.arange(10, 20)}
841+
y_const = {'a':[pe.Obs([np.random.normal(1, val, 1000)], ['ensemble1'])
842+
for val in [0.25, 0.3, 0.01, 0.2, 0.5, 1.3, 0.26, 0.4, 0.1, 1.0]],
843+
'b':[pe.Obs([np.random.normal(1, val, 1000)], ['ensemble1'])
844+
for val in [0.5, 1.12, 0.26, 0.25, 0.3, 0.01, 0.2, 1.0, 0.38, 0.1]]}
845+
for key in y_const.keys():
846+
[item.gamma_method() for item in y_const[key]]
847+
y_const_ls = np.concatenate([np.array(o) for o in y_const.values()])
848+
x_const_ls = np.arange(0, 20)
849+
850+
def func_const(a,x):
851+
return 0 * x + a[0]
852+
853+
funcs_const = {"a": func_const,"b": func_const}
854+
for method_kw in ['Levenberg-Marquardt', 'migrad', 'Powell', 'Nelder-Mead']:
855+
res = []
856+
res.append(pe.fits.least_squares(x_const, y_const, funcs_const, method = method_kw, correlated_fit=True))
857+
res.append(pe.fits.least_squares(x_const_ls, y_const_ls, func_const, method = method_kw, correlated_fit=True))
858+
[item.gamma_method for item in res]
859+
assert np.isclose(0.0, (res[0].chisquare_by_dof - res[1].chisquare_by_dof), 1e-14, 1e-8)
860+
assert np.isclose(0.0, (res[0].p_value - res[1].p_value), 1e-14, 1e-8)
861+
assert np.isclose(0.0, (res[0].t2_p_value - res[1].t2_p_value), 1e-14, 1e-8)
862+
assert (res[0][0] - res[1][0]).is_zero(atol=1e-8)
863+
864+
def test_combined_fit_hotelling_t():
865+
xvals_b = np.arange(0,6)
866+
xvals_a = np.arange(0,8)
867+
868+
def func_exp1(x):
869+
return 0.3*np.exp(0.5*x)
870+
871+
def func_exp2(x):
872+
return 0.3*np.exp(0.8*x)
873+
874+
def func_a(a,x):
875+
return a[0]*anp.exp(a[1]*x)
876+
877+
def func_b(a,x):
878+
return a[0]*anp.exp(a[2]*x)
879+
880+
funcs = {'a':func_a, 'b':func_b}
881+
xs = {'a':xvals_a, 'b':xvals_b}
882+
yobs_a = [pe.Obs([np.random.normal(item, item*1.5, 1000)],['ensemble1']) for item in func_exp1(xvals_a)]
883+
yobs_b = [pe.Obs([np.random.normal(item, item*1.4, 1000)],['ensemble1']) for item in func_exp2(xvals_b)]
884+
ys = {'a': yobs_a, 'b': yobs_b}
885+
886+
for key in funcs.keys():
887+
[item.gamma_method() for item in ys[key]]
888+
ft = pe.fits.least_squares(xs, ys, funcs, correlated_fit=True)
889+
assert ft.t2_p_value >= ft.p_value
890+
836891
def fit_general(x, y, func, silent=False, **kwargs):
837892
"""Performs a non-linear fit to y = func(x) and returns a list of Obs corresponding to the fit parameters.
838893

0 commit comments

Comments
 (0)