Skip to content

Fix non overlapping configurations #147

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Feb 2, 2023
52 changes: 26 additions & 26 deletions pyerrors/input/dobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,7 @@ def read_pobs(fname, full_output=False, gz=True, separator_insertion=None):


# this is based on Mattia Bruno's implementation at https://github.com/mbruno46/pyobs/blob/master/pyobs/IO/xml.py
def import_dobs_string(content, noempty=False, full_output=False, separator_insertion=True):
def import_dobs_string(content, full_output=False, separator_insertion=True):
"""Import a list of Obs from a string in the Zeuthen dobs format.

Tags are not written or recovered automatically.
Expand All @@ -406,9 +406,6 @@ def import_dobs_string(content, noempty=False, full_output=False, separator_inse
----------
content : str
XML string containing the data
noemtpy : bool
If True, ensembles with no contribution to the Obs are not included.
If False, ensembles are included as written in the file, possibly with vanishing entries.
full_output : bool
If True, a dict containing auxiliary information and the data is returned.
If False, only the data is returned as list.
Expand Down Expand Up @@ -457,7 +454,6 @@ def import_dobs_string(content, noempty=False, full_output=False, separator_inse
_check(dobs[4].tag == "ne")
ne = int(dobs[4].text.strip())
_check(dobs[5].tag == "nc")
nc = int(dobs[5].text.strip())

idld = {}
deltad = {}
Expand Down Expand Up @@ -507,7 +503,11 @@ def import_dobs_string(content, noempty=False, full_output=False, separator_inse

for name in names:
for i in range(len(deltad[name])):
deltad[name][i] = np.array(deltad[name][i]) + mean[i]
tmp = np.zeros_like(deltad[name][i])
for j in range(len(deltad[name][i])):
if deltad[name][i][j] != 0.:
tmp[j] = deltad[name][i][j] + mean[i]
deltad[name][i] = tmp

res = []
for i in range(len(mean)):
Expand All @@ -516,25 +516,30 @@ def import_dobs_string(content, noempty=False, full_output=False, separator_inse
obs_names = []
for name in names:
h = np.unique(deltad[name][i])
if len(h) == 1 and np.all(h == mean[i]) and noempty:
if len(h) == 1 and np.all(h == mean[i]):
continue
deltas.append(deltad[name][i])
obs_names.append(name)
idl.append(idld[name])
repdeltas = []
repidl = []
for j in range(len(deltad[name][i])):
if deltad[name][i][j] != 0.:
repdeltas.append(deltad[name][i][j])
repidl.append(idld[name][j])
if len(repdeltas) > 0:
obs_names.append(name)
deltas.append(repdeltas)
idl.append(repidl)

res.append(Obs(deltas, obs_names, idl=idl))
res[-1]._value = mean[i]
_check(len(e_names) == ne)

cnames = list(covd.keys())
for i in range(len(res)):
new_covobs = {name: Covobs(0, covd[name], name, grad=gradd[name][i]) for name in cnames}
if noempty:
for name in cnames:
if np.all(new_covobs[name].grad == 0):
del new_covobs[name]
cnames_loc = list(new_covobs.keys())
else:
cnames_loc = cnames
for name in cnames:
if np.all(new_covobs[name].grad == 0):
del new_covobs[name]
cnames_loc = list(new_covobs.keys())
for name in cnames_loc:
res[i].names.append(name)
res[i].shape[name] = 1
Expand All @@ -546,8 +551,6 @@ def import_dobs_string(content, noempty=False, full_output=False, separator_inse
res[i].tag = symbol[i]
if res[i].tag == 'None':
res[i].tag = None
if not noempty:
_check(len(res[0].covobs.keys()) == nc)
if full_output:
retd = {}
tool = file_origin.get('tool', None)
Expand All @@ -568,7 +571,7 @@ def import_dobs_string(content, noempty=False, full_output=False, separator_inse
return res


def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insertion=True):
def read_dobs(fname, full_output=False, gz=True, separator_insertion=True):
"""Import a list of Obs from an xml.gz file in the Zeuthen dobs format.

Tags are not written or recovered automatically.
Expand All @@ -577,9 +580,6 @@ def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insert
----------
fname : str
Filename of the input file.
noemtpy : bool
If True, ensembles with no contribution to the Obs are not included.
If False, ensembles are included as written in the file.
full_output : bool
If True, a dict containing auxiliary information and the data is returned.
If False, only the data is returned as list.
Expand Down Expand Up @@ -615,7 +615,7 @@ def read_dobs(fname, noempty=False, full_output=False, gz=True, separator_insert
with open(fname, 'r') as fin:
content = fin.read()

return import_dobs_string(content, noempty, full_output, separator_insertion=separator_insertion)
return import_dobs_string(content, full_output, separator_insertion=separator_insertion)


def _dobsdict_to_xmlstring(d):
Expand Down Expand Up @@ -782,7 +782,7 @@ def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=N
o = obsl[oi]
if repname in o.idl:
if counters[oi] < 0:
num = offsets[oi]
num = 0
if num == 0:
data += '0 '
else:
Expand All @@ -798,7 +798,7 @@ def create_dobs_string(obsl, name, spec='dobs v1.0', origin='', symbol=[], who=N
if counters[oi] >= len(o.idl[repname]):
counters[oi] = -1
else:
num = offsets[oi]
num = 0
if num == 0:
data += '0 '
else:
Expand Down
2 changes: 1 addition & 1 deletion pyerrors/obs.py
Original file line number Diff line number Diff line change
Expand Up @@ -1097,7 +1097,7 @@ def _expand_deltas_for_merge(deltas, idx, shape, new_idx):
ret = np.zeros(new_idx[-1] - new_idx[0] + 1)
for i in range(shape):
ret[idx[i] - new_idx[0]] = deltas[i]
return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))])
return np.array([ret[new_idx[i] - new_idx[0]] for i in range(len(new_idx))]) * len(new_idx) / len(idx)


def derived_observable(func, data, array_mode=False, **kwargs):
Expand Down
2 changes: 1 addition & 1 deletion tests/json_io_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,7 @@ def test_dobsio():

dobsio.write_dobs(ol, fname, 'TEST')

rl = dobsio.read_dobs(fname, noempty=True)
rl = dobsio.read_dobs(fname)
os.remove(fname + '.xml.gz')
[o.gamma_method() for o in rl]

Expand Down
101 changes: 99 additions & 2 deletions tests/obs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,7 @@ def test_intersection_reduce():
intersection = pe.obs._intersection_idx([o.idl["ens"] for o in [obs1, obs_merge]])
coll = pe.obs._reduce_deltas(obs_merge.deltas["ens"], obs_merge.idl["ens"], range1)

assert np.all(coll == obs1.deltas["ens"])
assert np.allclose(coll, obs1.deltas["ens"] * (len(obs_merge.idl["ens"]) / len(range1)))


def test_irregular_error_propagation():
Expand Down Expand Up @@ -878,7 +878,7 @@ def test_correlation_intersection_of_idls():
cov1 = pe.covariance([obs1, obs2_a])
corr1 = pe.covariance([obs1, obs2_a], correlation=True)

obs2_b = obs2_a + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])
obs2_b = (obs2_a + pe.Obs([np.random.normal(1.0, 0.1, len(range2))], ["ens"], idl=[range2])) / 2
obs2_b.gamma_method()

cov2 = pe.covariance([obs1, obs2_b])
Expand Down Expand Up @@ -1038,6 +1038,7 @@ def test_hash():
assert hash(obs) != hash(o1)
assert hash(o1) != hash(o2)


def test_gm_alias():
samples = np.random.rand(500)

Expand All @@ -1049,3 +1050,99 @@ def test_gm_alias():

assert np.isclose(tt1.dvalue, tt2.dvalue)


def test_overlapping_missing_cnfgs():
length = 200000

l_samp = np.random.normal(2.87, 0.5, length)
s_samp = np.random.normal(7.87, 0.7, length // 2)

o1 = pe.Obs([l_samp], ["test"])
o2 = pe.Obs([s_samp], ["test"], idl=[range(1, length, 2)])

a2 = pe.Obs([s_samp], ["alt"])
t1 = o1 + o2
t1.gm(S=0)

t2 = o1 + a2
t2.gm(S=0)
assert np.isclose(t1.value, t2.value)
assert np.isclose(t1.dvalue, t2.dvalue, rtol=0.01)


def test_non_overlapping_missing_cnfgs():
length = 100000

xsamp = np.random.normal(1.0, 1.0, length)


full = pe.Obs([xsamp], ["ensemble"], idl=[range(0, length)])
full.gm()

even = pe.Obs([xsamp[0:length:2]], ["ensemble"], idl=[range(0, length, 2)])
odd = pe.Obs([xsamp[1:length:2]], ["ensemble"], idl=[range(1, length, 2)])

average = (even + odd) / 2
average.gm(S=0)
assert np.isclose(full.value, average.value)
assert np.isclose(full.dvalue, average.dvalue, rtol=0.01)


def test_non_overlapping_operations():
length = 100000

samples = np.random.normal(0.93, 0.5, length)

e = pe.Obs([samples[0:length:2]], ["ensemble"], idl=[range(0, length, 2)])
o = pe.Obs([samples[1:length:2]], ["ensemble"], idl=[range(1, length, 2)])


e2 = pe.Obs([samples[0:length:2]], ["even"])
o2 = pe.Obs([samples[1:length:2]], ["odd"])

for func in [lambda a, b: a + b,
lambda a, b: a - b,
lambda a, b: a * b,
lambda a, b: a / b,
lambda a, b: a ** b]:

res1 = func(e, o)
res1.gm(S=0)
res2 = func(e2, o2)
res2.gm(S=0)

print(res1, res2)
print((res1.dvalue - res2.dvalue) / res1.dvalue)

assert np.isclose(res1.value, res2.value)
assert np.isclose(res1.dvalue, res2.dvalue, rtol=0.01)


def test_non_overlapping_operations_different_lengths():
length = 100000

samples = np.random.normal(0.93, 0.5, length)
first = samples[:length // 5]
second = samples[length // 5:]

f1 = pe.Obs([first], ["ensemble"], idl=[range(1, length // 5 + 1)])
s1 = pe.Obs([second], ["ensemble"], idl=[range(length // 5, length)])


f2 = pe.Obs([first], ["first"])
s2 = pe.Obs([second], ["second"])

for func in [lambda a, b: a + b,
lambda a, b: a - b,
lambda a, b: a * b,
lambda a, b: a / b,
lambda a, b: a ** b,
lambda a, b: a ** 2 + b ** 2 / a]:

res1 = func(f1, f1)
res1.gm(S=0)
res2 = func(f2, f2)
res2.gm(S=0)

assert np.isclose(res1.value, res2.value)
assert np.isclose(res1.dvalue, res2.dvalue, rtol=0.01)