Skip to content

Commit 56a2657

Browse files
Merge pull request #95 from equinor/support-higher-stream-converter-samples-precision
feat: add option for higher sample precision in StreamConverter
2 parents 91ad5f4 + ee2af72 commit 56a2657

File tree

5 files changed

+64
-15
lines changed

5 files changed

+64
-15
lines changed

docs/file-specification.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,5 +71,5 @@ Storing whether trace header fields are duplicates of previous ones reduces the
7171

7272
**** Blockshape in IL direction is set to 1 for 2D files, also no bytes for 3D geometry between 4-40 are set.
7373

74-
***** This value may be overidden to provide greater precision by bytes 3273–3280 in the SEG-Y header, or equivalent in ZGY file
74+
***** This value may be overidden to provide higher precision by bytes 3273–3280 in the SEG-Y header, or equivalent in ZGY file
7575
These bytes were allocated in rev 2.0 for "Extended sample interval", as an IEEE double-precision float.

seismic_zfp/conversion.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -505,6 +505,7 @@ def __init__(
505505
bits_per_voxel=4,
506506
blockshape=(4, 4, -1),
507507
trace_headers={},
508+
use_higher_samples_precision=False,
508509
):
509510
"""
510511
Parameters
@@ -537,6 +538,12 @@ def __init__(
537538
key, value pairs pf:
538539
- Member of segyio.tracefield.TraceField Enum
539540
- 2D numpy array of integers in inline-major order, representing trace header values to be inserted
541+
542+
use_higher_samples_precision : bool, optional
543+
Specifies whether to use higher precision for the sample interval and sample time.
544+
Default is `False`. When set to `True`, stores sample interval and sample time as
545+
64-bit floating-point numbers for increased precision. If `False`, they are stored
546+
as 32-bit integers.
540547
"""
541548
# Get ilines axis. If overspecified check consistency, and generate if unspecified.
542549
if segyio.tracefield.TraceField.INLINE_3D in trace_headers:
@@ -592,6 +599,7 @@ def __init__(
592599
self.header_info,
593600
self.geom,
594601
total_shape,
602+
use_higher_samples_precision=use_higher_samples_precision
595603
)
596604

597605
def write(self, data_array):

seismic_zfp/conversion_utils.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def make_header_seismic_file(seismicfile, bits_per_voxel, blockshape, geom, head
5050
return buffer
5151

5252

53-
def make_header_numpy(bits_per_voxel, blockshape, source, header_info, geom):
53+
def make_header_numpy(bits_per_voxel, blockshape, source, header_info, geom, use_higher_samples_precision=False):
5454
"""Generate header for SGZ file from numpy arrays representing axis and header values"""
5555

5656
# Nothing clever to identify duplicated header arrays yet, just include everything we're given.
@@ -59,6 +59,11 @@ def make_header_numpy(bits_per_voxel, blockshape, source, header_info, geom):
5959
header_info, bits_per_voxel, blockshape, geom)
6060
# These 4 bytes indicate the data source for the SGZ file. Use 20 to indicate numpy.
6161
buffer[76:80] = int_to_bytes(20)
62+
if use_higher_samples_precision:
63+
# higher precision minimum sample time/depth
64+
buffer[84:92] = double_to_bytes(source.samples[0])
65+
# higher precision sample interval (μs/m)
66+
buffer[92:100] = double_to_bytes(1000.0 * (source.samples[1] - source.samples[0]))
6267
return buffer
6368

6469

@@ -454,6 +459,7 @@ def __init__(
454459
geom,
455460
total_shape,
456461
queue_size=16,
462+
use_higher_samples_precision=False,
457463
):
458464
"""
459465
Parameters
@@ -474,11 +480,18 @@ def __init__(
474480
Total shape of the full input array.
475481
queue_size : int, optional
476482
Maximum size of the compression and writing queues.
483+
use_higher_samples_precision : bool, optional
484+
Whether to use higher precision for sample interval and sample time.
477485
"""
478486
self.total_shape = total_shape
479487
self.blockshape = blockshape
480488
self.header = make_header_numpy(
481-
bits_per_voxel, blockshape, axes, header_info, geom
489+
bits_per_voxel,
490+
blockshape,
491+
axes,
492+
header_info,
493+
geom,
494+
use_higher_samples_precision=use_higher_samples_precision,
482495
)
483496
# Maxsize can be reduced for machines with little memory
484497
# ... or for files which are so big they might be very useful.

seismic_zfp/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def read_range_blob(file, offset, length):
7777
return file.download_blob(offset=offset, length=length).readall()
7878

7979

80-
def generate_fake_seismic(n_ilines, n_xlines, n_samples, min_iline=0, min_xline=0):
80+
def generate_fake_seismic(n_ilines, n_xlines, n_samples, min_iline=0, min_xline=0, min_sample=0):
8181
# Generate an array which looks a *bit* like an impulse-response test...
8282
ilines, xlines, samples = np.arange(n_ilines), np.arange(n_xlines), np.arange(n_samples)
8383
array_shape = (n_ilines, n_xlines, n_samples)
@@ -87,7 +87,7 @@ def generate_fake_seismic(n_ilines, n_xlines, n_samples, min_iline=0, min_xline=
8787
s = np.broadcast_to(samples - n_samples / 4, array_shape).astype(np.float32)
8888
array = 0.01 + (np.sin(0.1 + np.sqrt(2.0 + (i+0.01) ** 2 + x ** 2 + (s*0.75) ** 2) / 8.0) /
8989
(0.1 * np.sqrt(2.0 + (i+0.01) ** 2 + x ** 2 + (s*0.50) ** 2)))
90-
return array, ilines+min_iline, xlines+min_xline, samples
90+
return array, ilines+min_iline, xlines+min_xline, samples+min_sample
9191

9292

9393
def pad(orig, multiple):

tests/test_compress.py

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,7 @@ def compress_numpy_and_compare_data(n_samples, min_iline, n_ilines, min_xline, n
483483
# print(np.max(np.abs(reader.read_volume()-array)/np.abs(array)))
484484
assert np.allclose(reader.ilines, ilines, rtol=rtol)
485485
assert np.allclose(reader.xlines, xlines, rtol=rtol)
486+
assert np.allclose(reader.zslices, samples, rtol=rtol)
486487
assert np.allclose(reader.read_volume(), array, rtol=rtol)
487488
assert 20 == reader.get_file_source_code()
488489

@@ -507,6 +508,7 @@ def test_compress_numpy_data(tmp_path):
507508

508509

509510
def compress_stream_and_compare_data(
511+
min_sample,
510512
n_samples,
511513
min_iline,
512514
n_ilines,
@@ -518,12 +520,13 @@ def compress_stream_and_compare_data(
518520
blockshape=(4, 4, -1),
519521
):
520522

521-
out_sgz = os.path.join(str(tmp_path), "from-stream.sgz")
523+
out_sgz_lower_samples_precision = os.path.join(str(tmp_path), "from-stream-lower-samples-precision.sgz")
524+
out_sgz_higher_samples_precision = os.path.join(str(tmp_path), "from-stream-higher-samples-precision.sgz")
522525
out_sgz_numpy = os.path.join(str(tmp_path), "from-numpy.sgz")
523526
out_sgz_no_headers = os.path.join(str(tmp_path), "from-stream_no_headers.sgz")
524527

525528
array, ilines, xlines, samples = generate_fake_seismic(
526-
n_ilines, n_xlines, n_samples, min_iline=min_iline, min_xline=min_xline
529+
n_ilines, n_xlines, n_samples, min_iline=min_iline, min_xline=min_xline, min_sample=min_sample
527530
)
528531

529532
trace_headers = {
@@ -534,9 +537,10 @@ def compress_stream_and_compare_data(
534537
xlines, (n_ilines, n_xlines)
535538
),
536539
}
540+
lower_samples_precision = samples.astype(int)
537541

538542
with StreamConverter(
539-
out_sgz,
543+
out_sgz_lower_samples_precision,
540544
ilines=ilines,
541545
xlines=xlines,
542546
samples=samples,
@@ -548,10 +552,31 @@ def compress_stream_and_compare_data(
548552
end = min(i + blockshape[0], array.shape[0])
549553
chunk = array[i:end, :, :]
550554
converter.write(chunk)
555+
with SgzReader(out_sgz_lower_samples_precision) as reader:
556+
assert np.allclose(reader.ilines, ilines, rtol=rtol)
557+
assert np.allclose(reader.xlines, xlines, rtol=rtol)
558+
assert np.allclose(reader.zslices, lower_samples_precision, rtol=rtol)
559+
assert np.allclose(reader.read_volume(), array, rtol=rtol)
560+
assert 20 == reader.get_file_source_code()
551561

552-
with SgzReader(out_sgz) as reader:
562+
with StreamConverter(
563+
out_sgz_higher_samples_precision,
564+
ilines=ilines,
565+
xlines=xlines,
566+
samples=samples,
567+
bits_per_voxel=bits_per_voxel,
568+
blockshape=blockshape,
569+
trace_headers=trace_headers,
570+
use_higher_samples_precision=True
571+
) as converter:
572+
for i in range(0, array.shape[0], blockshape[0]):
573+
end = min(i + blockshape[0], array.shape[0])
574+
chunk = array[i:end, :, :]
575+
converter.write(chunk)
576+
with SgzReader(out_sgz_higher_samples_precision) as reader:
553577
assert np.allclose(reader.ilines, ilines, rtol=rtol)
554578
assert np.allclose(reader.xlines, xlines, rtol=rtol)
579+
assert np.allclose(reader.zslices, samples, rtol=rtol)
555580
assert np.allclose(reader.read_volume(), array, rtol=rtol)
556581
assert 20 == reader.get_file_source_code()
557582

@@ -563,6 +588,7 @@ def compress_stream_and_compare_data(
563588
bits_per_voxel=bits_per_voxel,
564589
blockshape=blockshape,
565590
trace_headers=trace_headers,
591+
use_higher_samples_precision=False
566592
) as converter:
567593
for i in range(0, array.shape[0], blockshape[0]):
568594
end = min(i + blockshape[0], array.shape[0])
@@ -572,6 +598,7 @@ def compress_stream_and_compare_data(
572598
with SgzReader(out_sgz_no_headers) as reader:
573599
assert np.allclose(reader.ilines, ilines, rtol=rtol)
574600
assert np.allclose(reader.xlines, xlines, rtol=rtol)
601+
assert np.allclose(reader.zslices, lower_samples_precision, rtol=rtol)
575602
assert np.allclose(reader.read_volume(), array, rtol=rtol)
576603
assert 20 == reader.get_file_source_code()
577604
stream_hash = reader.get_source_data_hash()
@@ -586,14 +613,15 @@ def compress_stream_and_compare_data(
586613

587614

588615
def test_compress_stream(tmp_path):
589-
compress_stream_and_compare_data(16, 0, 8, 8, 12, tmp_path, 8, 1e-3)
590-
compress_stream_and_compare_data(801, 0, 8, 8, 12, tmp_path, 8, 1e-3)
591-
compress_stream_and_compare_data(512, 0, 9, 8, 12, tmp_path, 8, 1e-3)
592-
compress_stream_and_compare_data(512, 0, 8, 8, 13, tmp_path, 8, 1e-3)
616+
compress_stream_and_compare_data(0, 16, 0, 8, 8, 12, tmp_path, 8, 1e-3)
617+
compress_stream_and_compare_data(2.5, 16, 0, 8, 8, 12, tmp_path, 8, 1e-3)
618+
compress_stream_and_compare_data(0, 801, 0, 8, 8, 12, tmp_path, 8, 1e-3)
619+
compress_stream_and_compare_data(0, 512, 0, 9, 8, 12, tmp_path, 8, 1e-3)
620+
compress_stream_and_compare_data(0, 512, 0, 8, 8, 13, tmp_path, 8, 1e-3)
593621

594622
compress_stream_and_compare_data(
595-
17, 0, 65, 8, 65, tmp_path, 8, 1e-2, blockshape=(32, 32, 4)
623+
0, 17, 0, 65, 8, 65, tmp_path, 8, 1e-2, blockshape=(32, 32, 4)
596624
)
597625
compress_stream_and_compare_data(
598-
801, 0, 9, 8, 13, tmp_path, 8, 1e-2, blockshape=(16, 16, 16)
626+
0, 801, 0, 9, 8, 13, tmp_path, 8, 1e-2, blockshape=(16, 16, 16)
599627
)

0 commit comments

Comments
 (0)