Merge pull request #95 from equinor/support-higher-stream-converter-samples-precision

ThomasRyschawyEquinor · web-flow · commit 56a265764f04 · 2024-11-06T12:24:16.000+01:00
feat: add option for higher sample precision in StreamConverter
diff --git a/docs/file-specification.md b/docs/file-specification.md
@@ -71,5 +71,5 @@ Storing whether trace header fields are duplicates of previous ones reduces the
 
 **** Blockshape in IL direction is set to 1 for 2D files, also no bytes for 3D geometry between 4-40 are set.
 
-***** This value may be overidden to provide greater precision by bytes 3273–3280 in the SEG-Y header, or equivalent in ZGY file
+***** This value may be overidden to provide higher precision by bytes 3273–3280 in the SEG-Y header, or equivalent in ZGY file
 These bytes were allocated in rev 2.0 for "Extended sample interval", as an IEEE double-precision float.
diff --git a/seismic_zfp/conversion.py b/seismic_zfp/conversion.py
@@ -505,6 +505,7 @@ def __init__(
         bits_per_voxel=4,
         blockshape=(4, 4, -1),
         trace_headers={},
+        use_higher_samples_precision=False,
     ):
         """
         Parameters
@@ -537,6 +538,12 @@ def __init__(
             key, value pairs pf:
                 - Member of segyio.tracefield.TraceField Enum
                 - 2D numpy array of integers in inline-major order, representing trace header values to be inserted
+
+        use_higher_samples_precision : bool, optional
+            Specifies whether to use higher precision for the sample interval and sample time. 
+            Default is `False`. When set to `True`, stores sample interval and sample time as 
+            64-bit floating-point numbers for increased precision. If `False`, they are stored 
+            as 32-bit integers.
         """
         # Get ilines axis. If overspecified check consistency, and generate if unspecified.
         if segyio.tracefield.TraceField.INLINE_3D in trace_headers:
@@ -592,6 +599,7 @@ def __init__(
             self.header_info,
             self.geom,
             total_shape,
+            use_higher_samples_precision=use_higher_samples_precision
         )
 
     def write(self, data_array):
diff --git a/seismic_zfp/conversion_utils.py b/seismic_zfp/conversion_utils.py
@@ -50,7 +50,7 @@ def make_header_seismic_file(seismicfile, bits_per_voxel, blockshape, geom, head
     return buffer
 
 
-def make_header_numpy(bits_per_voxel, blockshape, source, header_info, geom):
+def make_header_numpy(bits_per_voxel, blockshape, source, header_info, geom, use_higher_samples_precision=False):
     """Generate header for SGZ file from numpy arrays representing axis and header values"""
 
     # Nothing clever to identify duplicated header arrays yet, just include everything we're given.
@@ -59,6 +59,11 @@ def make_header_numpy(bits_per_voxel, blockshape, source, header_info, geom):
                          header_info, bits_per_voxel, blockshape, geom)
     # These 4 bytes indicate the data source for the SGZ file. Use 20 to indicate numpy.
     buffer[76:80] = int_to_bytes(20)
+    if use_higher_samples_precision:
+        # higher precision minimum sample time/depth
+        buffer[84:92] = double_to_bytes(source.samples[0])
+        # higher precision sample interval (μs/m)
+        buffer[92:100] = double_to_bytes(1000.0 * (source.samples[1] - source.samples[0]))
     return buffer
 
 
@@ -454,6 +459,7 @@ def __init__(
         geom,
         total_shape,
         queue_size=16,
+        use_higher_samples_precision=False,
     ):
         """
         Parameters
@@ -474,11 +480,18 @@ def __init__(
             Total shape of the full input array.
         queue_size : int, optional
             Maximum size of the compression and writing queues.
+        use_higher_samples_precision : bool, optional
+            Whether to use higher precision for sample interval and sample time.
         """
         self.total_shape = total_shape
         self.blockshape = blockshape
         self.header = make_header_numpy(
-            bits_per_voxel, blockshape, axes, header_info, geom
+            bits_per_voxel,
+            blockshape,
+            axes,
+            header_info,
+            geom,
+            use_higher_samples_precision=use_higher_samples_precision,
         )
         # Maxsize can be reduced for machines with little memory
         # ... or for files which are so big they might be very useful.
diff --git a/seismic_zfp/utils.py b/seismic_zfp/utils.py
@@ -77,7 +77,7 @@ def read_range_blob(file, offset, length):
     return file.download_blob(offset=offset, length=length).readall()
 
 
-def generate_fake_seismic(n_ilines, n_xlines, n_samples, min_iline=0, min_xline=0):
+def generate_fake_seismic(n_ilines, n_xlines, n_samples, min_iline=0, min_xline=0, min_sample=0):
     # Generate an array which looks a *bit* like an impulse-response test...
     ilines, xlines, samples = np.arange(n_ilines), np.arange(n_xlines), np.arange(n_samples)
     array_shape = (n_ilines, n_xlines, n_samples)
@@ -87,7 +87,7 @@ def generate_fake_seismic(n_ilines, n_xlines, n_samples, min_iline=0, min_xline=
     s = np.broadcast_to(samples - n_samples / 4, array_shape).astype(np.float32)
     array = 0.01 + (np.sin(0.1 + np.sqrt(2.0 + (i+0.01) ** 2 + x ** 2 + (s*0.75) ** 2) / 8.0) /
                           (0.1 * np.sqrt(2.0 + (i+0.01) ** 2 + x ** 2 + (s*0.50) ** 2)))
-    return array, ilines+min_iline, xlines+min_xline, samples
+    return array, ilines+min_iline, xlines+min_xline, samples+min_sample
 
 
 def pad(orig, multiple):
diff --git a/tests/test_compress.py b/tests/test_compress.py
@@ -483,6 +483,7 @@ def compress_numpy_and_compare_data(n_samples, min_iline, n_ilines, min_xline, n
         # print(np.max(np.abs(reader.read_volume()-array)/np.abs(array)))
         assert np.allclose(reader.ilines, ilines, rtol=rtol)
         assert np.allclose(reader.xlines, xlines, rtol=rtol)
+        assert np.allclose(reader.zslices, samples, rtol=rtol)
         assert np.allclose(reader.read_volume(), array, rtol=rtol)
         assert 20 == reader.get_file_source_code()
 
@@ -507,6 +508,7 @@ def test_compress_numpy_data(tmp_path):
 
 
 def compress_stream_and_compare_data(
+    min_sample,
     n_samples,
     min_iline,
     n_ilines,
@@ -518,12 +520,13 @@ def compress_stream_and_compare_data(
     blockshape=(4, 4, -1),
 ):
 
-    out_sgz = os.path.join(str(tmp_path), "from-stream.sgz")
+    out_sgz_lower_samples_precision = os.path.join(str(tmp_path), "from-stream-lower-samples-precision.sgz")
+    out_sgz_higher_samples_precision = os.path.join(str(tmp_path), "from-stream-higher-samples-precision.sgz")
     out_sgz_numpy = os.path.join(str(tmp_path), "from-numpy.sgz")
     out_sgz_no_headers = os.path.join(str(tmp_path), "from-stream_no_headers.sgz")
 
     array, ilines, xlines, samples = generate_fake_seismic(
-        n_ilines, n_xlines, n_samples, min_iline=min_iline, min_xline=min_xline
+        n_ilines, n_xlines, n_samples, min_iline=min_iline, min_xline=min_xline, min_sample=min_sample
     )
 
     trace_headers = {
@@ -534,9 +537,10 @@ def compress_stream_and_compare_data(
             xlines, (n_ilines, n_xlines)
         ),
     }
+    lower_samples_precision = samples.astype(int)
 
     with StreamConverter(
-        out_sgz,
+        out_sgz_lower_samples_precision,
         ilines=ilines,
         xlines=xlines,
         samples=samples,
@@ -548,10 +552,31 @@ def compress_stream_and_compare_data(
             end = min(i + blockshape[0], array.shape[0])
             chunk = array[i:end, :, :]
             converter.write(chunk)
+    with SgzReader(out_sgz_lower_samples_precision) as reader:
+        assert np.allclose(reader.ilines, ilines, rtol=rtol)
+        assert np.allclose(reader.xlines, xlines, rtol=rtol)
+        assert np.allclose(reader.zslices, lower_samples_precision, rtol=rtol)
+        assert np.allclose(reader.read_volume(), array, rtol=rtol)
+        assert 20 == reader.get_file_source_code()
 
-    with SgzReader(out_sgz) as reader:
+    with StreamConverter(
+        out_sgz_higher_samples_precision,
+        ilines=ilines,
+        xlines=xlines,
+        samples=samples,
+        bits_per_voxel=bits_per_voxel,
+        blockshape=blockshape,
+        trace_headers=trace_headers,
+        use_higher_samples_precision=True
+    ) as converter:
+        for i in range(0, array.shape[0], blockshape[0]):
+            end = min(i + blockshape[0], array.shape[0])
+            chunk = array[i:end, :, :]
+            converter.write(chunk)
+    with SgzReader(out_sgz_higher_samples_precision) as reader:
         assert np.allclose(reader.ilines, ilines, rtol=rtol)
         assert np.allclose(reader.xlines, xlines, rtol=rtol)
+        assert np.allclose(reader.zslices, samples, rtol=rtol)
         assert np.allclose(reader.read_volume(), array, rtol=rtol)
         assert 20 == reader.get_file_source_code()
 
@@ -563,6 +588,7 @@ def compress_stream_and_compare_data(
         bits_per_voxel=bits_per_voxel,
         blockshape=blockshape,
         trace_headers=trace_headers,
+        use_higher_samples_precision=False
     ) as converter:
         for i in range(0, array.shape[0], blockshape[0]):
             end = min(i + blockshape[0], array.shape[0])
@@ -572,6 +598,7 @@ def compress_stream_and_compare_data(
     with SgzReader(out_sgz_no_headers) as reader:
         assert np.allclose(reader.ilines, ilines, rtol=rtol)
         assert np.allclose(reader.xlines, xlines, rtol=rtol)
+        assert np.allclose(reader.zslices, lower_samples_precision, rtol=rtol)
         assert np.allclose(reader.read_volume(), array, rtol=rtol)
         assert 20 == reader.get_file_source_code()
         stream_hash = reader.get_source_data_hash()
@@ -586,14 +613,15 @@ def compress_stream_and_compare_data(
 
 
 def test_compress_stream(tmp_path):
-    compress_stream_and_compare_data(16, 0, 8, 8, 12, tmp_path, 8, 1e-3)
-    compress_stream_and_compare_data(801, 0, 8, 8, 12, tmp_path, 8, 1e-3)
-    compress_stream_and_compare_data(512, 0, 9, 8, 12, tmp_path, 8, 1e-3)
-    compress_stream_and_compare_data(512, 0, 8, 8, 13, tmp_path, 8, 1e-3)
+    compress_stream_and_compare_data(0, 16, 0, 8, 8, 12, tmp_path, 8, 1e-3)
+    compress_stream_and_compare_data(2.5, 16, 0, 8, 8, 12, tmp_path, 8, 1e-3)
+    compress_stream_and_compare_data(0, 801, 0, 8, 8, 12, tmp_path, 8, 1e-3)
+    compress_stream_and_compare_data(0, 512, 0, 9, 8, 12, tmp_path, 8, 1e-3)
+    compress_stream_and_compare_data(0, 512, 0, 8, 8, 13, tmp_path, 8, 1e-3)
 
     compress_stream_and_compare_data(
-        17, 0, 65, 8, 65, tmp_path, 8, 1e-2, blockshape=(32, 32, 4)
+        0, 17, 0, 65, 8, 65, tmp_path, 8, 1e-2, blockshape=(32, 32, 4)
     )
     compress_stream_and_compare_data(
-        801, 0, 9, 8, 13, tmp_path, 8, 1e-2, blockshape=(16, 16, 16)
+        0, 801, 0, 9, 8, 13, tmp_path, 8, 1e-2, blockshape=(16, 16, 16)
     )