[segyio] 209/376: subtr support for start/stop/step
Jørgen Kvalsvik
jokva-guest at moszumanska.debian.org
Wed Sep 20 08:04:34 UTC 2017
This is an automated email from the git hooks/post-receive script.
jokva-guest pushed a commit to branch debian
in repository segyio.
commit 0edba852c434f214358a4be04179a51cea12589e
Author: Jørgen Kvalsvik <jokva at statoil.com>
Date: Wed Feb 22 12:41:51 2017 +0100
subtr support for start/stop/step
Replaces the first-last interface of read/write_subtr with
start/stop/step, analoguous to python slices. With a step of 1, the most
common case, this covers the old chunk read, but the step allows greater
flexibility and maps nicely to features commonly found in higher-level
languages.
---
lib/include/segyio/segy.h | 31 +++++--
lib/src/segy.c | 158 ++++++++++++++++++++++++++++++-----
lib/test/segy.c | 208 +++++++++++++++++++++++++++++++++++++++++++++-
python/segyio/_segyio.c | 2 +
4 files changed, 367 insertions(+), 32 deletions(-)
diff --git a/lib/include/segyio/segy.h b/lib/include/segyio/segy.h
index 6247ef4..d7982b1 100644
--- a/lib/include/segyio/segy.h
+++ b/lib/include/segyio/segy.h
@@ -150,23 +150,44 @@ int segy_writetrace( segy_file*,
/*
* read/write sub traces, with the same assumption and requirements as
- * segy_readtrace. fst and lst are *indices*, not byte offsets, so
+ * segy_readtrace. start and stop are *indices*, not byte offsets, so
* segy_readsubtr(fp, traceno, 10, 12, ...) reads samples 10 through 12, and
* not bytes 10 through 12.
+ *
+ * start and stop are in the range [start,stop), so start=0, stop=5, step=2
+ * yields [0, 2, 4], whereas stop=4 yields [0, 2]
+ *
+ * When step is negative, the subtrace will be read in reverse. If step is
+ * negative and [0,n) is desired, pass use -1 for stop. Other negative values
+ * are undefined. If the range [n, m) where m is larger than the samples is
+ * considered undefined. Any [n, m) where distance(n,m) > samples is undefined.
+ *
+ * The parameter rangebuf is a pointer to a buffer of at least abs(stop-start)
+ * size. This is largely intended for script-C boundaries. In code paths where
+ * step is not 1 or -1, and mmap is not activated, these functions will
+ * *allocate* a buffer to read data from file in chunks. This is a significant
+ * speedup over multiple fread calls, at the cost of a clunkier interface. This
+ * is a tradeoff, since this function is often called in an inner loop. If
+ * you're fine with these functions allocating and freeing this buffer for you,
+ * rangebuf can be NULL.
*/
int segy_readsubtr( segy_file*,
int traceno,
- int fst,
- int lst,
+ int start,
+ int stop,
+ int step,
float* buf,
+ float* rangebuf,
long trace0,
int trace_bsize );
int segy_writesubtr( segy_file*,
int traceno,
- int fst,
- int lst,
+ int start,
+ int stop,
+ int step,
const float* buf,
+ float* rangebuf,
long trace0,
int trace_bsize );
diff --git a/lib/src/segy.c b/lib/src/segy.c
index 83e8838..785e370 100644
--- a/lib/src/segy.c
+++ b/lib/src/segy.c
@@ -1032,7 +1032,7 @@ int segy_lines_count( segy_file* fp,
return SEGY_OK;
}
-/*
+/*
* segy_*line_length is rather pointless as a computation, but serve a purpose
* as an abstraction as the detail on how exactly a length is defined is usually uninteresting
*/
@@ -1090,52 +1090,119 @@ int segy_crossline_indices( segy_file* fp,
static inline int subtr_seek( segy_file* fp,
int traceno,
- int fst,
- int lst,
+ int start,
+ int stop,
long trace0,
int trace_bsize ) {
/*
* Optimistically assume that indices are correct by the time they're given
* to subtr_seek.
*/
- assert( lst >= fst && fst >= 0 );
+ int min = start < stop ? start : stop + 1;
assert( sizeof( float ) == 4 );
- assert( (lst - fst) * sizeof( float ) <= (size_t)trace_bsize );
+ assert( start >= 0 );
+ assert( stop >= -1 );
+ assert( abs(stop - start) * (int)sizeof( float ) <= trace_bsize );
- // skip the trace header and skip everything before fst.
- trace0 += (fst * sizeof( float )) + SEGY_TRACE_HEADER_SIZE;
+ // skip the trace header and skip everything before min
+ trace0 += (min * (int)sizeof( float )) + SEGY_TRACE_HEADER_SIZE;
return segy_seek( fp, traceno, trace0, trace_bsize );
}
+static int slicelength( int start, int stop, int step ) {
+ if( ( step < 0 && stop >= start ) ||
+ ( step > 0 && start >= stop ) ) return 0;
+
+ if( step < 0 ) return (stop - start + 1) / step + 1;
+
+ return (stop - start - 1) / step + 1;
+}
+
+static int reverse( float* arr, int elems ) {
+ float tmp;
+ const int last = elems - 1;
+ for( int i = 0; i < elems / 2; ++i ) {
+ tmp = arr[ i ];
+ arr[ i ] = arr[ last - i ];
+ arr[ last - i ] = tmp;
+ }
+
+ return SEGY_OK;
+}
int segy_readtrace( segy_file* fp,
int traceno,
float* buf,
long trace0,
int trace_bsize ) {
- const int lst = trace_bsize / sizeof( float );
- return segy_readsubtr( fp, traceno, 0, lst, buf, trace0, trace_bsize );
+ const int stop = trace_bsize / sizeof( float );
+ return segy_readsubtr( fp, traceno, 0, stop, 1, buf, NULL, trace0, trace_bsize );
}
int segy_readsubtr( segy_file* fp,
int traceno,
- int fst,
- int lst,
+ int start,
+ int stop,
+ int step,
float* buf,
+ float* rangebuf,
long trace0,
int trace_bsize ) {
- int err = subtr_seek( fp, traceno, fst, lst, trace0, trace_bsize );
+ int err = subtr_seek( fp, traceno, start, stop, trace0, trace_bsize );
if( err != SEGY_OK ) return err;
+ const size_t elems = abs( stop - start );
+
+ // most common case: step == abs(1), reading contiguously
+ if( step == 1 || step == -1 ) {
+
+ if( fp->addr ) {
+ memcpy( buf, fp->cur, sizeof( float ) * elems );
+ } else {
+ const size_t readc = fread( buf, sizeof( float ), elems, fp->fp );
+ if( readc != elems ) return SEGY_FREAD_ERROR;
+ }
+
+ if( step == -1 ) reverse( buf, elems );
+
+ return SEGY_OK;
+ }
+
+ // step != 1, i.e. do strided reads
+ int defstart = start < stop ? 0 : elems - 1;
+ int slicelen = slicelength( start, stop, step );
+
if( fp->addr ) {
- memcpy( buf, fp->cur, sizeof( float ) * ( lst - fst ) );
+ float* cur = (float*)fp->cur + defstart;
+ for( ; slicelen > 0; cur += step, ++buf, --slicelen )
+ *buf = *cur;
+
return SEGY_OK;
}
- const size_t readc = fread( buf, sizeof( float ), lst - fst, fp->fp );
- if( readc != lst - fst ) return SEGY_FREAD_ERROR;
+ /*
+ * fread fallback: read the full chunk [start, stop) to avoid multiple
+ * fread calls (which are VERY expensive, measured to about 10x the cost of
+ * a single read when reading every other trace). If rangebuf is NULL, the
+ * caller has not supplied a buffer for us to use (likely if it's a
+ * one-off, and we heap-alloc a buffer. This way the function is safer to
+ * use, but with a significant performance penalty when no buffer is
+ * supplied.
+ */
+ float* tracebuf = rangebuf ? rangebuf : malloc( elems * sizeof( float ) );
+
+ const size_t readc = fread( tracebuf, sizeof( float ), elems, fp->fp );
+ if( readc != elems ) {
+ free( tracebuf );
+ return SEGY_FREAD_ERROR;
+ }
+
+ float* cur = tracebuf + defstart;
+ for( ; slicelen > 0; cur += step, --slicelen, ++buf )
+ *buf = *cur;
+ free( tracebuf );
return SEGY_OK;
}
@@ -1145,28 +1212,73 @@ int segy_writetrace( segy_file* fp,
long trace0,
int trace_bsize ) {
- const int lst = trace_bsize / sizeof( float );
- return segy_writesubtr( fp, traceno, 0, lst, buf, trace0, trace_bsize );
+ const int stop = trace_bsize / sizeof( float );
+ return segy_writesubtr( fp, traceno, 0, stop, 1, buf, NULL, trace0, trace_bsize );
}
int segy_writesubtr( segy_file* fp,
int traceno,
- int fst,
- int lst,
+ int start,
+ int stop,
+ int step,
const float* buf,
+ float* rangebuf,
long trace0,
int trace_bsize ) {
- int err = subtr_seek( fp, traceno, fst, lst, trace0, trace_bsize );
+ int err = subtr_seek( fp, traceno, start, stop, trace0, trace_bsize );
if( err != SEGY_OK ) return err;
+ const size_t elems = abs( stop - start );
+
+ if( step == 1 ) {
+ /*
+ * most common case: step == 1, writing contiguously
+ * -1 is not covered here as it would require reversing the input buffer
+ * (which is const), which in turn may require a memory allocation. It will
+ * be handled by the stride-aware code path
+ */
+ if( fp->addr ) {
+ memcpy( fp->cur, buf, sizeof( float ) * elems );
+ } else {
+ const size_t writec = fwrite( buf, sizeof( float ), elems, fp->fp );
+ if( writec != elems ) return SEGY_FWRITE_ERROR;
+ }
+
+ return SEGY_OK;
+ }
+
+ // step != 1, i.e. do strided reads
+ int defstart = start < stop ? 0 : elems - 1;
+ int slicelen = slicelength( start, stop, step );
+
if( fp->addr ) {
- memcpy( fp->cur, buf, sizeof( float ) * ( lst - fst ) );
+ /* if mmap is on, strided write is trivial and fast */
+ float* cur = (float*)fp->cur + defstart;
+ for( ; slicelen > 0; cur += step, ++buf, --slicelen )
+ *cur = *buf;
+
return SEGY_OK;
}
- const size_t writec = fwrite( buf, sizeof( float ), lst - fst, fp->fp );
- if( writec != lst - fst ) return SEGY_FWRITE_ERROR;
+ const int elemsize = elems * sizeof( float );
+ float* tracebuf = rangebuf ? rangebuf : malloc( elemsize );
+
+ // like in readsubtr, read a larger chunk and then step through that
+ const size_t readc = fread( tracebuf, sizeof( float ), elems, fp->fp );
+ if( readc != elems ) { free( tracebuf ); return SEGY_FREAD_ERROR; }
+ /* rewind, because fread advances the file pointer */
+ err = fseek( fp->fp, -elemsize, SEEK_CUR );
+ if( err != 0 ) { free( tracebuf ); return SEGY_FSEEK_ERROR; }
+
+ float* cur = tracebuf + defstart;
+ for( ; slicelen > 0; cur += step, --slicelen, ++buf )
+ *cur = *buf;
+
+ const size_t writec = fwrite( tracebuf, sizeof( float ), elems, fp->fp );
+ free( tracebuf );
+
+ if( writec != elems ) return SEGY_FWRITE_ERROR;
return SEGY_OK;
}
diff --git a/lib/test/segy.c b/lib/test/segy.c
index d6a9164..90a4073 100644
--- a/lib/test/segy.c
+++ b/lib/test/segy.c
@@ -108,6 +108,193 @@ static void test_interpret_file() {
segy_close( fp );
}
+static void test_read_subtr( bool mmap ) {
+ const char *file = "test-data/small.sgy";
+ segy_file* fp = segy_open( file, "rb" );
+
+ const int format = SEGY_IBM_FLOAT_4_BYTE;
+ int trace0 = 3600;
+ int trace_bsize = 50 * 4;
+ int err = 0;
+
+ if( mmap ) segy_mmap( fp );
+
+ float buf[ 5 ];
+ /* read a strided chunk across the middle of all traces */
+ /* should read samples 3, 8, 13, 18 */
+ err = segy_readsubtr( fp,
+ 10,
+ 3, /* start */
+ 19, /* stop */
+ 5, /* step */
+ buf,
+ NULL,
+ trace0,
+ trace_bsize );
+ assertTrue( err == 0, "Unable to correctly read subtrace" );
+ segy_to_native( format, 4, buf );
+ assertClose( 3.20003, buf[ 0 ], 1e-6 );
+ assertClose( 3.20008, buf[ 1 ], 1e-6 );
+ assertClose( 3.20013, buf[ 2 ], 1e-6 );
+ assertClose( 3.20018, buf[ 3 ], 1e-6 );
+
+ err = segy_readsubtr( fp,
+ 10,
+ 18, /* start */
+ 2, /* stop */
+ -5, /* step */
+ buf,
+ NULL,
+ trace0,
+ trace_bsize );
+ assertTrue( err == 0, "Unable to correctly read subtrace" );
+ segy_to_native( format, 4, buf );
+ assertClose( 3.20003, buf[ 3 ], 1e-6 );
+ assertClose( 3.20008, buf[ 2 ], 1e-6 );
+ assertClose( 3.20013, buf[ 1 ], 1e-6 );
+ assertClose( 3.20018, buf[ 0 ], 1e-6 );
+
+ err = segy_readsubtr( fp,
+ 10,
+ 3, /* start */
+ -1, /* stop */
+ -1, /* step */
+ buf,
+ NULL,
+ trace0,
+ trace_bsize );
+ assertTrue( err == 0, "Unable to correctly read subtrace" );
+ segy_to_native( format, 4, buf );
+ assertClose( 3.20000, buf[ 3 ], 1e-6 );
+ assertClose( 3.20001, buf[ 2 ], 1e-6 );
+ assertClose( 3.20002, buf[ 1 ], 1e-6 );
+ assertClose( 3.20003, buf[ 0 ], 1e-6 );
+
+ err = segy_readsubtr( fp,
+ 10,
+ 24, /* start */
+ -1, /* stop */
+ -5, /* step */
+ buf,
+ NULL,
+ trace0,
+ trace_bsize );
+ assertTrue( err == 0, "Unable to correctly read subtrace" );
+ segy_to_native( format, 5, buf );
+ assertClose( 3.20004, buf[ 4 ], 1e-6 );
+ assertClose( 3.20009, buf[ 3 ], 1e-6 );
+ assertClose( 3.20014, buf[ 2 ], 1e-6 );
+ assertClose( 3.20019, buf[ 1 ], 1e-6 );
+ assertClose( 3.20024, buf[ 0 ], 1e-6 );
+
+ segy_close( fp );
+}
+
+static void test_write_subtr( bool mmap ) {
+ const char *file = "test-data/write-subtr.sgy";
+ segy_file* fp = segy_open( file, "w+b" );
+
+ const int format = SEGY_IBM_FLOAT_4_BYTE;
+ int trace0 = 3600;
+ int trace_bsize = 50 * 4;
+ int err = 0;
+
+ if( mmap ) segy_mmap( fp );
+
+ // since write-subtr.segy is an empty file we must pre-write a full trace
+ // to make sure *something* exists, otherwise we'd get a read error.
+ float dummytrace[ 50 ] = { 0 };
+ err = segy_writetrace( fp, 10, dummytrace, trace0, trace_bsize );
+ assertTrue( err == 0, "Error in write-subtr setup." );
+
+ float buf[ 5 ];
+ float out[ 5 ] = { 2.0, 2.1, 2.2, 2.3, 2.4 };
+ segy_from_native( format, 5, out );
+
+ /* read a strided chunk across the middle of all traces */
+ /* should read samples 3, 8, 13, 18 */
+ err = segy_writesubtr( fp,
+ 10,
+ 3, /* start */
+ 19, /* stop */
+ 5, /* step */
+ out,
+ NULL,
+ trace0,
+ trace_bsize );
+ assertTrue( err == 0, "Unable to correctly write subtrace" );
+ err = segy_readsubtr( fp,
+ 10,
+ 3, /* start */
+ 19, /* stop */
+ 5, /* step */
+ buf,
+ NULL,
+ trace0,
+ trace_bsize );
+ assertTrue( err == 0, "Unable to correctly read subtrace" );
+ segy_to_native( format, 4, buf );
+ assertClose( 2.0, buf[ 0 ], 1e-6 );
+ assertClose( 2.1, buf[ 1 ], 1e-6 );
+ assertClose( 2.2, buf[ 2 ], 1e-6 );
+ assertClose( 2.3, buf[ 3 ], 1e-6 );
+ err = segy_writesubtr( fp,
+ 10,
+ 18, /* start */
+ 2, /* stop */
+ -5, /* step */
+ out,
+ NULL,
+ trace0,
+ trace_bsize );
+ assertTrue( err == 0, "Unable to correctly write subtrace" );
+ err = segy_readsubtr( fp,
+ 10,
+ 18, /* start */
+ 2, /* stop */
+ -5, /* step */
+ buf,
+ NULL,
+ trace0,
+ trace_bsize );
+ assertTrue( err == 0, "Unable to correctly read subtrace" );
+ segy_to_native( format, 4, buf );
+ assertClose( 2.3, buf[ 3 ], 1e-6 );
+ assertClose( 2.2, buf[ 2 ], 1e-6 );
+ assertClose( 2.1, buf[ 1 ], 1e-6 );
+ assertClose( 2.0, buf[ 0 ], 1e-6 );
+
+ /* write back-to-front, read front-to-back */
+ err = segy_writesubtr( fp,
+ 10,
+ 24, /* start */
+ -1, /* stop */
+ -5, /* step */
+ out,
+ NULL,
+ trace0,
+ trace_bsize );
+ assertTrue( err == 0, "Unable to correctly write subtrace" );
+ err = segy_readsubtr( fp,
+ 10,
+ 4, /* start */
+ 25, /* stop */
+ 5, /* step */
+ buf,
+ NULL,
+ trace0,
+ trace_bsize );
+ assertTrue( err == 0, "Unable to correctly read subtrace" );
+ segy_to_native( format, 5, buf );
+ assertClose( 2.0, buf[ 4 ], 1e-6 );
+ assertClose( 2.1, buf[ 3 ], 1e-6 );
+ assertClose( 2.2, buf[ 2 ], 1e-6 );
+ assertClose( 2.3, buf[ 1 ], 1e-6 );
+ assertClose( 2.4, buf[ 0 ], 1e-6 );
+
+ segy_close( fp );
+}
+
/* Prestack test for when we have an approperiate prestack file
void test_interpret_file_prestack() {
@@ -550,16 +737,16 @@ static void test_file_error_codes() {
assertTrue( err == SEGY_FSEEK_ERROR, "Could seek in invalid file." );
int l1, l2;
- err = segy_readsubtr( fp, 0, 2, 0, NULL, 3600, 350 );
+ err = segy_readsubtr( fp, 0, 2, 0, 1, NULL, NULL, 3600, 350 );
assertTrue( err == SEGY_INVALID_ARGS, "Could pass fst larger than lst" );
- err = segy_readsubtr( fp, 0, -1, 10, NULL, 3600, 350 );
+ err = segy_readsubtr( fp, 0, -1, 10, 1, NULL, NULL, 3600, 350 );
assertTrue( err == SEGY_INVALID_ARGS, "Could pass negative fst" );
- err = segy_writesubtr( fp, 0, 2, 0, NULL, 3600, 350 );
+ err = segy_writesubtr( fp, 0, 2, 0, 1, NULL, NULL, 3600, 350 );
assertTrue( err == SEGY_INVALID_ARGS, "Could pass fst larger than lst" );
- err = segy_writesubtr( fp, 0, -1, 10, NULL, 3600, 350 );
+ err = segy_writesubtr( fp, 0, -1, 10, 1, NULL, NULL, 3600, 350 );
assertTrue( err == SEGY_INVALID_ARGS, "Could pass negative fst" );
err = segy_count_lines( fp, 0, 1, &l1, &l2, 3600, 350 );
@@ -617,6 +804,19 @@ int main() {
test_text_header();
puts("test traceh");
test_trace_header_errors();
+
+ puts("test readsubtr(mmap)");
+ test_read_subtr( true );
+
+ puts("test readsubtr(no-mmap)");
+ test_read_subtr( false );
+
+ /*
+ * this test *creates* a new file, which is currently won't mmap (since we
+ * cannot determine the size, as this does not require any geometry.
+ */
+ puts("test writesubtr(no-mmap)");
+ test_write_subtr( false );
/*
* due to its barely-defined behavorial nature, this test is commented out
* for most runs, as it would trip up the memcheck test
diff --git a/python/segyio/_segyio.c b/python/segyio/_segyio.c
index 347dd9f..8e71778 100644
--- a/python/segyio/_segyio.c
+++ b/python/segyio/_segyio.c
@@ -988,7 +988,9 @@ static PyObject *py_read_depth_slice(PyObject *self, PyObject *args) {
trace_no * offsets,
depth,
depth + 1,
+ 1,
buf++,
+ NULL,
trace0, trace_bsize);
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/segyio.git
More information about the debian-science-commits
mailing list