[segyio] 211/376: segy_field_forall - read word from set of headers

Jørgen Kvalsvik jokva-guest at moszumanska.debian.org
Wed Sep 20 08:04:35 UTC 2017


This is an automated email from the git hooks/post-receive script.

jokva-guest pushed a commit to branch debian
in repository segyio.

commit 4e39c08b75e3207658b8568fb85cad89bc94f8c8
Author: Jørgen Kvalsvik <jokva at statoil.com>
Date:   Fri Feb 24 12:42:59 2017 +0100

    segy_field_forall - read word from set of headers
    
    A common use case is to inspect some property of a file by scanning
    through all or a subset of headers and look at one field in particular.
    Doing this operation in a scripting language over medium and large files
    is very computationally expensive, but it's rather cheap if you drop
    down in C.
---
 lib/include/segyio/segy.h |  9 +++++
 lib/src/segy.c            | 68 ++++++++++++++++++++++++++++-----
 lib/src/segy.def          |  1 +
 lib/test/segy.c           | 97 ++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 164 insertions(+), 11 deletions(-)

diff --git a/lib/include/segyio/segy.h b/lib/include/segyio/segy.h
index 8922b9f..2d13d32 100644
--- a/lib/include/segyio/segy.h
+++ b/lib/include/segyio/segy.h
@@ -61,6 +61,15 @@ int segy_get_bfield( const char* binheader, int field, int32_t* f );
 int segy_set_field( char* traceheader, int field, int32_t val );
 int segy_set_bfield( char* binheader, int field, int32_t val );
 
+int segy_field_forall( segy_file*,
+                       int field,
+                       int start,
+                       int stop,
+                       int step,
+                       int* buf,
+                       long trace0,
+                       int trace_bsize );
+
 /*
  * exception: segy_trace_bsize computes the size of the traces in bytes. Cannot
  * fail.
diff --git a/lib/src/segy.c b/lib/src/segy.c
index 379e038..5b82640 100644
--- a/lib/src/segy.c
+++ b/lib/src/segy.c
@@ -531,6 +531,65 @@ int segy_set_bfield( char* binheader, int field, int val ) {
     return set_field( binheader, bfield_size, field, val );
 }
 
+static int slicelength( int start, int stop, int step ) {
+    if( ( step < 0 && stop >= start ) ||
+        ( step > 0 && start >= stop ) ) return 0;
+
+    if( step < 0 ) return (stop - start + 1) / step + 1;
+
+    return (stop - start - 1) / step + 1;
+}
+
+int segy_field_forall( segy_file* fp,
+                       int field,
+                       int start,
+                       int stop,
+                       int step,
+                       int* buf,
+                       long trace0,
+                       int trace_bsize ) {
+    int err;
+    // do a dummy-read of a zero-init'd buffer to check args
+    int32_t f;
+    char header[ SEGY_TRACE_HEADER_SIZE ] = { 0 };
+    err = segy_get_field( header, field, &f );
+    if( err != SEGY_OK ) return SEGY_INVALID_ARGS;
+
+    int slicelen = slicelength( start, stop, step );
+
+    if( fp->addr ) {
+        for( int i = start; slicelen > 0; i += step, ++buf, --slicelen ) {
+            err = segy_seek( fp, i, trace0, trace_bsize );
+            if( err != 0 ) return SEGY_FSEEK_ERROR;
+
+            segy_get_field( fp->cur, field, &f );
+            *buf = f;
+        }
+
+        return SEGY_OK;
+    }
+
+    /*
+     * non-mmap path. Doing multiple freads is slow, so instead the *actual*
+     * offset is computed, not just the start of the header, and that's copied
+     * into the correct offset in our local buffer.
+     *
+     * Always read 4 bytes to be sure, there's no significant cost difference.
+     */
+    size_t readc;
+    for( int i = start; slicelen > 0; i += step, ++buf, --slicelen ) {
+        err = segy_seek( fp, i, trace0 + field, trace_bsize );
+        if( err != 0 ) return SEGY_FSEEK_ERROR;
+        readc = fread( header + field, sizeof( uint32_t ), 1, fp->fp );
+        if( readc != 1 ) return SEGY_FREAD_ERROR;
+
+        segy_get_field( header, field, &f );
+        *buf = f;
+    }
+
+    return SEGY_OK;
+}
+
 int segy_binheader( segy_file* fp, char* buf ) {
     if(fp == NULL) {
         return SEGY_INVALID_ARGS;
@@ -1109,15 +1168,6 @@ static inline int subtr_seek( segy_file* fp,
     return segy_seek( fp, traceno, trace0, trace_bsize );
 }
 
-static int slicelength( int start, int stop, int step ) {
-    if( ( step < 0 && stop >= start ) ||
-        ( step > 0 && start >= stop ) ) return 0;
-
-    if( step < 0 ) return (stop - start + 1) / step + 1;
-
-    return (stop - start - 1) / step + 1;
-}
-
 static int reverse( float* arr, int elems ) {
     float tmp;
     const int last = elems - 1;
diff --git a/lib/src/segy.def b/lib/src/segy.def
index 43907ee..90411f7 100644
--- a/lib/src/segy.def
+++ b/lib/src/segy.def
@@ -14,6 +14,7 @@ segy_get_field
 segy_get_bfield
 segy_set_field
 segy_set_bfield
+segy_field_forall
 segy_trace_bsize
 segy_trace0
 segy_traces
diff --git a/lib/test/segy.c b/lib/test/segy.c
index 90a4073..3cb243f 100644
--- a/lib/test/segy.c
+++ b/lib/test/segy.c
@@ -193,12 +193,11 @@ static void test_read_subtr( bool mmap ) {
 static void test_write_subtr( bool mmap ) {
     const char *file = "test-data/write-subtr.sgy";
     segy_file* fp = segy_open( file, "w+b" );
-
-    const int format = SEGY_IBM_FLOAT_4_BYTE;
     int trace0 = 3600;
     int trace_bsize = 50 * 4;
     int err = 0;
 
+    const int format = SEGY_IBM_FLOAT_4_BYTE;
     if( mmap ) segy_mmap( fp );
 
     // since write-subtr.segy is an empty file we must pre-write a full trace
@@ -295,6 +294,94 @@ static void test_write_subtr( bool mmap ) {
     segy_close( fp );
 }
 
+static const int inlines[] = {
+    1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3,
+    4, 4, 4, 4, 4,
+    5, 5, 5, 5, 5,
+};
+
+static const int crosslines[] = {
+    20, 21, 22, 23, 24,
+    20, 21, 22, 23, 24,
+    20, 21, 22, 23, 24,
+    20, 21, 22, 23, 24,
+    20, 21, 22, 23, 24,
+};
+
+static void test_scan_headers( bool mmap ) {
+    const char *file = "test-data/small.sgy";
+    segy_file* fp = segy_open( file, "rb" );
+
+    int trace0 = 3600;
+    int trace_bsize = 50 * 4;
+    int err = 0;
+
+    if( mmap) segy_mmap( fp );
+
+    int full[ 25 ];
+    err = segy_field_forall( fp,
+                           SEGY_TR_INLINE,
+                           0,        /* start */
+                           25,       /* stop */
+                           1,        /* step */
+                           full,
+                           trace0,
+                           trace_bsize );
+    assertTrue( err == 0, "Unable to correctly scan headers" );
+    assertTrue( memcmp( full, inlines, sizeof( full ) ) == 0,
+                "Mismatching inline numbers." );
+
+    err = segy_field_forall( fp,
+                             SEGY_TR_CROSSLINE,
+                             0,        /* start */
+                             25,       /* stop */
+                             1,        /* step */
+                             full,
+                             trace0,
+                             trace_bsize );
+    assertTrue( err == 0, "Unable to correctly scan headers" );
+    assertTrue( memcmp( full, crosslines, sizeof( full ) ) == 0,
+                "Mismatching inline numbers." );
+
+    const int xlines_skip[] = {
+        crosslines[ 1 ],  crosslines[ 4 ],  crosslines[ 7 ],  crosslines[ 10 ],
+        crosslines[ 13 ], crosslines[ 16 ], crosslines[ 19 ], crosslines[ 22 ],
+    };
+
+    int xlbuf[ sizeof( xlines_skip ) / sizeof( int ) ];
+    err = segy_field_forall( fp,
+                             SEGY_TR_CROSSLINE,
+                             1,        /* start */
+                             25,       /* stop */
+                             3,        /* step */
+                             xlbuf,
+                             trace0,
+                             trace_bsize );
+    assertTrue( err == 0, "Unable to correctly scan headers" );
+    assertTrue( memcmp( xlbuf, xlines_skip, sizeof( xlbuf ) ) == 0,
+                "Mismatching skipped crossline numbers." );
+
+    const int xlines_skip_rev[] = {
+        crosslines[ 22 ], crosslines[ 19 ], crosslines[ 16 ], crosslines[ 13 ],
+        crosslines[ 10 ], crosslines[ 7 ],  crosslines[ 4 ],  crosslines[ 1 ],
+    };
+    err = segy_field_forall( fp,
+                             SEGY_TR_CROSSLINE,
+                             22,      /* start */
+                             0,       /* stop */
+                             -3,      /* step */
+                             xlbuf,
+                             trace0,
+                             trace_bsize );
+    assertTrue( err == 0, "Unable to correctly scan headers" );
+    assertTrue( memcmp( xlbuf, xlines_skip_rev, sizeof( xlbuf ) ) == 0,
+                "Mismatching reverse skipped crossline numbers." );
+
+    segy_close( fp );
+}
+
 /* Prestack test for when we have an approperiate prestack file
 
 void test_interpret_file_prestack() {
@@ -817,6 +904,12 @@ int main() {
      */
     puts("test writesubtr(no-mmap)");
     test_write_subtr( false );
+
+    puts("test scan headers(mmap)");
+    test_scan_headers( true );
+
+    puts("test scan headers(no-mmap)");
+    test_scan_headers( false );
     /*
      * due to its barely-defined behavorial nature, this test is commented out
      * for most runs, as it would trip up the memcheck test

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/segyio.git



More information about the debian-science-commits mailing list