[segyio] 176/376: Handle 4G+ files on sizeof(long) == 4 platforms.

Jørgen Kvalsvik jokva-guest at moszumanska.debian.org
Wed Sep 20 08:04:28 UTC 2017


This is an automated email from the git hooks/post-receive script.

jokva-guest pushed a commit to branch debian
in repository segyio.

commit dabfa4b9db5c25eda53db5554adda53650ac27bd
Author: Jørgen Kvalsvik <jokva at statoil.com>
Date:   Fri Feb 10 10:35:59 2017 +0100

    Handle 4G+ files on sizeof(long) == 4 platforms.
    
    The use of size_t for file positions uses an implicit conversion to
    long for calls to fseek and friends. Long is *not* guaranteed to be
    64-bit, so this effectively restricts portable support for very large
    files.
    
    segy_seek has been rewritten to consider LONG_MAX and doing repeated
    relative seeks should the position overflow a long. On most platform
    this path should not be taken and optimised out.
---
 cmake/check_includes.cmake | 11 +++++++
 lib/src/segy.c             | 73 ++++++++++++++++++++++++++++++++++++----------
 lib/src/segyio/util.h      |  4 +--
 lib/test/segy.c            | 16 ++++++----
 4 files changed, 81 insertions(+), 23 deletions(-)

diff --git a/cmake/check_includes.cmake b/cmake/check_includes.cmake
index 04d36b5..a7d022c 100644
--- a/cmake/check_includes.cmake
+++ b/cmake/check_includes.cmake
@@ -1,4 +1,5 @@
 include(CheckIncludeFile)
+include(CheckFunctionExists)
 
 # Portability checks; look for htons function
 check_include_file("netinet/in.h" HAVE_NETINET_IN_H)
@@ -20,3 +21,13 @@ check_include_file("sys/mman.h" HAVE_SYS_MMAN_H)
 if (HAVE_SYS_MMAN_H)
     add_definitions("-DHAVE_MMAP")
 endif()
+
+check_include_file("sys/stat.h" HAVE_SYS_STAT_H)
+if (HAVE_SYS_STAT_H)
+    add_definitions("-DHAVE_SYS_STAT_H")
+
+    check_function_exists(_fstati64 HAVE_FSTATI64)
+    if (HAVE_FSTATI64)
+        add_definitions("-DHAVE_FSTATI64")
+    endif ()
+endif()
diff --git a/lib/src/segy.c b/lib/src/segy.c
index b33c950..ec1d7c3 100644
--- a/lib/src/segy.c
+++ b/lib/src/segy.c
@@ -11,10 +11,16 @@
 #include <winsock2.h>
 #endif
 
+#ifdef HAVE_SYS_STAT_H
+  #include <sys/types.h>
+  #include <sys/stat.h>
+#endif //HAVE_SYS_STAT_H
+
 #include <assert.h>
+#include <limits.h>
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <stdio.h>
 
 #include <segyio/segy.h>
 #include <segyio/util.h>
@@ -305,20 +311,27 @@ static int bfield_size[] = {
  * pointer will be reset to wherever it was before this call. If this call
  * fails for some reason, the return value is 0 and the file pointer location
  * will be determined by the behaviour of fseek.
+ *
+ * sys/stat.h is POSIX, but is well enough supported by Windows. The long long
+ * data type is required to support files >4G (as long only guarantees 32 bits).
  */
-static int file_size( FILE* fp, size_t* size ) {
-    const long prev_pos = ftell( fp );
-
-    int err = fseek( fp, 0, SEEK_END );
-    if( err != 0 ) return SEGY_FSEEK_ERROR;
+#ifdef HAVE_SYS_STAT_H
+static int file_size( FILE* fp, long long* size ) {
+#ifdef HAVE_FSTATI64
+    // this means we're on windows where fstat is unreliable for filesizes >2G
+    // because long is only 4 bytes
+    struct _stati64 st;
+    const int err = _fstati64( fileno( fp ), &st );
+#else
+    struct stat st;
+    const int err = fstat( fileno( fp ), &st );
+#endif
 
-    const size_t sz = ftell( fp );
-    err = fseek( fp, prev_pos, SEEK_SET );
     if( err != 0 ) return SEGY_FSEEK_ERROR;
-
-    *size = sz;
+    *size = st.st_size;
     return SEGY_OK;
 }
+#endif //HAVE_SYS_STAT_H
 
 /*
  * addr is NULL if mmap is not found under compilation or if the file is
@@ -355,9 +368,11 @@ int segy_mmap( segy_file* fp ) {
     return SEGY_MMAP_INVALID;
 #else
 
-    int err = file_size( fp->fp, &fp->fsize );
+    long long fsize;
+    int err = file_size( fp->fp, &fsize );
 
     if( err != 0 ) return SEGY_FSEEK_ERROR;
+    fp->fsize = fsize;
 
     bool rw = strstr( fp->mode, "+" ) || strstr( fp->mode, "w" );
     const int prot =  rw ? PROT_READ | PROT_WRITE : PROT_READ;
@@ -392,8 +407,15 @@ int segy_flush( segy_file* fp, bool async ) {
     return SEGY_OK;
 }
 
-long segy_ftell( segy_file* fp ) {
+long long segy_ftell( segy_file* fp ) {
+#ifdef HAVE_FSTATI64
+    // assuming we're on windows. This function is a little rough, but only
+    // meant for testing - it's not a part of the public interface.
+    return _ftelli64( fp->fp );
+#else
+    assert( sizeof( long ) == sizeof( long long ) );
     return ftell( fp->fp );
+#endif
 }
 
 int segy_close( segy_file* fp ) {
@@ -551,12 +573,12 @@ long segy_trace0( const char* binheader ) {
 }
 
 int segy_seek( segy_file* fp,
-               unsigned int trace,
+               int trace,
                long trace0,
                unsigned int trace_bsize ) {
 
     trace_bsize += SEGY_TRACE_HEADER_SIZE;
-    const long pos = trace0 + ( (long)trace * (long)trace_bsize );
+    long long pos = (long long)trace0 + (trace * (long long)trace_bsize);
 
     if( fp->addr ) {
         if( (size_t)pos >= fp->fsize ) return SEGY_FSEEK_ERROR;
@@ -565,7 +587,26 @@ int segy_seek( segy_file* fp,
         return SEGY_OK;
     }
 
-    const int err = fseek( fp->fp, pos, SEEK_SET );
+    int err = SEGY_OK;
+    if( sizeof( long ) == sizeof( long long ) ) {
+        err = fseek( fp->fp, pos, SEEK_SET );
+    } else {
+        /*
+         * If long is 32bit on our platform (hello, windows), we do skips according
+         * to LONG_MAX and seek relative to our cursor rather than absolute on file
+         * begin.
+         */
+        rewind( fp->fp );
+        while( pos >= LONG_MAX && err == SEGY_OK ) {
+            err = fseek( fp->fp, LONG_MAX, SEEK_CUR );
+            pos -= LONG_MAX;
+        }
+
+        if( err != 0 ) return SEGY_FSEEK_ERROR;
+
+        err = fseek( fp->fp, pos, SEEK_CUR );
+    }
+
     if( err != 0 ) return SEGY_FSEEK_ERROR;
     return SEGY_OK;
 }
@@ -625,7 +666,7 @@ int segy_traces( segy_file* fp,
                  long trace0,
                  unsigned int trace_bsize ) {
 
-    size_t fsize;
+    long long fsize;
     int err = file_size( fp->fp, &fsize );
     if( err != 0 ) return err;
 
diff --git a/lib/src/segyio/util.h b/lib/src/segyio/util.h
index 7582f09..a2494c3 100644
--- a/lib/src/segyio/util.h
+++ b/lib/src/segyio/util.h
@@ -18,8 +18,8 @@ void ebcdic2ascii( const char* ebcdic, char* ascii );
 void ascii2ebcdic( const char* ascii, char* ebcdic );
 void ibm2ieee(void* to, const void* from);
 void ieee2ibm(void* to, const void* from);
-int segy_seek( struct segy_file_handle*, unsigned int, long, unsigned int );
-long segy_ftell( struct segy_file_handle* );
+int segy_seek( struct segy_file_handle*, int, long, unsigned int );
+long long segy_ftell( struct segy_file_handle* );
 
 #ifdef __cplusplus
 }
diff --git a/lib/test/segy.c b/lib/test/segy.c
index f4f053e..5b2eebb 100644
--- a/lib/test/segy.c
+++ b/lib/test/segy.c
@@ -1,3 +1,4 @@
+#include <limits.h>
 #include <math.h>
 #include <stdlib.h>
 
@@ -574,13 +575,18 @@ static void test_error_codes_sans_file() {
 static void test_file_size_above_4GB(){
     segy_file* fp = segy_open( "4gbfile", "w+b" );
 
-    unsigned int trace = 5e6;
-    unsigned int trace_bsize = 1e3;
+    unsigned int trace = 5000000;
+    unsigned int trace_bsize = 1000;
+    long long tracesize = trace_bsize + SEGY_TRACE_HEADER_SIZE;
     long trace0 = 0;
-    int err = segy_seek( fp, trace, trace0, trace_bsize);
+
+    int err = segy_seek( fp, trace, trace0, trace_bsize );
     assertTrue(err==0, "");
-    long pos = segy_ftell( fp );
-    assertTrue(pos == (long)trace*((long)trace_bsize+SEGY_TRACE_HEADER_SIZE), "seek overflow");
+
+    long long pos = segy_ftell( fp );
+    assertTrue(pos > (long long)INT_MAX, "pos smaller than INT_MAX. "
+                              "This means there's an overflow somewhere" );
+    assertTrue(pos == trace * tracesize, "seek overflow");
     segy_close(fp);
 }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/segyio.git



More information about the debian-science-commits mailing list