[segyio] 176/376: Handle 4G+ files on sizeof(long) == 4 platforms.
Jørgen Kvalsvik
jokva-guest at moszumanska.debian.org
Wed Sep 20 08:04:28 UTC 2017
This is an automated email from the git hooks/post-receive script.
jokva-guest pushed a commit to branch debian
in repository segyio.
commit dabfa4b9db5c25eda53db5554adda53650ac27bd
Author: Jørgen Kvalsvik <jokva at statoil.com>
Date: Fri Feb 10 10:35:59 2017 +0100
Handle 4G+ files on sizeof(long) == 4 platforms.
The use of size_t for file positions uses an implicit conversion to
long for calls to fseek and friends. Long is *not* guaranteed to be
64-bit, so this effectively restricts portable support for very large
files.
segy_seek has been rewritten to consider LONG_MAX and doing repeated
relative seeks should the position overflow a long. On most platform
this path should not be taken and optimised out.
---
cmake/check_includes.cmake | 11 +++++++
lib/src/segy.c | 73 ++++++++++++++++++++++++++++++++++++----------
lib/src/segyio/util.h | 4 +--
lib/test/segy.c | 16 ++++++----
4 files changed, 81 insertions(+), 23 deletions(-)
diff --git a/cmake/check_includes.cmake b/cmake/check_includes.cmake
index 04d36b5..a7d022c 100644
--- a/cmake/check_includes.cmake
+++ b/cmake/check_includes.cmake
@@ -1,4 +1,5 @@
include(CheckIncludeFile)
+include(CheckFunctionExists)
# Portability checks; look for htons function
check_include_file("netinet/in.h" HAVE_NETINET_IN_H)
@@ -20,3 +21,13 @@ check_include_file("sys/mman.h" HAVE_SYS_MMAN_H)
if (HAVE_SYS_MMAN_H)
add_definitions("-DHAVE_MMAP")
endif()
+
+check_include_file("sys/stat.h" HAVE_SYS_STAT_H)
+if (HAVE_SYS_STAT_H)
+ add_definitions("-DHAVE_SYS_STAT_H")
+
+ check_function_exists(_fstati64 HAVE_FSTATI64)
+ if (HAVE_FSTATI64)
+ add_definitions("-DHAVE_FSTATI64")
+ endif ()
+endif()
diff --git a/lib/src/segy.c b/lib/src/segy.c
index b33c950..ec1d7c3 100644
--- a/lib/src/segy.c
+++ b/lib/src/segy.c
@@ -11,10 +11,16 @@
#include <winsock2.h>
#endif
+#ifdef HAVE_SYS_STAT_H
+ #include <sys/types.h>
+ #include <sys/stat.h>
+#endif //HAVE_SYS_STAT_H
+
#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <stdio.h>
#include <segyio/segy.h>
#include <segyio/util.h>
@@ -305,20 +311,27 @@ static int bfield_size[] = {
* pointer will be reset to wherever it was before this call. If this call
* fails for some reason, the return value is 0 and the file pointer location
* will be determined by the behaviour of fseek.
+ *
+ * sys/stat.h is POSIX, but is well enough supported by Windows. The long long
+ * data type is required to support files >4G (as long only guarantees 32 bits).
*/
-static int file_size( FILE* fp, size_t* size ) {
- const long prev_pos = ftell( fp );
-
- int err = fseek( fp, 0, SEEK_END );
- if( err != 0 ) return SEGY_FSEEK_ERROR;
+#ifdef HAVE_SYS_STAT_H
+static int file_size( FILE* fp, long long* size ) {
+#ifdef HAVE_FSTATI64
+ // this means we're on windows where fstat is unreliable for filesizes >2G
+ // because long is only 4 bytes
+ struct _stati64 st;
+ const int err = _fstati64( fileno( fp ), &st );
+#else
+ struct stat st;
+ const int err = fstat( fileno( fp ), &st );
+#endif
- const size_t sz = ftell( fp );
- err = fseek( fp, prev_pos, SEEK_SET );
if( err != 0 ) return SEGY_FSEEK_ERROR;
-
- *size = sz;
+ *size = st.st_size;
return SEGY_OK;
}
+#endif //HAVE_SYS_STAT_H
/*
* addr is NULL if mmap is not found under compilation or if the file is
@@ -355,9 +368,11 @@ int segy_mmap( segy_file* fp ) {
return SEGY_MMAP_INVALID;
#else
- int err = file_size( fp->fp, &fp->fsize );
+ long long fsize;
+ int err = file_size( fp->fp, &fsize );
if( err != 0 ) return SEGY_FSEEK_ERROR;
+ fp->fsize = fsize;
bool rw = strstr( fp->mode, "+" ) || strstr( fp->mode, "w" );
const int prot = rw ? PROT_READ | PROT_WRITE : PROT_READ;
@@ -392,8 +407,15 @@ int segy_flush( segy_file* fp, bool async ) {
return SEGY_OK;
}
-long segy_ftell( segy_file* fp ) {
+long long segy_ftell( segy_file* fp ) {
+#ifdef HAVE_FSTATI64
+ // assuming we're on windows. This function is a little rough, but only
+ // meant for testing - it's not a part of the public interface.
+ return _ftelli64( fp->fp );
+#else
+ assert( sizeof( long ) == sizeof( long long ) );
return ftell( fp->fp );
+#endif
}
int segy_close( segy_file* fp ) {
@@ -551,12 +573,12 @@ long segy_trace0( const char* binheader ) {
}
int segy_seek( segy_file* fp,
- unsigned int trace,
+ int trace,
long trace0,
unsigned int trace_bsize ) {
trace_bsize += SEGY_TRACE_HEADER_SIZE;
- const long pos = trace0 + ( (long)trace * (long)trace_bsize );
+ long long pos = (long long)trace0 + (trace * (long long)trace_bsize);
if( fp->addr ) {
if( (size_t)pos >= fp->fsize ) return SEGY_FSEEK_ERROR;
@@ -565,7 +587,26 @@ int segy_seek( segy_file* fp,
return SEGY_OK;
}
- const int err = fseek( fp->fp, pos, SEEK_SET );
+ int err = SEGY_OK;
+ if( sizeof( long ) == sizeof( long long ) ) {
+ err = fseek( fp->fp, pos, SEEK_SET );
+ } else {
+ /*
+ * If long is 32bit on our platform (hello, windows), we do skips according
+ * to LONG_MAX and seek relative to our cursor rather than absolute on file
+ * begin.
+ */
+ rewind( fp->fp );
+ while( pos >= LONG_MAX && err == SEGY_OK ) {
+ err = fseek( fp->fp, LONG_MAX, SEEK_CUR );
+ pos -= LONG_MAX;
+ }
+
+ if( err != 0 ) return SEGY_FSEEK_ERROR;
+
+ err = fseek( fp->fp, pos, SEEK_CUR );
+ }
+
if( err != 0 ) return SEGY_FSEEK_ERROR;
return SEGY_OK;
}
@@ -625,7 +666,7 @@ int segy_traces( segy_file* fp,
long trace0,
unsigned int trace_bsize ) {
- size_t fsize;
+ long long fsize;
int err = file_size( fp->fp, &fsize );
if( err != 0 ) return err;
diff --git a/lib/src/segyio/util.h b/lib/src/segyio/util.h
index 7582f09..a2494c3 100644
--- a/lib/src/segyio/util.h
+++ b/lib/src/segyio/util.h
@@ -18,8 +18,8 @@ void ebcdic2ascii( const char* ebcdic, char* ascii );
void ascii2ebcdic( const char* ascii, char* ebcdic );
void ibm2ieee(void* to, const void* from);
void ieee2ibm(void* to, const void* from);
-int segy_seek( struct segy_file_handle*, unsigned int, long, unsigned int );
-long segy_ftell( struct segy_file_handle* );
+int segy_seek( struct segy_file_handle*, int, long, unsigned int );
+long long segy_ftell( struct segy_file_handle* );
#ifdef __cplusplus
}
diff --git a/lib/test/segy.c b/lib/test/segy.c
index f4f053e..5b2eebb 100644
--- a/lib/test/segy.c
+++ b/lib/test/segy.c
@@ -1,3 +1,4 @@
+#include <limits.h>
#include <math.h>
#include <stdlib.h>
@@ -574,13 +575,18 @@ static void test_error_codes_sans_file() {
static void test_file_size_above_4GB(){
segy_file* fp = segy_open( "4gbfile", "w+b" );
- unsigned int trace = 5e6;
- unsigned int trace_bsize = 1e3;
+ unsigned int trace = 5000000;
+ unsigned int trace_bsize = 1000;
+ long long tracesize = trace_bsize + SEGY_TRACE_HEADER_SIZE;
long trace0 = 0;
- int err = segy_seek( fp, trace, trace0, trace_bsize);
+
+ int err = segy_seek( fp, trace, trace0, trace_bsize );
assertTrue(err==0, "");
- long pos = segy_ftell( fp );
- assertTrue(pos == (long)trace*((long)trace_bsize+SEGY_TRACE_HEADER_SIZE), "seek overflow");
+
+ long long pos = segy_ftell( fp );
+ assertTrue(pos > (long long)INT_MAX, "pos smaller than INT_MAX. "
+ "This means there's an overflow somewhere" );
+ assertTrue(pos == trace * tracesize, "seek overflow");
segy_close(fp);
}
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/segyio.git
More information about the debian-science-commits
mailing list