[segyio] 104/376: Primitive mmap support
Jørgen Kvalsvik
jokva-guest at moszumanska.debian.org
Wed Sep 20 08:04:16 UTC 2017
This is an automated email from the git hooks/post-receive script.
jokva-guest pushed a commit to branch debian
in repository segyio.
commit 51401568d8b17019b50424eaed9a4d5d84f8ac11
Author: Jørgen Kvalsvik <jokva at statoil.com>
Date: Wed Nov 2 15:55:48 2016 +0100
Primitive mmap support
If the system has the mmap system call available (really any posix
compliant system) the segy_mmap function will attempt to memory map the
file in question.
This is considered an experimental, optional feature and the scheme is
rather naïve - the full file will be mmap'd, not just a view into it,
meaning it will likely cause forms of system failure for
way-larger-than-memory files. However, with non-regular access such as
reading lines or every nth trace or similar performance is vastly
improved.
---
applications/segyinfo.c | 8 ++-
applications/segyinspect.c | 15 +++--
cmake/check_includes.cmake | 5 +-
python/segyio/_segyio.c | 3 +
src/segyio/segy.c | 144 +++++++++++++++++++++++++++++++++++++--------
src/segyio/segy.h | 5 +-
6 files changed, 148 insertions(+), 32 deletions(-)
diff --git a/applications/segyinfo.c b/applications/segyinfo.c
index 5e59cd6..47f45f1 100644
--- a/applications/segyinfo.c
+++ b/applications/segyinfo.c
@@ -1,5 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <time.h>
#include <segyio/segy.h>
@@ -27,9 +28,9 @@ static inline int maximum( int x, int y ) {
int main(int argc, char* argv[]) {
- if( argc != 2 ) {
+ if( argc < 2 ) {
puts("Missing argument, expected run signature:");
- printf(" %s <segy_file>\n", argv[0]);
+ printf(" %s <segy_file> [mmap]\n", argv[0]);
exit(1);
}
@@ -39,6 +40,9 @@ int main(int argc, char* argv[]) {
exit( 3 );
}
+ if( argc > 2 && strcmp( argv[ 2 ], "mmap" ) == 0 )
+ segy_mmap( fp );
+
int err;
char header[ SEGY_BINARY_HEADER_SIZE ];
err = segy_binheader( fp, header );
diff --git a/applications/segyinspect.c b/applications/segyinspect.c
index 61750c4..30ca5da 100644
--- a/applications/segyinspect.c
+++ b/applications/segyinspect.c
@@ -1,5 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include <time.h>
#include <segyio/segy.h>
@@ -37,9 +38,9 @@ static const char* getFastestDirectionName( int sorting ) {
int main(int argc, char* argv[]) {
- if (!(argc == 2 || argc == 4)) {
+ if( argc < 2 ) {
puts("Missing argument, expected run signature:");
- printf(" %s <segy_file> [INLINE_BYTE CROSSLINE_BYTE]\n", argv[0]);
+ printf(" %s <segy_file> [mmap] [INLINE_BYTE CROSSLINE_BYTE]\n", argv[0]);
printf(" Inline and crossline bytes default to: 189 and 193\n");
exit(1);
}
@@ -47,9 +48,13 @@ int main(int argc, char* argv[]) {
int xl_field = CROSSLINE_3D;
int il_field = INLINE_3D;
- if (argc == 4) {
- il_field = atoi(argv[2]);
- xl_field = atoi(argv[3]);
+ bool memory_map = argc > 2 && strcmp( argv[ 2 ], "mmap" ) == 0;
+
+ if( ( memory_map && argc > 4 ) || ( !memory_map && argc > 2 ) ) {
+ int argindex = memory_map ? 2 : 3;
+
+ il_field = atoi(argv[ argindex + 0 ]);
+ xl_field = atoi(argv[ argindex + 1 ]);
}
clock_t start = clock();
diff --git a/cmake/check_includes.cmake b/cmake/check_includes.cmake
index 9ca9dcd..04d36b5 100644
--- a/cmake/check_includes.cmake
+++ b/cmake/check_includes.cmake
@@ -16,4 +16,7 @@ else()
message(FATAL_ERROR "Could not find htons.")
endif()
-
+check_include_file("sys/mman.h" HAVE_SYS_MMAN_H)
+if (HAVE_SYS_MMAN_H)
+ add_definitions("-DHAVE_MMAP")
+endif()
diff --git a/python/segyio/_segyio.c b/python/segyio/_segyio.c
index 156f1e9..d0120c4 100644
--- a/python/segyio/_segyio.c
+++ b/python/segyio/_segyio.c
@@ -70,6 +70,9 @@ static PyObject *py_FILE_open(PyObject *self, PyObject *args) {
if (p_FILE == NULL) {
return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
}
+
+ segy_mmap( p_FILE );
+
return PyCapsule_New(p_FILE, "segy_file*", (PyCapsule_Destructor) py_FILE_destructor);
}
diff --git a/src/segyio/segy.c b/src/segyio/segy.c
index 4b7bbb2..fa630fd 100644
--- a/src/segyio/segy.c
+++ b/src/segyio/segy.c
@@ -1,3 +1,8 @@
+#ifdef HAVE_MMAP
+ #define _POSIX_SOURCE
+ #include <sys/mman.h>
+#endif //HAVE_MMAP
+
#ifdef HAVE_NETINET_IN_H
#include <netinet/in.h>
#elif HAVE_ARPA_INET_H
@@ -9,6 +14,7 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
+#include <stdio.h>
#include <segyio/segy.h>
#include <segyio/util.h>
@@ -294,8 +300,36 @@ static int bfield_size[] = {
[- HEADER_SIZE + BIN_Unassigned2] = 0,
};
+/*
+ * Determine the file size in bytes. If this function succeeds, the file
+ * pointer will be reset to wherever it was before this call. If this call
+ * fails for some reason, the return value is 0 and the file pointer location
+ * will be determined by the behaviour of fseek.
+ */
+static int file_size( FILE* fp, size_t* size ) {
+ const long prev_pos = ftell( fp );
+
+ int err = fseek( fp, 0, SEEK_END );
+ if( err != 0 ) return SEGY_FSEEK_ERROR;
+
+ const size_t sz = ftell( fp );
+ err = fseek( fp, prev_pos, SEEK_SET );
+ if( err != 0 ) return SEGY_FSEEK_ERROR;
+
+ *size = sz;
+ return SEGY_OK;
+}
+
+/*
+ * addr is NULL if mmap is not found under compilation or if the file is
+ * not requested mmap'd. If so, the fallback code path of FILE* is taken
+ */
struct segy_file_handle {
+ void* addr;
+ void* cur;
FILE* fp;
+ size_t fsize;
+ char mode[ 4 ];
};
segy_file* segy_open( const char* path, const char* mode ) {
@@ -303,7 +337,7 @@ segy_file* segy_open( const char* path, const char* mode ) {
if( !fp ) return NULL;
- segy_file* file = malloc( sizeof( segy_file ) );
+ segy_file* file = calloc( 1, sizeof( segy_file ) );
if( !file ) {
fclose( fp );
@@ -311,11 +345,51 @@ segy_file* segy_open( const char* path, const char* mode ) {
}
file->fp = fp;
+ strncpy( file->mode, mode, 3 );
+
return file;
}
+int segy_mmap( segy_file* fp ) {
+#ifndef HAVE_MMAP
+ return SEGY_MMAP_INVALID;
+#else
+
+ int err = file_size( fp->fp, &fp->fsize );
+
+ if( err != 0 ) return SEGY_FSEEK_ERROR;
+
+ bool rw = strstr( fp->mode, "+" ) || strstr( fp->mode, "w" );
+ const int prot = rw ? PROT_READ | PROT_WRITE : PROT_READ;
+
+ int fd = fileno( fp->fp );
+ void* addr = mmap( NULL, fp->fsize, prot, MAP_SHARED, fd, 0 );
+
+ if( addr == MAP_FAILED )
+ return SEGY_MMAP_ERROR;
+
+ fp->addr = fp->cur = addr;
+ return SEGY_OK;
+#endif //HAVE_MMAP
+}
+
int segy_flush( segy_file* fp, bool async ) {
- return fflush( fp->fp );
+ int syncerr = 0;
+
+#ifdef HAVE_MMAP
+ if( fp->addr ) {
+ int flag = async ? MS_ASYNC : MS_SYNC;
+ syncerr = msync( fp->addr, fp->fsize, flag );
+ }
+#endif //HAVE_MMAP
+
+ if( syncerr != 0 ) return syncerr;
+
+ int flusherr = fflush( fp->fp );
+
+ if( flusherr != 0 ) return SEGY_FWRITE_ERROR;
+
+ return SEGY_OK;
}
long segy_ftell( segy_file* fp ) {
@@ -323,7 +397,19 @@ long segy_ftell( segy_file* fp ) {
}
int segy_close( segy_file* fp ) {
- int err = fclose( fp->fp );
+ int err = segy_flush( fp, false );
+
+#ifdef HAVE_MMAP
+ if( !fp->addr ) goto no_mmap;
+
+ err = munmap( fp->addr, fp->fsize );
+ if( err != 0 )
+ err = SEGY_MMAP_ERROR;
+
+no_mmap:
+#endif //HAVE_MMAP
+
+ fclose( fp->fp );
free( fp );
return err;
}
@@ -471,6 +557,14 @@ int segy_seek( segy_file* fp,
trace_bsize += SEGY_TRACE_HEADER_SIZE;
const long pos = trace0 + ( (long)trace * (long)trace_bsize );
+
+ if( fp->addr ) {
+ if( (size_t)pos >= fp->fsize ) return SEGY_FSEEK_ERROR;
+
+ fp->cur = (char*)fp->addr + pos;
+ return SEGY_OK;
+ }
+
const int err = fseek( fp->fp, pos, SEEK_SET );
if( err != 0 ) return SEGY_FSEEK_ERROR;
return SEGY_OK;
@@ -485,6 +579,11 @@ int segy_traceheader( segy_file* fp,
const int err = segy_seek( fp, traceno, trace0, trace_bsize );
if( err != 0 ) return err;
+ if( fp->addr ) {
+ memcpy( buf, fp->cur, SEGY_TRACE_HEADER_SIZE );
+ return SEGY_OK;
+ }
+
const size_t readc = fread( buf, 1, SEGY_TRACE_HEADER_SIZE, fp->fp );
if( readc != SEGY_TRACE_HEADER_SIZE )
@@ -502,6 +601,11 @@ int segy_write_traceheader( segy_file* fp,
const int err = segy_seek( fp, traceno, trace0, trace_bsize );
if( err != 0 ) return err;
+ if( fp->addr ) {
+ memcpy( fp->cur, buf, SEGY_TRACE_HEADER_SIZE );
+ return SEGY_OK;
+ }
+
const size_t writec = fwrite( buf, 1, SEGY_TRACE_HEADER_SIZE, fp->fp );
if( writec != SEGY_TRACE_HEADER_SIZE )
@@ -511,26 +615,6 @@ int segy_write_traceheader( segy_file* fp,
}
/*
- * Determine the file size in bytes. If this function succeeds, the file
- * pointer will be reset to wherever it was before this call. If this call
- * fails for some reason, the return value is 0 and the file pointer location
- * will be determined by the behaviour of fseek.
- */
-static int file_size( FILE* fp, size_t* size ) {
- const long prev_pos = ftell( fp );
-
- int err = fseek( fp, 0, SEEK_END );
- if( err != 0 ) return SEGY_FSEEK_ERROR;
-
- const size_t sz = ftell( fp );
- err = fseek( fp, prev_pos, SEEK_SET );
- if( err != 0 ) return SEGY_FSEEK_ERROR;
-
- *size = sz;
- return SEGY_OK;
-}
-
-/*
* Return the number of traces in the file. The file pointer won't change after
* this call unless fseek itself fails.
*
@@ -879,6 +963,10 @@ int segy_crossline_indices( segy_file* fp,
static int skip_traceheader( segy_file* fp ) {
+ if( fp->addr ) {
+ fp->cur = (char*)fp->cur + SEGY_TRACE_HEADER_SIZE;
+ return SEGY_OK;
+ }
const int err = fseek( fp->fp, SEGY_TRACE_HEADER_SIZE, SEEK_CUR );
if( err != 0 ) return SEGY_FSEEK_ERROR;
return SEGY_OK;
@@ -896,6 +984,11 @@ int segy_readtrace( segy_file* fp,
err = skip_traceheader( fp );
if( err != 0 ) return err;
+ if( fp->addr ) {
+ memcpy( buf, fp->cur, trace_bsize );
+ return SEGY_OK;
+ }
+
const size_t readc = fread( buf, 1, trace_bsize, fp->fp );
if( readc != trace_bsize ) return SEGY_FREAD_ERROR;
@@ -916,6 +1009,11 @@ int segy_writetrace( segy_file* fp,
err = skip_traceheader( fp );
if( err != 0 ) return err;
+ if( fp->addr ) {
+ memcpy( fp->cur, buf, trace_bsize );
+ return SEGY_OK;
+ }
+
const size_t writec = fwrite( buf, 1, trace_bsize, fp->fp );
if( writec != trace_bsize )
return SEGY_FWRITE_ERROR;
diff --git a/src/segyio/segy.h b/src/segyio/segy.h
index 9479f62..14b0e82 100644
--- a/src/segyio/segy.h
+++ b/src/segyio/segy.h
@@ -32,6 +32,7 @@ struct segy_file_handle;
typedef struct segy_file_handle segy_file;
segy_file* segy_open( const char* path, const char* mode );
+int segy_mmap( segy_file* );
int segy_flush( segy_file*, bool async );
int segy_close( segy_file* );
@@ -385,7 +386,9 @@ typedef enum {
SEGY_MISSING_LINE_INDEX,
SEGY_INVALID_OFFSETS,
SEGY_TRACE_SIZE_MISMATCH,
- SEGY_INVALID_ARGS
+ SEGY_INVALID_ARGS,
+ SEGY_MMAP_ERROR,
+ SEGY_MMAP_INVALID,
} SEGY_ERROR;
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/segyio.git
More information about the debian-science-commits
mailing list