[arrayfire] 221/408: FEAT Added saveArray and readArray functions for file read/write

Ghislain Vaillant ghisvail-guest at moszumanska.debian.org
Mon Sep 21 19:12:02 UTC 2015


This is an automated email from the git hooks/post-receive script.

ghisvail-guest pushed a commit to branch debian/sid
in repository arrayfire.

commit 50699ccd9926e5656888971096fa8d67443a7f2e
Author: Shehzan Mohammed <shehzan at arrayfire.com>
Date:   Mon Aug 10 15:28:41 2015 -0400

    FEAT Added saveArray and readArray functions for file read/write
    
    The format of the file (version 1) is as follows:
    
    Header:
    Description | Data Type | Size (Bytes) | Detailed Desc
    ------------|-----------|--------------|--------------
    Version     | Char      | 1            | ArrayFire File Format Version for future use. Currently set to 1
    Array Count | Int       | 4            | No. of Arrays stored in file
    
    Per Array:
    Description             | Data Type | Size (Bytes) | Detailed Desc
    ------------------------|-----------|--------------|--------------
    Length of Key String    | Int       | 4            | No. of characters (excluding null ending) in the key string
    Key                     | Char []   | length       | Key of the Array. Used when reading from file
    Offset                  | Int64     | 8            | No of bytes between offset and start of next array
    Array Type              | Char      | 1            | Type corresponding to af_dtype enum
    Dims (4 values)         | Int64     | 4 * 8 = 32   | Dimensions of the Array
    Data                    | Type      | sizeof(Type) * dims.elements() | Actual data of the array
    
    The offset is equal to 1 byte (type) + 32 bytes (dims) + size of data.
---
 docs/details/util.dox | 137 +++++++++++++++++++
 include/af/util.h     |  93 +++++++++++++
 src/api/c/stream.cpp  | 356 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/api/cpp/util.cpp  |  27 ++++
 4 files changed, 613 insertions(+)

diff --git a/docs/details/util.dox b/docs/details/util.dox
new file mode 100644
index 0000000..5f70a4d
--- /dev/null
+++ b/docs/details/util.dox
@@ -0,0 +1,137 @@
+/**
+\addtogroup arrayfire_func
+@{
+\defgroup print_func_print print
+
+\brief Print the array to screen
+
+Print Array and dimensions to screen
+
+\ingroup arrayfire_func
+
+=======================================================================
+
+\defgroup stream_func_read readArray
+
+\brief Load an array from a file
+
+The readArray function lets users read arrays saved in files.
+Arrays can either be read using the index in the file (0-indexed), or using
+the key that was used along with the Array.
+
+Note that if there are multiple arrays with the same key, only the first one
+will be read.
+
+The format of the file (version 1) is as follows:
+
+Header:
+Description | Data Type | Size (Bytes) | Detailed Desc
+------------|-----------|--------------|--------------
+Version     | Char      | 1            | ArrayFire File Format Version for future use. Currently set to 1
+Array Count | Int       | 4            | No. of Arrays stored in file
+
+
+Per Array:
+Description             | Data Type | Size (Bytes) | Detailed Desc
+------------------------|-----------|--------------|--------------
+Length of Key String    | Int       | 4            | No. of characters (excluding null ending) in the key string
+Key                     | Char []   | length       | Key of the Array. Used when reading from file
+Offset                  | Int64     | 8            | No of bytes between offset and start of next array
+Array Type              | Char      | 1            | Type corresponding to af_dtype enum
+Dims (4 values)         | Int64     | 4 * 8 = 32   | Dimensions of the Array
+Data                    | Type      | sizeof(Type) * dims.elements() | Actual data of the array
+
+The offset is equal to 1 byte (type) + 32 bytes (dims) + size of data.
+
+An file with 2 arrays would look like (representative)
+
+> 1\n
+> 2\n
+> Array 1 Key Length\n
+> Array 1 Key\n
+> Array 1 Offset\n
+> Array 1 Type\n
+> Array 1 Dims\n
+> Array 1 Data\n
+> Array 2 Key Length\n
+> Array 2 Key\n
+> Array 2 Offset\n
+> Array 2 Type\n
+> Array 2 Dims\n
+> Array 2 Data\n
+
+\ingroup stream_func
+\ingroup arrayfire_func
+
+=======================================================================
+
+\defgroup stream_func_save saveArray
+
+\brief Save an array to a binary file
+
+The saveArray and readArray functions are designed to provide store and
+read access to arrays using files written to disk.
+
+The format of the file (version 1) is as follows:
+
+Header:
+Description | Data Type | Size (Bytes) | Detailed Desc
+------------|-----------|--------------|--------------
+Version     | Char      | 1            | ArrayFire File Format Version for future use. Currently set to 1
+Array Count | Int       | 4            | No. of Arrays stored in file
+
+
+Per Array:
+Description             | Data Type | Size (Bytes) | Detailed Desc
+------------------------|-----------|--------------|--------------
+Length of Key String    | Int       | 4            | No. of characters (excluding null ending) in the key string
+Key                     | Char []   | length       | Key of the Array. Used when reading from file
+Offset                  | Int64     | 8            | No of bytes between offset and start of next array
+Array Type              | Char      | 1            | Type corresponding to af_dtype enum
+Dims (4 values)         | Int64     | 4 * 8 = 32   | Dimensions of the Array
+Data                    | Type      | sizeof(Type) * dims.elements() | Actual data of the array
+
+The offset is equal to 1 byte (type) + 32 bytes (dims) + size of data.
+
+An file with 2 arrays would look like (representative)
+
+> 1\n
+> 2\n
+> Array 1 Key Length\n
+> Array 1 Key\n
+> Array 1 Offset\n
+> Array 1 Type\n
+> Array 1 Dims\n
+> Array 1 Data\n
+> Array 2 Key Length\n
+> Array 2 Key\n
+> Array 2 Offset\n
+> Array 2 Type\n
+> Array 2 Dims\n
+> Array 2 Data\n
+
+Save array allows you to append any number of Arrays to the same file using
+the append argument. If the append argument is false, then the contents of the
+file are discarded and new array is written anew.
+
+On each append, the array counter in the header is incremented and the new
+array is written to the end of the file. This function does not check if the
+tag is unique or not.
+
+\ingroup stream_func
+\ingroup arrayfire_func
+
+=======================================================================
+
+\defgroup data_func_randn randn
+
+\brief Create a random array sampled from a normal distribution
+
+The distribution is centered around 0
+
+\ingroup data_mat
+\ingroup arrayfire_func
+
+@}
+*/
+
diff --git a/include/af/util.h b/include/af/util.h
index 2c7b7ac..81dbd1a 100644
--- a/include/af/util.h
+++ b/include/af/util.h
@@ -32,6 +32,54 @@ namespace af
     */
     AFAPI void print(const char *exp, const array &arr, const int precision);
 
+    /**
+        \param[in] key is an expression used as tag/key for the array during \ref readArray
+        \param[in] arr is the array to be written
+        \param[in] filename is the path to the location on disk
+        \param[in] append is used to append to an existing file when true and create or
+        overwrite an existing file when false
+
+        \ingroup stream_func_save
+    */
+    AFAPI void saveArray(const char *key, const array &arr, const char *filename, const bool append = false);
+
+    /**
+        \param[in] filename is the path to the location on disk
+        \param[in] index is the 0-based sequential location of the array to be read
+
+        \returns array read from the index location
+
+        \note This function will throw an exception if the index is out of bounds
+
+        \ingroup stream_func_read
+    */
+    AFAPI array readArray(const char *filename, const unsigned index);
+
+    /**
+        \param[in] filename is the path to the location on disk
+        \param[in] key is the tag/name of the array to be read. The key needs to have an exact match.
+
+        \returns array read by key
+
+        \note This function will throw an exception if the key is not found.
+
+        \ingroup stream_func_read
+    */
+    AFAPI array readArray(const char *filename, const char *key);
+
+    /**
+        When reading by key, it may be a good idea to run this function first to check for the key
+        and then call the readArray using the index. This will avoid exceptions in case of key not found.
+
+        \param[in] filename is the path to the location on disk
+        \param[in] key is the tag/name of the array to be read. The key needs to have an exact match.
+
+        \returns index of the array in the file if the key is found. -1 if key is not found.
+
+        \ingroup stream_func_read
+    */
+    AFAPI int readArrayCheck(const char *filename, const char *key);
+
     // Purpose of Addition: "How to add Function" documentation
     AFAPI array exampleFunction(const array& in, const af_someenum_t param);
 }
@@ -110,6 +158,51 @@ extern "C" {
     */
     AFAPI af_err af_print_array_p(const char *exp, const af_array arr, const int precision);
 
+    /**
+        \param[in] key is an expression used as tag/key for the array during \ref readArray
+        \param[in] arr is the array to be written
+        \param[in] filename is the path to the location on disk
+        \param[in] append is used to append to an existing file when true and create or
+        overwrite an existing file when false
+
+        \ingroup stream_func_save
+    */
+    AFAPI af_err af_save_array(const char* key, const af_array arr, const char *filename, const bool append);
+
+    /**
+        \param[out] out is the array read from index
+        \param[in] filename is the path to the location on disk
+        \param[in] index is the 0-based sequential location of the array to be read
+
+        \note This function will throw an exception if the key is not found.
+
+        \ingroup stream_func_read
+    */
+    AFAPI af_err af_read_array_index(af_array *out, const char *filename, const unsigned index);
+
+    /**
+        \param[out] out is the array read from key
+        \param[in] filename is the path to the location on disk
+        \param[in] key is the tag/name of the array to be read. The key needs to have an exact match.
+
+        \note This function will throw an exception if the key is not found.
+
+        \ingroup stream_func_read
+    */
+    AFAPI af_err af_read_array_key(af_array *out, const char *filename, const char* key);
+
+    /**
+        When reading by key, it may be a good idea to run this function first to check for the key
+        and then call the readArray using the index. This will avoid exceptions in case of key not found.
+
+        \param[out] index of the array in the file if the key is found. -1 if key is not found.
+        \param[in] filename is the path to the location on disk
+        \param[in] key is the tag/name of the array to be read. The key needs to have an exact match.
+
+        \ingroup stream_func_read
+    */
+    AFAPI af_err af_read_array_key_check(int *index, const char *filename, const char* key);
+
     // Purpose of Addition: "How to add Function" documentation
     AFAPI af_err af_example_function(af_array* out, const af_array in, const af_someenum_t param);
 
diff --git a/src/api/c/stream.cpp b/src/api/c/stream.cpp
new file mode 100644
index 0000000..d966699
--- /dev/null
+++ b/src/api/c/stream.cpp
@@ -0,0 +1,356 @@
+/*******************************************************
+ * Copyright (c) 2014, ArrayFire
+ * All rights reserved.
+ *
+ * This file is distributed under 3-clause BSD license.
+ * The complete license agreement can be obtained at:
+ * http://arrayfire.com/licenses/BSD-3-Clause
+ ********************************************************/
+
+#include <iostream>
+#include <fstream>
+#include <iomanip>
+#include <vector>
+
+#include <af/array.h>
+#include <ArrayInfo.hpp>
+#include <handle.hpp>
+#include <backend.hpp>
+#include <err_common.hpp>
+#include <type_util.hpp>
+
+#include <af/index.h>
+
+using namespace detail;
+
+#define STREAM_FORMAT_VERSION 0x1
+static const char sfv_char = STREAM_FORMAT_VERSION;
+
+template<typename T>
+static void save(const char *key, const af_array arr, const char *filename, const bool append = false)
+{
+    // (char     )   Version (Once)
+    // (int      )   No. of Arrays (Once)
+        // (int    )   Length of the key
+        // (cstring)   Key
+        // (intl   )   Offset bytes to next array (type + dims + data)
+        // (char   )   Type
+        // (intl   )   dim4 (x 4)
+        // (T      )   data (x elements)
+
+    // Setup all the data structures that need to be written to file
+    ///////////////////////////////////////////////////////////////////////////
+    std::string k(key);
+    int klen = k.size();
+
+    const ArrayInfo info = getInfo(arr);
+    std::vector<T> data(info.elements());
+
+    AF_CHECK(af_get_data_ptr(&data.front(), arr));
+
+    char type = info.getType();
+
+    intl odims[4];
+    for(int i = 0; i < 4; i++) {
+        odims[i] = info.dims()[i];
+    }
+
+    intl offset = sizeof(char) + 4 * sizeof(intl) + info.elements() * sizeof(T);
+    ///////////////////////////////////////////////////////////////////////////
+
+    std::fstream fs;
+    int n_arrays = 0;
+
+    if(append) {
+        std::ifstream checkIfExists(filename);
+        bool exists = checkIfExists.good();
+        checkIfExists.close();
+        if(exists) {
+            fs.open(filename, std::fstream::in | std::fstream::out | std::fstream::binary);
+        } else {
+            fs.open(filename, std::fstream::out | std::fstream::binary);
+        }
+
+        // Throw exception if file is not open
+        if(!fs.is_open()) AF_ERROR("File failed to open", AF_ERR_ARG);
+
+        // Assert Version
+        if(fs.peek() == std::fstream::traits_type::eof()) {
+            // File is empty
+            fs.clear();
+        } else {
+            char prev_version = 0;
+            fs.read(&prev_version, sizeof(char));
+
+            AF_ASSERT(prev_version == sfv_char, "ArrayFire data format has changed. Can't append to file");
+
+            fs.read((char*)&n_arrays, sizeof(int));
+        }
+    } else {
+        fs.open(filename, std::fstream::out | std::fstream::binary | std::fstream::trunc);
+
+        // Throw exception if file is not open
+        if(!fs.is_open()) AF_ERROR("File failed to open", AF_ERR_ARG);
+    }
+
+    n_arrays++;
+
+    // Write version and n_arrays to top of file
+    fs.seekp(0);
+    fs.write(&sfv_char, 1);
+    fs.write((char*)&n_arrays, sizeof(int));
+
+    // Write array to end of file. Irrespective of new or append
+    fs.seekp(0, std::ios_base::end);
+    fs.write((char*)&klen, sizeof(int));
+    fs.write(k.c_str(), klen);
+    fs.write((char*)&offset, sizeof(intl));
+    fs.write(&type, sizeof(char));
+    fs.write((char*)&odims, sizeof(intl) * 4);
+    fs.write((char*)&data.front(), sizeof(T) * data.size());
+    fs.close();
+}
+
+af_err af_save_array(const char *key, const af_array arr, const char *filename, const bool append)
+{
+    try {
+        ARG_ASSERT(0, key != NULL);
+        ARG_ASSERT(2, filename != NULL);
+
+        ArrayInfo info = getInfo(arr);
+        af_dtype type = info.getType();
+        switch(type) {
+            case f32:   save<float>   (key, arr, filename, append);   break;
+            case c32:   save<cfloat>  (key, arr, filename, append);   break;
+            case f64:   save<double>  (key, arr, filename, append);   break;
+            case c64:   save<cdouble> (key, arr, filename, append);   break;
+            case b8:    save<char>    (key, arr, filename, append);   break;
+            case s32:   save<int>     (key, arr, filename, append);   break;
+            case u32:   save<unsigned>(key, arr, filename, append);   break;
+            case u8:    save<uchar>   (key, arr, filename, append);   break;
+            case s64:   save<intl>    (key, arr, filename, append);   break;
+            case u64:   save<uintl>   (key, arr, filename, append);   break;
+            default:    TYPE_ERROR(1, type);
+        }
+    }
+    CATCHALL;
+    return AF_SUCCESS;
+}
+
+template<typename T>
+static af_array readDataToArray(std::fstream &fs)
+{
+    intl dims[4];
+    fs.read((char*)&dims, 4 * sizeof(intl));
+
+    dim4 d;
+    for(int i = 0; i < 4; i++) {
+        d[i] = dims[i];
+    }
+
+    intl size = d.elements();
+
+    std::vector<T> data(size);
+    fs.read((char*)&data.front(), size * sizeof(T));
+
+    return getHandle(createHostDataArray<T>(d, &data.front()));
+}
+
+static af_array readArrayV1(const char *filename, const unsigned index)
+{
+    char version = 0;
+    int n_arrays = 0;
+
+    std::fstream fs(filename, std::fstream::in | std::fstream::binary);
+
+    // Throw exception if file is not open
+    if(!fs.is_open()) AF_ERROR("File failed to open", AF_ERR_ARG);
+
+    if(fs.peek() == std::fstream::traits_type::eof()) {
+        AF_ERROR("File is empty", AF_ERR_ARG);
+    }
+
+    fs.read(&version, sizeof(char));
+    fs.read((char*)&n_arrays, sizeof(int));
+
+    AF_ASSERT((int)index < n_arrays, "Index out of bounds");
+
+    for(int i = 0; i < (int)index; i++) {
+        // (int    )   Length of the key
+        // (cstring)   Key
+        // (intl   )   Offset bytes to next array (type + dims + data)
+        // (char   )   Type
+        // (intl   )   dim4 (x 4)
+        // (T      )   data (x elements)
+        int klen = -1;
+        fs.read((char*)&klen, sizeof(int));
+
+        //char* key = new char[klen];
+        //fs.read((char*)&key, klen * sizeof(char));
+
+        // Skip the array name tag
+        fs.seekg(klen, std::ios_base::cur);
+
+        // Read data offset
+        intl offset = -1;
+        fs.read((char*)&offset, sizeof(intl));
+
+        // Skip data
+        fs.seekg(offset, std::ios_base::cur);
+    }
+
+    int klen = -1;
+    fs.read((char*)&klen, sizeof(int));
+
+    //char* key = new char[klen];
+    //fs.read((char*)&key, klen * sizeof(char));
+
+    // Skip the array name tag
+    fs.seekg(klen, std::ios_base::cur);
+
+    // Read data offset
+    intl offset = -1;
+    fs.read((char*)&offset, sizeof(intl));
+
+    // Read type and dims
+    char type_ = -1;
+    fs.read(&type_, sizeof(char));
+
+    af_dtype type = (af_dtype)type_;
+
+    af_array out;
+    switch(type) {
+        case f32 : out = readDataToArray<float>  (fs);  break;
+        case c32 : out = readDataToArray<cfloat> (fs);  break;
+        case f64 : out = readDataToArray<double> (fs);  break;
+        case c64 : out = readDataToArray<cdouble>(fs);  break;
+        case b8  : out = readDataToArray<char>   (fs);  break;
+        case s32 : out = readDataToArray<int>    (fs);  break;
+        case u32 : out = readDataToArray<uint>   (fs);  break;
+        case u8  : out = readDataToArray<uchar>  (fs);  break;
+        case s64 : out = readDataToArray<intl>   (fs);  break;
+        case u64 : out = readDataToArray<uintl>  (fs);  break;
+        default:    TYPE_ERROR(1, type);
+    }
+    fs.close();
+
+    return out;
+}
+
+static af_array checkVersionAndRead(const char *filename, const unsigned index)
+{
+    char version = 0;
+
+    std::fstream fs(filename, std::fstream::in | std::fstream::binary);
+    // Throw exception if file is not open
+    if(!fs.is_open()) AF_ERROR("File failed to open", AF_ERR_ARG);
+
+    if(fs.peek() == std::fstream::traits_type::eof()) {
+        AF_ERROR("File is empty", AF_ERR_ARG);
+    } else {
+        fs.read(&version, sizeof(char));
+    }
+    fs.close();
+
+    switch(version) {
+        case 1: return readArrayV1(filename, index);
+        default: AF_ERROR("Invalid version", AF_ERR_ARG);
+    }
+}
+
+int checkVersionAndFindIndex(const char *filename, const char *k)
+{
+    char version = 0;
+    std::string key(k);
+
+    std::ifstream fs(filename, std::ifstream::in | std::ifstream::binary);
+    // Throw exception if file is not open
+    if(!fs.is_open()) AF_ERROR("File failed to open", AF_ERR_ARG);
+
+    if(fs.peek() == std::ifstream::traits_type::eof()) {
+        AF_ERROR("File is empty", AF_ERR_ARG);
+    } else {
+        fs.read(&version, sizeof(char));
+    }
+
+    int index = -1;
+    if(version == 1) {
+        int n_arrays = -1;
+        fs.read((char*)&n_arrays, sizeof(int));
+        for(int i = 0; i < n_arrays; i++) {
+            int klen = -1;
+            fs.read((char*)&klen, sizeof(int));
+            char *readKey = new char[klen + 1];
+            fs.read(readKey, klen);
+            readKey[klen] = '\0';
+
+            if(key == readKey) {
+                // Ket matches, break
+                index = i;
+                delete [] readKey;
+                break;
+            } else {
+                // Key doesn't match. Skip the data
+                intl offset = -1;
+                fs.read((char*)&offset, sizeof(intl));
+                fs.seekg(offset, std::ios_base::cur);
+                delete [] readKey;
+            }
+        }
+    } else {
+        AF_ERROR("Invalid version", AF_ERR_ARG);
+    }
+    fs.close();
+
+    return index;
+}
+
+af_err af_read_array_index(af_array *out, const char *filename, const unsigned index)
+{
+    try {
+        AF_CHECK(af_init());
+
+        ARG_ASSERT(1, filename != NULL);
+
+        af_array output = checkVersionAndRead(filename, index);
+        std::swap(*out, output);
+    }
+    CATCHALL;
+    return AF_SUCCESS;
+}
+
+af_err af_read_array_key(af_array *out, const char *filename, const char *key)
+{
+    try {
+        AF_CHECK(af_init());
+        ARG_ASSERT(1, filename != NULL);
+        ARG_ASSERT(2, key != NULL);
+
+        // Find index of key. Then call read by index
+        int index = checkVersionAndFindIndex(filename, key);
+
+        if(index == -1)
+            AF_ERROR("Key not found", AF_ERR_INVALID_ARRAY);
+
+        af_array output = checkVersionAndRead(filename, index);
+        std::swap(*out, output);
+    }
+    CATCHALL;
+    return AF_SUCCESS;
+}
+
+af_err af_read_array_key_check(int *index, const char *filename, const char* key)
+{
+    try {
+        ARG_ASSERT(1, filename != NULL);
+        ARG_ASSERT(2, key != NULL);
+
+        AF_CHECK(af_init());
+
+        // Find index of key. Then call read by index
+        int id = checkVersionAndFindIndex(filename, key);
+        std::swap(*index, id);
+    }
+    CATCHALL;
+    return AF_SUCCESS;
+}
diff --git a/src/api/cpp/util.cpp b/src/api/cpp/util.cpp
index 5ce7aa4..4ce0bb5 100644
--- a/src/api/cpp/util.cpp
+++ b/src/api/cpp/util.cpp
@@ -28,4 +28,31 @@ namespace af
         AF_THROW(af_print_array_p(exp, arr.get(), precision));
         return;
     }
+
+    void saveArray(const char *key, const array &arr, const char *filename, const bool append)
+    {
+        AF_THROW(af_save_array(key, arr.get(), filename, append));
+        return;
+    }
+
+    array readArray(const char *filename, const unsigned index)
+    {
+        af_array out = 0;
+        AF_THROW(af_read_array_index(&out, filename, index));
+        return array(out);
+    }
+
+    array readArray(const char *filename, const char *key)
+    {
+        af_array out = 0;
+        AF_THROW(af_read_array_key(&out, filename, key));
+        return array(out);
+    }
+
+    int readArrayCheck(const char *filename, const char *key)
+    {
+        int out = -1;
+        AF_THROW(af_read_array_key_check(&out, filename, key));
+        return out;
+    }
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/arrayfire.git



More information about the debian-science-commits mailing list