Commit 84c91bf1 authored by Lukas Riedel's avatar Lukas Riedel 📝

Merge branch 'h5file-cleanup' into 'master'

Improve and cleanup H5File

See merge request !109
parents a7372802 784720cb
......@@ -85,12 +85,20 @@
See [#86](https://ts-gitlab.iup.uni-heidelberg.de/dorie/dorie/issues/86).
* Improved and refurbished `Dorie::H5File`.
The source file was renamed to `dune/dorie/solver/util_h5file.hh`. The
`read_dataset` function is now capable of directly opening dataset paths
that contain groups.
### Deprecated
* The configuration file key `[parameters.interpolation]` is deprecated due to
the new scheme for storing parameterization data. DORiE now only supports
nearest-neighbor interpolation of parameters and scaling factors.
### Removed
* The class `H5File::AttributeReader` was completely removed.
## 1.1.1 (2018-08-21)
......
......@@ -7,6 +7,9 @@
#include <string>
#include <iostream>
#include <cstdio>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <dune/common/exceptions.hh>
#include <dune/common/parametertree.hh>
......
......@@ -26,7 +26,7 @@
#include <dune/grid/utility/structuredgridfactory.hh>
#include "util_grid_mapper.hh"
#include "util_h5tools.hh"
#include "util_h5file.hh"
namespace Dune {
namespace Dorie {
......@@ -308,22 +308,12 @@ private:
// create H5 file reader
H5File file(file_path, _verbose);
// split group from dataset name if applicable, and open
const auto pos = dataset_name.rfind('/');
if (pos != std::string::npos) {
const auto group = dataset_name.substr(0, pos);
dataset_name = dataset_name.substr(pos+1);
file.open(group);
}
else {
file.open();
}
// Read the data
file.read_dataset(element_index_map,
cells,
dataset_name,
H5T_NATIVE_INT);
file.read_dataset(dataset_name,
H5T_NATIVE_INT,
element_index_map,
cells
);
// check if cells has correct size and extensions
if (cells.size() != Grid::dimension) {
......
#ifndef DUNE_DORIE_UTIL_H5FILE_HH
#define DUNE_DORIE_UTIL_H5FILE_HH
#include <string>
#include <cassert>
#include <iostream>
#include <vector>
#include <exception>
#include <hdf5.h>
namespace Dune {
namespace Dorie {
/// A very simple HDF5 dataset reader. Represents an open H5 file.
class H5File
{
private:
//! Verbosity of this class
const int _verbose;
//! ID of the H5 file
hid_t _file_id;
//! ID of the currently opened group
hid_t _group_id;
public:
/// Open the H5 file and its base group.
/** \param file_path Path to the H5 file
* \param verbose Verbosity of the output
*/
explicit H5File(const std::string& file_path, const int verbose):
_verbose(verbose),
_file_id(-1),
_group_id(-1)
{
// set up property list for collective I/O
hid_t h5_plist_id = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio(h5_plist_id, MPI_COMM_WORLD, MPI_INFO_NULL);
assert(h5_plist_id > -1);
// open the file for reading
_file_id = H5Fopen(file_path.c_str(), H5F_ACC_RDONLY, h5_plist_id);
assert(_file_id > -1);
// release properties
herr_t status = H5Pclose(h5_plist_id);
assert(status > -1);
// check for errors regardless of build type
if (_file_id < 0) {
throw std::runtime_error("Cannot open H5 file " + file_path);
}
// open the base group
open_group();
}
/// Close the H5 file when this object is destroyed
~H5File ()
{
// close the opened group
herr_t status = H5Gclose(_group_id);
assert(status > -1);
// close the opened file
status = H5Fclose(_file_id);
assert(status > -1);
}
/// Open a group
/** \param group_path Full internal path to the group.
* Defaults to the base group.
*/
void open_group(const std::string& group_path="./")
{
// close the group if we already opened it
if (_group_id >= 0) {
herr_t status = H5Gclose(_group_id);
assert(status > -1);
}
// open the new group
_group_id = H5Gopen(_file_id, group_path.c_str(), H5P_DEFAULT);
if (_group_id < 0) {
throw std::runtime_error("Cannot open H5 group " + group_path);
}
}
/// Read data from a multi-dimensional dataset.
/** Data is read collectively from all parallel processes.
* The output data arrays are resized automatically.
*
* \tparam data_t The data type to read the H5 array entries into.
* Must implicitly convert to the data H5 data type.
* \tparam ext_t The data type for array extensions.
* Must implicitly convert to hid_t.
* \param[in] dataset_path The full internal path of the dataset.
* May contain subgroups.
* \param[in] data_type The H5 internal data type of the array to read.
* \param[out] data The vector for reading the data into. Is resized
* automatically.
* \param[out] shape The vector containing the extensions of the dataset.
* This shape is the reverse information of the H5 internal shape.
* It indicated directions x, y[, z].
*/
template<typename data_t, typename ext_t>
void read_dataset(const std::string& dataset_path,
const hid_t data_type,
std::vector<data_t>& data,
std::vector<ext_t>& shape
)
{
if(_verbose > 1) {
std::cout << "Reading H5 parameter field " << dataset_path
<< std::endl;
}
// open a subgroup if the dataset path is nested
const auto dataset_name = open_nested_dataset(dataset_path);
// open the dataset
hid_t dataset_id = H5Dopen(_group_id,
dataset_name.c_str(),
H5P_DEFAULT);
if (dataset_id < 0) {
throw std::runtime_error("Cannot open dataset " + dataset_path);
}
// get the dataspace
hid_t dataspace_id = H5Dget_space(dataset_id);
assert(dataspace_id > -1);
// get the dimension (2-d or 3-d)
const hsize_t arr_dim = H5Sget_simple_extent_ndims(dataspace_id);
// get the size of the problem
std::vector<hsize_t> dims(arr_dim, 0);
herr_t status = H5Sget_simple_extent_dims(dataspace_id,
dims.data(),
0);
assert(status > -1);
const auto local_size = std::accumulate(dims.begin(),
dims.end(),
1,
std::multiplies<hsize_t>());
const std::vector<hsize_t> offset(arr_dim, 0);
// reverse order of dimensions!
shape.resize(arr_dim);
std::copy(dims.rbegin(), dims.rend(), shape.begin());
// create the memory space
hid_t memspace_id = H5Screate_simple(arr_dim, dims.data(), NULL);
//select the hyperslab
status = H5Sselect_hyperslab(memspace_id, H5S_SELECT_SET, offset.data(), NULL, dims.data(), NULL);
assert(status > -1);
//resize the return data
data.resize(local_size);
// set up the collective transfer properties list
hid_t h5_plist_id = H5Pcreate(H5P_DATASET_XFER);
H5Pset_dxpl_mpio(h5_plist_id, H5FD_MPIO_COLLECTIVE);
assert(h5_plist_id > -1);
// read the actual data
status = H5Dread(dataset_id,
data_type,
memspace_id,
dataspace_id,
h5_plist_id,
data.data()
);
if (status < 0) {
throw std::runtime_error("Error reading data from dataset "
+ dataset_path);
}
// close the identifiers
status = H5Dclose(dataset_id);
status = H5Sclose(dataspace_id);
status = H5Sclose(memspace_id);
status = H5Pclose(h5_plist_id);
assert(status > -1);
}
private:
/// Split the dataset path and open its group, if applicable
/** \param dataset_path (Possibly nested) dataset path.
* \return The name of the actual dataset to open.
*/
std::string open_nested_dataset(std::string dataset_path)
{
const auto pos = dataset_path.rfind('/');
// split group from dataset name
if (pos != std::string::npos) {
const auto group = dataset_path.substr(0, pos);
dataset_path = dataset_path.substr(pos+1);
open_group(group);
}
return dataset_path;
}
};
} // namespace Dorie
} // namespace Dune
#endif
// -*- tab-width: 4; indent-tabs-mode: nil -*-
#ifndef DUNE_DORIE_UTIL_H5FILE_HH
#define DUNE_DORIE_UTIL_H5FILE_HH
#include <string>
#include <cassert>
#include <iostream>
#include <vector>
#include <array>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <hdf5.h>
namespace Dune {
namespace Dorie {
class H5File
{
template<typename OUT>
struct AttributeReader;
protected:
bool fileOpen;
const std::string filePath;
const int verbose;
hid_t h5_file_id;
hid_t h5_group_id;
public:
/// Initialize thsi file reader. This does not open the H5 file!
/** \param filePath_ Path to the H5 file
* \param verbose_ Verbosity of the output
*/
explicit H5File(const std::string& filePath_, const int verbose_):
fileOpen(false), filePath(filePath_), verbose(verbose_)
{
if(!file_exists(filePath))
DUNE_THROW(Exception,"File '" << filePath << "' is not readable");
}
/// Close the H5 file when this object is destroyed
~H5File ()
{
if (fileOpen)
close();
}
/// Open a group inside the file
/** \param group_name Internal name/path of the group
*/
void open(const std::string& group_name="./")
{
if(fileOpen)
close();
// set up property list for collective I/O
hid_t h5_plist_id = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio(h5_plist_id, MPI_COMM_WORLD, MPI_INFO_NULL);
assert(h5_plist_id > -1);
// open the file for reading
h5_file_id = H5Fopen(filePath.c_str(), H5F_ACC_RDONLY, h5_plist_id);
assert(h5_file_id > -1);
// open the group
h5_group_id = H5Gopen(h5_file_id, group_name.c_str(), H5P_DEFAULT);
assert(h5_group_id > -1);
// release properties
H5Pclose(h5_plist_id);
fileOpen = true;
}
/** Read data set from multi-dim. array in sequential mode from HDF5 file.
*
* \param local_data is a 'return value' which gets the data that belongs to the current processor (current hyperslab)
* \param local_cells is a container for storing the number of cells in each dimension
* \param dataset_name is the group and name of the dataset to be read
* \param data_type The H5 data type to read.
*/
template<typename VEC, typename CELLVEC>
void read_dataset(VEC& local_data,
CELLVEC& local_cells,
const std::string& dataset_name,
const hid_t data_type)
{
if (not fileOpen)
DUNE_THROW(Exception,"File must be opened before reading data");
// open the dataset
hid_t dataset_id = H5Dopen(h5_group_id, dataset_name.c_str(), H5P_DEFAULT);
assert( dataset_id > -1 );
// get the dataspace
hid_t dataspace_id=H5Dget_space(dataset_id);
assert(dataspace_id>-1);
// some needed variables
herr_t status;
hsize_t arr_dim;
// get the dimension (2-d or 3-d)
arr_dim = H5Sget_simple_extent_ndims(dataspace_id);
// get the size of the problem
std::vector<hsize_t> dims(arr_dim, 0);
status = H5Sget_simple_extent_dims(dataspace_id , dims.data(), 0);
assert(status > -1);
const auto local_size = std::accumulate(dims.begin(),
dims.end(),
1,
std::multiplies<hsize_t>());
const std::vector<hsize_t> offset(arr_dim, 0);
// reverse order of dimensions!
local_cells.resize(arr_dim);
std::copy(dims.rbegin(), dims.rend(), local_cells.begin());
// create the memory space
hid_t memspace_id = H5Screate_simple(arr_dim, dims.data(), NULL);
//select the hyperslab
status = H5Sselect_hyperslab(memspace_id, H5S_SELECT_SET, offset.data(), NULL, dims.data(), NULL);
assert(status>-1);
//resize the return data
local_data.resize(local_size);
// set up the collective transfer properties list
hid_t h5_plist_id = H5Pcreate(H5P_DATASET_XFER);
H5Pset_dxpl_mpio(h5_plist_id, H5FD_MPIO_COLLECTIVE);
assert(h5_plist_id > -1);
status = H5Dread(dataset_id
, data_type
, memspace_id
, dataspace_id
, h5_plist_id
, local_data.data()
);
assert(status>-1);
if(verbose > 1)
std::cout << "Read H5 parameter field " << dataset_name << ";" << std::endl;
// close the identifiers
H5Dclose(dataset_id);
H5Sclose(dataspace_id);
H5Sclose(memspace_id);
H5Pclose(h5_plist_id);
}
/// Close the H5 file
void close()
{
if (not fileOpen)
DUNE_THROW(Exception,"HDF file must be opened before closing it");
herr_t status = H5Gclose(h5_group_id);
assert( status > -1 );
status = H5Fclose(h5_file_id);
assert( status > -1 );
fileOpen = false;
}
/// Reads an attribute array from HDF5 file
template<typename OUT>
OUT read_attribute(const std::string attribute_name)
{
if(!fileOpen)
DUNE_THROW(Exception,"File must be opened before reading attributes");
hid_t attr_id = H5Aopen_name(h5_group_id, attribute_name.c_str());
assert(attr_id > -1);
// get attribute datatype, space, and rank
hid_t atype = H5Aget_type(attr_id);
hid_t aspace = H5Aget_space(attr_id);
hid_t rank = H5Sget_simple_extent_ndims(aspace);
std::vector<hsize_t> dims;
dims.resize(rank);
herr_t status = H5Sget_simple_extent_dims(aspace , &(dims[0]), 0);
assert(status > -1);
hid_t atype_mem = H5Tget_native_type(atype, H5T_DIR_ASCEND);
// read attribute
OUT out = AttributeReader<OUT>::read(attr_id, atype_mem, rank, dims);
if(verbose > 1)
std::cout << "Reading H5 attribute: " << attribute_name << "; value: " << out << std::endl;
// close the identifiers
H5Tclose(atype);
H5Sclose(aspace);
status = H5Aclose(attr_id);
assert(status > -1);
return out;
}
template<typename OUT>
OUT read_attribute(const std::string attribute_name, const std::string group_name)
{
open(group_name);
OUT out = read_attribute<OUT>(attribute_name);
close();
return out;
}
protected:
/// This simple function checks if a file exists.
const bool file_exists(const std::string& filename) const
{
struct stat buf;
if(stat(filename.c_str(), &buf) != -1)
{
int filesize = buf.st_size;
if(verbose>2)
{
std::cout << "File found: " << filename
<< ", file size = " << filesize
<< std::endl;
}
if(filesize > 0)
return true;
}
return false;
}
};
template<typename OUT>
struct H5File::AttributeReader {
static OUT read(const hid_t attr_id, const hid_t atype_mem, const hsize_t rank, const std::vector<hsize_t> dims)
{
std::vector<hsize_t>::size_type flat_size = 1;
for(std::vector<hsize_t>::size_type i=0; i<rank; i++)
flat_size *= dims[i];
OUT out;
if(flat_size > 1)
DUNE_THROW(Exception,"Cannot convert attribute of size " << flat_size << " to scalar");
herr_t status = H5Aread(attr_id, atype_mem, &out);
assert(status > -1);
return out;
}
};
template<>
struct H5File::AttributeReader<std::string> {
static std::string read(const hid_t attr_id, const hid_t atype_mem, const hsize_t rank, const std::vector<hsize_t> dims)
{
if(rank > 1)
DUNE_THROW(Exception,"Cannot convert attribute with rank " << rank << " to string");
size_t size = H5Tget_size(atype_mem);
char attr_str[size+1];
herr_t status = H5Aread(attr_id, atype_mem, &attr_str);
assert(status > -1);
attr_str[size] = '\0';
return std::string(attr_str);
}
};
template<typename A>
struct H5File::AttributeReader<std::vector<A>> {
static std::vector<A> read(const hid_t attr_id, const hid_t atype_mem, const hsize_t rank, const std::vector<hsize_t> dims)
{
std::vector<hsize_t>::size_type flat_size = 1;
for(std::vector<hsize_t>::size_type i=0; i<rank; i++)
flat_size *= dims[i];
std::vector<A> out;
if(out.size() != flat_size)
out.resize(flat_size);
herr_t status = H5Aread(attr_id, atype_mem, &out);
assert(status > -1);
return out;
}
};
template<typename A, int B>
struct H5File::AttributeReader<Dune::FieldVector<A,B>> {
static Dune::FieldVector<A,B> read(const hid_t attr_id, const hid_t atype_mem, const hsize_t rank, const std::vector<hsize_t> dims)
{
std::vector<hsize_t>::size_type flat_size = 1;
for(std::vector<hsize_t>::size_type i=0; i<rank; i++)
flat_size *= dims[i];
Dune::FieldVector<A,B> out;
if(out.size() != flat_size)
DUNE_THROW(Exception, "Container size mismatch: Attribute has size " << flat_size);
herr_t status = H5Aread(attr_id, atype_mem, &out);
assert(status > -1);
return out;
}
};
template<typename A, int B>
struct H5File::AttributeReader<std::array<A,B>> {
static std::array<A,B> read(const hid_t attr_id, const hid_t atype_mem, const hsize_t rank, const std::vector<hsize_t> dims)
{
std::vector<hsize_t>::size_type flat_size = 1;
for(std::vector<hsize_t>::size_type i=0; i<rank; i++)
flat_size *= dims[i];
std::array<A,B> out;
if(out.size() != flat_size)
DUNE_THROW(Exception, "Container size mismatch: Attribute has size " << flat_size);
herr_t status = H5Aread(attr_id, atype_mem, &out);
assert(status > -1);
return out;
}
};
}
}
#endif
include ${CMAKE_BINARY_DIR}/doc/default_files/config.ini
__name = param
_asset_path = "${PROJECT_SOURCE_DIR}/test/"
_asset_path = "${PROJECT_SOURCE_DIR}/test"
output.fileName = param | unique name
output.outputPath = param | unique name
......@@ -16,7 +16,7 @@ grid.cells = 50 50
grid.FEorder = 1
grid.extensions = 1 1
grid.mappingFile = "{_asset_path}/maps/cell_ids.h5"
grid.mappingFileDataset = param_test
grid.mappingFileDataset = ut/param_test
boundary.file = "{_asset_path}/bcs/infiltration_2d.dat"
......
......@@ -34,7 +34,7 @@ def create_and_write_datasets(args):
f.create_dataset("ode_layered_160", data=ode_layered_160)
f.create_dataset("ode_layered_40", data=ode_layered_40)
f.create_dataset("ode_layered_20", data=ode_layered_20)
f.create_dataset("param_test", data=param_test)
f.create_dataset("ut/param_test", data=param_test)
f.create_dataset("parallel_reference_2d", data=parallel_reference_2d)
f.create_dataset("parallel_reference_3d", data=parallel_reference_3d)
f.close()
......