Skip to content

Commit

Permalink
Added support for type override when reading datasets
Browse files Browse the repository at this point in the history
* Permits reading subsets of datasets
* Makes it possible to use versioning of user-supplied structs
  • Loading branch information
DavidAce committed Feb 23, 2021
1 parent 6263843 commit 330a549
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 13 deletions.
13 changes: 8 additions & 5 deletions h5pp/include/h5pp/details/h5ppFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -621,24 +621,27 @@ namespace h5pp {
h5pp::hdf5::readDataset(data, dataInfo, dsetInfo, plists);
}
template<typename DataType, typename = std::enable_if_t<not std::is_const_v<DataType>>>
[[nodiscard]] DataType readDataset(std::string_view datasetPath, const Options &options) const {
[[nodiscard]] DataType readDataset(std::string_view dsetPath, const Options &options) const {
Options options_internal = options;
options_internal.linkPath = dsetPath;
DataType data;
readDataset(data, options);
readDataset(data, options_internal);
return data;
}

template<typename DataType>
void readDataset(DataType &data, std::string_view dsetPath, const OptDimsType &dataDims = std::nullopt) const {
void readDataset(DataType &data, std::string_view dsetPath, const OptDimsType &dataDims = std::nullopt, std::optional<hid::h5t> h5Type = std::nullopt) const {
Options options;
options.linkPath = dsetPath;
options.dataDims = dataDims;
options.h5Type = std::move(h5Type);
readDataset(data, options);
}

template<typename DataType, typename = std::enable_if_t<not std::is_const_v<DataType>>>
[[nodiscard]] DataType readDataset(std::string_view datasetPath, const OptDimsType &dataDims = std::nullopt) const {
[[nodiscard]] DataType readDataset(std::string_view datasetPath, const OptDimsType &dataDims = std::nullopt, std::optional<hid::h5t> h5Type = std::nullopt) const {
DataType data;
readDataset(data, datasetPath, dataDims);
readDataset(data, datasetPath, dataDims, std::move(h5Type));
return data;
}

Expand Down
41 changes: 34 additions & 7 deletions h5pp/include/h5pp/details/h5ppHdf5.h
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,7 @@ namespace h5pp::hdf5 {
}

template<typename DataType>
void assertReadBufferIsLargeEnough(const DataType &data, const hid::h5s &space, const hid::h5t &type) {
void assertReadSpaceIsLargeEnough(const DataType &data, const hid::h5s &space, const hid::h5t &type) {
if(H5Tget_class(type) == H5T_STRING) {
if(H5Tis_variable_str(type)) return; // These are resized on the fly
if constexpr(h5pp::type::sfinae::is_text_v<DataType>) {
Expand Down Expand Up @@ -367,7 +367,6 @@ namespace h5pp::hdf5 {

template<typename DataType, typename = std::enable_if_t<not std::is_base_of_v<hid::hid_base<DataType>, DataType>>>
void assertBytesPerElemMatch(const hid::h5t &h5Type) {
// if(h5pp::type::sfinae::is_container_of_v<DataType,std::string>) return; // Each element is potentially a different length!
size_t dsetTypeSize = h5pp::hdf5::getBytesPerElem(h5Type);
size_t dataTypeSize = h5pp::util::getBytesPerElem<DataType>();
if(H5Tget_class(h5Type) == H5T_STRING) dsetTypeSize = H5Tget_size(H5T_C_S1);
Expand All @@ -387,6 +386,33 @@ namespace h5pp::hdf5 {
}
}

template<typename DataType, typename = std::enable_if_t<not std::is_base_of_v<hid::hid_base<DataType>, DataType>>>
void assertReadTypeIsLargeEnough(const hid::h5t &h5Type) {
size_t dsetTypeSize = h5pp::hdf5::getBytesPerElem(h5Type);
size_t dataTypeSize = h5pp::util::getBytesPerElem<DataType>();
if(H5Tget_class(h5Type) == H5T_STRING) dsetTypeSize = H5Tget_size(H5T_C_S1);
if(dataTypeSize != dsetTypeSize) {
// The dsetType may have been generated by H5Tpack, in which case we should check against the native type
size_t packedTypesize = dsetTypeSize;
hid::h5t nativetype = H5Tget_native_type(h5Type, H5T_DIR_ASCEND);
dsetTypeSize = h5pp::hdf5::getBytesPerElem(nativetype);
if(dataTypeSize > dsetTypeSize)
h5pp::logger::log->debug(
"Given data-type is too large: elements of type [{}] are [{}] bytes (each) | target HDF5 type is [{}] bytes",
h5pp::type::sfinae::type_name<DataType>(), dataTypeSize, dsetTypeSize);
else if(dataTypeSize < dsetTypeSize)
throw std::runtime_error(h5pp::format(
"Given data-type is too small: elements of type [{}] are [{}] bytes (each) | target HDF5 type is [{}] bytes",
h5pp::type::sfinae::type_name<DataType>(), dataTypeSize, dsetTypeSize));
else
h5pp::logger::log->warn(
"Detected packed HDF5 type: packed size {} bytes | native size {} bytes. This is not supported by h5pp yet!",
packedTypesize,
dataTypeSize);
}
}


template<typename DataType>
inline void setStringSize(const DataType &data, hid::h5t &type, hsize_t &size, size_t &bytes, std::vector<hsize_t> &dims) {
H5T_class_t dataClass = H5Tget_class(type);
Expand Down Expand Up @@ -745,8 +771,8 @@ namespace h5pp::hdf5 {
const hid::h5s & h5Space,
const hid::h5t & h5Type) {
TypeInfo typeInfo;
typeInfo.h5Name = objectName;
typeInfo.h5Path = objectPath;
typeInfo.h5Name = std::move(objectName);
typeInfo.h5Path = std::move(objectPath);
typeInfo.h5Type = h5Type;
typeInfo.h5Rank = h5pp::hdf5::getRank(h5Space);
typeInfo.h5Size = h5pp::hdf5::getSize(h5Space);
Expand Down Expand Up @@ -1857,9 +1883,10 @@ namespace h5pp::hdf5 {
dataInfo.assertReadReady();
h5pp::logger::log->debug("Reading into memory {}", dataInfo.string(h5pp::logger::logIf(1)));
h5pp::logger::log->debug("Reading from dataset {}", dsetInfo.string(h5pp::logger::logIf(1)));
h5pp::hdf5::assertReadBufferIsLargeEnough(data, dataInfo.h5Space.value(), dsetInfo.h5Type.value());
h5pp::hdf5::assertReadTypeIsLargeEnough<DataType>(dsetInfo.h5Type.value());
h5pp::hdf5::assertReadSpaceIsLargeEnough(data, dataInfo.h5Space.value(), dsetInfo.h5Type.value());
h5pp::hdf5::assertSpacesEqual(dataInfo.h5Space.value(), dsetInfo.h5Space.value(), dsetInfo.h5Type.value());
h5pp::hdf5::assertBytesPerElemMatch<DataType>(dsetInfo.h5Type.value());
// h5pp::hdf5::assertBytesPerElemMatch<DataType>(dsetInfo.h5Type.value());
herr_t retval = 0;

// Get the memory address to the data buffer
Expand Down Expand Up @@ -2007,7 +2034,7 @@ namespace h5pp::hdf5 {
attrInfo.assertReadReady();
h5pp::logger::log->debug("Reading into memory {}", dataInfo.string(h5pp::logger::logIf(1)));
h5pp::logger::log->debug("Reading from file {}", attrInfo.string(h5pp::logger::logIf(1)));
h5pp::hdf5::assertReadBufferIsLargeEnough(data, dataInfo.h5Space.value(), attrInfo.h5Type.value());
h5pp::hdf5::assertReadSpaceIsLargeEnough(data, dataInfo.h5Space.value(), attrInfo.h5Type.value());
h5pp::hdf5::assertBytesPerElemMatch<DataType>(attrInfo.h5Type.value());
h5pp::hdf5::assertSpacesEqual(dataInfo.h5Space.value(), attrInfo.h5Space.value(), attrInfo.h5Type.value());
herr_t retval = 0;
Expand Down
2 changes: 1 addition & 1 deletion h5pp/include/h5pp/details/h5ppLogger.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ namespace h5pp::logger {

inline void setLogger(const std::string &name, std::optional<size_t> levelZeroToFive = std::nullopt, std::optional<bool> timestamp = std::nullopt) {
if(spdlog::get(name) == nullptr)
log = spdlog::stdout_color_mt(name);
log = spdlog::stdout_color_mt(name, spdlog::color_mode::automatic);
else
log = spdlog::get(name);
log->set_pattern("[%n]%^[%=8l]%$ %v"); // Disabled timestamp is the default
Expand Down
6 changes: 6 additions & 0 deletions h5pp/include/h5pp/details/h5ppScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ namespace h5pp::scan {
"[h5pp::hid::h5f], [h5pp::hid::h5g] or [h5pp::hid::h5o]");

if(not options.linkPath and not info.dsetPath) throw std::runtime_error("Could not read dataset info: No dataset path was given");
// Start by copying fields in options which override later analysis
if(not info.h5Type) info.h5Type = options.h5Type;
if(not info.dsetSlab) info.dsetSlab = options.dsetSlab;
if(not info.dsetPath) info.dsetPath = h5pp::util::safe_str(options.linkPath.value());
h5pp::logger::log->debug("Scanning metadata of dataset [{}]", info.dsetPath.value());
Expand Down Expand Up @@ -358,6 +360,7 @@ namespace h5pp::scan {
if(not options.linkPath and not info.linkPath) throw std::runtime_error("Could not read attribute info: No link path was given");
if(not options.attrName and not info.attrName) throw std::runtime_error("Could not read attribute info: No attribute name was given");
if(not info.linkPath) info.linkPath = h5pp::util::safe_str(options.linkPath.value());
if(not info.h5Type) info.h5Type = options.h5Type;
if(not info.attrName) info.attrName = h5pp::util::safe_str(options.attrName.value());
if(not info.attrSlab) info.attrSlab = options.attrSlab;
h5pp::logger::log->debug("Scanning metadata of attribute [{}] in link [{}]", info.attrName.value(), info.linkPath.value());
Expand Down Expand Up @@ -603,7 +606,10 @@ namespace h5pp::scan {
"[h5pp::hid::h5f], [h5pp::hid::h5g] or [h5pp::hid::h5o]");
if(not options.linkPath and not info.tablePath)
throw std::runtime_error("Could not read table info: No table path was given");
// Copy fields from options to override later analysis
if(not info.tablePath) info.tablePath = h5pp::util::safe_str(options.linkPath.value());
if(not info.h5Type) info.h5Type = options.h5Type;

h5pp::logger::log->debug("Scanning metadata of table [{}]", info.tablePath.value());

// Copy the location
Expand Down
114 changes: 114 additions & 0 deletions tests/test-userTypeVers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@

#include <h5pp/h5pp.h>
#include <iostream>

struct ParticleV1 {
double x = 0, y = 0;
int id = 0;
char version[10] = ""; // Can't be replaced by std::string, or anything resizeable?
bool operator==(const ParticleV1 &p) const { return x == p.x and y == p.y and strncmp(version, p.version, 10) == 0 and id == p.id; }
bool operator!=(const ParticleV1 &p) const { return not(*this == p); }
};

struct ParticleV2 {
double x = 0, y = 0, z = 0, t = 0;
int id = 0;
char version[10] = ""; // Can't be replaced by std::string, or anything resizeable?
bool operator==(const ParticleV2 &p) const { return x == p.x and y == p.y and z == p.z and t == p.t and strncmp(version, p.version, 10) == 0 and id == p.id; }
bool operator!=(const ParticleV2 &p) const { return not(*this == p); }
};


void print_particle(const ParticleV1 &p, const std::string & msg = "") {
h5pp::print("{} \t x: {} \t y: {} \t id: {} \t version: {}\n", msg,p.x,p.y,p.id,p.version);
}
void print_particle(const ParticleV2 &p, const std::string & msg = "") {
h5pp::print("{} \t x: {} \t y: {} \t z: {} \t t: {} \t id: {} \t version: {}\n", msg,p.x,p.y, p.z,p.t,p.id,p.version);
}


int main() {
h5pp::File file("output/userTypeVers.h5", h5pp::FilePermission::REPLACE, 2);

// Create a type for the char array from the template H5T_C_S1
// The template describes a string with a single char.
// Set the size with H5Tset_size.
h5pp::hid::h5t H5_VERS_TYPE = H5Tcopy(H5T_C_S1);
H5Tset_size(H5_VERS_TYPE, 10);
// Optionally set the null terminator '\0' and possibly padding.
H5Tset_strpad(H5_VERS_TYPE, H5T_STR_NULLTERM);

// Register the compound type
h5pp::hid::h5t H5_PARTICLE_V1 = H5Tcreate(H5T_COMPOUND, sizeof(ParticleV1));
H5Tinsert(H5_PARTICLE_V1, "x", HOFFSET(ParticleV1, x), H5T_NATIVE_DOUBLE);
H5Tinsert(H5_PARTICLE_V1, "y", HOFFSET(ParticleV1, y), H5T_NATIVE_DOUBLE);
H5Tinsert(H5_PARTICLE_V1, "id", HOFFSET(ParticleV1, id), H5T_NATIVE_INT);
H5Tinsert(H5_PARTICLE_V1, "version", HOFFSET(ParticleV1, version), H5_VERS_TYPE);


// Register the compound type
h5pp::hid::h5t H5_PARTICLE_V2 = H5Tcreate(H5T_COMPOUND, sizeof(ParticleV2));
H5Tinsert(H5_PARTICLE_V2, "x", HOFFSET(ParticleV2, x), H5T_NATIVE_DOUBLE);
H5Tinsert(H5_PARTICLE_V2, "y", HOFFSET(ParticleV2, y), H5T_NATIVE_DOUBLE);
H5Tinsert(H5_PARTICLE_V2, "z", HOFFSET(ParticleV2, z), H5T_NATIVE_DOUBLE);
H5Tinsert(H5_PARTICLE_V2, "t", HOFFSET(ParticleV2, t), H5T_NATIVE_DOUBLE);
H5Tinsert(H5_PARTICLE_V2, "id", HOFFSET(ParticleV2, id), H5T_NATIVE_INT);
H5Tinsert(H5_PARTICLE_V2, "version", HOFFSET(ParticleV2, version), H5_VERS_TYPE);

// Define a single particle version 1
ParticleV1 p1;
p1.x = 1;
p1.y = 2;
p1.id = 1111;
strncpy(p1.version, "v1", 10);
// Define a single particle version 2
ParticleV2 p2;
p2.x = 1;
p2.y = 2;
p2.z = 3;
p2.t = 4;
p2.id = 2222;
strncpy(p2.version, "v2", 10);


// Write both particles
file.writeDataset(p1, "singleParticle1", H5_PARTICLE_V1);
file.writeDataset(p2, "singleParticle2", H5_PARTICLE_V2);


auto p1_as_v1_h1 = file.readDataset<ParticleV1>("singleParticle1", std::nullopt, H5_PARTICLE_V1);
print_particle(p1_as_v1_h1, "p1 as v1|h1: Should work:");

try{
auto p1_as_v1_h2 = file.readDataset<ParticleV1>("singleParticle1", std::nullopt, H5_PARTICLE_V2);
print_particle(p1_as_v1_h2,"p1 as v1|h2");
}catch (const std::exception & ex){
h5pp::print("p1 as v1|h2: Caught expected error: {}\n", ex.what());
}

auto p1_as_v2_h1 = file.readDataset<ParticleV2>("singleParticle1", std::nullopt, H5_PARTICLE_V1);
print_particle(p1_as_v2_h1, "p1 as v2|h1: Should fail:");

auto p1_as_v2_h2 = file.readDataset<ParticleV2>("singleParticle1", std::nullopt, H5_PARTICLE_V2);
print_particle(p1_as_v2_h2, "p1 as v2|h2: Should work:");

auto p2_as_v1_h1 = file.readDataset<ParticleV1>("singleParticle2", std::nullopt, H5_PARTICLE_V1);
print_particle(p2_as_v1_h1, "p2 as v1|h1: Should work:");

try{
auto p2_as_v1_h2 = file.readDataset<ParticleV1>("singleParticle2", std::nullopt, H5_PARTICLE_V2);
print_particle(p2_as_v1_h2,"p2 as v1|h2");

}catch (const std::exception & ex){
h5pp::print("p2 as v1|h2: Caught expected error: {}\n", ex.what());
}


auto p2_as_v2_h1 = file.readDataset<ParticleV2>("singleParticle2", std::nullopt, H5_PARTICLE_V1);
print_particle(p2_as_v2_h1,"p2 as v2|h1: Should fail:");

auto p2_as_v2_h2 = file.readDataset<ParticleV2>("singleParticle2", std::nullopt, H5_PARTICLE_V2);
print_particle(p2_as_v2_h2,"p2 as v2|h2: Should work:");

return 0;
}

0 comments on commit 330a549

Please sign in to comment.