Skip to content
Snippets Groups Projects
Commit 0b166c28 authored by Nils Erik Krumnack's avatar Nils Erik Krumnack
Browse files

Merge branch 'add-lossy-floats' into '21.2'

Add a flag to save floats at half precision

See merge request atlas/athena!24738
parents 22eed2ad 58a030e6
No related branches found
No related tags found
No related merge requests found
......@@ -12,7 +12,7 @@ find_package( Boost 1.54.0 REQUIRED COMPONENTS program_options)
# Add the hdf tuple library
atlas_add_library(HDF5Utils
Root/HdfTuple.cxx Root/common.cxx Root/H5Traits.cxx
Root/HdfTuple.cxx Root/common.cxx Root/H5Traits.cxx Root/CompressedTypes.cxx
PUBLIC_HEADERS HDF5Utils
PRIVATE_INCLUDE_DIRS ${HDF5_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS}
LINK_LIBRARIES ${HDF5_LIBRARIES} ${ZLIB_LIBRARIES})
......
// this is -*- C++ -*-
/*
Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
*/
#ifndef COMPRESSED_TYPES_H
#define COMPRESSED_TYPES_H
#include "H5Traits.h"
#include "H5Cpp.h"
#include <stdexcept>
namespace H5Utils {
enum class Compression {STANDARD, HALF_PRECISION};
namespace internal {
template <typename T>
H5::DataType getCompressedType(Compression comp) {
if (comp != Compression::STANDARD) {
throw std::logic_error("compression not supported for this type");
}
return H5Traits<T>::type;
}
template <>
H5::DataType getCompressedType<float>(Compression comp);
}
}
#endif
// this is -*- C++ -*-
/*
Copyright (C) 2002-2018 CERN for the benefit of the ATLAS collaboration
Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
*/
#ifndef HDF_TUPLE_HH
#define HDF_TUPLE_HH
......@@ -14,6 +14,7 @@
**/
#include "H5Traits.h"
#include "CompressedTypes.h"
#include "common.h"
#include "H5Cpp.h"
......@@ -47,6 +48,7 @@ namespace H5Utils {
virtual data_buffer_t getBuffer(I) const = 0;
virtual data_buffer_t getDefault() const = 0;
virtual H5::DataType getType() const = 0;
virtual H5::DataType getWriteType() const = 0;
virtual std::string name() const = 0;
typedef I input_type;
};
......@@ -58,23 +60,28 @@ namespace H5Utils {
public:
DataConsumer(const std::string&,
const std::function<T(I)>&,
const T default_value = T());
const T default_value = T(),
Compression = Compression::STANDARD);
data_buffer_t getBuffer(I) const override;
data_buffer_t getDefault() const override;
H5::DataType getType() const override;
H5::DataType getWriteType() const override;
std::string name() const override;
private:
std::function<T(I)> m_getter;
std::string m_name;
T m_default_value;
H5::DataType m_write_type;
};
template <typename T, typename I>
DataConsumer<T, I>::DataConsumer(const std::string& name,
const std::function<T(I)>& func,
const T default_value):
const T default_value,
Compression comp):
m_getter(func),
m_name(name),
m_default_value(default_value)
m_default_value(default_value),
m_write_type(getCompressedType<T>(comp))
{
}
template <typename T, typename I>
......@@ -94,9 +101,14 @@ namespace H5Utils {
return H5Traits<T>::type;
}
template <typename T, typename I>
H5::DataType DataConsumer<T, I>::getWriteType() const {
return m_write_type;
}
template <typename T, typename I>
std::string DataConsumer<T, I>::name() const {
return m_name;
}
}
/// @}
......@@ -118,14 +130,17 @@ namespace H5Utils {
/// This should be the only method you need in this class
template <typename T>
void add(const std::string& name, const std::function<T(I)>&,
const T& default_value = T());
const T& default_value = T(),
Compression = Compression::STANDARD);
/// overload to cast lambdas into functions
template <typename T, typename F>
void add(const std::string& name, const F func, const T& def = T()) {
add(name, std::function<T(I)>(func), def);
void add(const std::string& name, const F func, const T& def = T(),
Compression comp = Compression::STANDARD) {
add(name, std::function<T(I)>(func), def, comp);
}
std::vector<SharedConsumer<I> > getConsumers() const;
typedef I input_type;
......@@ -140,15 +155,17 @@ namespace H5Utils {
template <typename T>
void Consumers<I>::add(const std::string& name,
const std::function<T(I)>& fun,
const T& def_val)
const T& def_val,
Compression comp)
{
if (m_used.count(name)) {
throw std::logic_error("tried to insert '" + name + "' twice");
}
m_consumers.push_back(
std::make_shared<internal::DataConsumer<T, I> >(name, fun, def_val));
std::make_shared<internal::DataConsumer<T,I>>(name, fun, def_val, comp));
m_used.insert(name);
}
template <typename I>
std::vector<SharedConsumer<I> > Consumers<I>::getConsumers() const {
return m_consumers;
......@@ -251,6 +268,17 @@ namespace H5Utils {
}
return type;
}
template<typename I>
H5::CompType buildWriteType(const std::vector<SharedConsumer<I> >& con) {
H5::CompType type(con.size() * sizeof(data_buffer_t));
size_t dt_offset = 0;
for (const SharedConsumer<I>& filler: con) {
type.insertMember(filler->name(), dt_offset, filler->getWriteType());
dt_offset += sizeof(data_buffer_t);
}
type.pack();
return type;
}
/// Constant parameters for the writer
template <typename I, size_t N>
......@@ -345,7 +373,6 @@ namespace H5Utils {
m_consumers(consumers.getConsumers()),
m_file_space(H5S_SIMPLE)
{
using internal::packed;
using internal::data_buffer_t;
if (batch_size < 1) {
throw std::logic_error("batch size must be > 0");
......@@ -362,7 +389,8 @@ namespace H5Utils {
// create ds
internal::throwIfExists(name, group);
m_ds = group.createDataSet(name, packed(m_par.type), space, params);
H5::CompType packed_type = buildWriteType(consumers.getConsumers());
m_ds = group.createDataSet(name, packed_type, space, params);
m_file_space = m_ds.getSpace();
m_file_space.selectNone();
}
......
......@@ -133,6 +133,23 @@ are free to create several writers, each of which will write a dataset
corresponding to a different type of object. These can write to the
same file, but aligning the indices after the fact is up to you!
### Lossy Compression
Many machine learning frameworks use 'half-precision' (16 bit)
floats. This means you probably won't gain much by saving data as 32
bit `float` or, much less, 64 bit `double`. By default atomic types
are saved at their native precision, but if you want to reduce this
you can specify a `Compression`:
```C++
consumers.add(name, function, default_value, COMPRESSION);
```
Currently we support:
- `STANDARD`: use standard native precision
- `HALF_PRECISION`: 16 bit
Hacking This Code
=================
......
/*
Copyright (C) 2002-2019 CERN for the benefit of the ATLAS collaboration
*/
#include "HDF5Utils/CompressedTypes.h"
namespace {
H5::DataType halfPrecisionFloat() {
// start with native float
H5::FloatType type(H5Tcopy(H5::PredType::NATIVE_FLOAT.getId()));
// These definitions are copied from h5py, see:
//
// https://github.com/h5py/h5py/blob/596748d52c351258c851bb56c8df1c25d3673110/h5py/h5t.pyx#L212-L217
//
type.setFields(15, 10, 5, 0, 10);
type.setSize(2);
type.setEbias(15);
return type;
}
}
namespace H5Utils {
namespace internal {
template <>
H5::DataType getCompressedType<float>(Compression comp) {
switch (comp) {
case Compression::STANDARD: return H5Traits<float>::type;
case Compression::HALF_PRECISION: return halfPrecisionFloat();
default: throw std::logic_error("unknown float compression");
}
}
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment